Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 8a69b3a8

History | View | Annotate | Download (505.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _MakeLegacyNodeInfo(data):
577
  """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
578

579
  Converts the data into a single dictionary. This is fine for most use cases,
580
  but some require information from more than one volume group or hypervisor.
581

582
  """
583
  (bootid, (vg_info, ), (hv_info, )) = data
584

    
585
  return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
586
    "bootid": bootid,
587
    })
588

    
589

    
590
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591
  """Checks if the owned node groups are still correct for an instance.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type instance_name: string
596
  @param instance_name: Instance name
597
  @type owned_groups: set or frozenset
598
  @param owned_groups: List of currently owned node groups
599

600
  """
601
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
602

    
603
  if not owned_groups.issuperset(inst_groups):
604
    raise errors.OpPrereqError("Instance %s's node groups changed since"
605
                               " locks were acquired, current groups are"
606
                               " are '%s', owning groups '%s'; retry the"
607
                               " operation" %
608
                               (instance_name,
609
                                utils.CommaJoin(inst_groups),
610
                                utils.CommaJoin(owned_groups)),
611
                               errors.ECODE_STATE)
612

    
613
  return inst_groups
614

    
615

    
616
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617
  """Checks if the instances in a node group are still correct.
618

619
  @type cfg: L{config.ConfigWriter}
620
  @param cfg: The cluster configuration
621
  @type group_uuid: string
622
  @param group_uuid: Node group UUID
623
  @type owned_instances: set or frozenset
624
  @param owned_instances: List of currently owned instances
625

626
  """
627
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628
  if owned_instances != wanted_instances:
629
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
630
                               " locks were acquired, wanted '%s', have '%s';"
631
                               " retry the operation" %
632
                               (group_uuid,
633
                                utils.CommaJoin(wanted_instances),
634
                                utils.CommaJoin(owned_instances)),
635
                               errors.ECODE_STATE)
636

    
637
  return wanted_instances
638

    
639

    
640
def _SupportsOob(cfg, node):
641
  """Tells if node supports OOB.
642

643
  @type cfg: L{config.ConfigWriter}
644
  @param cfg: The cluster configuration
645
  @type node: L{objects.Node}
646
  @param node: The node
647
  @return: The OOB script if supported or an empty string otherwise
648

649
  """
650
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
651

    
652

    
653
def _GetWantedNodes(lu, nodes):
654
  """Returns list of checked and expanded node names.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the logical unit on whose behalf we execute
658
  @type nodes: list
659
  @param nodes: list of node names or None for all nodes
660
  @rtype: list
661
  @return: the list of nodes, sorted
662
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
663

664
  """
665
  if nodes:
666
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
667

    
668
  return utils.NiceSort(lu.cfg.GetNodeList())
669

    
670

    
671
def _GetWantedInstances(lu, instances):
672
  """Returns list of checked and expanded instance names.
673

674
  @type lu: L{LogicalUnit}
675
  @param lu: the logical unit on whose behalf we execute
676
  @type instances: list
677
  @param instances: list of instance names or None for all instances
678
  @rtype: list
679
  @return: the list of instances, sorted
680
  @raise errors.OpPrereqError: if the instances parameter is wrong type
681
  @raise errors.OpPrereqError: if any of the passed instances is not found
682

683
  """
684
  if instances:
685
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
686
  else:
687
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
688
  return wanted
689

    
690

    
691
def _GetUpdatedParams(old_params, update_dict,
692
                      use_default=True, use_none=False):
693
  """Return the new version of a parameter dictionary.
694

695
  @type old_params: dict
696
  @param old_params: old parameters
697
  @type update_dict: dict
698
  @param update_dict: dict containing new parameter values, or
699
      constants.VALUE_DEFAULT to reset the parameter to its default
700
      value
701
  @param use_default: boolean
702
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703
      values as 'to be deleted' values
704
  @param use_none: boolean
705
  @type use_none: whether to recognise C{None} values as 'to be
706
      deleted' values
707
  @rtype: dict
708
  @return: the new parameter dictionary
709

710
  """
711
  params_copy = copy.deepcopy(old_params)
712
  for key, val in update_dict.iteritems():
713
    if ((use_default and val == constants.VALUE_DEFAULT) or
714
        (use_none and val is None)):
715
      try:
716
        del params_copy[key]
717
      except KeyError:
718
        pass
719
    else:
720
      params_copy[key] = val
721
  return params_copy
722

    
723

    
724
def _ReleaseLocks(lu, level, names=None, keep=None):
725
  """Releases locks owned by an LU.
726

727
  @type lu: L{LogicalUnit}
728
  @param level: Lock level
729
  @type names: list or None
730
  @param names: Names of locks to release
731
  @type keep: list or None
732
  @param keep: Names of locks to retain
733

734
  """
735
  assert not (keep is not None and names is not None), \
736
         "Only one of the 'names' and the 'keep' parameters can be given"
737

    
738
  if names is not None:
739
    should_release = names.__contains__
740
  elif keep:
741
    should_release = lambda name: name not in keep
742
  else:
743
    should_release = None
744

    
745
  owned = lu.owned_locks(level)
746
  if not owned:
747
    # Not owning any lock at this level, do nothing
748
    pass
749

    
750
  elif should_release:
751
    retain = []
752
    release = []
753

    
754
    # Determine which locks to release
755
    for name in owned:
756
      if should_release(name):
757
        release.append(name)
758
      else:
759
        retain.append(name)
760

    
761
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
762

    
763
    # Release just some locks
764
    lu.glm.release(level, names=release)
765

    
766
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
767
  else:
768
    # Release everything
769
    lu.glm.release(level)
770

    
771
    assert not lu.glm.is_owned(level), "No locks should be owned"
772

    
773

    
774
def _MapInstanceDisksToNodes(instances):
775
  """Creates a map from (node, volume) to instance name.
776

777
  @type instances: list of L{objects.Instance}
778
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
779

780
  """
781
  return dict(((node, vol), inst.name)
782
              for inst in instances
783
              for (node, vols) in inst.MapLVsByNode().items()
784
              for vol in vols)
785

    
786

    
787
def _RunPostHook(lu, node_name):
788
  """Runs the post-hook for an opcode on a single node.
789

790
  """
791
  hm = lu.proc.BuildHooksManager(lu)
792
  try:
793
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
794
  except:
795
    # pylint: disable=W0702
796
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
797

    
798

    
799
def _CheckOutputFields(static, dynamic, selected):
800
  """Checks whether all selected fields are valid.
801

802
  @type static: L{utils.FieldSet}
803
  @param static: static fields set
804
  @type dynamic: L{utils.FieldSet}
805
  @param dynamic: dynamic fields set
806

807
  """
808
  f = utils.FieldSet()
809
  f.Extend(static)
810
  f.Extend(dynamic)
811

    
812
  delta = f.NonMatching(selected)
813
  if delta:
814
    raise errors.OpPrereqError("Unknown output fields selected: %s"
815
                               % ",".join(delta), errors.ECODE_INVAL)
816

    
817

    
818
def _CheckGlobalHvParams(params):
819
  """Validates that given hypervisor params are not global ones.
820

821
  This will ensure that instances don't get customised versions of
822
  global params.
823

824
  """
825
  used_globals = constants.HVC_GLOBALS.intersection(params)
826
  if used_globals:
827
    msg = ("The following hypervisor parameters are global and cannot"
828
           " be customized at instance level, please modify them at"
829
           " cluster level: %s" % utils.CommaJoin(used_globals))
830
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
831

    
832

    
833
def _CheckNodeOnline(lu, node, msg=None):
834
  """Ensure that a given node is online.
835

836
  @param lu: the LU on behalf of which we make the check
837
  @param node: the node to check
838
  @param msg: if passed, should be a message to replace the default one
839
  @raise errors.OpPrereqError: if the node is offline
840

841
  """
842
  if msg is None:
843
    msg = "Can't use offline node"
844
  if lu.cfg.GetNodeInfo(node).offline:
845
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeNotDrained(lu, node):
849
  """Ensure that a given node is not drained.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @raise errors.OpPrereqError: if the node is drained
854

855
  """
856
  if lu.cfg.GetNodeInfo(node).drained:
857
    raise errors.OpPrereqError("Can't use drained node %s" % node,
858
                               errors.ECODE_STATE)
859

    
860

    
861
def _CheckNodeVmCapable(lu, node):
862
  """Ensure that a given node is vm capable.
863

864
  @param lu: the LU on behalf of which we make the check
865
  @param node: the node to check
866
  @raise errors.OpPrereqError: if the node is not vm capable
867

868
  """
869
  if not lu.cfg.GetNodeInfo(node).vm_capable:
870
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
871
                               errors.ECODE_STATE)
872

    
873

    
874
def _CheckNodeHasOS(lu, node, os_name, force_variant):
875
  """Ensure that a node supports a given OS.
876

877
  @param lu: the LU on behalf of which we make the check
878
  @param node: the node to check
879
  @param os_name: the OS to query about
880
  @param force_variant: whether to ignore variant errors
881
  @raise errors.OpPrereqError: if the node is not supporting the OS
882

883
  """
884
  result = lu.rpc.call_os_get(node, os_name)
885
  result.Raise("OS '%s' not in supported OS list for node %s" %
886
               (os_name, node),
887
               prereq=True, ecode=errors.ECODE_INVAL)
888
  if not force_variant:
889
    _CheckOSVariant(result.payload, os_name)
890

    
891

    
892
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
893
  """Ensure that a node has the given secondary ip.
894

895
  @type lu: L{LogicalUnit}
896
  @param lu: the LU on behalf of which we make the check
897
  @type node: string
898
  @param node: the node to check
899
  @type secondary_ip: string
900
  @param secondary_ip: the ip to check
901
  @type prereq: boolean
902
  @param prereq: whether to throw a prerequisite or an execute error
903
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
904
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
905

906
  """
907
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
908
  result.Raise("Failure checking secondary ip on node %s" % node,
909
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
910
  if not result.payload:
911
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
912
           " please fix and re-run this command" % secondary_ip)
913
    if prereq:
914
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
915
    else:
916
      raise errors.OpExecError(msg)
917

    
918

    
919
def _GetClusterDomainSecret():
920
  """Reads the cluster domain secret.
921

922
  """
923
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
924
                               strict=True)
925

    
926

    
927
def _CheckInstanceState(lu, instance, req_states, msg=None):
928
  """Ensure that an instance is in one of the required states.
929

930
  @param lu: the LU on behalf of which we make the check
931
  @param instance: the instance to check
932
  @param msg: if passed, should be a message to replace the default one
933
  @raise errors.OpPrereqError: if the instance is not in the required state
934

935
  """
936
  if msg is None:
937
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
938
  if instance.admin_state not in req_states:
939
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
940
                               (instance, instance.admin_state, msg),
941
                               errors.ECODE_STATE)
942

    
943
  if constants.ADMINST_UP not in req_states:
944
    pnode = instance.primary_node
945
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
946
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
947
                prereq=True, ecode=errors.ECODE_ENVIRON)
948

    
949
    if instance.name in ins_l.payload:
950
      raise errors.OpPrereqError("Instance %s is running, %s" %
951
                                 (instance.name, msg), errors.ECODE_STATE)
952

    
953

    
954
def _ExpandItemName(fn, name, kind):
955
  """Expand an item name.
956

957
  @param fn: the function to use for expansion
958
  @param name: requested item name
959
  @param kind: text description ('Node' or 'Instance')
960
  @return: the resolved (full) name
961
  @raise errors.OpPrereqError: if the item is not found
962

963
  """
964
  full_name = fn(name)
965
  if full_name is None:
966
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
967
                               errors.ECODE_NOENT)
968
  return full_name
969

    
970

    
971
def _ExpandNodeName(cfg, name):
972
  """Wrapper over L{_ExpandItemName} for nodes."""
973
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
974

    
975

    
976
def _ExpandInstanceName(cfg, name):
977
  """Wrapper over L{_ExpandItemName} for instance."""
978
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
979

    
980

    
981
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
982
                          minmem, maxmem, vcpus, nics, disk_template, disks,
983
                          bep, hvp, hypervisor_name, tags):
984
  """Builds instance related env variables for hooks
985

986
  This builds the hook environment from individual variables.
987

988
  @type name: string
989
  @param name: the name of the instance
990
  @type primary_node: string
991
  @param primary_node: the name of the instance's primary node
992
  @type secondary_nodes: list
993
  @param secondary_nodes: list of secondary nodes as strings
994
  @type os_type: string
995
  @param os_type: the name of the instance's OS
996
  @type status: string
997
  @param status: the desired status of the instance
998
  @type minmem: string
999
  @param minmem: the minimum memory size of the instance
1000
  @type maxmem: string
1001
  @param maxmem: the maximum memory size of the instance
1002
  @type vcpus: string
1003
  @param vcpus: the count of VCPUs the instance has
1004
  @type nics: list
1005
  @param nics: list of tuples (ip, mac, mode, link) representing
1006
      the NICs the instance has
1007
  @type disk_template: string
1008
  @param disk_template: the disk template of the instance
1009
  @type disks: list
1010
  @param disks: the list of (size, mode) pairs
1011
  @type bep: dict
1012
  @param bep: the backend parameters for the instance
1013
  @type hvp: dict
1014
  @param hvp: the hypervisor parameters for the instance
1015
  @type hypervisor_name: string
1016
  @param hypervisor_name: the hypervisor for the instance
1017
  @type tags: list
1018
  @param tags: list of instance tags as strings
1019
  @rtype: dict
1020
  @return: the hook environment for this instance
1021

1022
  """
1023
  env = {
1024
    "OP_TARGET": name,
1025
    "INSTANCE_NAME": name,
1026
    "INSTANCE_PRIMARY": primary_node,
1027
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1028
    "INSTANCE_OS_TYPE": os_type,
1029
    "INSTANCE_STATUS": status,
1030
    "INSTANCE_MINMEM": minmem,
1031
    "INSTANCE_MAXMEM": maxmem,
1032
    # TODO(2.7) remove deprecated "memory" value
1033
    "INSTANCE_MEMORY": maxmem,
1034
    "INSTANCE_VCPUS": vcpus,
1035
    "INSTANCE_DISK_TEMPLATE": disk_template,
1036
    "INSTANCE_HYPERVISOR": hypervisor_name,
1037
  }
1038
  if nics:
1039
    nic_count = len(nics)
1040
    for idx, (ip, mac, mode, link) in enumerate(nics):
1041
      if ip is None:
1042
        ip = ""
1043
      env["INSTANCE_NIC%d_IP" % idx] = ip
1044
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1045
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1046
      env["INSTANCE_NIC%d_LINK" % idx] = link
1047
      if mode == constants.NIC_MODE_BRIDGED:
1048
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1049
  else:
1050
    nic_count = 0
1051

    
1052
  env["INSTANCE_NIC_COUNT"] = nic_count
1053

    
1054
  if disks:
1055
    disk_count = len(disks)
1056
    for idx, (size, mode) in enumerate(disks):
1057
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1058
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1059
  else:
1060
    disk_count = 0
1061

    
1062
  env["INSTANCE_DISK_COUNT"] = disk_count
1063

    
1064
  if not tags:
1065
    tags = []
1066

    
1067
  env["INSTANCE_TAGS"] = " ".join(tags)
1068

    
1069
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1070
    for key, value in source.items():
1071
      env["INSTANCE_%s_%s" % (kind, key)] = value
1072

    
1073
  return env
1074

    
1075

    
1076
def _NICListToTuple(lu, nics):
1077
  """Build a list of nic information tuples.
1078

1079
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1080
  value in LUInstanceQueryData.
1081

1082
  @type lu:  L{LogicalUnit}
1083
  @param lu: the logical unit on whose behalf we execute
1084
  @type nics: list of L{objects.NIC}
1085
  @param nics: list of nics to convert to hooks tuples
1086

1087
  """
1088
  hooks_nics = []
1089
  cluster = lu.cfg.GetClusterInfo()
1090
  for nic in nics:
1091
    ip = nic.ip
1092
    mac = nic.mac
1093
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1094
    mode = filled_params[constants.NIC_MODE]
1095
    link = filled_params[constants.NIC_LINK]
1096
    hooks_nics.append((ip, mac, mode, link))
1097
  return hooks_nics
1098

    
1099

    
1100
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1101
  """Builds instance related env variables for hooks from an object.
1102

1103
  @type lu: L{LogicalUnit}
1104
  @param lu: the logical unit on whose behalf we execute
1105
  @type instance: L{objects.Instance}
1106
  @param instance: the instance for which we should build the
1107
      environment
1108
  @type override: dict
1109
  @param override: dictionary with key/values that will override
1110
      our values
1111
  @rtype: dict
1112
  @return: the hook environment dictionary
1113

1114
  """
1115
  cluster = lu.cfg.GetClusterInfo()
1116
  bep = cluster.FillBE(instance)
1117
  hvp = cluster.FillHV(instance)
1118
  args = {
1119
    "name": instance.name,
1120
    "primary_node": instance.primary_node,
1121
    "secondary_nodes": instance.secondary_nodes,
1122
    "os_type": instance.os,
1123
    "status": instance.admin_state,
1124
    "maxmem": bep[constants.BE_MAXMEM],
1125
    "minmem": bep[constants.BE_MINMEM],
1126
    "vcpus": bep[constants.BE_VCPUS],
1127
    "nics": _NICListToTuple(lu, instance.nics),
1128
    "disk_template": instance.disk_template,
1129
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1130
    "bep": bep,
1131
    "hvp": hvp,
1132
    "hypervisor_name": instance.hypervisor,
1133
    "tags": instance.tags,
1134
  }
1135
  if override:
1136
    args.update(override)
1137
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1138

    
1139

    
1140
def _AdjustCandidatePool(lu, exceptions):
1141
  """Adjust the candidate pool after node operations.
1142

1143
  """
1144
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1145
  if mod_list:
1146
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1147
               utils.CommaJoin(node.name for node in mod_list))
1148
    for name in mod_list:
1149
      lu.context.ReaddNode(name)
1150
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1151
  if mc_now > mc_max:
1152
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1153
               (mc_now, mc_max))
1154

    
1155

    
1156
def _DecideSelfPromotion(lu, exceptions=None):
1157
  """Decide whether I should promote myself as a master candidate.
1158

1159
  """
1160
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1161
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1162
  # the new node will increase mc_max with one, so:
1163
  mc_should = min(mc_should + 1, cp_size)
1164
  return mc_now < mc_should
1165

    
1166

    
1167
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1168
  """Check that the brigdes needed by a list of nics exist.
1169

1170
  """
1171
  cluster = lu.cfg.GetClusterInfo()
1172
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1173
  brlist = [params[constants.NIC_LINK] for params in paramslist
1174
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1175
  if brlist:
1176
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1177
    result.Raise("Error checking bridges on destination node '%s'" %
1178
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1179

    
1180

    
1181
def _CheckInstanceBridgesExist(lu, instance, node=None):
1182
  """Check that the brigdes needed by an instance exist.
1183

1184
  """
1185
  if node is None:
1186
    node = instance.primary_node
1187
  _CheckNicsBridgesExist(lu, instance.nics, node)
1188

    
1189

    
1190
def _CheckOSVariant(os_obj, name):
1191
  """Check whether an OS name conforms to the os variants specification.
1192

1193
  @type os_obj: L{objects.OS}
1194
  @param os_obj: OS object to check
1195
  @type name: string
1196
  @param name: OS name passed by the user, to check for validity
1197

1198
  """
1199
  variant = objects.OS.GetVariant(name)
1200
  if not os_obj.supported_variants:
1201
    if variant:
1202
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1203
                                 " passed)" % (os_obj.name, variant),
1204
                                 errors.ECODE_INVAL)
1205
    return
1206
  if not variant:
1207
    raise errors.OpPrereqError("OS name must include a variant",
1208
                               errors.ECODE_INVAL)
1209

    
1210
  if variant not in os_obj.supported_variants:
1211
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1212

    
1213

    
1214
def _GetNodeInstancesInner(cfg, fn):
1215
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1216

    
1217

    
1218
def _GetNodeInstances(cfg, node_name):
1219
  """Returns a list of all primary and secondary instances on a node.
1220

1221
  """
1222

    
1223
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1224

    
1225

    
1226
def _GetNodePrimaryInstances(cfg, node_name):
1227
  """Returns primary instances on a node.
1228

1229
  """
1230
  return _GetNodeInstancesInner(cfg,
1231
                                lambda inst: node_name == inst.primary_node)
1232

    
1233

    
1234
def _GetNodeSecondaryInstances(cfg, node_name):
1235
  """Returns secondary instances on a node.
1236

1237
  """
1238
  return _GetNodeInstancesInner(cfg,
1239
                                lambda inst: node_name in inst.secondary_nodes)
1240

    
1241

    
1242
def _GetStorageTypeArgs(cfg, storage_type):
1243
  """Returns the arguments for a storage type.
1244

1245
  """
1246
  # Special case for file storage
1247
  if storage_type == constants.ST_FILE:
1248
    # storage.FileStorage wants a list of storage directories
1249
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1250

    
1251
  return []
1252

    
1253

    
1254
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1255
  faulty = []
1256

    
1257
  for dev in instance.disks:
1258
    cfg.SetDiskID(dev, node_name)
1259

    
1260
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1261
  result.Raise("Failed to get disk status from node %s" % node_name,
1262
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1263

    
1264
  for idx, bdev_status in enumerate(result.payload):
1265
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1266
      faulty.append(idx)
1267

    
1268
  return faulty
1269

    
1270

    
1271
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1272
  """Check the sanity of iallocator and node arguments and use the
1273
  cluster-wide iallocator if appropriate.
1274

1275
  Check that at most one of (iallocator, node) is specified. If none is
1276
  specified, then the LU's opcode's iallocator slot is filled with the
1277
  cluster-wide default iallocator.
1278

1279
  @type iallocator_slot: string
1280
  @param iallocator_slot: the name of the opcode iallocator slot
1281
  @type node_slot: string
1282
  @param node_slot: the name of the opcode target node slot
1283

1284
  """
1285
  node = getattr(lu.op, node_slot, None)
1286
  iallocator = getattr(lu.op, iallocator_slot, None)
1287

    
1288
  if node is not None and iallocator is not None:
1289
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1290
                               errors.ECODE_INVAL)
1291
  elif node is None and iallocator is None:
1292
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1293
    if default_iallocator:
1294
      setattr(lu.op, iallocator_slot, default_iallocator)
1295
    else:
1296
      raise errors.OpPrereqError("No iallocator or node given and no"
1297
                                 " cluster-wide default iallocator found;"
1298
                                 " please specify either an iallocator or a"
1299
                                 " node, or set a cluster-wide default"
1300
                                 " iallocator")
1301

    
1302

    
1303
def _GetDefaultIAllocator(cfg, iallocator):
1304
  """Decides on which iallocator to use.
1305

1306
  @type cfg: L{config.ConfigWriter}
1307
  @param cfg: Cluster configuration object
1308
  @type iallocator: string or None
1309
  @param iallocator: Iallocator specified in opcode
1310
  @rtype: string
1311
  @return: Iallocator name
1312

1313
  """
1314
  if not iallocator:
1315
    # Use default iallocator
1316
    iallocator = cfg.GetDefaultIAllocator()
1317

    
1318
  if not iallocator:
1319
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1320
                               " opcode nor as a cluster-wide default",
1321
                               errors.ECODE_INVAL)
1322

    
1323
  return iallocator
1324

    
1325

    
1326
class LUClusterPostInit(LogicalUnit):
1327
  """Logical unit for running hooks after cluster initialization.
1328

1329
  """
1330
  HPATH = "cluster-init"
1331
  HTYPE = constants.HTYPE_CLUSTER
1332

    
1333
  def BuildHooksEnv(self):
1334
    """Build hooks env.
1335

1336
    """
1337
    return {
1338
      "OP_TARGET": self.cfg.GetClusterName(),
1339
      }
1340

    
1341
  def BuildHooksNodes(self):
1342
    """Build hooks nodes.
1343

1344
    """
1345
    return ([], [self.cfg.GetMasterNode()])
1346

    
1347
  def Exec(self, feedback_fn):
1348
    """Nothing to do.
1349

1350
    """
1351
    return True
1352

    
1353

    
1354
class LUClusterDestroy(LogicalUnit):
1355
  """Logical unit for destroying the cluster.
1356

1357
  """
1358
  HPATH = "cluster-destroy"
1359
  HTYPE = constants.HTYPE_CLUSTER
1360

    
1361
  def BuildHooksEnv(self):
1362
    """Build hooks env.
1363

1364
    """
1365
    return {
1366
      "OP_TARGET": self.cfg.GetClusterName(),
1367
      }
1368

    
1369
  def BuildHooksNodes(self):
1370
    """Build hooks nodes.
1371

1372
    """
1373
    return ([], [])
1374

    
1375
  def CheckPrereq(self):
1376
    """Check prerequisites.
1377

1378
    This checks whether the cluster is empty.
1379

1380
    Any errors are signaled by raising errors.OpPrereqError.
1381

1382
    """
1383
    master = self.cfg.GetMasterNode()
1384

    
1385
    nodelist = self.cfg.GetNodeList()
1386
    if len(nodelist) != 1 or nodelist[0] != master:
1387
      raise errors.OpPrereqError("There are still %d node(s) in"
1388
                                 " this cluster." % (len(nodelist) - 1),
1389
                                 errors.ECODE_INVAL)
1390
    instancelist = self.cfg.GetInstanceList()
1391
    if instancelist:
1392
      raise errors.OpPrereqError("There are still %d instance(s) in"
1393
                                 " this cluster." % len(instancelist),
1394
                                 errors.ECODE_INVAL)
1395

    
1396
  def Exec(self, feedback_fn):
1397
    """Destroys the cluster.
1398

1399
    """
1400
    master_params = self.cfg.GetMasterNetworkParameters()
1401

    
1402
    # Run post hooks on master node before it's removed
1403
    _RunPostHook(self, master_params.name)
1404

    
1405
    ems = self.cfg.GetUseExternalMipScript()
1406
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1407
                                                     master_params, ems)
1408
    result.Raise("Could not disable the master role")
1409

    
1410
    return master_params.name
1411

    
1412

    
1413
def _VerifyCertificate(filename):
1414
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1415

1416
  @type filename: string
1417
  @param filename: Path to PEM file
1418

1419
  """
1420
  try:
1421
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1422
                                           utils.ReadFile(filename))
1423
  except Exception, err: # pylint: disable=W0703
1424
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1425
            "Failed to load X509 certificate %s: %s" % (filename, err))
1426

    
1427
  (errcode, msg) = \
1428
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1429
                                constants.SSL_CERT_EXPIRATION_ERROR)
1430

    
1431
  if msg:
1432
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1433
  else:
1434
    fnamemsg = None
1435

    
1436
  if errcode is None:
1437
    return (None, fnamemsg)
1438
  elif errcode == utils.CERT_WARNING:
1439
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1440
  elif errcode == utils.CERT_ERROR:
1441
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1442

    
1443
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1444

    
1445

    
1446
def _GetAllHypervisorParameters(cluster, instances):
1447
  """Compute the set of all hypervisor parameters.
1448

1449
  @type cluster: L{objects.Cluster}
1450
  @param cluster: the cluster object
1451
  @param instances: list of L{objects.Instance}
1452
  @param instances: additional instances from which to obtain parameters
1453
  @rtype: list of (origin, hypervisor, parameters)
1454
  @return: a list with all parameters found, indicating the hypervisor they
1455
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1456

1457
  """
1458
  hvp_data = []
1459

    
1460
  for hv_name in cluster.enabled_hypervisors:
1461
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1462

    
1463
  for os_name, os_hvp in cluster.os_hvp.items():
1464
    for hv_name, hv_params in os_hvp.items():
1465
      if hv_params:
1466
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1467
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1468

    
1469
  # TODO: collapse identical parameter values in a single one
1470
  for instance in instances:
1471
    if instance.hvparams:
1472
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1473
                       cluster.FillHV(instance)))
1474

    
1475
  return hvp_data
1476

    
1477

    
1478
class _VerifyErrors(object):
1479
  """Mix-in for cluster/group verify LUs.
1480

1481
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1482
  self.op and self._feedback_fn to be available.)
1483

1484
  """
1485

    
1486
  ETYPE_FIELD = "code"
1487
  ETYPE_ERROR = "ERROR"
1488
  ETYPE_WARNING = "WARNING"
1489

    
1490
  def _Error(self, ecode, item, msg, *args, **kwargs):
1491
    """Format an error message.
1492

1493
    Based on the opcode's error_codes parameter, either format a
1494
    parseable error code, or a simpler error string.
1495

1496
    This must be called only from Exec and functions called from Exec.
1497

1498
    """
1499
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1500
    itype, etxt, _ = ecode
1501
    # first complete the msg
1502
    if args:
1503
      msg = msg % args
1504
    # then format the whole message
1505
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1506
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1507
    else:
1508
      if item:
1509
        item = " " + item
1510
      else:
1511
        item = ""
1512
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1513
    # and finally report it via the feedback_fn
1514
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1515

    
1516
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1517
    """Log an error message if the passed condition is True.
1518

1519
    """
1520
    cond = (bool(cond)
1521
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1522

    
1523
    # If the error code is in the list of ignored errors, demote the error to a
1524
    # warning
1525
    (_, etxt, _) = ecode
1526
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1527
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1528

    
1529
    if cond:
1530
      self._Error(ecode, *args, **kwargs)
1531

    
1532
    # do not mark the operation as failed for WARN cases only
1533
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1534
      self.bad = self.bad or cond
1535

    
1536

    
1537
class LUClusterVerify(NoHooksLU):
1538
  """Submits all jobs necessary to verify the cluster.
1539

1540
  """
1541
  REQ_BGL = False
1542

    
1543
  def ExpandNames(self):
1544
    self.needed_locks = {}
1545

    
1546
  def Exec(self, feedback_fn):
1547
    jobs = []
1548

    
1549
    if self.op.group_name:
1550
      groups = [self.op.group_name]
1551
      depends_fn = lambda: None
1552
    else:
1553
      groups = self.cfg.GetNodeGroupList()
1554

    
1555
      # Verify global configuration
1556
      jobs.append([
1557
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1558
        ])
1559

    
1560
      # Always depend on global verification
1561
      depends_fn = lambda: [(-len(jobs), [])]
1562

    
1563
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564
                                            ignore_errors=self.op.ignore_errors,
1565
                                            depends=depends_fn())]
1566
                for group in groups)
1567

    
1568
    # Fix up all parameters
1569
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1570
      op.debug_simulate_errors = self.op.debug_simulate_errors
1571
      op.verbose = self.op.verbose
1572
      op.error_codes = self.op.error_codes
1573
      try:
1574
        op.skip_checks = self.op.skip_checks
1575
      except AttributeError:
1576
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1577

    
1578
    return ResultWithJobs(jobs)
1579

    
1580

    
1581
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1582
  """Verifies the cluster config.
1583

1584
  """
1585
  REQ_BGL = True
1586

    
1587
  def _VerifyHVP(self, hvp_data):
1588
    """Verifies locally the syntax of the hypervisor parameters.
1589

1590
    """
1591
    for item, hv_name, hv_params in hvp_data:
1592
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1593
             (item, hv_name))
1594
      try:
1595
        hv_class = hypervisor.GetHypervisor(hv_name)
1596
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1597
        hv_class.CheckParameterSyntax(hv_params)
1598
      except errors.GenericError, err:
1599
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1600

    
1601
  def ExpandNames(self):
1602
    # Information can be safely retrieved as the BGL is acquired in exclusive
1603
    # mode
1604
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1605
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1606
    self.all_node_info = self.cfg.GetAllNodesInfo()
1607
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1608
    self.needed_locks = {}
1609

    
1610
  def Exec(self, feedback_fn):
1611
    """Verify integrity of cluster, performing various test on nodes.
1612

1613
    """
1614
    self.bad = False
1615
    self._feedback_fn = feedback_fn
1616

    
1617
    feedback_fn("* Verifying cluster config")
1618

    
1619
    for msg in self.cfg.VerifyConfig():
1620
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1621

    
1622
    feedback_fn("* Verifying cluster certificate files")
1623

    
1624
    for cert_filename in constants.ALL_CERT_FILES:
1625
      (errcode, msg) = _VerifyCertificate(cert_filename)
1626
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1627

    
1628
    feedback_fn("* Verifying hypervisor parameters")
1629

    
1630
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1631
                                                self.all_inst_info.values()))
1632

    
1633
    feedback_fn("* Verifying all nodes belong to an existing group")
1634

    
1635
    # We do this verification here because, should this bogus circumstance
1636
    # occur, it would never be caught by VerifyGroup, which only acts on
1637
    # nodes/instances reachable from existing node groups.
1638

    
1639
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1640
                         if node.group not in self.all_group_info)
1641

    
1642
    dangling_instances = {}
1643
    no_node_instances = []
1644

    
1645
    for inst in self.all_inst_info.values():
1646
      if inst.primary_node in dangling_nodes:
1647
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1648
      elif inst.primary_node not in self.all_node_info:
1649
        no_node_instances.append(inst.name)
1650

    
1651
    pretty_dangling = [
1652
        "%s (%s)" %
1653
        (node.name,
1654
         utils.CommaJoin(dangling_instances.get(node.name,
1655
                                                ["no instances"])))
1656
        for node in dangling_nodes]
1657

    
1658
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1659
                  None,
1660
                  "the following nodes (and their instances) belong to a non"
1661
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1662

    
1663
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1664
                  None,
1665
                  "the following instances have a non-existing primary-node:"
1666
                  " %s", utils.CommaJoin(no_node_instances))
1667

    
1668
    return not self.bad
1669

    
1670

    
1671
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1672
  """Verifies the status of a node group.
1673

1674
  """
1675
  HPATH = "cluster-verify"
1676
  HTYPE = constants.HTYPE_CLUSTER
1677
  REQ_BGL = False
1678

    
1679
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1680

    
1681
  class NodeImage(object):
1682
    """A class representing the logical and physical status of a node.
1683

1684
    @type name: string
1685
    @ivar name: the node name to which this object refers
1686
    @ivar volumes: a structure as returned from
1687
        L{ganeti.backend.GetVolumeList} (runtime)
1688
    @ivar instances: a list of running instances (runtime)
1689
    @ivar pinst: list of configured primary instances (config)
1690
    @ivar sinst: list of configured secondary instances (config)
1691
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1692
        instances for which this node is secondary (config)
1693
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1694
    @ivar dfree: free disk, as reported by the node (runtime)
1695
    @ivar offline: the offline status (config)
1696
    @type rpc_fail: boolean
1697
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1698
        not whether the individual keys were correct) (runtime)
1699
    @type lvm_fail: boolean
1700
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1701
    @type hyp_fail: boolean
1702
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1703
    @type ghost: boolean
1704
    @ivar ghost: whether this is a known node or not (config)
1705
    @type os_fail: boolean
1706
    @ivar os_fail: whether the RPC call didn't return valid OS data
1707
    @type oslist: list
1708
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1709
    @type vm_capable: boolean
1710
    @ivar vm_capable: whether the node can host instances
1711

1712
    """
1713
    def __init__(self, offline=False, name=None, vm_capable=True):
1714
      self.name = name
1715
      self.volumes = {}
1716
      self.instances = []
1717
      self.pinst = []
1718
      self.sinst = []
1719
      self.sbp = {}
1720
      self.mfree = 0
1721
      self.dfree = 0
1722
      self.offline = offline
1723
      self.vm_capable = vm_capable
1724
      self.rpc_fail = False
1725
      self.lvm_fail = False
1726
      self.hyp_fail = False
1727
      self.ghost = False
1728
      self.os_fail = False
1729
      self.oslist = {}
1730

    
1731
  def ExpandNames(self):
1732
    # This raises errors.OpPrereqError on its own:
1733
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1734

    
1735
    # Get instances in node group; this is unsafe and needs verification later
1736
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1737

    
1738
    self.needed_locks = {
1739
      locking.LEVEL_INSTANCE: inst_names,
1740
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1741
      locking.LEVEL_NODE: [],
1742
      }
1743

    
1744
    self.share_locks = _ShareAll()
1745

    
1746
  def DeclareLocks(self, level):
1747
    if level == locking.LEVEL_NODE:
1748
      # Get members of node group; this is unsafe and needs verification later
1749
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1750

    
1751
      all_inst_info = self.cfg.GetAllInstancesInfo()
1752

    
1753
      # In Exec(), we warn about mirrored instances that have primary and
1754
      # secondary living in separate node groups. To fully verify that
1755
      # volumes for these instances are healthy, we will need to do an
1756
      # extra call to their secondaries. We ensure here those nodes will
1757
      # be locked.
1758
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1759
        # Important: access only the instances whose lock is owned
1760
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1761
          nodes.update(all_inst_info[inst].secondary_nodes)
1762

    
1763
      self.needed_locks[locking.LEVEL_NODE] = nodes
1764

    
1765
  def CheckPrereq(self):
1766
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1767
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1768

    
1769
    group_nodes = set(self.group_info.members)
1770
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1771

    
1772
    unlocked_nodes = \
1773
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1774

    
1775
    unlocked_instances = \
1776
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1777

    
1778
    if unlocked_nodes:
1779
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1780
                                 utils.CommaJoin(unlocked_nodes))
1781

    
1782
    if unlocked_instances:
1783
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1784
                                 utils.CommaJoin(unlocked_instances))
1785

    
1786
    self.all_node_info = self.cfg.GetAllNodesInfo()
1787
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1788

    
1789
    self.my_node_names = utils.NiceSort(group_nodes)
1790
    self.my_inst_names = utils.NiceSort(group_instances)
1791

    
1792
    self.my_node_info = dict((name, self.all_node_info[name])
1793
                             for name in self.my_node_names)
1794

    
1795
    self.my_inst_info = dict((name, self.all_inst_info[name])
1796
                             for name in self.my_inst_names)
1797

    
1798
    # We detect here the nodes that will need the extra RPC calls for verifying
1799
    # split LV volumes; they should be locked.
1800
    extra_lv_nodes = set()
1801

    
1802
    for inst in self.my_inst_info.values():
1803
      if inst.disk_template in constants.DTS_INT_MIRROR:
1804
        group = self.my_node_info[inst.primary_node].group
1805
        for nname in inst.secondary_nodes:
1806
          if self.all_node_info[nname].group != group:
1807
            extra_lv_nodes.add(nname)
1808

    
1809
    unlocked_lv_nodes = \
1810
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1811

    
1812
    if unlocked_lv_nodes:
1813
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1814
                                 utils.CommaJoin(unlocked_lv_nodes))
1815
    self.extra_lv_nodes = list(extra_lv_nodes)
1816

    
1817
  def _VerifyNode(self, ninfo, nresult):
1818
    """Perform some basic validation on data returned from a node.
1819

1820
      - check the result data structure is well formed and has all the
1821
        mandatory fields
1822
      - check ganeti version
1823

1824
    @type ninfo: L{objects.Node}
1825
    @param ninfo: the node to check
1826
    @param nresult: the results from the node
1827
    @rtype: boolean
1828
    @return: whether overall this call was successful (and we can expect
1829
         reasonable values in the respose)
1830

1831
    """
1832
    node = ninfo.name
1833
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1834

    
1835
    # main result, nresult should be a non-empty dict
1836
    test = not nresult or not isinstance(nresult, dict)
1837
    _ErrorIf(test, constants.CV_ENODERPC, node,
1838
                  "unable to verify node: no data returned")
1839
    if test:
1840
      return False
1841

    
1842
    # compares ganeti version
1843
    local_version = constants.PROTOCOL_VERSION
1844
    remote_version = nresult.get("version", None)
1845
    test = not (remote_version and
1846
                isinstance(remote_version, (list, tuple)) and
1847
                len(remote_version) == 2)
1848
    _ErrorIf(test, constants.CV_ENODERPC, node,
1849
             "connection to node returned invalid data")
1850
    if test:
1851
      return False
1852

    
1853
    test = local_version != remote_version[0]
1854
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1855
             "incompatible protocol versions: master %s,"
1856
             " node %s", local_version, remote_version[0])
1857
    if test:
1858
      return False
1859

    
1860
    # node seems compatible, we can actually try to look into its results
1861

    
1862
    # full package version
1863
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1864
                  constants.CV_ENODEVERSION, node,
1865
                  "software version mismatch: master %s, node %s",
1866
                  constants.RELEASE_VERSION, remote_version[1],
1867
                  code=self.ETYPE_WARNING)
1868

    
1869
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1870
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1871
      for hv_name, hv_result in hyp_result.iteritems():
1872
        test = hv_result is not None
1873
        _ErrorIf(test, constants.CV_ENODEHV, node,
1874
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1875

    
1876
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1877
    if ninfo.vm_capable and isinstance(hvp_result, list):
1878
      for item, hv_name, hv_result in hvp_result:
1879
        _ErrorIf(True, constants.CV_ENODEHV, node,
1880
                 "hypervisor %s parameter verify failure (source %s): %s",
1881
                 hv_name, item, hv_result)
1882

    
1883
    test = nresult.get(constants.NV_NODESETUP,
1884
                       ["Missing NODESETUP results"])
1885
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1886
             "; ".join(test))
1887

    
1888
    return True
1889

    
1890
  def _VerifyNodeTime(self, ninfo, nresult,
1891
                      nvinfo_starttime, nvinfo_endtime):
1892
    """Check the node time.
1893

1894
    @type ninfo: L{objects.Node}
1895
    @param ninfo: the node to check
1896
    @param nresult: the remote results for the node
1897
    @param nvinfo_starttime: the start time of the RPC call
1898
    @param nvinfo_endtime: the end time of the RPC call
1899

1900
    """
1901
    node = ninfo.name
1902
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1903

    
1904
    ntime = nresult.get(constants.NV_TIME, None)
1905
    try:
1906
      ntime_merged = utils.MergeTime(ntime)
1907
    except (ValueError, TypeError):
1908
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1909
      return
1910

    
1911
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1912
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1913
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1914
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1915
    else:
1916
      ntime_diff = None
1917

    
1918
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1919
             "Node time diverges by at least %s from master node time",
1920
             ntime_diff)
1921

    
1922
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1923
    """Check the node LVM results.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param vg_name: the configured VG name
1929

1930
    """
1931
    if vg_name is None:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    # checks vg existence and size > 20G
1938
    vglist = nresult.get(constants.NV_VGLIST, None)
1939
    test = not vglist
1940
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1941
    if not test:
1942
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1943
                                            constants.MIN_VG_SIZE)
1944
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1945

    
1946
    # check pv names
1947
    pvlist = nresult.get(constants.NV_PVLIST, None)
1948
    test = pvlist is None
1949
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1950
    if not test:
1951
      # check that ':' is not present in PV names, since it's a
1952
      # special character for lvcreate (denotes the range of PEs to
1953
      # use on the PV)
1954
      for _, pvname, owner_vg in pvlist:
1955
        test = ":" in pvname
1956
        _ErrorIf(test, constants.CV_ENODELVM, node,
1957
                 "Invalid character ':' in PV '%s' of VG '%s'",
1958
                 pvname, owner_vg)
1959

    
1960
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961
    """Check the node bridges.
1962

1963
    @type ninfo: L{objects.Node}
1964
    @param ninfo: the node to check
1965
    @param nresult: the remote results for the node
1966
    @param bridges: the expected list of bridges
1967

1968
    """
1969
    if not bridges:
1970
      return
1971

    
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974

    
1975
    missing = nresult.get(constants.NV_BRIDGES, None)
1976
    test = not isinstance(missing, list)
1977
    _ErrorIf(test, constants.CV_ENODENET, node,
1978
             "did not return valid bridge information")
1979
    if not test:
1980
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1981
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1982

    
1983
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1984
    """Check the results of user scripts presence and executability on the node
1985

1986
    @type ninfo: L{objects.Node}
1987
    @param ninfo: the node to check
1988
    @param nresult: the remote results for the node
1989

1990
    """
1991
    node = ninfo.name
1992

    
1993
    test = not constants.NV_USERSCRIPTS in nresult
1994
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1995
                  "did not return user scripts information")
1996

    
1997
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1998
    if not test:
1999
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2000
                    "user scripts not present or not executable: %s" %
2001
                    utils.CommaJoin(sorted(broken_scripts)))
2002

    
2003
  def _VerifyNodeNetwork(self, ninfo, nresult):
2004
    """Check the node network connectivity results.
2005

2006
    @type ninfo: L{objects.Node}
2007
    @param ninfo: the node to check
2008
    @param nresult: the remote results for the node
2009

2010
    """
2011
    node = ninfo.name
2012
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2013

    
2014
    test = constants.NV_NODELIST not in nresult
2015
    _ErrorIf(test, constants.CV_ENODESSH, node,
2016
             "node hasn't returned node ssh connectivity data")
2017
    if not test:
2018
      if nresult[constants.NV_NODELIST]:
2019
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2020
          _ErrorIf(True, constants.CV_ENODESSH, node,
2021
                   "ssh communication with node '%s': %s", a_node, a_msg)
2022

    
2023
    test = constants.NV_NODENETTEST not in nresult
2024
    _ErrorIf(test, constants.CV_ENODENET, node,
2025
             "node hasn't returned node tcp connectivity data")
2026
    if not test:
2027
      if nresult[constants.NV_NODENETTEST]:
2028
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2029
        for anode in nlist:
2030
          _ErrorIf(True, constants.CV_ENODENET, node,
2031
                   "tcp communication with node '%s': %s",
2032
                   anode, nresult[constants.NV_NODENETTEST][anode])
2033

    
2034
    test = constants.NV_MASTERIP not in nresult
2035
    _ErrorIf(test, constants.CV_ENODENET, node,
2036
             "node hasn't returned node master IP reachability data")
2037
    if not test:
2038
      if not nresult[constants.NV_MASTERIP]:
2039
        if node == self.master_node:
2040
          msg = "the master node cannot reach the master IP (not configured?)"
2041
        else:
2042
          msg = "cannot reach the master IP"
2043
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2044

    
2045
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2046
                      diskstatus):
2047
    """Verify an instance.
2048

2049
    This function checks to see if the required block devices are
2050
    available on the instance's node.
2051

2052
    """
2053
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2054
    node_current = instanceconfig.primary_node
2055

    
2056
    node_vol_should = {}
2057
    instanceconfig.MapLVsByNode(node_vol_should)
2058

    
2059
    for node in node_vol_should:
2060
      n_img = node_image[node]
2061
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2062
        # ignore missing volumes on offline or broken nodes
2063
        continue
2064
      for volume in node_vol_should[node]:
2065
        test = volume not in n_img.volumes
2066
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2067
                 "volume %s missing on node %s", volume, node)
2068

    
2069
    if instanceconfig.admin_state == constants.ADMINST_UP:
2070
      pri_img = node_image[node_current]
2071
      test = instance not in pri_img.instances and not pri_img.offline
2072
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2073
               "instance not running on its primary node %s",
2074
               node_current)
2075

    
2076
    diskdata = [(nname, success, status, idx)
2077
                for (nname, disks) in diskstatus.items()
2078
                for idx, (success, status) in enumerate(disks)]
2079

    
2080
    for nname, success, bdev_status, idx in diskdata:
2081
      # the 'ghost node' construction in Exec() ensures that we have a
2082
      # node here
2083
      snode = node_image[nname]
2084
      bad_snode = snode.ghost or snode.offline
2085
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2086
               not success and not bad_snode,
2087
               constants.CV_EINSTANCEFAULTYDISK, instance,
2088
               "couldn't retrieve status for disk/%s on %s: %s",
2089
               idx, nname, bdev_status)
2090
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2091
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2092
               constants.CV_EINSTANCEFAULTYDISK, instance,
2093
               "disk/%s on %s is faulty", idx, nname)
2094

    
2095
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2096
    """Verify if there are any unknown volumes in the cluster.
2097

2098
    The .os, .swap and backup volumes are ignored. All other volumes are
2099
    reported as unknown.
2100

2101
    @type reserved: L{ganeti.utils.FieldSet}
2102
    @param reserved: a FieldSet of reserved volume names
2103

2104
    """
2105
    for node, n_img in node_image.items():
2106
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2107
        # skip non-healthy nodes
2108
        continue
2109
      for volume in n_img.volumes:
2110
        test = ((node not in node_vol_should or
2111
                volume not in node_vol_should[node]) and
2112
                not reserved.Matches(volume))
2113
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2114
                      "volume %s is unknown", volume)
2115

    
2116
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2117
    """Verify N+1 Memory Resilience.
2118

2119
    Check that if one single node dies we can still start all the
2120
    instances it was primary for.
2121

2122
    """
2123
    cluster_info = self.cfg.GetClusterInfo()
2124
    for node, n_img in node_image.items():
2125
      # This code checks that every node which is now listed as
2126
      # secondary has enough memory to host all instances it is
2127
      # supposed to should a single other node in the cluster fail.
2128
      # FIXME: not ready for failover to an arbitrary node
2129
      # FIXME: does not support file-backed instances
2130
      # WARNING: we currently take into account down instances as well
2131
      # as up ones, considering that even if they're down someone
2132
      # might want to start them even in the event of a node failure.
2133
      if n_img.offline:
2134
        # we're skipping offline nodes from the N+1 warning, since
2135
        # most likely we don't have good memory infromation from them;
2136
        # we already list instances living on such nodes, and that's
2137
        # enough warning
2138
        continue
2139
      #TODO(dynmem): use MINMEM for checking
2140
      #TODO(dynmem): also consider ballooning out other instances
2141
      for prinode, instances in n_img.sbp.items():
2142
        needed_mem = 0
2143
        for instance in instances:
2144
          bep = cluster_info.FillBE(instance_cfg[instance])
2145
          if bep[constants.BE_AUTO_BALANCE]:
2146
            needed_mem += bep[constants.BE_MAXMEM]
2147
        test = n_img.mfree < needed_mem
2148
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2149
                      "not enough memory to accomodate instance failovers"
2150
                      " should node %s fail (%dMiB needed, %dMiB available)",
2151
                      prinode, needed_mem, n_img.mfree)
2152

    
2153
  @classmethod
2154
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2155
                   (files_all, files_opt, files_mc, files_vm)):
2156
    """Verifies file checksums collected from all nodes.
2157

2158
    @param errorif: Callback for reporting errors
2159
    @param nodeinfo: List of L{objects.Node} objects
2160
    @param master_node: Name of master node
2161
    @param all_nvinfo: RPC results
2162

2163
    """
2164
    # Define functions determining which nodes to consider for a file
2165
    files2nodefn = [
2166
      (files_all, None),
2167
      (files_mc, lambda node: (node.master_candidate or
2168
                               node.name == master_node)),
2169
      (files_vm, lambda node: node.vm_capable),
2170
      ]
2171

    
2172
    # Build mapping from filename to list of nodes which should have the file
2173
    nodefiles = {}
2174
    for (files, fn) in files2nodefn:
2175
      if fn is None:
2176
        filenodes = nodeinfo
2177
      else:
2178
        filenodes = filter(fn, nodeinfo)
2179
      nodefiles.update((filename,
2180
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2181
                       for filename in files)
2182

    
2183
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2184

    
2185
    fileinfo = dict((filename, {}) for filename in nodefiles)
2186
    ignore_nodes = set()
2187

    
2188
    for node in nodeinfo:
2189
      if node.offline:
2190
        ignore_nodes.add(node.name)
2191
        continue
2192

    
2193
      nresult = all_nvinfo[node.name]
2194

    
2195
      if nresult.fail_msg or not nresult.payload:
2196
        node_files = None
2197
      else:
2198
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2199

    
2200
      test = not (node_files and isinstance(node_files, dict))
2201
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2202
              "Node did not return file checksum data")
2203
      if test:
2204
        ignore_nodes.add(node.name)
2205
        continue
2206

    
2207
      # Build per-checksum mapping from filename to nodes having it
2208
      for (filename, checksum) in node_files.items():
2209
        assert filename in nodefiles
2210
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2211

    
2212
    for (filename, checksums) in fileinfo.items():
2213
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2214

    
2215
      # Nodes having the file
2216
      with_file = frozenset(node_name
2217
                            for nodes in fileinfo[filename].values()
2218
                            for node_name in nodes) - ignore_nodes
2219

    
2220
      expected_nodes = nodefiles[filename] - ignore_nodes
2221

    
2222
      # Nodes missing file
2223
      missing_file = expected_nodes - with_file
2224

    
2225
      if filename in files_opt:
2226
        # All or no nodes
2227
        errorif(missing_file and missing_file != expected_nodes,
2228
                constants.CV_ECLUSTERFILECHECK, None,
2229
                "File %s is optional, but it must exist on all or no"
2230
                " nodes (not found on %s)",
2231
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2232
      else:
2233
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2234
                "File %s is missing from node(s) %s", filename,
2235
                utils.CommaJoin(utils.NiceSort(missing_file)))
2236

    
2237
        # Warn if a node has a file it shouldn't
2238
        unexpected = with_file - expected_nodes
2239
        errorif(unexpected,
2240
                constants.CV_ECLUSTERFILECHECK, None,
2241
                "File %s should not exist on node(s) %s",
2242
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2243

    
2244
      # See if there are multiple versions of the file
2245
      test = len(checksums) > 1
2246
      if test:
2247
        variants = ["variant %s on %s" %
2248
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2249
                    for (idx, (checksum, nodes)) in
2250
                      enumerate(sorted(checksums.items()))]
2251
      else:
2252
        variants = []
2253

    
2254
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2255
              "File %s found with %s different checksums (%s)",
2256
              filename, len(checksums), "; ".join(variants))
2257

    
2258
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2259
                      drbd_map):
2260
    """Verifies and the node DRBD status.
2261

2262
    @type ninfo: L{objects.Node}
2263
    @param ninfo: the node to check
2264
    @param nresult: the remote results for the node
2265
    @param instanceinfo: the dict of instances
2266
    @param drbd_helper: the configured DRBD usermode helper
2267
    @param drbd_map: the DRBD map as returned by
2268
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2269

2270
    """
2271
    node = ninfo.name
2272
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273

    
2274
    if drbd_helper:
2275
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2276
      test = (helper_result == None)
2277
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2278
               "no drbd usermode helper returned")
2279
      if helper_result:
2280
        status, payload = helper_result
2281
        test = not status
2282
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2283
                 "drbd usermode helper check unsuccessful: %s", payload)
2284
        test = status and (payload != drbd_helper)
2285
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2286
                 "wrong drbd usermode helper: %s", payload)
2287

    
2288
    # compute the DRBD minors
2289
    node_drbd = {}
2290
    for minor, instance in drbd_map[node].items():
2291
      test = instance not in instanceinfo
2292
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2293
               "ghost instance '%s' in temporary DRBD map", instance)
2294
        # ghost instance should not be running, but otherwise we
2295
        # don't give double warnings (both ghost instance and
2296
        # unallocated minor in use)
2297
      if test:
2298
        node_drbd[minor] = (instance, False)
2299
      else:
2300
        instance = instanceinfo[instance]
2301
        node_drbd[minor] = (instance.name,
2302
                            instance.admin_state == constants.ADMINST_UP)
2303

    
2304
    # and now check them
2305
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2306
    test = not isinstance(used_minors, (tuple, list))
2307
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2308
             "cannot parse drbd status file: %s", str(used_minors))
2309
    if test:
2310
      # we cannot check drbd status
2311
      return
2312

    
2313
    for minor, (iname, must_exist) in node_drbd.items():
2314
      test = minor not in used_minors and must_exist
2315
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2316
               "drbd minor %d of instance %s is not active", minor, iname)
2317
    for minor in used_minors:
2318
      test = minor not in node_drbd
2319
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2320
               "unallocated drbd minor %d is in use", minor)
2321

    
2322
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2323
    """Builds the node OS structures.
2324

2325
    @type ninfo: L{objects.Node}
2326
    @param ninfo: the node to check
2327
    @param nresult: the remote results for the node
2328
    @param nimg: the node image object
2329

2330
    """
2331
    node = ninfo.name
2332
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2333

    
2334
    remote_os = nresult.get(constants.NV_OSLIST, None)
2335
    test = (not isinstance(remote_os, list) or
2336
            not compat.all(isinstance(v, list) and len(v) == 7
2337
                           for v in remote_os))
2338

    
2339
    _ErrorIf(test, constants.CV_ENODEOS, node,
2340
             "node hasn't returned valid OS data")
2341

    
2342
    nimg.os_fail = test
2343

    
2344
    if test:
2345
      return
2346

    
2347
    os_dict = {}
2348

    
2349
    for (name, os_path, status, diagnose,
2350
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2351

    
2352
      if name not in os_dict:
2353
        os_dict[name] = []
2354

    
2355
      # parameters is a list of lists instead of list of tuples due to
2356
      # JSON lacking a real tuple type, fix it:
2357
      parameters = [tuple(v) for v in parameters]
2358
      os_dict[name].append((os_path, status, diagnose,
2359
                            set(variants), set(parameters), set(api_ver)))
2360

    
2361
    nimg.oslist = os_dict
2362

    
2363
  def _VerifyNodeOS(self, ninfo, nimg, base):
2364
    """Verifies the node OS list.
2365

2366
    @type ninfo: L{objects.Node}
2367
    @param ninfo: the node to check
2368
    @param nimg: the node image object
2369
    @param base: the 'template' node we match against (e.g. from the master)
2370

2371
    """
2372
    node = ninfo.name
2373
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374

    
2375
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2376

    
2377
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2378
    for os_name, os_data in nimg.oslist.items():
2379
      assert os_data, "Empty OS status for OS %s?!" % os_name
2380
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2381
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2382
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2383
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2384
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2385
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2386
      # comparisons with the 'base' image
2387
      test = os_name not in base.oslist
2388
      _ErrorIf(test, constants.CV_ENODEOS, node,
2389
               "Extra OS %s not present on reference node (%s)",
2390
               os_name, base.name)
2391
      if test:
2392
        continue
2393
      assert base.oslist[os_name], "Base node has empty OS status?"
2394
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2395
      if not b_status:
2396
        # base OS is invalid, skipping
2397
        continue
2398
      for kind, a, b in [("API version", f_api, b_api),
2399
                         ("variants list", f_var, b_var),
2400
                         ("parameters", beautify_params(f_param),
2401
                          beautify_params(b_param))]:
2402
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2403
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2404
                 kind, os_name, base.name,
2405
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2406

    
2407
    # check any missing OSes
2408
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2409
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2410
             "OSes present on reference node %s but missing on this node: %s",
2411
             base.name, utils.CommaJoin(missing))
2412

    
2413
  def _VerifyOob(self, ninfo, nresult):
2414
    """Verifies out of band functionality of a node.
2415

2416
    @type ninfo: L{objects.Node}
2417
    @param ninfo: the node to check
2418
    @param nresult: the remote results for the node
2419

2420
    """
2421
    node = ninfo.name
2422
    # We just have to verify the paths on master and/or master candidates
2423
    # as the oob helper is invoked on the master
2424
    if ((ninfo.master_candidate or ninfo.master_capable) and
2425
        constants.NV_OOB_PATHS in nresult):
2426
      for path_result in nresult[constants.NV_OOB_PATHS]:
2427
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2428

    
2429
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2430
    """Verifies and updates the node volume data.
2431

2432
    This function will update a L{NodeImage}'s internal structures
2433
    with data from the remote call.
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    nimg.lvm_fail = True
2446
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2447
    if vg_name is None:
2448
      pass
2449
    elif isinstance(lvdata, basestring):
2450
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2451
               utils.SafeEncode(lvdata))
2452
    elif not isinstance(lvdata, dict):
2453
      _ErrorIf(True, constants.CV_ENODELVM, node,
2454
               "rpc call to node failed (lvlist)")
2455
    else:
2456
      nimg.volumes = lvdata
2457
      nimg.lvm_fail = False
2458

    
2459
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2460
    """Verifies and updates the node instance list.
2461

2462
    If the listing was successful, then updates this node's instance
2463
    list. Otherwise, it marks the RPC call as failed for the instance
2464
    list key.
2465

2466
    @type ninfo: L{objects.Node}
2467
    @param ninfo: the node to check
2468
    @param nresult: the remote results for the node
2469
    @param nimg: the node image object
2470

2471
    """
2472
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2473
    test = not isinstance(idata, list)
2474
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2475
                  "rpc call to node failed (instancelist): %s",
2476
                  utils.SafeEncode(str(idata)))
2477
    if test:
2478
      nimg.hyp_fail = True
2479
    else:
2480
      nimg.instances = idata
2481

    
2482
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2483
    """Verifies and computes a node information map
2484

2485
    @type ninfo: L{objects.Node}
2486
    @param ninfo: the node to check
2487
    @param nresult: the remote results for the node
2488
    @param nimg: the node image object
2489
    @param vg_name: the configured VG name
2490

2491
    """
2492
    node = ninfo.name
2493
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494

    
2495
    # try to read free memory (from the hypervisor)
2496
    hv_info = nresult.get(constants.NV_HVINFO, None)
2497
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2498
    _ErrorIf(test, constants.CV_ENODEHV, node,
2499
             "rpc call to node failed (hvinfo)")
2500
    if not test:
2501
      try:
2502
        nimg.mfree = int(hv_info["memory_free"])
2503
      except (ValueError, TypeError):
2504
        _ErrorIf(True, constants.CV_ENODERPC, node,
2505
                 "node returned invalid nodeinfo, check hypervisor")
2506

    
2507
    # FIXME: devise a free space model for file based instances as well
2508
    if vg_name is not None:
2509
      test = (constants.NV_VGLIST not in nresult or
2510
              vg_name not in nresult[constants.NV_VGLIST])
2511
      _ErrorIf(test, constants.CV_ENODELVM, node,
2512
               "node didn't return data for the volume group '%s'"
2513
               " - it is either missing or broken", vg_name)
2514
      if not test:
2515
        try:
2516
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2517
        except (ValueError, TypeError):
2518
          _ErrorIf(True, constants.CV_ENODERPC, node,
2519
                   "node returned invalid LVM info, check LVM status")
2520

    
2521
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2522
    """Gets per-disk status information for all instances.
2523

2524
    @type nodelist: list of strings
2525
    @param nodelist: Node names
2526
    @type node_image: dict of (name, L{objects.Node})
2527
    @param node_image: Node objects
2528
    @type instanceinfo: dict of (name, L{objects.Instance})
2529
    @param instanceinfo: Instance objects
2530
    @rtype: {instance: {node: [(succes, payload)]}}
2531
    @return: a dictionary of per-instance dictionaries with nodes as
2532
        keys and disk information as values; the disk information is a
2533
        list of tuples (success, payload)
2534

2535
    """
2536
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2537

    
2538
    node_disks = {}
2539
    node_disks_devonly = {}
2540
    diskless_instances = set()
2541
    diskless = constants.DT_DISKLESS
2542

    
2543
    for nname in nodelist:
2544
      node_instances = list(itertools.chain(node_image[nname].pinst,
2545
                                            node_image[nname].sinst))
2546
      diskless_instances.update(inst for inst in node_instances
2547
                                if instanceinfo[inst].disk_template == diskless)
2548
      disks = [(inst, disk)
2549
               for inst in node_instances
2550
               for disk in instanceinfo[inst].disks]
2551

    
2552
      if not disks:
2553
        # No need to collect data
2554
        continue
2555

    
2556
      node_disks[nname] = disks
2557

    
2558
      # Creating copies as SetDiskID below will modify the objects and that can
2559
      # lead to incorrect data returned from nodes
2560
      devonly = [dev.Copy() for (_, dev) in disks]
2561

    
2562
      for dev in devonly:
2563
        self.cfg.SetDiskID(dev, nname)
2564

    
2565
      node_disks_devonly[nname] = devonly
2566

    
2567
    assert len(node_disks) == len(node_disks_devonly)
2568

    
2569
    # Collect data from all nodes with disks
2570
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2571
                                                          node_disks_devonly)
2572

    
2573
    assert len(result) == len(node_disks)
2574

    
2575
    instdisk = {}
2576

    
2577
    for (nname, nres) in result.items():
2578
      disks = node_disks[nname]
2579

    
2580
      if nres.offline:
2581
        # No data from this node
2582
        data = len(disks) * [(False, "node offline")]
2583
      else:
2584
        msg = nres.fail_msg
2585
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2586
                 "while getting disk information: %s", msg)
2587
        if msg:
2588
          # No data from this node
2589
          data = len(disks) * [(False, msg)]
2590
        else:
2591
          data = []
2592
          for idx, i in enumerate(nres.payload):
2593
            if isinstance(i, (tuple, list)) and len(i) == 2:
2594
              data.append(i)
2595
            else:
2596
              logging.warning("Invalid result from node %s, entry %d: %s",
2597
                              nname, idx, i)
2598
              data.append((False, "Invalid result from the remote node"))
2599

    
2600
      for ((inst, _), status) in zip(disks, data):
2601
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2602

    
2603
    # Add empty entries for diskless instances.
2604
    for inst in diskless_instances:
2605
      assert inst not in instdisk
2606
      instdisk[inst] = {}
2607

    
2608
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2609
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2610
                      compat.all(isinstance(s, (tuple, list)) and
2611
                                 len(s) == 2 for s in statuses)
2612
                      for inst, nnames in instdisk.items()
2613
                      for nname, statuses in nnames.items())
2614
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2615

    
2616
    return instdisk
2617

    
2618
  @staticmethod
2619
  def _SshNodeSelector(group_uuid, all_nodes):
2620
    """Create endless iterators for all potential SSH check hosts.
2621

2622
    """
2623
    nodes = [node for node in all_nodes
2624
             if (node.group != group_uuid and
2625
                 not node.offline)]
2626
    keyfunc = operator.attrgetter("group")
2627

    
2628
    return map(itertools.cycle,
2629
               [sorted(map(operator.attrgetter("name"), names))
2630
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2631
                                                  keyfunc)])
2632

    
2633
  @classmethod
2634
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2635
    """Choose which nodes should talk to which other nodes.
2636

2637
    We will make nodes contact all nodes in their group, and one node from
2638
    every other group.
2639

2640
    @warning: This algorithm has a known issue if one node group is much
2641
      smaller than others (e.g. just one node). In such a case all other
2642
      nodes will talk to the single node.
2643

2644
    """
2645
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2646
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2647

    
2648
    return (online_nodes,
2649
            dict((name, sorted([i.next() for i in sel]))
2650
                 for name in online_nodes))
2651

    
2652
  def BuildHooksEnv(self):
2653
    """Build hooks env.
2654

2655
    Cluster-Verify hooks just ran in the post phase and their failure makes
2656
    the output be logged in the verify output and the verification to fail.
2657

2658
    """
2659
    env = {
2660
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2661
      }
2662

    
2663
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2664
               for node in self.my_node_info.values())
2665

    
2666
    return env
2667

    
2668
  def BuildHooksNodes(self):
2669
    """Build hooks nodes.
2670

2671
    """
2672
    return ([], self.my_node_names)
2673

    
2674
  def Exec(self, feedback_fn):
2675
    """Verify integrity of the node group, performing various test on nodes.
2676

2677
    """
2678
    # This method has too many local variables. pylint: disable=R0914
2679
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2680

    
2681
    if not self.my_node_names:
2682
      # empty node group
2683
      feedback_fn("* Empty node group, skipping verification")
2684
      return True
2685

    
2686
    self.bad = False
2687
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688
    verbose = self.op.verbose
2689
    self._feedback_fn = feedback_fn
2690

    
2691
    vg_name = self.cfg.GetVGName()
2692
    drbd_helper = self.cfg.GetDRBDHelper()
2693
    cluster = self.cfg.GetClusterInfo()
2694
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2695
    hypervisors = cluster.enabled_hypervisors
2696
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2697

    
2698
    i_non_redundant = [] # Non redundant instances
2699
    i_non_a_balanced = [] # Non auto-balanced instances
2700
    i_offline = 0 # Count of offline instances
2701
    n_offline = 0 # Count of offline nodes
2702
    n_drained = 0 # Count of nodes being drained
2703
    node_vol_should = {}
2704

    
2705
    # FIXME: verify OS list
2706

    
2707
    # File verification
2708
    filemap = _ComputeAncillaryFiles(cluster, False)
2709

    
2710
    # do local checksums
2711
    master_node = self.master_node = self.cfg.GetMasterNode()
2712
    master_ip = self.cfg.GetMasterIP()
2713

    
2714
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2715

    
2716
    user_scripts = []
2717
    if self.cfg.GetUseExternalMipScript():
2718
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2719

    
2720
    node_verify_param = {
2721
      constants.NV_FILELIST:
2722
        utils.UniqueSequence(filename
2723
                             for files in filemap
2724
                             for filename in files),
2725
      constants.NV_NODELIST:
2726
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2727
                                  self.all_node_info.values()),
2728
      constants.NV_HYPERVISOR: hypervisors,
2729
      constants.NV_HVPARAMS:
2730
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2731
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2732
                                 for node in node_data_list
2733
                                 if not node.offline],
2734
      constants.NV_INSTANCELIST: hypervisors,
2735
      constants.NV_VERSION: None,
2736
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2737
      constants.NV_NODESETUP: None,
2738
      constants.NV_TIME: None,
2739
      constants.NV_MASTERIP: (master_node, master_ip),
2740
      constants.NV_OSLIST: None,
2741
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2742
      constants.NV_USERSCRIPTS: user_scripts,
2743
      }
2744

    
2745
    if vg_name is not None:
2746
      node_verify_param[constants.NV_VGLIST] = None
2747
      node_verify_param[constants.NV_LVLIST] = vg_name
2748
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2749
      node_verify_param[constants.NV_DRBDLIST] = None
2750

    
2751
    if drbd_helper:
2752
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2753

    
2754
    # bridge checks
2755
    # FIXME: this needs to be changed per node-group, not cluster-wide
2756
    bridges = set()
2757
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2758
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2759
      bridges.add(default_nicpp[constants.NIC_LINK])
2760
    for instance in self.my_inst_info.values():
2761
      for nic in instance.nics:
2762
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2763
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2764
          bridges.add(full_nic[constants.NIC_LINK])
2765

    
2766
    if bridges:
2767
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2768

    
2769
    # Build our expected cluster state
2770
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2771
                                                 name=node.name,
2772
                                                 vm_capable=node.vm_capable))
2773
                      for node in node_data_list)
2774

    
2775
    # Gather OOB paths
2776
    oob_paths = []
2777
    for node in self.all_node_info.values():
2778
      path = _SupportsOob(self.cfg, node)
2779
      if path and path not in oob_paths:
2780
        oob_paths.append(path)
2781

    
2782
    if oob_paths:
2783
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2784

    
2785
    for instance in self.my_inst_names:
2786
      inst_config = self.my_inst_info[instance]
2787

    
2788
      for nname in inst_config.all_nodes:
2789
        if nname not in node_image:
2790
          gnode = self.NodeImage(name=nname)
2791
          gnode.ghost = (nname not in self.all_node_info)
2792
          node_image[nname] = gnode
2793

    
2794
      inst_config.MapLVsByNode(node_vol_should)
2795

    
2796
      pnode = inst_config.primary_node
2797
      node_image[pnode].pinst.append(instance)
2798

    
2799
      for snode in inst_config.secondary_nodes:
2800
        nimg = node_image[snode]
2801
        nimg.sinst.append(instance)
2802
        if pnode not in nimg.sbp:
2803
          nimg.sbp[pnode] = []
2804
        nimg.sbp[pnode].append(instance)
2805

    
2806
    # At this point, we have the in-memory data structures complete,
2807
    # except for the runtime information, which we'll gather next
2808

    
2809
    # Due to the way our RPC system works, exact response times cannot be
2810
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2811
    # time before and after executing the request, we can at least have a time
2812
    # window.
2813
    nvinfo_starttime = time.time()
2814
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2815
                                           node_verify_param,
2816
                                           self.cfg.GetClusterName())
2817
    nvinfo_endtime = time.time()
2818

    
2819
    if self.extra_lv_nodes and vg_name is not None:
2820
      extra_lv_nvinfo = \
2821
          self.rpc.call_node_verify(self.extra_lv_nodes,
2822
                                    {constants.NV_LVLIST: vg_name},
2823
                                    self.cfg.GetClusterName())
2824
    else:
2825
      extra_lv_nvinfo = {}
2826

    
2827
    all_drbd_map = self.cfg.ComputeDRBDMap()
2828

    
2829
    feedback_fn("* Gathering disk information (%s nodes)" %
2830
                len(self.my_node_names))
2831
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2832
                                     self.my_inst_info)
2833

    
2834
    feedback_fn("* Verifying configuration file consistency")
2835

    
2836
    # If not all nodes are being checked, we need to make sure the master node
2837
    # and a non-checked vm_capable node are in the list.
2838
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2839
    if absent_nodes:
2840
      vf_nvinfo = all_nvinfo.copy()
2841
      vf_node_info = list(self.my_node_info.values())
2842
      additional_nodes = []
2843
      if master_node not in self.my_node_info:
2844
        additional_nodes.append(master_node)
2845
        vf_node_info.append(self.all_node_info[master_node])
2846
      # Add the first vm_capable node we find which is not included
2847
      for node in absent_nodes:
2848
        nodeinfo = self.all_node_info[node]
2849
        if nodeinfo.vm_capable and not nodeinfo.offline:
2850
          additional_nodes.append(node)
2851
          vf_node_info.append(self.all_node_info[node])
2852
          break
2853
      key = constants.NV_FILELIST
2854
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2855
                                                 {key: node_verify_param[key]},
2856
                                                 self.cfg.GetClusterName()))
2857
    else:
2858
      vf_nvinfo = all_nvinfo
2859
      vf_node_info = self.my_node_info.values()
2860

    
2861
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2862

    
2863
    feedback_fn("* Verifying node status")
2864

    
2865
    refos_img = None
2866

    
2867
    for node_i in node_data_list:
2868
      node = node_i.name
2869
      nimg = node_image[node]
2870

    
2871
      if node_i.offline:
2872
        if verbose:
2873
          feedback_fn("* Skipping offline node %s" % (node,))
2874
        n_offline += 1
2875
        continue
2876

    
2877
      if node == master_node:
2878
        ntype = "master"
2879
      elif node_i.master_candidate:
2880
        ntype = "master candidate"
2881
      elif node_i.drained:
2882
        ntype = "drained"
2883
        n_drained += 1
2884
      else:
2885
        ntype = "regular"
2886
      if verbose:
2887
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2888

    
2889
      msg = all_nvinfo[node].fail_msg
2890
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2891
               msg)
2892
      if msg:
2893
        nimg.rpc_fail = True
2894
        continue
2895

    
2896
      nresult = all_nvinfo[node].payload
2897

    
2898
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2899
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2900
      self._VerifyNodeNetwork(node_i, nresult)
2901
      self._VerifyNodeUserScripts(node_i, nresult)
2902
      self._VerifyOob(node_i, nresult)
2903

    
2904
      if nimg.vm_capable:
2905
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2906
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2907
                             all_drbd_map)
2908

    
2909
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2910
        self._UpdateNodeInstances(node_i, nresult, nimg)
2911
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2912
        self._UpdateNodeOS(node_i, nresult, nimg)
2913

    
2914
        if not nimg.os_fail:
2915
          if refos_img is None:
2916
            refos_img = nimg
2917
          self._VerifyNodeOS(node_i, nimg, refos_img)
2918
        self._VerifyNodeBridges(node_i, nresult, bridges)
2919

    
2920
        # Check whether all running instancies are primary for the node. (This
2921
        # can no longer be done from _VerifyInstance below, since some of the
2922
        # wrong instances could be from other node groups.)
2923
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2924

    
2925
        for inst in non_primary_inst:
2926
          # FIXME: investigate best way to handle offline insts
2927
          if inst.admin_state == constants.ADMINST_OFFLINE:
2928
            if verbose:
2929
              feedback_fn("* Skipping offline instance %s" % inst.name)
2930
            i_offline += 1
2931
            continue
2932
          test = inst in self.all_inst_info
2933
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2934
                   "instance should not run on node %s", node_i.name)
2935
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2936
                   "node is running unknown instance %s", inst)
2937

    
2938
    for node, result in extra_lv_nvinfo.items():
2939
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2940
                              node_image[node], vg_name)
2941

    
2942
    feedback_fn("* Verifying instance status")
2943
    for instance in self.my_inst_names:
2944
      if verbose:
2945
        feedback_fn("* Verifying instance %s" % instance)
2946
      inst_config = self.my_inst_info[instance]
2947
      self._VerifyInstance(instance, inst_config, node_image,
2948
                           instdisk[instance])
2949
      inst_nodes_offline = []
2950

    
2951
      pnode = inst_config.primary_node
2952
      pnode_img = node_image[pnode]
2953
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2954
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2955
               " primary node failed", instance)
2956

    
2957
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2958
               pnode_img.offline,
2959
               constants.CV_EINSTANCEBADNODE, instance,
2960
               "instance is marked as running and lives on offline node %s",
2961
               inst_config.primary_node)
2962

    
2963
      # If the instance is non-redundant we cannot survive losing its primary
2964
      # node, so we are not N+1 compliant. On the other hand we have no disk
2965
      # templates with more than one secondary so that situation is not well
2966
      # supported either.
2967
      # FIXME: does not support file-backed instances
2968
      if not inst_config.secondary_nodes:
2969
        i_non_redundant.append(instance)
2970

    
2971
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2972
               constants.CV_EINSTANCELAYOUT,
2973
               instance, "instance has multiple secondary nodes: %s",
2974
               utils.CommaJoin(inst_config.secondary_nodes),
2975
               code=self.ETYPE_WARNING)
2976

    
2977
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2978
        pnode = inst_config.primary_node
2979
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2980
        instance_groups = {}
2981

    
2982
        for node in instance_nodes:
2983
          instance_groups.setdefault(self.all_node_info[node].group,
2984
                                     []).append(node)
2985

    
2986
        pretty_list = [
2987
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2988
          # Sort so that we always list the primary node first.
2989
          for group, nodes in sorted(instance_groups.items(),
2990
                                     key=lambda (_, nodes): pnode in nodes,
2991
                                     reverse=True)]
2992

    
2993
        self._ErrorIf(len(instance_groups) > 1,
2994
                      constants.CV_EINSTANCESPLITGROUPS,
2995
                      instance, "instance has primary and secondary nodes in"
2996
                      " different groups: %s", utils.CommaJoin(pretty_list),
2997
                      code=self.ETYPE_WARNING)
2998

    
2999
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3000
        i_non_a_balanced.append(instance)
3001

    
3002
      for snode in inst_config.secondary_nodes:
3003
        s_img = node_image[snode]
3004
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3005
                 snode, "instance %s, connection to secondary node failed",
3006
                 instance)
3007

    
3008
        if s_img.offline:
3009
          inst_nodes_offline.append(snode)
3010

    
3011
      # warn that the instance lives on offline nodes
3012
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3013
               "instance has offline secondary node(s) %s",
3014
               utils.CommaJoin(inst_nodes_offline))
3015
      # ... or ghost/non-vm_capable nodes
3016
      for node in inst_config.all_nodes:
3017
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3018
                 instance, "instance lives on ghost node %s", node)
3019
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3020
                 instance, "instance lives on non-vm_capable node %s", node)
3021

    
3022
    feedback_fn("* Verifying orphan volumes")
3023
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3024

    
3025
    # We will get spurious "unknown volume" warnings if any node of this group
3026
    # is secondary for an instance whose primary is in another group. To avoid
3027
    # them, we find these instances and add their volumes to node_vol_should.
3028
    for inst in self.all_inst_info.values():
3029
      for secondary in inst.secondary_nodes:
3030
        if (secondary in self.my_node_info
3031
            and inst.name not in self.my_inst_info):
3032
          inst.MapLVsByNode(node_vol_should)
3033
          break
3034

    
3035
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3036

    
3037
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3038
      feedback_fn("* Verifying N+1 Memory redundancy")
3039
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3040

    
3041
    feedback_fn("* Other Notes")
3042
    if i_non_redundant:
3043
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3044
                  % len(i_non_redundant))
3045

    
3046
    if i_non_a_balanced:
3047
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3048
                  % len(i_non_a_balanced))
3049

    
3050
    if i_offline:
3051
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3052

    
3053
    if n_offline:
3054
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3055

    
3056
    if n_drained:
3057
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3058

    
3059
    return not self.bad
3060

    
3061
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3062
    """Analyze the post-hooks' result
3063

3064
    This method analyses the hook result, handles it, and sends some
3065
    nicely-formatted feedback back to the user.
3066

3067
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3068
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3069
    @param hooks_results: the results of the multi-node hooks rpc call
3070
    @param feedback_fn: function used send feedback back to the caller
3071
    @param lu_result: previous Exec result
3072
    @return: the new Exec result, based on the previous result
3073
        and hook results
3074

3075
    """
3076
    # We only really run POST phase hooks, only for non-empty groups,
3077
    # and are only interested in their results
3078
    if not self.my_node_names:
3079
      # empty node group
3080
      pass
3081
    elif phase == constants.HOOKS_PHASE_POST:
3082
      # Used to change hooks' output to proper indentation
3083
      feedback_fn("* Hooks Results")
3084
      assert hooks_results, "invalid result from hooks"
3085

    
3086
      for node_name in hooks_results:
3087
        res = hooks_results[node_name]
3088
        msg = res.fail_msg
3089
        test = msg and not res.offline
3090
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3091
                      "Communication failure in hooks execution: %s", msg)
3092
        if res.offline or msg:
3093
          # No need to investigate payload if node is offline or gave
3094
          # an error.
3095
          continue
3096
        for script, hkr, output in res.payload:
3097
          test = hkr == constants.HKR_FAIL
3098
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3099
                        "Script %s failed, output:", script)
3100
          if test:
3101
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3102
            feedback_fn("%s" % output)
3103
            lu_result = False
3104

    
3105
    return lu_result
3106

    
3107

    
3108
class LUClusterVerifyDisks(NoHooksLU):
3109
  """Verifies the cluster disks status.
3110

3111
  """
3112
  REQ_BGL = False
3113

    
3114
  def ExpandNames(self):
3115
    self.share_locks = _ShareAll()
3116
    self.needed_locks = {
3117
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3118
      }
3119

    
3120
  def Exec(self, feedback_fn):
3121
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3122

    
3123
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3124
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3125
                           for group in group_names])
3126

    
3127

    
3128
class LUGroupVerifyDisks(NoHooksLU):
3129
  """Verifies the status of all disks in a node group.
3130

3131
  """
3132
  REQ_BGL = False
3133

    
3134
  def ExpandNames(self):
3135
    # Raises errors.OpPrereqError on its own if group can't be found
3136
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3137

    
3138
    self.share_locks = _ShareAll()
3139
    self.needed_locks = {
3140
      locking.LEVEL_INSTANCE: [],
3141
      locking.LEVEL_NODEGROUP: [],
3142
      locking.LEVEL_NODE: [],
3143
      }
3144

    
3145
  def DeclareLocks(self, level):
3146
    if level == locking.LEVEL_INSTANCE:
3147
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3148

    
3149
      # Lock instances optimistically, needs verification once node and group
3150
      # locks have been acquired
3151
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3152
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3153

    
3154
    elif level == locking.LEVEL_NODEGROUP:
3155
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3156

    
3157
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3158
        set([self.group_uuid] +
3159
            # Lock all groups used by instances optimistically; this requires
3160
            # going via the node before it's locked, requiring verification
3161
            # later on
3162
            [group_uuid
3163
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3164
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3165

    
3166
    elif level == locking.LEVEL_NODE:
3167
      # This will only lock the nodes in the group to be verified which contain
3168
      # actual instances
3169
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3170
      self._LockInstancesNodes()
3171

    
3172
      # Lock all nodes in group to be verified
3173
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3174
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3175
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3176

    
3177
  def CheckPrereq(self):
3178
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3179
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3180
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3181

    
3182
    assert self.group_uuid in owned_groups
3183

    
3184
    # Check if locked instances are still correct
3185
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3186

    
3187
    # Get instance information
3188
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3189

    
3190
    # Check if node groups for locked instances are still correct
3191
    for (instance_name, inst) in self.instances.items():
3192
      assert owned_nodes.issuperset(inst.all_nodes), \
3193
        "Instance %s's nodes changed while we kept the lock" % instance_name
3194

    
3195
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3196
                                             owned_groups)
3197

    
3198
      assert self.group_uuid in inst_groups, \
3199
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3200

    
3201
  def Exec(self, feedback_fn):
3202
    """Verify integrity of cluster disks.
3203

3204
    @rtype: tuple of three items
3205
    @return: a tuple of (dict of node-to-node_error, list of instances
3206
        which need activate-disks, dict of instance: (node, volume) for
3207
        missing volumes
3208

3209
    """
3210
    res_nodes = {}
3211
    res_instances = set()
3212
    res_missing = {}
3213

    
3214
    nv_dict = _MapInstanceDisksToNodes([inst
3215
            for inst in self.instances.values()
3216
            if inst.admin_state == constants.ADMINST_UP])
3217

    
3218
    if nv_dict:
3219
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3220
                             set(self.cfg.GetVmCapableNodeList()))
3221

    
3222
      node_lvs = self.rpc.call_lv_list(nodes, [])
3223

    
3224
      for (node, node_res) in node_lvs.items():
3225
        if node_res.offline:
3226
          continue
3227

    
3228
        msg = node_res.fail_msg
3229
        if msg:
3230
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3231
          res_nodes[node] = msg
3232
          continue
3233

    
3234
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3235
          inst = nv_dict.pop((node, lv_name), None)
3236
          if not (lv_online or inst is None):
3237
            res_instances.add(inst)
3238

    
3239
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3240
      # better
3241
      for key, inst in nv_dict.iteritems():
3242
        res_missing.setdefault(inst, []).append(list(key))
3243

    
3244
    return (res_nodes, list(res_instances), res_missing)
3245

    
3246

    
3247
class LUClusterRepairDiskSizes(NoHooksLU):
3248
  """Verifies the cluster disks sizes.
3249

3250
  """
3251
  REQ_BGL = False
3252

    
3253
  def ExpandNames(self):
3254
    if self.op.instances:
3255
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3256
      self.needed_locks = {
3257
        locking.LEVEL_NODE_RES: [],
3258
        locking.LEVEL_INSTANCE: self.wanted_names,
3259
        }
3260
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3261
    else:
3262
      self.wanted_names = None
3263
      self.needed_locks = {
3264
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3265
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3266
        }
3267
    self.share_locks = {
3268
      locking.LEVEL_NODE_RES: 1,
3269
      locking.LEVEL_INSTANCE: 0,
3270
      }
3271

    
3272
  def DeclareLocks(self, level):
3273
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3274
      self._LockInstancesNodes(primary_only=True, level=level)
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This only checks the optional instance list against the existing names.
3280

3281
    """
3282
    if self.wanted_names is None:
3283
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3284

    
3285
    self.wanted_instances = \
3286
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3287

    
3288
  def _EnsureChildSizes(self, disk):
3289
    """Ensure children of the disk have the needed disk size.
3290

3291
    This is valid mainly for DRBD8 and fixes an issue where the
3292
    children have smaller disk size.
3293

3294
    @param disk: an L{ganeti.objects.Disk} object
3295

3296
    """
3297
    if disk.dev_type == constants.LD_DRBD8:
3298
      assert disk.children, "Empty children for DRBD8?"
3299
      fchild = disk.children[0]
3300
      mismatch = fchild.size < disk.size
3301
      if mismatch:
3302
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3303
                     fchild.size, disk.size)
3304
        fchild.size = disk.size
3305

    
3306
      # and we recurse on this child only, not on the metadev
3307
      return self._EnsureChildSizes(fchild) or mismatch
3308
    else:
3309
      return False
3310

    
3311
  def Exec(self, feedback_fn):
3312
    """Verify the size of cluster disks.
3313

3314
    """
3315
    # TODO: check child disks too
3316
    # TODO: check differences in size between primary/secondary nodes
3317
    per_node_disks = {}
3318
    for instance in self.wanted_instances:
3319
      pnode = instance.primary_node
3320
      if pnode not in per_node_disks:
3321
        per_node_disks[pnode] = []
3322
      for idx, disk in enumerate(instance.disks):
3323
        per_node_disks[pnode].append((instance, idx, disk))
3324

    
3325
    assert not (frozenset(per_node_disks.keys()) -
3326
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3327
      "Not owning correct locks"
3328
    assert not self.owned_locks(locking.LEVEL_NODE)
3329

    
3330
    changed = []
3331
    for node, dskl in per_node_disks.items():
3332
      newl = [v[2].Copy() for v in dskl]
3333
      for dsk in newl:
3334
        self.cfg.SetDiskID(dsk, node)
3335
      result = self.rpc.call_blockdev_getsize(node, newl)
3336
      if result.fail_msg:
3337
        self.LogWarning("Failure in blockdev_getsize call to node"
3338
                        " %s, ignoring", node)
3339
        continue
3340
      if len(result.payload) != len(dskl):
3341
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3342
                        " result.payload=%s", node, len(dskl), result.payload)
3343
        self.LogWarning("Invalid result from node %s, ignoring node results",
3344
                        node)
3345
        continue
3346
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3347
        if size is None:
3348
          self.LogWarning("Disk %d of instance %s did not return size"
3349
                          " information, ignoring", idx, instance.name)
3350
          continue
3351
        if not isinstance(size, (int, long)):
3352
          self.LogWarning("Disk %d of instance %s did not return valid"
3353
                          " size information, ignoring", idx, instance.name)
3354
          continue
3355
        size = size >> 20
3356
        if size != disk.size:
3357
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3358
                       " correcting: recorded %d, actual %d", idx,
3359
                       instance.name, disk.size, size)
3360
          disk.size = size
3361
          self.cfg.Update(instance, feedback_fn)
3362
          changed.append((instance.name, idx, size))
3363
        if self._EnsureChildSizes(disk):
3364
          self.cfg.Update(instance, feedback_fn)
3365
          changed.append((instance.name, idx, disk.size))
3366
    return changed
3367

    
3368

    
3369
class LUClusterRename(LogicalUnit):
3370
  """Rename the cluster.
3371

3372
  """
3373
  HPATH = "cluster-rename"
3374
  HTYPE = constants.HTYPE_CLUSTER
3375

    
3376
  def BuildHooksEnv(self):
3377
    """Build hooks env.
3378

3379
    """
3380
    return {
3381
      "OP_TARGET": self.cfg.GetClusterName(),
3382
      "NEW_NAME": self.op.name,
3383
      }
3384

    
3385
  def BuildHooksNodes(self):
3386
    """Build hooks nodes.
3387

3388
    """
3389
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3390

    
3391
  def CheckPrereq(self):
3392
    """Verify that the passed name is a valid one.
3393

3394
    """
3395
    hostname = netutils.GetHostname(name=self.op.name,
3396
                                    family=self.cfg.GetPrimaryIPFamily())
3397

    
3398
    new_name = hostname.name
3399
    self.ip = new_ip = hostname.ip
3400
    old_name = self.cfg.GetClusterName()
3401
    old_ip = self.cfg.GetMasterIP()
3402
    if new_name == old_name and new_ip == old_ip:
3403
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3404
                                 " cluster has changed",
3405
                                 errors.ECODE_INVAL)
3406
    if new_ip != old_ip:
3407
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3408
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3409
                                   " reachable on the network" %
3410
                                   new_ip, errors.ECODE_NOTUNIQUE)
3411

    
3412
    self.op.name = new_name
3413

    
3414
  def Exec(self, feedback_fn):
3415
    """Rename the cluster.
3416

3417
    """
3418
    clustername = self.op.name
3419
    new_ip = self.ip
3420

    
3421
    # shutdown the master IP
3422
    master_params = self.cfg.GetMasterNetworkParameters()
3423
    ems = self.cfg.GetUseExternalMipScript()
3424
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3425
                                                     master_params, ems)
3426
    result.Raise("Could not disable the master role")
3427

    
3428
    try:
3429
      cluster = self.cfg.GetClusterInfo()
3430
      cluster.cluster_name = clustername
3431
      cluster.master_ip = new_ip
3432
      self.cfg.Update(cluster, feedback_fn)
3433

    
3434
      # update the known hosts file
3435
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3436
      node_list = self.cfg.GetOnlineNodeList()
3437
      try:
3438
        node_list.remove(master_params.name)
3439
      except ValueError:
3440
        pass
3441
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3442
    finally:
3443
      master_params.ip = new_ip
3444
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3445
                                                     master_params, ems)
3446
      msg = result.fail_msg
3447
      if msg:
3448
        self.LogWarning("Could not re-enable the master role on"
3449
                        " the master, please restart manually: %s", msg)
3450

    
3451
    return clustername
3452

    
3453

    
3454
def _ValidateNetmask(cfg, netmask):
3455
  """Checks if a netmask is valid.
3456

3457
  @type cfg: L{config.ConfigWriter}
3458
  @param cfg: The cluster configuration
3459
  @type netmask: int
3460
  @param netmask: the netmask to be verified
3461
  @raise errors.OpPrereqError: if the validation fails
3462

3463
  """
3464
  ip_family = cfg.GetPrimaryIPFamily()
3465
  try:
3466
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3467
  except errors.ProgrammerError:
3468
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3469
                               ip_family)
3470
  if not ipcls.ValidateNetmask(netmask):
3471
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3472
                                (netmask))
3473

    
3474

    
3475
class LUClusterSetParams(LogicalUnit):
3476
  """Change the parameters of the cluster.
3477

3478
  """
3479
  HPATH = "cluster-modify"
3480
  HTYPE = constants.HTYPE_CLUSTER
3481
  REQ_BGL = False
3482

    
3483
  def CheckArguments(self):
3484
    """Check parameters
3485

3486
    """
3487
    if self.op.uid_pool:
3488
      uidpool.CheckUidPool(self.op.uid_pool)
3489

    
3490
    if self.op.add_uids:
3491
      uidpool.CheckUidPool(self.op.add_uids)
3492

    
3493
    if self.op.remove_uids:
3494
      uidpool.CheckUidPool(self.op.remove_uids)
3495

    
3496
    if self.op.master_netmask is not None:
3497
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3498

    
3499
    if self.op.diskparams:
3500
      for dt_params in self.op.diskparams.values():
3501
        utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3502

    
3503
  def ExpandNames(self):
3504
    # FIXME: in the future maybe other cluster params won't require checking on
3505
    # all nodes to be modified.
3506
    self.needed_locks = {
3507
      locking.LEVEL_NODE: locking.ALL_SET,
3508
    }
3509
    self.share_locks[locking.LEVEL_NODE] = 1
3510

    
3511
  def BuildHooksEnv(self):
3512
    """Build hooks env.
3513

3514
    """
3515
    return {
3516
      "OP_TARGET": self.cfg.GetClusterName(),
3517
      "NEW_VG_NAME": self.op.vg_name,
3518
      }
3519

    
3520
  def BuildHooksNodes(self):
3521
    """Build hooks nodes.
3522

3523
    """
3524
    mn = self.cfg.GetMasterNode()
3525
    return ([mn], [mn])
3526

    
3527
  def CheckPrereq(self):
3528
    """Check prerequisites.
3529

3530
    This checks whether the given params don't conflict and
3531
    if the given volume group is valid.
3532

3533
    """
3534
    if self.op.vg_name is not None and not self.op.vg_name:
3535
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3536
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3537
                                   " instances exist", errors.ECODE_INVAL)
3538

    
3539
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3540
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3541
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3542
                                   " drbd-based instances exist",
3543
                                   errors.ECODE_INVAL)
3544

    
3545
    node_list = self.owned_locks(locking.LEVEL_NODE)
3546

    
3547
    # if vg_name not None, checks given volume group on all nodes
3548
    if self.op.vg_name:
3549
      vglist = self.rpc.call_vg_list(node_list)
3550
      for node in node_list:
3551
        msg = vglist[node].fail_msg
3552
        if msg:
3553
          # ignoring down node
3554
          self.LogWarning("Error while gathering data on node %s"
3555
                          " (ignoring node): %s", node, msg)
3556
          continue
3557
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3558
                                              self.op.vg_name,
3559
                                              constants.MIN_VG_SIZE)
3560
        if vgstatus:
3561
          raise errors.OpPrereqError("Error on node '%s': %s" %
3562
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3563

    
3564
    if self.op.drbd_helper:
3565
      # checks given drbd helper on all nodes
3566
      helpers = self.rpc.call_drbd_helper(node_list)
3567
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3568
        if ninfo.offline:
3569
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3570
          continue
3571
        msg = helpers[node].fail_msg
3572
        if msg:
3573
          raise errors.OpPrereqError("Error checking drbd helper on node"
3574
                                     " '%s': %s" % (node, msg),
3575
                                     errors.ECODE_ENVIRON)
3576
        node_helper = helpers[node].payload
3577
        if node_helper != self.op.drbd_helper:
3578
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3579
                                     (node, node_helper), errors.ECODE_ENVIRON)
3580

    
3581
    self.cluster = cluster = self.cfg.GetClusterInfo()
3582
    # validate params changes
3583
    if self.op.beparams:
3584
      objects.UpgradeBeParams(self.op.beparams)
3585
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3586
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3587

    
3588
    if self.op.ndparams:
3589
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3590
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3591

    
3592
      # TODO: we need a more general way to handle resetting
3593
      # cluster-level parameters to default values
3594
      if self.new_ndparams["oob_program"] == "":
3595
        self.new_ndparams["oob_program"] = \
3596
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3597

    
3598
    if self.op.nicparams:
3599
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3600
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3601
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3602
      nic_errors = []
3603

    
3604
      # check all instances for consistency
3605
      for instance in self.cfg.GetAllInstancesInfo().values():
3606
        for nic_idx, nic in enumerate(instance.nics):
3607
          params_copy = copy.deepcopy(nic.nicparams)
3608
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3609

    
3610
          # check parameter syntax
3611
          try:
3612
            objects.NIC.CheckParameterSyntax(params_filled)
3613
          except errors.ConfigurationError, err:
3614
            nic_errors.append("Instance %s, nic/%d: %s" %
3615
                              (instance.name, nic_idx, err))
3616

    
3617
          # if we're moving instances to routed, check that they have an ip
3618
          target_mode = params_filled[constants.NIC_MODE]
3619
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3620
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3621
                              " address" % (instance.name, nic_idx))
3622
      if nic_errors:
3623
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3624
                                   "\n".join(nic_errors))
3625

    
3626
    # hypervisor list/parameters
3627
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3628
    if self.op.hvparams:
3629
      for hv_name, hv_dict in self.op.hvparams.items():
3630
        if hv_name not in self.new_hvparams:
3631
          self.new_hvparams[hv_name] = hv_dict
3632
        else:
3633
          self.new_hvparams[hv_name].update(hv_dict)
3634

    
3635
    # disk template parameters
3636
    self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3637
    if self.op.diskparams:
3638
      for dt_name, dt_params in self.op.diskparams.items():
3639
        if dt_name not in self.op.diskparams:
3640
          self.new_diskparams[dt_name] = dt_params
3641
        else:
3642
          self.new_diskparams[dt_name].update(dt_params)
3643

    
3644
    # os hypervisor parameters
3645
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3646
    if self.op.os_hvp:
3647
      for os_name, hvs in self.op.os_hvp.items():
3648
        if os_name not in self.new_os_hvp:
3649
          self.new_os_hvp[os_name] = hvs
3650
        else:
3651
          for hv_name, hv_dict in hvs.items():
3652
            if hv_name not in self.new_os_hvp[os_name]:
3653
              self.new_os_hvp[os_name][hv_name] = hv_dict
3654
            else:
3655
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3656

    
3657
    # os parameters
3658
    self.new_osp = objects.FillDict(cluster.osparams, {})
3659
    if self.op.osparams:
3660
      for os_name, osp in self.op.osparams.items():
3661
        if os_name not in self.new_osp:
3662
          self.new_osp[os_name] = {}
3663

    
3664
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3665
                                                  use_none=True)
3666

    
3667
        if not self.new_osp[os_name]:
3668
          # we removed all parameters
3669
          del self.new_osp[os_name]
3670
        else:
3671
          # check the parameter validity (remote check)
3672
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3673
                         os_name, self.new_osp[os_name])
3674

    
3675
    # changes to the hypervisor list
3676
    if self.op.enabled_hypervisors is not None:
3677
      self.hv_list = self.op.enabled_hypervisors
3678
      for hv in self.hv_list:
3679
        # if the hypervisor doesn't already exist in the cluster
3680
        # hvparams, we initialize it to empty, and then (in both
3681
        # cases) we make sure to fill the defaults, as we might not
3682
        # have a complete defaults list if the hypervisor wasn't
3683
        # enabled before
3684
        if hv not in new_hvp:
3685
          new_hvp[hv] = {}
3686
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3687
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3688
    else:
3689
      self.hv_list = cluster.enabled_hypervisors
3690

    
3691
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3692
      # either the enabled list has changed, or the parameters have, validate
3693
      for hv_name, hv_params in self.new_hvparams.items():
3694
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3695
            (self.op.enabled_hypervisors and
3696
             hv_name in self.op.enabled_hypervisors)):
3697
          # either this is a new hypervisor, or its parameters have changed
3698
          hv_class = hypervisor.GetHypervisor(hv_name)
3699
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3700
          hv_class.CheckParameterSyntax(hv_params)
3701
          _CheckHVParams(self, node_list, hv_name, hv_params)
3702

    
3703
    if self.op.os_hvp:
3704
      # no need to check any newly-enabled hypervisors, since the
3705
      # defaults have already been checked in the above code-block
3706
      for os_name, os_hvp in self.new_os_hvp.items():
3707
        for hv_name, hv_params in os_hvp.items():
3708
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3709
          # we need to fill in the new os_hvp on top of the actual hv_p
3710
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3711
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3712
          hv_class = hypervisor.GetHypervisor(hv_name)
3713
          hv_class.CheckParameterSyntax(new_osp)
3714
          _CheckHVParams(self, node_list, hv_name, new_osp)
3715

    
3716
    if self.op.default_iallocator:
3717
      alloc_script = utils.FindFile(self.op.default_iallocator,
3718
                                    constants.IALLOCATOR_SEARCH_PATH,
3719
                                    os.path.isfile)
3720
      if alloc_script is None:
3721
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3722
                                   " specified" % self.op.default_iallocator,
3723
                                   errors.ECODE_INVAL)
3724

    
3725
  def Exec(self, feedback_fn):
3726
    """Change the parameters of the cluster.
3727

3728
    """
3729
    if self.op.vg_name is not None:
3730
      new_volume = self.op.vg_name
3731
      if not new_volume:
3732
        new_volume = None
3733
      if new_volume != self.cfg.GetVGName():
3734
        self.cfg.SetVGName(new_volume)
3735
      else:
3736
        feedback_fn("Cluster LVM configuration already in desired"
3737
                    " state, not changing")
3738
    if self.op.drbd_helper is not None:
3739
      new_helper = self.op.drbd_helper
3740
      if not new_helper:
3741
        new_helper = None
3742
      if new_helper != self.cfg.GetDRBDHelper():
3743
        self.cfg.SetDRBDHelper(new_helper)
3744
      else:
3745
        feedback_fn("Cluster DRBD helper already in desired state,"
3746
                    " not changing")
3747
    if self.op.hvparams:
3748
      self.cluster.hvparams = self.new_hvparams
3749
    if self.op.os_hvp:
3750
      self.cluster.os_hvp = self.new_os_hvp
3751
    if self.op.enabled_hypervisors is not None:
3752
      self.cluster.hvparams = self.new_hvparams
3753
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3754
    if self.op.beparams:
3755
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3756
    if self.op.nicparams:
3757
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3758
    if self.op.osparams:
3759
      self.cluster.osparams = self.new_osp
3760
    if self.op.ndparams:
3761
      self.cluster.ndparams = self.new_ndparams
3762
    if self.op.diskparams:
3763
      self.cluster.diskparams = self.new_diskparams
3764

    
3765
    if self.op.candidate_pool_size is not None:
3766
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3767
      # we need to update the pool size here, otherwise the save will fail
3768
      _AdjustCandidatePool(self, [])
3769

    
3770
    if self.op.maintain_node_health is not None:
3771
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3772
        feedback_fn("Note: CONFD was disabled at build time, node health"
3773
                    " maintenance is not useful (still enabling it)")
3774
      self.cluster.maintain_node_health = self.op.maintain_node_health
3775

    
3776
    if self.op.prealloc_wipe_disks is not None:
3777
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3778

    
3779
    if self.op.add_uids is not None:
3780
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3781

    
3782
    if self.op.remove_uids is not None:
3783
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3784

    
3785
    if self.op.uid_pool is not None:
3786
      self.cluster.uid_pool = self.op.uid_pool
3787

    
3788
    if self.op.default_iallocator is not None:
3789
      self.cluster.default_iallocator = self.op.default_iallocator
3790

    
3791
    if self.op.reserved_lvs is not None:
3792
      self.cluster.reserved_lvs = self.op.reserved_lvs
3793

    
3794
    if self.op.use_external_mip_script is not None:
3795
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3796

    
3797
    def helper_os(aname, mods, desc):
3798
      desc += " OS list"
3799
      lst = getattr(self.cluster, aname)
3800
      for key, val in mods:
3801
        if key == constants.DDM_ADD:
3802
          if val in lst:
3803
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3804
          else:
3805
            lst.append(val)
3806
        elif key == constants.DDM_REMOVE:
3807
          if val in lst:
3808
            lst.remove(val)
3809
          else:
3810
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3811
        else:
3812
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3813

    
3814
    if self.op.hidden_os:
3815
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3816

    
3817
    if self.op.blacklisted_os:
3818
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3819

    
3820
    if self.op.master_netdev:
3821
      master_params = self.cfg.GetMasterNetworkParameters()
3822
      ems = self.cfg.GetUseExternalMipScript()
3823
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3824
                  self.cluster.master_netdev)
3825
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3826
                                                       master_params, ems)
3827
      result.Raise("Could not disable the master ip")
3828
      feedback_fn("Changing master_netdev from %s to %s" %
3829
                  (master_params.netdev, self.op.master_netdev))
3830
      self.cluster.master_netdev = self.op.master_netdev
3831

    
3832
    if self.op.master_netmask:
3833
      master_params = self.cfg.GetMasterNetworkParameters()
3834
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3835
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3836
                                                        master_params.netmask,
3837
                                                        self.op.master_netmask,
3838
                                                        master_params.ip,
3839
                                                        master_params.netdev)
3840
      if result.fail_msg:
3841
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3842
        feedback_fn(msg)
3843

    
3844
      self.cluster.master_netmask = self.op.master_netmask
3845

    
3846
    self.cfg.Update(self.cluster, feedback_fn)
3847

    
3848
    if self.op.master_netdev:
3849
      master_params = self.cfg.GetMasterNetworkParameters()
3850
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3851
                  self.op.master_netdev)
3852
      ems = self.cfg.GetUseExternalMipScript()
3853
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3854
                                                     master_params, ems)
3855
      if result.fail_msg:
3856
        self.LogWarning("Could not re-enable the master ip on"
3857
                        " the master, please restart manually: %s",
3858
                        result.fail_msg)
3859

    
3860

    
3861
def _UploadHelper(lu, nodes, fname):
3862
  """Helper for uploading a file and showing warnings.
3863

3864
  """
3865
  if os.path.exists(fname):
3866
    result = lu.rpc.call_upload_file(nodes, fname)
3867
    for to_node, to_result in result.items():
3868
      msg = to_result.fail_msg
3869
      if msg:
3870
        msg = ("Copy of file %s to node %s failed: %s" %
3871
               (fname, to_node, msg))
3872
        lu.proc.LogWarning(msg)
3873

    
3874

    
3875
def _ComputeAncillaryFiles(cluster, redist):
3876
  """Compute files external to Ganeti which need to be consistent.
3877

3878
  @type redist: boolean
3879
  @param redist: Whether to include files which need to be redistributed
3880

3881
  """
3882
  # Compute files for all nodes
3883
  files_all = set([
3884
    constants.SSH_KNOWN_HOSTS_FILE,
3885
    constants.CONFD_HMAC_KEY,
3886
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3887
    constants.SPICE_CERT_FILE,
3888
    constants.SPICE_CACERT_FILE,
3889
    constants.RAPI_USERS_FILE,
3890
    ])
3891

    
3892
  if not redist:
3893
    files_all.update(constants.ALL_CERT_FILES)
3894
    files_all.update(ssconf.SimpleStore().GetFileList())
3895
  else:
3896
    # we need to ship at least the RAPI certificate
3897
    files_all.add(constants.RAPI_CERT_FILE)
3898

    
3899
  if cluster.modify_etc_hosts:
3900
    files_all.add(constants.ETC_HOSTS)
3901

    
3902
  # Files which are optional, these must:
3903
  # - be present in one other category as well
3904
  # - either exist or not exist on all nodes of that category (mc, vm all)
3905
  files_opt = set([
3906
    constants.RAPI_USERS_FILE,
3907
    ])
3908

    
3909
  # Files which should only be on master candidates
3910
  files_mc = set()
3911

    
3912
  if not redist:
3913
    files_mc.add(constants.CLUSTER_CONF_FILE)
3914

    
3915
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3916
    # replication
3917
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3918

    
3919
  # Files which should only be on VM-capable nodes
3920
  files_vm = set(filename
3921
    for hv_name in cluster.enabled_hypervisors
3922
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3923

    
3924
  files_opt |= set(filename
3925
    for hv_name in cluster.enabled_hypervisors
3926
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3927

    
3928
  # Filenames in each category must be unique
3929
  all_files_set = files_all | files_mc | files_vm
3930
  assert (len(all_files_set) ==
3931
          sum(map(len, [files_all, files_mc, files_vm]))), \
3932
         "Found file listed in more than one file list"
3933

    
3934
  # Optional files must be present in one other category
3935
  assert all_files_set.issuperset(files_opt), \
3936
         "Optional file not in a different required list"
3937

    
3938
  return (files_all, files_opt, files_mc, files_vm)
3939

    
3940

    
3941
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3942
  """Distribute additional files which are part of the cluster configuration.
3943

3944
  ConfigWriter takes care of distributing the config and ssconf files, but
3945
  there are more files which should be distributed to all nodes. This function
3946
  makes sure those are copied.
3947

3948
  @param lu: calling logical unit
3949
  @param additional_nodes: list of nodes not in the config to distribute to
3950
  @type additional_vm: boolean
3951
  @param additional_vm: whether the additional nodes are vm-capable or not
3952

3953
  """
3954
  # Gather target nodes
3955
  cluster = lu.cfg.GetClusterInfo()
3956
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3957

    
3958
  online_nodes = lu.cfg.GetOnlineNodeList()
3959
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3960

    
3961
  if additional_nodes is not None:
3962
    online_nodes.extend(additional_nodes)
3963
    if additional_vm:
3964
      vm_nodes.extend(additional_nodes)
3965

    
3966
  # Never distribute to master node
3967
  for nodelist in [online_nodes, vm_nodes]:
3968
    if master_info.name in nodelist:
3969
      nodelist.remove(master_info.name)
3970

    
3971
  # Gather file lists
3972
  (files_all, _, files_mc, files_vm) = \
3973
    _ComputeAncillaryFiles(cluster, True)
3974

    
3975
  # Never re-distribute configuration file from here
3976
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3977
              constants.CLUSTER_CONF_FILE in files_vm)
3978
  assert not files_mc, "Master candidates not handled in this function"
3979

    
3980
  filemap = [
3981
    (online_nodes, files_all),
3982
    (vm_nodes, files_vm),
3983
    ]
3984

    
3985
  # Upload the files
3986
  for (node_list, files) in filemap:
3987
    for fname in files:
3988
      _UploadHelper(lu, node_list, fname)
3989

    
3990

    
3991
class LUClusterRedistConf(NoHooksLU):
3992
  """Force the redistribution of cluster configuration.
3993

3994
  This is a very simple LU.
3995

3996
  """
3997
  REQ_BGL = False
3998

    
3999
  def ExpandNames(self):
4000
    self.needed_locks = {
4001
      locking.LEVEL_NODE: locking.ALL_SET,
4002
    }
4003
    self.share_locks[locking.LEVEL_NODE] = 1
4004

    
4005
  def Exec(self, feedback_fn):
4006
    """Redistribute the configuration.
4007

4008
    """
4009
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4010
    _RedistributeAncillaryFiles(self)
4011

    
4012

    
4013
class LUClusterActivateMasterIp(NoHooksLU):
4014
  """Activate the master IP on the master node.
4015

4016
  """
4017
  def Exec(self, feedback_fn):
4018
    """Activate the master IP.
4019

4020
    """
4021
    master_params = self.cfg.GetMasterNetworkParameters()
4022
    ems = self.cfg.GetUseExternalMipScript()
4023
    result = self.rpc.call_node_activate_master_ip(master_params.name,
4024
                                                   master_params, ems)
4025
    result.Raise("Could not activate the master IP")
4026

    
4027

    
4028
class LUClusterDeactivateMasterIp(NoHooksLU):
4029
  """Deactivate the master IP on the master node.
4030

4031
  """
4032
  def Exec(self, feedback_fn):
4033
    """Deactivate the master IP.
4034

4035
    """
4036
    master_params = self.cfg.GetMasterNetworkParameters()
4037
    ems = self.cfg.GetUseExternalMipScript()
4038
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4039
                                                     master_params, ems)
4040
    result.Raise("Could not deactivate the master IP")
4041

    
4042

    
4043
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4044
  """Sleep and poll for an instance's disk to sync.
4045

4046
  """
4047
  if not instance.disks or disks is not None and not disks:
4048
    return True
4049

    
4050
  disks = _ExpandCheckDisks(instance, disks)
4051

    
4052
  if not oneshot:
4053
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4054

    
4055
  node = instance.primary_node
4056

    
4057
  for dev in disks:
4058
    lu.cfg.SetDiskID(dev, node)
4059

    
4060
  # TODO: Convert to utils.Retry
4061

    
4062
  retries = 0
4063
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4064
  while True:
4065
    max_time = 0
4066
    done = True
4067
    cumul_degraded = False
4068
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4069
    msg = rstats.fail_msg
4070
    if msg:
4071
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4072
      retries += 1
4073
      if retries >= 10:
4074
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4075
                                 " aborting." % node)
4076
      time.sleep(6)
4077
      continue
4078
    rstats = rstats.payload
4079
    retries = 0
4080
    for i, mstat in enumerate(rstats):
4081
      if mstat is None:
4082
        lu.LogWarning("Can't compute data for node %s/%s",
4083
                           node, disks[i].iv_name)
4084
        continue
4085

    
4086
      cumul_degraded = (cumul_degraded or
4087
                        (mstat.is_degraded and mstat.sync_percent is None))
4088
      if mstat.sync_percent is not None:
4089
        done = False
4090
        if mstat.estimated_time is not None:
4091
          rem_time = ("%s remaining (estimated)" %
4092
                      utils.FormatSeconds(mstat.estimated_time))
4093
          max_time = mstat.estimated_time
4094
        else:
4095
          rem_time = "no time estimate"
4096
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4097
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4098

    
4099
    # if we're done but degraded, let's do a few small retries, to
4100
    # make sure we see a stable and not transient situation; therefore
4101
    # we force restart of the loop
4102
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4103
      logging.info("Degraded disks found, %d retries left", degr_retries)
4104
      degr_retries -= 1
4105
      time.sleep(1)
4106
      continue
4107

    
4108
    if done or oneshot:
4109
      break
4110

    
4111
    time.sleep(min(60, max_time))
4112

    
4113
  if done:
4114
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4115
  return not cumul_degraded
4116

    
4117

    
4118
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4119
  """Check that mirrors are not degraded.
4120

4121
  The ldisk parameter, if True, will change the test from the
4122
  is_degraded attribute (which represents overall non-ok status for
4123
  the device(s)) to the ldisk (representing the local storage status).
4124

4125
  """
4126
  lu.cfg.SetDiskID(dev, node)
4127

    
4128
  result = True
4129

    
4130
  if on_primary or dev.AssembleOnSecondary():
4131
    rstats = lu.rpc.call_blockdev_find(node, dev)
4132
    msg = rstats.fail_msg
4133
    if msg:
4134
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4135
      result = False
4136
    elif not rstats.payload:
4137
      lu.LogWarning("Can't find disk on node %s", node)
4138
      result = False
4139
    else:
4140
      if ldisk:
4141
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4142
      else:
4143
        result = result and not rstats.payload.is_degraded
4144

    
4145
  if dev.children:
4146
    for child in dev.children:
4147
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4148

    
4149
  return result
4150

    
4151

    
4152
class LUOobCommand(NoHooksLU):
4153
  """Logical unit for OOB handling.
4154

4155
  """
4156
  REG_BGL = False
4157
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4158

    
4159
  def ExpandNames(self):
4160
    """Gather locks we need.
4161

4162
    """
4163
    if self.op.node_names:
4164
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4165
      lock_names = self.op.node_names
4166
    else:
4167
      lock_names = locking.ALL_SET
4168

    
4169
    self.needed_locks = {
4170
      locking.LEVEL_NODE: lock_names,
4171
      }
4172

    
4173
  def CheckPrereq(self):
4174
    """Check prerequisites.
4175

4176
    This checks:
4177
     - the node exists in the configuration
4178
     - OOB is supported
4179

4180
    Any errors are signaled by raising errors.OpPrereqError.
4181

4182
    """
4183
    self.nodes = []
4184
    self.master_node = self.cfg.GetMasterNode()
4185

    
4186
    assert self.op.power_delay >= 0.0
4187

    
4188
    if self.op.node_names:
4189
      if (self.op.command in self._SKIP_MASTER and
4190
          self.master_node in self.op.node_names):
4191
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4192
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4193

    
4194
        if master_oob_handler:
4195
          additional_text = ("run '%s %s %s' if you want to operate on the"
4196
                             " master regardless") % (master_oob_handler,
4197
                                                      self.op.command,
4198
                                                      self.master_node)
4199
        else:
4200
          additional_text = "it does not support out-of-band operations"
4201

    
4202
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4203
                                    " allowed for %s; %s") %
4204
                                   (self.master_node, self.op.command,
4205
                                    additional_text), errors.ECODE_INVAL)
4206
    else:
4207
      self.op.node_names = self.cfg.GetNodeList()
4208
      if self.op.command in self._SKIP_MASTER:
4209
        self.op.node_names.remove(self.master_node)
4210

    
4211
    if self.op.command in self._SKIP_MASTER:
4212
      assert self.master_node not in self.op.node_names
4213

    
4214
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4215
      if node is None:
4216
        raise errors.OpPrereqError("Node %s not found" % node_name,
4217
                                   errors.ECODE_NOENT)
4218
      else:
4219
        self.nodes.append(node)
4220

    
4221
      if (not self.op.ignore_status and
4222
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4223
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4224
                                    " not marked offline") % node_name,
4225
                                   errors.ECODE_STATE)
4226

    
4227
  def Exec(self, feedback_fn):
4228
    """Execute OOB and return result if we expect any.
4229

4230
    """
4231
    master_node = self.master_node
4232
    ret = []
4233

    
4234
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4235
                                              key=lambda node: node.name)):
4236
      node_entry = [(constants.RS_NORMAL, node.name)]
4237
      ret.append(node_entry)
4238

    
4239
      oob_program = _SupportsOob(self.cfg, node)
4240

    
4241
      if not oob_program:
4242
        node_entry.append((constants.RS_UNAVAIL, None))
4243
        continue
4244

    
4245
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4246
                   self.op.command, oob_program, node.name)
4247
      result = self.rpc.call_run_oob(master_node, oob_program,
4248
                                     self.op.command, node.name,
4249
                                     self.op.timeout)
4250

    
4251
      if result.fail_msg:
4252
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4253
                        node.name, result.fail_msg)
4254
        node_entry.append((constants.RS_NODATA, None))
4255
      else:
4256
        try:
4257
          self._CheckPayload(result)
4258
        except errors.OpExecError, err:
4259
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4260
                          node.name, err)
4261
          node_entry.append((constants.RS_NODATA, None))
4262
        else:
4263
          if self.op.command == constants.OOB_HEALTH:
4264
            # For health we should log important events
4265
            for item, status in result.payload:
4266
              if status in [constants.OOB_STATUS_WARNING,
4267
                            constants.OOB_STATUS_CRITICAL]:
4268
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4269
                                item, node.name, status)
4270

    
4271
          if self.op.command == constants.OOB_POWER_ON:
4272
            node.powered = True
4273
          elif self.op.command == constants.OOB_POWER_OFF:
4274
            node.powered = False
4275
          elif self.op.command == constants.OOB_POWER_STATUS:
4276
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4277
            if powered != node.powered:
4278
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4279
                               " match actual power state (%s)"), node.powered,
4280
                              node.name, powered)
4281

    
4282
          # For configuration changing commands we should update the node
4283
          if self.op.command in (constants.OOB_POWER_ON,
4284
                                 constants.OOB_POWER_OFF):
4285
            self.cfg.Update(node, feedback_fn)
4286

    
4287
          node_entry.append((constants.RS_NORMAL, result.payload))
4288

    
4289
          if (self.op.command == constants.OOB_POWER_ON and
4290
              idx < len(self.nodes) - 1):
4291
            time.sleep(self.op.power_delay)
4292

    
4293
    return ret
4294

    
4295
  def _CheckPayload(self, result):
4296
    """Checks if the payload is valid.
4297

4298
    @param result: RPC result
4299
    @raises errors.OpExecError: If payload is not valid
4300

4301
    """
4302
    errs = []
4303
    if self.op.command == constants.OOB_HEALTH:
4304
      if not isinstance(result.payload, list):
4305
        errs.append("command 'health' is expected to return a list but got %s" %
4306
                    type(result.payload))
4307
      else:
4308
        for item, status in result.payload:
4309
          if status not in constants.OOB_STATUSES:
4310
            errs.append("health item '%s' has invalid status '%s'" %
4311
                        (item, status))
4312

    
4313
    if self.op.command == constants.OOB_POWER_STATUS:
4314
      if not isinstance(result.payload, dict):
4315
        errs.append("power-status is expected to return a dict but got %s" %
4316
                    type(result.payload))
4317

    
4318
    if self.op.command in [
4319
        constants.OOB_POWER_ON,
4320
        constants.OOB_POWER_OFF,
4321
        constants.OOB_POWER_CYCLE,
4322
        ]:
4323
      if result.payload is not None:
4324
        errs.append("%s is expected to not return payload but got '%s'" %
4325
                    (self.op.command, result.payload))
4326

    
4327
    if errs:
4328
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4329
                               utils.CommaJoin(errs))
4330

    
4331

    
4332
class _OsQuery(_QueryBase):
4333
  FIELDS = query.OS_FIELDS
4334

    
4335
  def ExpandNames(self, lu):
4336
    # Lock all nodes in shared mode
4337
    # Temporary removal of locks, should be reverted later
4338
    # TODO: reintroduce locks when they are lighter-weight
4339
    lu.needed_locks = {}
4340
    #self.share_locks[locking.LEVEL_NODE] = 1
4341
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4342

    
4343
    # The following variables interact with _QueryBase._GetNames
4344
    if self.names:
4345
      self.wanted = self.names
4346
    else:
4347
      self.wanted = locking.ALL_SET
4348

    
4349
    self.do_locking = self.use_locking
4350

    
4351
  def DeclareLocks(self, lu, level):
4352
    pass
4353

    
4354
  @staticmethod
4355
  def _DiagnoseByOS(rlist):
4356
    """Remaps a per-node return list into an a per-os per-node dictionary
4357

4358
    @param rlist: a map with node names as keys and OS objects as values
4359

4360
    @rtype: dict
4361
    @return: a dictionary with osnames as keys and as value another
4362
        map, with nodes as keys and tuples of (path, status, diagnose,
4363
        variants, parameters, api_versions) as values, eg::
4364

4365
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4366
                                     (/srv/..., False, "invalid api")],
4367
                           "node2": [(/srv/..., True, "", [], [])]}
4368
          }
4369

4370
    """
4371
    all_os = {}
4372
    # we build here the list of nodes that didn't fail the RPC (at RPC
4373
    # level), so that nodes with a non-responding node daemon don't
4374
    # make all OSes invalid
4375
    good_nodes = [node_name for node_name in rlist
4376
                  if not rlist[node_name].fail_msg]
4377
    for node_name, nr in rlist.items():
4378
      if nr.fail_msg or not nr.payload:
4379
        continue
4380
      for (name, path, status, diagnose, variants,
4381
           params, api_versions) in nr.payload:
4382
        if name not in all_os:
4383
          # build a list of nodes for this os containing empty lists
4384
          # for each node in node_list
4385
          all_os[name] = {}
4386
          for nname in good_nodes:
4387
            all_os[name][nname] = []
4388
        # convert params from [name, help] to (name, help)
4389
        params = [tuple(v) for v in params]
4390
        all_os[name][node_name].append((path, status, diagnose,
4391
                                        variants, params, api_versions))
4392
    return all_os
4393

    
4394
  def _GetQueryData(self, lu):
4395
    """Computes the list of nodes and their attributes.
4396

4397
    """
4398
    # Locking is not used
4399
    assert not (compat.any(lu.glm.is_owned(level)
4400
                           for level in locking.LEVELS
4401
                           if level != locking.LEVEL_CLUSTER) or
4402
                self.do_locking or self.use_locking)
4403

    
4404
    valid_nodes = [node.name
4405
                   for node in lu.cfg.GetAllNodesInfo().values()
4406
                   if not node.offline and node.vm_capable]
4407
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4408
    cluster = lu.cfg.GetClusterInfo()
4409

    
4410
    data = {}
4411

    
4412
    for (os_name, os_data) in pol.items():
4413
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4414
                          hidden=(os_name in cluster.hidden_os),
4415
                          blacklisted=(os_name in cluster.blacklisted_os))
4416

    
4417
      variants = set()
4418
      parameters = set()
4419
      api_versions = set()
4420

    
4421
      for idx, osl in enumerate(os_data.values()):
4422
        info.valid = bool(info.valid and osl and osl[0][1])
4423
        if not info.valid:
4424
          break
4425

    
4426
        (node_variants, node_params, node_api) = osl[0][3:6]
4427
        if idx == 0:
4428
          # First entry
4429
          variants.update(node_variants)
4430
          parameters.update(node_params)
4431
          api_versions.update(node_api)
4432
        else:
4433
          # Filter out inconsistent values
4434
          variants.intersection_update(node_variants)
4435
          parameters.intersection_update(node_params)
4436
          api_versions.intersection_update(node_api)
4437

    
4438
      info.variants = list(variants)
4439
      info.parameters = list(parameters)
4440
      info.api_versions = list(api_versions)
4441

    
4442
      data[os_name] = info
4443

    
4444
    # Prepare data in requested order
4445
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4446
            if name in data]
4447

    
4448

    
4449
class LUOsDiagnose(NoHooksLU):
4450
  """Logical unit for OS diagnose/query.
4451

4452
  """
4453
  REQ_BGL = False
4454

    
4455
  @staticmethod
4456
  def _BuildFilter(fields, names):
4457
    """Builds a filter for querying OSes.
4458

4459
    """
4460
    name_filter = qlang.MakeSimpleFilter("name", names)
4461

    
4462
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4463
    # respective field is not requested
4464
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4465
                     for fname in ["hidden", "blacklisted"]
4466
                     if fname not in fields]
4467
    if "valid" not in fields:
4468
      status_filter.append([qlang.OP_TRUE, "valid"])
4469

    
4470
    if status_filter:
4471
      status_filter.insert(0, qlang.OP_AND)
4472
    else:
4473
      status_filter = None
4474

    
4475
    if name_filter and status_filter:
4476
      return [qlang.OP_AND, name_filter, status_filter]
4477
    elif name_filter:
4478
      return name_filter
4479
    else:
4480
      return status_filter
4481

    
4482
  def CheckArguments(self):
4483
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4484
                       self.op.output_fields, False)
4485

    
4486
  def ExpandNames(self):
4487
    self.oq.ExpandNames(self)
4488

    
4489
  def Exec(self, feedback_fn):
4490
    return self.oq.OldStyleQuery(self)
4491

    
4492

    
4493
class LUNodeRemove(LogicalUnit):
4494
  """Logical unit for removing a node.
4495

4496
  """
4497
  HPATH = "node-remove"
4498
  HTYPE = constants.HTYPE_NODE
4499

    
4500
  def BuildHooksEnv(self):
4501
    """Build hooks env.
4502

4503
    This doesn't run on the target node in the pre phase as a failed
4504
    node would then be impossible to remove.
4505

4506
    """
4507
    return {
4508
      "OP_TARGET": self.op.node_name,
4509
      "NODE_NAME": self.op.node_name,
4510
      }
4511

    
4512
  def BuildHooksNodes(self):
4513
    """Build hooks nodes.
4514

4515
    """
4516
    all_nodes = self.cfg.GetNodeList()
4517
    try:
4518
      all_nodes.remove(self.op.node_name)
4519
    except ValueError:
4520
      logging.warning("Node '%s', which is about to be removed, was not found"
4521
                      " in the list of all nodes", self.op.node_name)
4522
    return (all_nodes, all_nodes)
4523

    
4524
  def CheckPrereq(self):
4525
    """Check prerequisites.
4526

4527
    This checks:
4528
     - the node exists in the configuration
4529
     - it does not have primary or secondary instances
4530
     - it's not the master
4531

4532
    Any errors are signaled by raising errors.OpPrereqError.
4533

4534
    """
4535
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4536
    node = self.cfg.GetNodeInfo(self.op.node_name)
4537
    assert node is not None
4538

    
4539
    masternode = self.cfg.GetMasterNode()
4540
    if node.name == masternode:
4541
      raise errors.OpPrereqError("Node is the master node, failover to another"
4542
                                 " node is required", errors.ECODE_INVAL)
4543

    
4544
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4545
      if node.name in instance.all_nodes:
4546
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4547
                                   " please remove first" % instance_name,
4548
                                   errors.ECODE_INVAL)
4549
    self.op.node_name = node.name
4550
    self.node = node
4551

    
4552
  def Exec(self, feedback_fn):
4553
    """Removes the node from the cluster.
4554

4555
    """
4556
    node = self.node
4557
    logging.info("Stopping the node daemon and removing configs from node %s",
4558
                 node.name)
4559

    
4560
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4561

    
4562
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4563
      "Not owning BGL"
4564

    
4565
    # Promote nodes to master candidate as needed
4566
    _AdjustCandidatePool(self, exceptions=[node.name])
4567
    self.context.RemoveNode(node.name)
4568

    
4569
    # Run post hooks on the node before it's removed
4570
    _RunPostHook(self, node.name)
4571

    
4572
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4573
    msg = result.fail_msg
4574
    if msg:
4575
      self.LogWarning("Errors encountered on the remote node while leaving"
4576
                      " the cluster: %s", msg)
4577

    
4578
    # Remove node from our /etc/hosts
4579
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4580
      master_node = self.cfg.GetMasterNode()
4581
      result = self.rpc.call_etc_hosts_modify(master_node,
4582
                                              constants.ETC_HOSTS_REMOVE,
4583
                                              node.name, None)
4584
      result.Raise("Can't update hosts file with new host data")
4585
      _RedistributeAncillaryFiles(self)
4586

    
4587

    
4588
class _NodeQuery(_QueryBase):
4589
  FIELDS = query.NODE_FIELDS
4590

    
4591
  def ExpandNames(self, lu):
4592
    lu.needed_locks = {}
4593
    lu.share_locks = _ShareAll()
4594

    
4595
    if self.names:
4596
      self.wanted = _GetWantedNodes(lu, self.names)
4597
    else:
4598
      self.wanted = locking.ALL_SET
4599

    
4600
    self.do_locking = (self.use_locking and
4601
                       query.NQ_LIVE in self.requested_data)
4602

    
4603
    if self.do_locking:
4604
      # If any non-static field is requested we need to lock the nodes
4605
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4606

    
4607
  def DeclareLocks(self, lu, level):
4608
    pass
4609

    
4610
  def _GetQueryData(self, lu):
4611
    """Computes the list of nodes and their attributes.
4612

4613
    """
4614
    all_info = lu.cfg.GetAllNodesInfo()
4615

    
4616
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4617

    
4618
    # Gather data as requested
4619
    if query.NQ_LIVE in self.requested_data:
4620
      # filter out non-vm_capable nodes
4621
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4622

    
4623
      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4624
                                        [lu.cfg.GetHypervisorType()])
4625
      live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4626
                       for (name, nresult) in node_data.items()
4627
                       if not nresult.fail_msg and nresult.payload)
4628
    else:
4629
      live_data = None
4630

    
4631
    if query.NQ_INST in self.requested_data:
4632
      node_to_primary = dict([(name, set()) for name in nodenames])
4633
      node_to_secondary = dict([(name, set()) for name in nodenames])
4634

    
4635
      inst_data = lu.cfg.GetAllInstancesInfo()
4636

    
4637
      for inst in inst_data.values():
4638
        if inst.primary_node in node_to_primary:
4639
          node_to_primary[inst.primary_node].add(inst.name)
4640
        for secnode in inst.secondary_nodes:
4641
          if secnode in node_to_secondary:
4642
            node_to_secondary[secnode].add(inst.name)
4643
    else:
4644
      node_to_primary = None
4645
      node_to_secondary = None
4646

    
4647
    if query.NQ_OOB in self.requested_data:
4648
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4649
                         for name, node in all_info.iteritems())
4650
    else:
4651
      oob_support = None
4652

    
4653
    if query.NQ_GROUP in self.requested_data:
4654
      groups = lu.cfg.GetAllNodeGroupsInfo()
4655
    else:
4656
      groups = {}
4657

    
4658
    return query.NodeQueryData([all_info[name] for name in nodenames],
4659
                               live_data, lu.cfg.GetMasterNode(),
4660
                               node_to_primary, node_to_secondary, groups,
4661
                               oob_support, lu.cfg.GetClusterInfo())
4662

    
4663

    
4664
class LUNodeQuery(NoHooksLU):
4665
  """Logical unit for querying nodes.
4666

4667
  """
4668
  # pylint: disable=W0142
4669
  REQ_BGL = False
4670

    
4671
  def CheckArguments(self):
4672
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4673
                         self.op.output_fields, self.op.use_locking)
4674

    
4675
  def ExpandNames(self):
4676
    self.nq.ExpandNames(self)
4677

    
4678
  def DeclareLocks(self, level):
4679
    self.nq.DeclareLocks(self, level)
4680

    
4681
  def Exec(self, feedback_fn):
4682
    return self.nq.OldStyleQuery(self)
4683

    
4684

    
4685
class LUNodeQueryvols(NoHooksLU):
4686
  """Logical unit for getting volumes on node(s).
4687

4688
  """
4689
  REQ_BGL = False
4690
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4691
  _FIELDS_STATIC = utils.FieldSet("node")
4692

    
4693
  def CheckArguments(self):
4694
    _CheckOutputFields(static=self._FIELDS_STATIC,
4695
                       dynamic=self._FIELDS_DYNAMIC,
4696
                       selected=self.op.output_fields)
4697

    
4698
  def ExpandNames(self):
4699
    self.share_locks = _ShareAll()
4700
    self.needed_locks = {}
4701

    
4702
    if not self.op.nodes:
4703
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4704
    else:
4705
      self.needed_locks[locking.LEVEL_NODE] = \
4706
        _GetWantedNodes(self, self.op.nodes)
4707

    
4708
  def Exec(self, feedback_fn):
4709
    """Computes the list of nodes and their attributes.
4710

4711
    """
4712
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4713
    volumes = self.rpc.call_node_volumes(nodenames)
4714

    
4715
    ilist = self.cfg.GetAllInstancesInfo()
4716
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4717

    
4718
    output = []
4719
    for node in nodenames:
4720
      nresult = volumes[node]
4721
      if nresult.offline:
4722
        continue
4723
      msg = nresult.fail_msg
4724
      if msg:
4725
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4726
        continue
4727

    
4728
      node_vols = sorted(nresult.payload,
4729
                         key=operator.itemgetter("dev"))
4730

    
4731
      for vol in node_vols:
4732
        node_output = []
4733
        for field in self.op.output_fields:
4734
          if field == "node":
4735
            val = node
4736
          elif field == "phys":
4737
            val = vol["dev"]
4738
          elif field == "vg":
4739
            val = vol["vg"]
4740
          elif field == "name":
4741
            val = vol["name"]
4742
          elif field == "size":
4743
            val = int(float(vol["size"]))
4744
          elif field == "instance":
4745
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4746
          else:
4747
            raise errors.ParameterError(field)
4748
          node_output.append(str(val))
4749

    
4750
        output.append(node_output)
4751

    
4752
    return output
4753

    
4754

    
4755
class LUNodeQueryStorage(NoHooksLU):
4756
  """Logical unit for getting information on storage units on node(s).
4757

4758
  """
4759
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4760
  REQ_BGL = False
4761

    
4762
  def CheckArguments(self):
4763
    _CheckOutputFields(static=self._FIELDS_STATIC,
4764
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4765
                       selected=self.op.output_fields)
4766

    
4767
  def ExpandNames(self):
4768
    self.share_locks = _ShareAll()
4769
    self.needed_locks = {}
4770

    
4771
    if self.op.nodes:
4772
      self.needed_locks[locking.LEVEL_NODE] = \
4773
        _GetWantedNodes(self, self.op.nodes)
4774
    else:
4775
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4776

    
4777
  def Exec(self, feedback_fn):
4778
    """Computes the list of nodes and their attributes.
4779

4780
    """
4781
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4782

    
4783
    # Always get name to sort by
4784
    if constants.SF_NAME in self.op.output_fields:
4785
      fields = self.op.output_fields[:]
4786
    else:
4787
      fields = [constants.SF_NAME] + self.op.output_fields
4788

    
4789
    # Never ask for node or type as it's only known to the LU
4790
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4791
      while extra in fields:
4792
        fields.remove(extra)
4793

    
4794
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4795
    name_idx = field_idx[constants.SF_NAME]
4796

    
4797
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4798
    data = self.rpc.call_storage_list(self.nodes,
4799
                                      self.op.storage_type, st_args,
4800
                                      self.op.name, fields)
4801

    
4802
    result = []
4803

    
4804
    for node in utils.NiceSort(self.nodes):
4805
      nresult = data[node]
4806
      if nresult.offline:
4807
        continue
4808

    
4809
      msg = nresult.fail_msg
4810
      if msg:
4811
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4812
        continue
4813

    
4814
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4815

    
4816
      for name in utils.NiceSort(rows.keys()):
4817
        row = rows[name]
4818

    
4819
        out = []
4820

    
4821
        for field in self.op.output_fields:
4822
          if field == constants.SF_NODE:
4823
            val = node
4824
          elif field == constants.SF_TYPE:
4825
            val = self.op.storage_type
4826
          elif field in field_idx:
4827
            val = row[field_idx[field]]
4828
          else:
4829
            raise errors.ParameterError(field)
4830

    
4831
          out.append(val)
4832

    
4833
        result.append(out)
4834

    
4835
    return result
4836

    
4837

    
4838
class _InstanceQuery(_QueryBase):
4839
  FIELDS = query.INSTANCE_FIELDS
4840

    
4841
  def ExpandNames(self, lu):
4842
    lu.needed_locks = {}
4843
    lu.share_locks = _ShareAll()
4844

    
4845
    if self.names:
4846
      self.wanted = _GetWantedInstances(lu, self.names)
4847
    else:
4848
      self.wanted = locking.ALL_SET
4849

    
4850
    self.do_locking = (self.use_locking and
4851
                       query.IQ_LIVE in self.requested_data)
4852
    if self.do_locking:
4853
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4854
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4855
      lu.needed_locks[locking.LEVEL_NODE] = []
4856
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4857

    
4858
    self.do_grouplocks = (self.do_locking and
4859
                          query.IQ_NODES in self.requested_data)
4860

    
4861
  def DeclareLocks(self, lu, level):
4862
    if self.do_locking:
4863
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4864
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4865

    
4866
        # Lock all groups used by instances optimistically; this requires going
4867
        # via the node before it's locked, requiring verification later on
4868
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4869
          set(group_uuid
4870
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4871
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4872
      elif level == locking.LEVEL_NODE:
4873
        lu._LockInstancesNodes() # pylint: disable=W0212
4874

    
4875
  @staticmethod
4876
  def _CheckGroupLocks(lu):
4877
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4878
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4879

    
4880
    # Check if node groups for locked instances are still correct
4881
    for instance_name in owned_instances:
4882
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4883

    
4884
  def _GetQueryData(self, lu):
4885
    """Computes the list of instances and their attributes.
4886

4887
    """
4888
    if self.do_grouplocks:
4889
      self._CheckGroupLocks(lu)
4890

    
4891
    cluster = lu.cfg.GetClusterInfo()
4892
    all_info = lu.cfg.GetAllInstancesInfo()
4893

    
4894
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4895

    
4896
    instance_list = [all_info[name] for name in instance_names]
4897
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4898
                                        for inst in instance_list)))
4899
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4900
    bad_nodes = []
4901
    offline_nodes = []
4902
    wrongnode_inst = set()
4903

    
4904
    # Gather data as requested
4905
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4906
      live_data = {}
4907
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4908
      for name in nodes:
4909
        result = node_data[name]
4910
        if result.offline:
4911
          # offline nodes will be in both lists
4912
          assert result.fail_msg
4913
          offline_nodes.append(name)
4914
        if result.fail_msg:
4915
          bad_nodes.append(name)
4916
        elif result.payload:
4917
          for inst in result.payload:
4918
            if inst in all_info:
4919
              if all_info[inst].primary_node == name:
4920
                live_data.update(result.payload)
4921
              else:
4922
                wrongnode_inst.add(inst)
4923
            else:
4924
              # orphan instance; we don't list it here as we don't
4925
              # handle this case yet in the output of instance listing
4926
              logging.warning("Orphan instance '%s' found on node %s",
4927
                              inst, name)
4928
        # else no instance is alive
4929
    else:
4930
      live_data = {}
4931

    
4932
    if query.IQ_DISKUSAGE in self.requested_data:
4933
      disk_usage = dict((inst.name,
4934
                         _ComputeDiskSize(inst.disk_template,
4935
                                          [{constants.IDISK_SIZE: disk.size}
4936
                                           for disk in inst.disks]))
4937
                        for inst in instance_list)
4938
    else:
4939
      disk_usage = None
4940

    
4941
    if query.IQ_CONSOLE in self.requested_data:
4942
      consinfo = {}
4943
      for inst in instance_list:
4944
        if inst.name in live_data:
4945
          # Instance is running
4946
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4947
        else:
4948
          consinfo[inst.name] = None
4949
      assert set(consinfo.keys()) == set(instance_names)
4950
    else:
4951
      consinfo = None
4952

    
4953
    if query.IQ_NODES in self.requested_data:
4954
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4955
                                            instance_list)))
4956
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4957
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4958
                    for uuid in set(map(operator.attrgetter("group"),
4959
                                        nodes.values())))
4960
    else:
4961
      nodes = None
4962
      groups = None
4963

    
4964
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4965
                                   disk_usage, offline_nodes, bad_nodes,
4966
                                   live_data, wrongnode_inst, consinfo,
4967
                                   nodes, groups)
4968

    
4969

    
4970
class LUQuery(NoHooksLU):
4971
  """Query for resources/items of a certain kind.
4972

4973
  """
4974
  # pylint: disable=W0142
4975
  REQ_BGL = False
4976

    
4977
  def CheckArguments(self):
4978
    qcls = _GetQueryImplementation(self.op.what)
4979

    
4980
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4981

    
4982
  def ExpandNames(self):
4983
    self.impl.ExpandNames(self)
4984

    
4985
  def DeclareLocks(self, level):
4986
    self.impl.DeclareLocks(self, level)
4987

    
4988
  def Exec(self, feedback_fn):
4989
    return self.impl.NewStyleQuery(self)
4990

    
4991

    
4992
class LUQueryFields(NoHooksLU):
4993
  """Query for resources/items of a certain kind.
4994

4995
  """
4996
  # pylint: disable=W0142
4997
  REQ_BGL = False
4998

    
4999
  def CheckArguments(self):
5000
    self.qcls = _GetQueryImplementation(self.op.what)
5001

    
5002
  def ExpandNames(self):
5003
    self.needed_locks = {}
5004

    
5005
  def Exec(self, feedback_fn):
5006
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5007

    
5008

    
5009
class LUNodeModifyStorage(NoHooksLU):
5010
  """Logical unit for modifying a storage volume on a node.
5011

5012
  """
5013
  REQ_BGL = False
5014

    
5015
  def CheckArguments(self):
5016
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5017

    
5018
    storage_type = self.op.storage_type
5019

    
5020
    try:
5021
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5022
    except KeyError:
5023
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
5024
                                 " modified" % storage_type,
5025
                                 errors.ECODE_INVAL)
5026

    
5027
    diff = set(self.op.changes.keys()) - modifiable
5028
    if diff:
5029
      raise errors.OpPrereqError("The following fields can not be modified for"
5030
                                 " storage units of type '%s': %r" %
5031
                                 (storage_type, list(diff)),
5032
                                 errors.ECODE_INVAL)
5033

    
5034
  def ExpandNames(self):
5035
    self.needed_locks = {
5036
      locking.LEVEL_NODE: self.op.node_name,
5037
      }
5038

    
5039
  def Exec(self, feedback_fn):
5040
    """Computes the list of nodes and their attributes.
5041

5042
    """
5043
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5044
    result = self.rpc.call_storage_modify(self.op.node_name,
5045
                                          self.op.storage_type, st_args,
5046
                                          self.op.name, self.op.changes)
5047
    result.Raise("Failed to modify storage unit '%s' on %s" %
5048
                 (self.op.name, self.op.node_name))
5049

    
5050

    
5051
class LUNodeAdd(LogicalUnit):
5052
  """Logical unit for adding node to the cluster.
5053

5054
  """
5055
  HPATH = "node-add"
5056
  HTYPE = constants.HTYPE_NODE
5057
  _NFLAGS = ["master_capable", "vm_capable"]
5058

    
5059
  def CheckArguments(self):
5060
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5061
    # validate/normalize the node name
5062
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5063
                                         family=self.primary_ip_family)
5064
    self.op.node_name = self.hostname.name
5065

    
5066
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5067
      raise errors.OpPrereqError("Cannot readd the master node",
5068
                                 errors.ECODE_STATE)
5069

    
5070
    if self.op.readd and self.op.group:
5071
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5072
                                 " being readded", errors.ECODE_INVAL)
5073

    
5074
  def BuildHooksEnv(self):
5075
    """Build hooks env.
5076

5077
    This will run on all nodes before, and on all nodes + the new node after.
5078

5079
    """
5080
    return {
5081
      "OP_TARGET": self.op.node_name,
5082
      "NODE_NAME": self.op.node_name,
5083
      "NODE_PIP": self.op.primary_ip,
5084
      "NODE_SIP": self.op.secondary_ip,
5085
      "MASTER_CAPABLE": str(self.op.master_capable),
5086
      "VM_CAPABLE": str(self.op.vm_capable),
5087
      }
5088

    
5089
  def BuildHooksNodes(self):
5090
    """Build hooks nodes.
5091

5092
    """
5093
    # Exclude added node
5094
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5095
    post_nodes = pre_nodes + [self.op.node_name, ]
5096

    
5097
    return (pre_nodes, post_nodes)
5098

    
5099
  def CheckPrereq(self):
5100
    """Check prerequisites.
5101

5102
    This checks:
5103
     - the new node is not already in the config
5104
     - it is resolvable
5105
     - its parameters (single/dual homed) matches the cluster
5106

5107
    Any errors are signaled by raising errors.OpPrereqError.
5108

5109
    """
5110
    cfg = self.cfg
5111
    hostname = self.hostname
5112
    node = hostname.name
5113
    primary_ip = self.op.primary_ip = hostname.ip
5114
    if self.op.secondary_ip is None:
5115
      if self.primary_ip_family == netutils.IP6Address.family:
5116
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5117
                                   " IPv4 address must be given as secondary",
5118
                                   errors.ECODE_INVAL)
5119
      self.op.secondary_ip = primary_ip
5120

    
5121
    secondary_ip = self.op.secondary_ip
5122
    if not netutils.IP4Address.IsValid(secondary_ip):
5123
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5124
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5125

    
5126
    node_list = cfg.GetNodeList()
5127
    if not self.op.readd and node in node_list:
5128
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5129
                                 node, errors.ECODE_EXISTS)
5130
    elif self.op.readd and node not in node_list:
5131
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5132
                                 errors.ECODE_NOENT)
5133

    
5134
    self.changed_primary_ip = False
5135

    
5136
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5137
      if self.op.readd and node == existing_node_name:
5138
        if existing_node.secondary_ip != secondary_ip:
5139
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5140
                                     " address configuration as before",
5141
                                     errors.ECODE_INVAL)
5142
        if existing_node.primary_ip != primary_ip:
5143
          self.changed_primary_ip = True
5144

    
5145
        continue
5146

    
5147
      if (existing_node.primary_ip == primary_ip or
5148
          existing_node.secondary_ip == primary_ip or
5149
          existing_node.primary_ip == secondary_ip or
5150
          existing_node.secondary_ip == secondary_ip):
5151
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5152
                                   " existing node %s" % existing_node.name,
5153
                                   errors.ECODE_NOTUNIQUE)
5154

    
5155
    # After this 'if' block, None is no longer a valid value for the
5156
    # _capable op attributes
5157
    if self.op.readd:
5158
      old_node = self.cfg.GetNodeInfo(node)
5159
      assert old_node is not None, "Can't retrieve locked node %s" % node
5160
      for attr in self._NFLAGS:
5161
        if getattr(self.op, attr) is None:
5162
          setattr(self.op, attr, getattr(old_node, attr))
5163
    else:
5164
      for attr in self._NFLAGS:
5165
        if getattr(self.op, attr) is None:
5166
          setattr(self.op, attr, True)
5167

    
5168
    if self.op.readd and not self.op.vm_capable:
5169
      pri, sec = cfg.GetNodeInstances(node)
5170
      if pri or sec:
5171
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5172
                                   " flag set to false, but it already holds"
5173
                                   " instances" % node,
5174
                                   errors.ECODE_STATE)
5175

    
5176
    # check that the type of the node (single versus dual homed) is the
5177
    # same as for the master
5178
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5179
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5180
    newbie_singlehomed = secondary_ip == primary_ip
5181
    if master_singlehomed != newbie_singlehomed:
5182
      if master_singlehomed:
5183
        raise errors.OpPrereqError("The master has no secondary ip but the"
5184
                                   " new node has one",
5185
                                   errors.ECODE_INVAL)
5186
      else:
5187
        raise errors.OpPrereqError("The master has a secondary ip but the"
5188
                                   " new node doesn't have one",
5189
                                   errors.ECODE_INVAL)
5190

    
5191
    # checks reachability
5192
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5193
      raise errors.OpPrereqError("Node not reachable by ping",
5194
                                 errors.ECODE_ENVIRON)
5195

    
5196
    if not newbie_singlehomed:
5197
      # check reachability from my secondary ip to newbie's secondary ip
5198
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5199
                           source=myself.secondary_ip):
5200
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5201
                                   " based ping to node daemon port",
5202
                                   errors.ECODE_ENVIRON)
5203

    
5204
    if self.op.readd:
5205
      exceptions = [node]
5206
    else:
5207
      exceptions = []
5208

    
5209
    if self.op.master_capable:
5210
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5211
    else:
5212
      self.master_candidate = False
5213

    
5214
    if self.op.readd:
5215
      self.new_node = old_node
5216
    else:
5217
      node_group = cfg.LookupNodeGroup(self.op.group)
5218
      self.new_node = objects.Node(name=node,
5219
                                   primary_ip=primary_ip,
5220
                                   secondary_ip=secondary_ip,
5221
                                   master_candidate=self.master_candidate,
5222
                                   offline=False, drained=False,
5223
                                   group=node_group)
5224

    
5225
    if self.op.ndparams:
5226
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5227

    
5228
  def Exec(self, feedback_fn):
5229
    """Adds the new node to the cluster.
5230

5231
    """
5232
    new_node = self.new_node
5233
    node = new_node.name
5234

    
5235
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5236
      "Not owning BGL"
5237

    
5238
    # We adding a new node so we assume it's powered
5239
    new_node.powered = True
5240

    
5241
    # for re-adds, reset the offline/drained/master-candidate flags;
5242
    # we need to reset here, otherwise offline would prevent RPC calls
5243
    # later in the procedure; this also means that if the re-add
5244
    # fails, we are left with a non-offlined, broken node
5245
    if self.op.readd:
5246
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5247
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5248
      # if we demote the node, we do cleanup later in the procedure
5249
      new_node.master_candidate = self.master_candidate
5250
      if self.changed_primary_ip:
5251
        new_node.primary_ip = self.op.primary_ip
5252

    
5253
    # copy the master/vm_capable flags
5254
    for attr in self._NFLAGS:
5255
      setattr(new_node, attr, getattr(self.op, attr))
5256

    
5257
    # notify the user about any possible mc promotion
5258
    if new_node.master_candidate:
5259
      self.LogInfo("Node will be a master candidate")
5260

    
5261
    if self.op.ndparams:
5262
      new_node.ndparams = self.op.ndparams
5263
    else:
5264
      new_node.ndparams = {}
5265

    
5266
    # check connectivity
5267
    result = self.rpc.call_version([node])[node]
5268
    result.Raise("Can't get version information from node %s" % node)
5269
    if constants.PROTOCOL_VERSION == result.payload:
5270
      logging.info("Communication to node %s fine, sw version %s match",
5271
                   node, result.payload)
5272
    else:
5273
      raise errors.OpExecError("Version mismatch master version %s,"
5274
                               " node version %s" %
5275
                               (constants.PROTOCOL_VERSION, result.payload))
5276

    
5277
    # Add node to our /etc/hosts, and add key to known_hosts
5278
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5279
      master_node = self.cfg.GetMasterNode()
5280
      result = self.rpc.call_etc_hosts_modify(master_node,
5281
                                              constants.ETC_HOSTS_ADD,
5282
                                              self.hostname.name,
5283
                                              self.hostname.ip)
5284
      result.Raise("Can't update hosts file with new host data")
5285

    
5286
    if new_node.secondary_ip != new_node.primary_ip:
5287
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5288
                               False)
5289

    
5290
    node_verify_list = [self.cfg.GetMasterNode()]
5291
    node_verify_param = {
5292
      constants.NV_NODELIST: ([node], {}),
5293
      # TODO: do a node-net-test as well?
5294
    }
5295

    
5296
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5297
                                       self.cfg.GetClusterName())
5298
    for verifier in node_verify_list:
5299
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5300
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5301
      if nl_payload:
5302
        for failed in nl_payload:
5303
          feedback_fn("ssh/hostname verification failed"
5304
                      " (checking from %s): %s" %
5305
                      (verifier, nl_payload[failed]))
5306
        raise errors.OpExecError("ssh/hostname verification failed")
5307

    
5308
    if self.op.readd:
5309
      _RedistributeAncillaryFiles(self)
5310
      self.context.ReaddNode(new_node)
5311
      # make sure we redistribute the config
5312
      self.cfg.Update(new_node, feedback_fn)
5313
      # and make sure the new node will not have old files around
5314
      if not new_node.master_candidate:
5315
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5316
        msg = result.fail_msg
5317
        if msg:
5318
          self.LogWarning("Node failed to demote itself from master"
5319
                          " candidate status: %s" % msg)
5320
    else:
5321
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5322
                                  additional_vm=self.op.vm_capable)
5323
      self.context.AddNode(new_node, self.proc.GetECId())
5324

    
5325

    
5326
class LUNodeSetParams(LogicalUnit):
5327
  """Modifies the parameters of a node.
5328

5329
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5330
      to the node role (as _ROLE_*)
5331
  @cvar _R2F: a dictionary from node role to tuples of flags
5332
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5333

5334
  """
5335
  HPATH = "node-modify"
5336
  HTYPE = constants.HTYPE_NODE
5337
  REQ_BGL = False
5338
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5339
  _F2R = {
5340
    (True, False, False): _ROLE_CANDIDATE,
5341
    (False, True, False): _ROLE_DRAINED,
5342
    (False, False, True): _ROLE_OFFLINE,
5343
    (False, False, False): _ROLE_REGULAR,
5344
    }
5345
  _R2F = dict((v, k) for k, v in _F2R.items())
5346
  _FLAGS = ["master_candidate", "drained", "offline"]
5347

    
5348
  def CheckArguments(self):
5349
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5351
                self.op.master_capable, self.op.vm_capable,
5352
                self.op.secondary_ip, self.op.ndparams]
5353
    if all_mods.count(None) == len(all_mods):
5354
      raise errors.OpPrereqError("Please pass at least one modification",
5355
                                 errors.ECODE_INVAL)
5356
    if all_mods.count(True) > 1:
5357
      raise errors.OpPrereqError("Can't set the node into more than one"
5358
                                 " state at the same time",
5359
                                 errors.ECODE_INVAL)
5360

    
5361
    # Boolean value that tells us whether we might be demoting from MC
5362
    self.might_demote = (self.op.master_candidate == False or
5363
                         self.op.offline == True or
5364
                         self.op.drained == True or
5365
                         self.op.master_capable == False)
5366

    
5367
    if self.op.secondary_ip:
5368
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5369
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5370
                                   " address" % self.op.secondary_ip,
5371
                                   errors.ECODE_INVAL)
5372

    
5373
    self.lock_all = self.op.auto_promote and self.might_demote
5374
    self.lock_instances = self.op.secondary_ip is not None
5375

    
5376
  def _InstanceFilter(self, instance):
5377
    """Filter for getting affected instances.
5378

5379
    """
5380
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5381
            self.op.node_name in instance.all_nodes)
5382

    
5383
  def ExpandNames(self):
5384
    if self.lock_all:
5385
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5386
    else:
5387
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5388

    
5389
    # Since modifying a node can have severe effects on currently running
5390
    # operations the resource lock is at least acquired in shared mode
5391
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5392
      self.needed_locks[locking.LEVEL_NODE]
5393

    
5394
    # Get node resource and instance locks in shared mode; they are not used
5395
    # for anything but read-only access
5396
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5397
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5398

    
5399
    if self.lock_instances:
5400
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5401
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5402

    
5403
  def BuildHooksEnv(self):
5404
    """Build hooks env.
5405

5406
    This runs on the master node.
5407

5408
    """
5409
    return {
5410
      "OP_TARGET": self.op.node_name,
5411
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5412
      "OFFLINE": str(self.op.offline),
5413
      "DRAINED": str(self.op.drained),
5414
      "MASTER_CAPABLE": str(self.op.master_capable),
5415
      "VM_CAPABLE": str(self.op.vm_capable),
5416
      }
5417

    
5418
  def BuildHooksNodes(self):
5419
    """Build hooks nodes.
5420

5421
    """
5422
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5423
    return (nl, nl)
5424

    
5425
  def CheckPrereq(self):
5426
    """Check prerequisites.
5427

5428
    This only checks the instance list against the existing names.
5429

5430
    """
5431
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5432

    
5433
    if self.lock_instances:
5434
      affected_instances = \
5435
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5436

    
5437
      # Verify instance locks
5438
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5439
      wanted_instances = frozenset(affected_instances.keys())
5440
      if wanted_instances - owned_instances:
5441
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5442
                                   " secondary IP address have changed since"
5443
                                   " locks were acquired, wanted '%s', have"
5444
                                   " '%s'; retry the operation" %
5445
                                   (self.op.node_name,
5446
                                    utils.CommaJoin(wanted_instances),
5447
                                    utils.CommaJoin(owned_instances)),
5448
                                   errors.ECODE_STATE)
5449
    else:
5450
      affected_instances = None
5451

    
5452
    if (self.op.master_candidate is not None or
5453
        self.op.drained is not None or
5454
        self.op.offline is not None):
5455
      # we can't change the master's node flags
5456
      if self.op.node_name == self.cfg.GetMasterNode():
5457
        raise errors.OpPrereqError("The master role can be changed"
5458
                                   " only via master-failover",
5459
                                   errors.ECODE_INVAL)
5460

    
5461
    if self.op.master_candidate and not node.master_capable:
5462
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5463
                                 " it a master candidate" % node.name,
5464
                                 errors.ECODE_STATE)
5465

    
5466
    if self.op.vm_capable == False:
5467
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5468
      if ipri or isec:
5469
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5470
                                   " the vm_capable flag" % node.name,
5471
                                   errors.ECODE_STATE)
5472

    
5473
    if node.master_candidate and self.might_demote and not self.lock_all:
5474
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5475
      # check if after removing the current node, we're missing master
5476
      # candidates
5477
      (mc_remaining, mc_should, _) = \
5478
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5479
      if mc_remaining < mc_should:
5480
        raise errors.OpPrereqError("Not enough master candidates, please"
5481
                                   " pass auto promote option to allow"
5482
                                   " promotion", errors.ECODE_STATE)
5483

    
5484
    self.old_flags = old_flags = (node.master_candidate,
5485
                                  node.drained, node.offline)
5486
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5487
    self.old_role = old_role = self._F2R[old_flags]
5488

    
5489
    # Check for ineffective changes
5490
    for attr in self._FLAGS:
5491
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5492
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5493
        setattr(self.op, attr, None)
5494

    
5495
    # Past this point, any flag change to False means a transition
5496
    # away from the respective state, as only real changes are kept
5497

    
5498
    # TODO: We might query the real power state if it supports OOB
5499
    if _SupportsOob(self.cfg, node):
5500
      if self.op.offline is False and not (node.powered or
5501
                                           self.op.powered == True):
5502
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5503
                                    " offline status can be reset") %
5504
                                   self.op.node_name)
5505
    elif self.op.powered is not None:
5506
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5507
                                  " as it does not support out-of-band"
5508
                                  " handling") % self.op.node_name)
5509

    
5510
    # If we're being deofflined/drained, we'll MC ourself if needed
5511
    if (self.op.drained == False or self.op.offline == False or
5512
        (self.op.master_capable and not node.master_capable)):
5513
      if _DecideSelfPromotion(self):
5514
        self.op.master_candidate = True
5515
        self.LogInfo("Auto-promoting node to master candidate")
5516

    
5517
    # If we're no longer master capable, we'll demote ourselves from MC
5518
    if self.op.master_capable == False and node.master_candidate:
5519
      self.LogInfo("Demoting from master candidate")
5520
      self.op.master_candidate = False
5521

    
5522
    # Compute new role
5523
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5524
    if self.op.master_candidate:
5525
      new_role = self._ROLE_CANDIDATE
5526
    elif self.op.drained:
5527
      new_role = self._ROLE_DRAINED
5528
    elif self.op.offline:
5529
      new_role = self._ROLE_OFFLINE
5530
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5531
      # False is still in new flags, which means we're un-setting (the
5532
      # only) True flag
5533
      new_role = self._ROLE_REGULAR
5534
    else: # no new flags, nothing, keep old role
5535
      new_role = old_role
5536

    
5537
    self.new_role = new_role
5538

    
5539
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5540
      # Trying to transition out of offline status
5541
      # TODO: Use standard RPC runner, but make sure it works when the node is
5542
      # still marked offline
5543
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5544
      if result.fail_msg:
5545
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5546
                                   " to report its version: %s" %
5547
                                   (node.name, result.fail_msg),
5548
                                   errors.ECODE_STATE)
5549
      else:
5550
        self.LogWarning("Transitioning node from offline to online state"
5551
                        " without using re-add. Please make sure the node"
5552
                        " is healthy!")
5553

    
5554
    if self.op.secondary_ip:
5555
      # Ok even without locking, because this can't be changed by any LU
5556
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5557
      master_singlehomed = master.secondary_ip == master.primary_ip
5558
      if master_singlehomed and self.op.secondary_ip:
5559
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5560
                                   " homed cluster", errors.ECODE_INVAL)
5561

    
5562
      assert not (frozenset(affected_instances) -
5563
                  self.owned_locks(locking.LEVEL_INSTANCE))
5564

    
5565
      if node.offline:
5566
        if affected_instances:
5567
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5568
                                     " offline node has instances (%s)"
5569
                                     " configured to use it" %
5570
                                     utils.CommaJoin(affected_instances.keys()))
5571
      else:
5572
        # On online nodes, check that no instances are running, and that
5573
        # the node has the new ip and we can reach it.
5574
        for instance in affected_instances.values():
5575
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5576
                              msg="cannot change secondary ip")
5577

    
5578
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5579
        if master.name != node.name:
5580
          # check reachability from master secondary ip to new secondary ip
5581
          if not netutils.TcpPing(self.op.secondary_ip,
5582
                                  constants.DEFAULT_NODED_PORT,
5583
                                  source=master.secondary_ip):
5584
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585
                                       " based ping to node daemon port",
5586
                                       errors.ECODE_ENVIRON)
5587

    
5588
    if self.op.ndparams:
5589
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5590
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5591
      self.new_ndparams = new_ndparams
5592

    
5593
  def Exec(self, feedback_fn):
5594
    """Modifies a node.
5595

5596
    """
5597
    node = self.node
5598
    old_role = self.old_role
5599
    new_role = self.new_role
5600

    
5601
    result = []
5602

    
5603
    if self.op.ndparams:
5604
      node.ndparams = self.new_ndparams
5605

    
5606
    if self.op.powered is not None:
5607
      node.powered = self.op.powered
5608

    
5609
    for attr in ["master_capable", "vm_capable"]:
5610
      val = getattr(self.op, attr)
5611
      if val is not None:
5612
        setattr(node, attr, val)
5613
        result.append((attr, str(val)))
5614

    
5615
    if new_role != old_role:
5616
      # Tell the node to demote itself, if no longer MC and not offline
5617
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5618
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5619
        if msg:
5620
          self.LogWarning("Node failed to demote itself: %s", msg)
5621

    
5622
      new_flags = self._R2F[new_role]
5623
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5624
        if of != nf:
5625
          result.append((desc, str(nf)))
5626
      (node.master_candidate, node.drained, node.offline) = new_flags
5627

    
5628
      # we locked all nodes, we adjust the CP before updating this node
5629
      if self.lock_all:
5630
        _AdjustCandidatePool(self, [node.name])
5631

    
5632
    if self.op.secondary_ip:
5633
      node.secondary_ip = self.op.secondary_ip
5634
      result.append(("secondary_ip", self.op.secondary_ip))
5635

    
5636
    # this will trigger configuration file update, if needed
5637
    self.cfg.Update(node, feedback_fn)
5638

    
5639
    # this will trigger job queue propagation or cleanup if the mc
5640
    # flag changed
5641
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5642
      self.context.ReaddNode(node)
5643

    
5644
    return result
5645

    
5646

    
5647
class LUNodePowercycle(NoHooksLU):
5648
  """Powercycles a node.
5649

5650
  """
5651
  REQ_BGL = False
5652

    
5653
  def CheckArguments(self):
5654
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5655
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5656
      raise errors.OpPrereqError("The node is the master and the force"
5657
                                 " parameter was not set",
5658
                                 errors.ECODE_INVAL)
5659

    
5660
  def ExpandNames(self):
5661
    """Locking for PowercycleNode.
5662

5663
    This is a last-resort option and shouldn't block on other
5664
    jobs. Therefore, we grab no locks.
5665

5666
    """
5667
    self.needed_locks = {}
5668

    
5669
  def Exec(self, feedback_fn):
5670
    """Reboots a node.
5671

5672
    """
5673
    result = self.rpc.call_node_powercycle(self.op.node_name,
5674
                                           self.cfg.GetHypervisorType())
5675
    result.Raise("Failed to schedule the reboot")
5676
    return result.payload
5677

    
5678

    
5679
class LUClusterQuery(NoHooksLU):
5680
  """Query cluster configuration.
5681

5682
  """
5683
  REQ_BGL = False
5684

    
5685
  def ExpandNames(self):
5686
    self.needed_locks = {}
5687

    
5688
  def Exec(self, feedback_fn):
5689
    """Return cluster config.
5690

5691
    """
5692
    cluster = self.cfg.GetClusterInfo()
5693
    os_hvp = {}
5694

    
5695
    # Filter just for enabled hypervisors
5696
    for os_name, hv_dict in cluster.os_hvp.items():
5697
      os_hvp[os_name] = {}
5698
      for hv_name, hv_params in hv_dict.items():
5699
        if hv_name in cluster.enabled_hypervisors:
5700
          os_hvp[os_name][hv_name] = hv_params
5701

    
5702
    # Convert ip_family to ip_version
5703
    primary_ip_version = constants.IP4_VERSION
5704
    if cluster.primary_ip_family == netutils.IP6Address.family:
5705
      primary_ip_version = constants.IP6_VERSION
5706

    
5707
    result = {
5708
      "software_version": constants.RELEASE_VERSION,
5709
      "protocol_version": constants.PROTOCOL_VERSION,
5710
      "config_version": constants.CONFIG_VERSION,
5711
      "os_api_version": max(constants.OS_API_VERSIONS),
5712
      "export_version": constants.EXPORT_VERSION,
5713
      "architecture": (platform.architecture()[0], platform.machine()),
5714
      "name": cluster.cluster_name,
5715
      "master": cluster.master_node,
5716
      "default_hypervisor": cluster.primary_hypervisor,
5717
      "enabled_hypervisors": cluster.enabled_hypervisors,
5718
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5719
                        for hypervisor_name in cluster.enabled_hypervisors]),
5720
      "os_hvp": os_hvp,
5721
      "beparams": cluster.beparams,
5722
      "osparams": cluster.osparams,
5723
      "nicparams": cluster.nicparams,
5724
      "ndparams": cluster.ndparams,
5725
      "candidate_pool_size": cluster.candidate_pool_size,
5726
      "master_netdev": cluster.master_netdev,
5727
      "master_netmask": cluster.master_netmask,
5728
      "use_external_mip_script": cluster.use_external_mip_script,
5729
      "volume_group_name": cluster.volume_group_name,
5730
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5731
      "file_storage_dir": cluster.file_storage_dir,
5732
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5733
      "maintain_node_health": cluster.maintain_node_health,
5734
      "ctime": cluster.ctime,
5735
      "mtime": cluster.mtime,
5736
      "uuid": cluster.uuid,
5737
      "tags": list(cluster.GetTags()),
5738
      "uid_pool": cluster.uid_pool,
5739
      "default_iallocator": cluster.default_iallocator,
5740
      "reserved_lvs": cluster.reserved_lvs,
5741
      "primary_ip_version": primary_ip_version,
5742
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5743
      "hidden_os": cluster.hidden_os,
5744
      "blacklisted_os": cluster.blacklisted_os,
5745
      }
5746

    
5747
    return result
5748

    
5749

    
5750
class LUClusterConfigQuery(NoHooksLU):
5751
  """Return configuration values.
5752

5753
  """
5754
  REQ_BGL = False
5755
  _FIELDS_DYNAMIC = utils.FieldSet()
5756
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5757
                                  "watcher_pause", "volume_group_name")
5758

    
5759
  def CheckArguments(self):
5760
    _CheckOutputFields(static=self._FIELDS_STATIC,
5761
                       dynamic=self._FIELDS_DYNAMIC,
5762
                       selected=self.op.output_fields)
5763

    
5764
  def ExpandNames(self):
5765
    self.needed_locks = {}
5766

    
5767
  def Exec(self, feedback_fn):
5768
    """Dump a representation of the cluster config to the standard output.
5769

5770
    """
5771
    values = []
5772
    for field in self.op.output_fields:
5773
      if field == "cluster_name":
5774
        entry = self.cfg.GetClusterName()
5775
      elif field == "master_node":
5776
        entry = self.cfg.GetMasterNode()
5777
      elif field == "drain_flag":
5778
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5779
      elif field == "watcher_pause":
5780
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5781
      elif field == "volume_group_name":
5782
        entry = self.cfg.GetVGName()
5783
      else:
5784
        raise errors.ParameterError(field)
5785
      values.append(entry)
5786
    return values
5787

    
5788

    
5789
class LUInstanceActivateDisks(NoHooksLU):
5790
  """Bring up an instance's disks.
5791

5792
  """
5793
  REQ_BGL = False
5794

    
5795
  def ExpandNames(self):
5796
    self._ExpandAndLockInstance()
5797
    self.needed_locks[locking.LEVEL_NODE] = []
5798
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5799

    
5800
  def DeclareLocks(self, level):
5801
    if level == locking.LEVEL_NODE:
5802
      self._LockInstancesNodes()
5803

    
5804
  def CheckPrereq(self):
5805
    """Check prerequisites.
5806

5807
    This checks that the instance is in the cluster.
5808

5809
    """
5810
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811
    assert self.instance is not None, \
5812
      "Cannot retrieve locked instance %s" % self.op.instance_name
5813
    _CheckNodeOnline(self, self.instance.primary_node)
5814

    
5815
  def Exec(self, feedback_fn):
5816
    """Activate the disks.
5817

5818
    """
5819
    disks_ok, disks_info = \
5820
              _AssembleInstanceDisks(self, self.instance,
5821
                                     ignore_size=self.op.ignore_size)
5822
    if not disks_ok:
5823
      raise errors.OpExecError("Cannot activate block devices")
5824

    
5825
    return disks_info
5826

    
5827

    
5828
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5829
                           ignore_size=False):
5830
  """Prepare the block devices for an instance.
5831

5832
  This sets up the block devices on all nodes.
5833

5834
  @type lu: L{LogicalUnit}
5835
  @param lu: the logical unit on whose behalf we execute
5836
  @type instance: L{objects.Instance}
5837
  @param instance: the instance for whose disks we assemble
5838
  @type disks: list of L{objects.Disk} or None
5839
  @param disks: which disks to assemble (or all, if None)
5840
  @type ignore_secondaries: boolean
5841
  @param ignore_secondaries: if true, errors on secondary nodes
5842
      won't result in an error return from the function
5843
  @type ignore_size: boolean
5844
  @param ignore_size: if true, the current known size of the disk
5845
      will not be used during the disk activation, useful for cases
5846
      when the size is wrong
5847
  @return: False if the operation failed, otherwise a list of
5848
      (host, instance_visible_name, node_visible_name)
5849
      with the mapping from node devices to instance devices
5850

5851
  """
5852
  device_info = []
5853
  disks_ok = True
5854
  iname = instance.name
5855
  disks = _ExpandCheckDisks(instance, disks)
5856

    
5857
  # With the two passes mechanism we try to reduce the window of
5858
  # opportunity for the race condition of switching DRBD to primary
5859
  # before handshaking occured, but we do not eliminate it
5860

    
5861
  # The proper fix would be to wait (with some limits) until the
5862
  # connection has been made and drbd transitions from WFConnection
5863
  # into any other network-connected state (Connected, SyncTarget,
5864
  # SyncSource, etc.)
5865

    
5866
  # 1st pass, assemble on all nodes in secondary mode
5867
  for idx, inst_disk in enumerate(disks):
5868
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5869
      if ignore_size:
5870
        node_disk = node_disk.Copy()
5871
        node_disk.UnsetSize()
5872
      lu.cfg.SetDiskID(node_disk, node)
5873
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5874
      msg = result.fail_msg
5875
      if msg:
5876
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5877
                           " (is_primary=False, pass=1): %s",
5878
                           inst_disk.iv_name, node, msg)
5879
        if not ignore_secondaries:
5880
          disks_ok = False
5881

    
5882
  # FIXME: race condition on drbd migration to primary
5883

    
5884
  # 2nd pass, do only the primary node
5885
  for idx, inst_disk in enumerate(disks):
5886
    dev_path = None
5887

    
5888
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5889
      if node != instance.primary_node:
5890
        continue
5891
      if ignore_size:
5892
        node_disk = node_disk.Copy()
5893
        node_disk.UnsetSize()
5894
      lu.cfg.SetDiskID(node_disk, node)
5895
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5896
      msg = result.fail_msg
5897
      if msg:
5898
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5899
                           " (is_primary=True, pass=2): %s",
5900
                           inst_disk.iv_name, node, msg)
5901
        disks_ok = False
5902
      else:
5903
        dev_path = result.payload
5904

    
5905
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5906

    
5907
  # leave the disks configured for the primary node
5908
  # this is a workaround that would be fixed better by
5909
  # improving the logical/physical id handling
5910
  for disk in disks:
5911
    lu.cfg.SetDiskID(disk, instance.primary_node)
5912

    
5913
  return disks_ok, device_info
5914

    
5915

    
5916
def _StartInstanceDisks(lu, instance, force):
5917
  """Start the disks of an instance.
5918

5919
  """
5920
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5921
                                           ignore_secondaries=force)
5922
  if not disks_ok:
5923
    _ShutdownInstanceDisks(lu, instance)
5924
    if force is not None and not force:
5925
      lu.proc.LogWarning("", hint="If the message above refers to a"
5926
                         " secondary node,"
5927
                         " you can retry the operation using '--force'.")
5928
    raise errors.OpExecError("Disk consistency error")
5929

    
5930

    
5931
class LUInstanceDeactivateDisks(NoHooksLU):
5932
  """Shutdown an instance's disks.
5933

5934
  """
5935
  REQ_BGL = False
5936

    
5937
  def ExpandNames(self):
5938
    self._ExpandAndLockInstance()
5939
    self.needed_locks[locking.LEVEL_NODE] = []
5940
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5941

    
5942
  def DeclareLocks(self, level):
5943
    if level == locking.LEVEL_NODE:
5944
      self._LockInstancesNodes()
5945

    
5946
  def CheckPrereq(self):
5947
    """Check prerequisites.
5948

5949
    This checks that the instance is in the cluster.
5950

5951
    """
5952
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5953
    assert self.instance is not None, \
5954
      "Cannot retrieve locked instance %s" % self.op.instance_name
5955

    
5956
  def Exec(self, feedback_fn):
5957
    """Deactivate the disks
5958

5959
    """
5960
    instance = self.instance
5961
    if self.op.force:
5962
      _ShutdownInstanceDisks(self, instance)
5963
    else:
5964
      _SafeShutdownInstanceDisks(self, instance)
5965

    
5966

    
5967
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5968
  """Shutdown block devices of an instance.
5969

5970
  This function checks if an instance is running, before calling
5971
  _ShutdownInstanceDisks.
5972

5973
  """
5974
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5975
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5976

    
5977

    
5978
def _ExpandCheckDisks(instance, disks):
5979
  """Return the instance disks selected by the disks list
5980

5981
  @type disks: list of L{objects.Disk} or None
5982
  @param disks: selected disks
5983
  @rtype: list of L{objects.Disk}
5984
  @return: selected instance disks to act on
5985

5986
  """
5987
  if disks is None:
5988
    return instance.disks
5989
  else:
5990
    if not set(disks).issubset(instance.disks):
5991
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5992
                                   " target instance")
5993
    return disks
5994

    
5995

    
5996
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5997
  """Shutdown block devices of an instance.
5998

5999
  This does the shutdown on all nodes of the instance.
6000

6001
  If the ignore_primary is false, errors on the primary node are
6002
  ignored.
6003

6004
  """
6005
  all_result = True
6006
  disks = _ExpandCheckDisks(instance, disks)
6007

    
6008
  for disk in disks:
6009
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6010
      lu.cfg.SetDiskID(top_disk, node)
6011
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6012
      msg = result.fail_msg
6013
      if msg:
6014
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6015
                      disk.iv_name, node, msg)
6016
        if ((node == instance.primary_node and not ignore_primary) or
6017
            (node != instance.primary_node and not result.offline)):
6018
          all_result = False
6019
  return all_result
6020

    
6021

    
6022
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6023
  """Checks if a node has enough free memory.
6024

6025
  This function check if a given node has the needed amount of free
6026
  memory. In case the node has less memory or we cannot get the
6027
  information from the node, this function raise an OpPrereqError
6028
  exception.
6029

6030
  @type lu: C{LogicalUnit}
6031
  @param lu: a logical unit from which we get configuration data
6032
  @type node: C{str}
6033
  @param node: the node to check
6034
  @type reason: C{str}
6035
  @param reason: string to use in the error message
6036
  @type requested: C{int}
6037
  @param requested: the amount of memory in MiB to check for
6038
  @type hypervisor_name: C{str}
6039
  @param hypervisor_name: the hypervisor to ask for memory stats
6040
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6041
      we cannot check the node
6042

6043
  """
6044
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6045
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6046
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6047
  (_, _, (hv_info, )) = nodeinfo[node].payload
6048

    
6049
  free_mem = hv_info.get("memory_free", None)
6050
  if not isinstance(free_mem, int):
6051
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6052
                               " was '%s'" % (node, free_mem),
6053
                               errors.ECODE_ENVIRON)
6054
  if requested > free_mem:
6055
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6056
                               " needed %s MiB, available %s MiB" %
6057
                               (node, reason, requested, free_mem),
6058
                               errors.ECODE_NORES)
6059

    
6060

    
6061
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6062
  """Checks if nodes have enough free disk space in the all VGs.
6063

6064
  This function check if all given nodes have the needed amount of
6065
  free disk. In case any node has less disk or we cannot get the
6066
  information from the node, this function raise an OpPrereqError
6067
  exception.
6068

6069
  @type lu: C{LogicalUnit}
6070
  @param lu: a logical unit from which we get configuration data
6071
  @type nodenames: C{list}
6072
  @param nodenames: the list of node names to check
6073
  @type req_sizes: C{dict}
6074
  @param req_sizes: the hash of vg and corresponding amount of disk in
6075
      MiB to check for
6076
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6077
      or we cannot check the node
6078

6079
  """
6080
  for vg, req_size in req_sizes.items():
6081
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6082

    
6083

    
6084
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6085
  """Checks if nodes have enough free disk space in the specified VG.
6086

6087
  This function check if all given nodes have the needed amount of
6088
  free disk. In case any node has less disk or we cannot get the
6089
  information from the node, this function raise an OpPrereqError
6090
  exception.
6091

6092
  @type lu: C{LogicalUnit}
6093
  @param lu: a logical unit from which we get configuration data
6094
  @type nodenames: C{list}
6095
  @param nodenames: the list of node names to check
6096
  @type vg: C{str}
6097
  @param vg: the volume group to check
6098
  @type requested: C{int}
6099
  @param requested: the amount of disk in MiB to check for
6100
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6101
      or we cannot check the node
6102

6103
  """
6104
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6105
  for node in nodenames:
6106
    info = nodeinfo[node]
6107
    info.Raise("Cannot get current information from node %s" % node,
6108
               prereq=True, ecode=errors.ECODE_ENVIRON)
6109
    (_, (vg_info, ), _) = info.payload
6110
    vg_free = vg_info.get("vg_free", None)
6111
    if not isinstance(vg_free, int):
6112
      raise errors.OpPrereqError("Can't compute free disk space on node"
6113
                                 " %s for vg %s, result was '%s'" %
6114
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6115
    if requested > vg_free:
6116
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6117
                                 " vg %s: required %d MiB, available %d MiB" %
6118
                                 (node, vg, requested, vg_free),
6119
                                 errors.ECODE_NORES)
6120

    
6121

    
6122
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6123
  """Checks if nodes have enough physical CPUs
6124

6125
  This function checks if all given nodes have the needed number of
6126
  physical CPUs. In case any node has less CPUs or we cannot get the
6127
  information from the node, this function raises an OpPrereqError
6128
  exception.
6129

6130
  @type lu: C{LogicalUnit}
6131
  @param lu: a logical unit from which we get configuration data
6132
  @type nodenames: C{list}
6133
  @param nodenames: the list of node names to check
6134
  @type requested: C{int}
6135
  @param requested: the minimum acceptable number of physical CPUs
6136
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6137
      or we cannot check the node
6138

6139
  """
6140
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6141
  for node in nodenames:
6142
    info = nodeinfo[node]
6143
    info.Raise("Cannot get current information from node %s" % node,
6144
               prereq=True, ecode=errors.ECODE_ENVIRON)
6145
    (_, _, (hv_info, )) = info.payload
6146
    num_cpus = hv_info.get("cpu_total", None)
6147
    if not isinstance(num_cpus, int):
6148
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6149
                                 " on node %s, result was '%s'" %
6150
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6151
    if requested > num_cpus:
6152
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6153
                                 "required" % (node, num_cpus, requested),
6154
                                 errors.ECODE_NORES)
6155

    
6156

    
6157
class LUInstanceStartup(LogicalUnit):
6158
  """Starts an instance.
6159

6160
  """
6161
  HPATH = "instance-start"
6162
  HTYPE = constants.HTYPE_INSTANCE
6163
  REQ_BGL = False
6164

    
6165
  def CheckArguments(self):
6166
    # extra beparams
6167
    if self.op.beparams:
6168
      # fill the beparams dict
6169
      objects.UpgradeBeParams(self.op.beparams)
6170
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6171

    
6172
  def ExpandNames(self):
6173
    self._ExpandAndLockInstance()
6174

    
6175
  def BuildHooksEnv(self):
6176
    """Build hooks env.
6177

6178
    This runs on master, primary and secondary nodes of the instance.
6179

6180
    """
6181
    env = {
6182
      "FORCE": self.op.force,
6183
      }
6184

    
6185
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6186

    
6187
    return env
6188

    
6189
  def BuildHooksNodes(self):
6190
    """Build hooks nodes.
6191

6192
    """
6193
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6194
    return (nl, nl)
6195

    
6196
  def CheckPrereq(self):
6197
    """Check prerequisites.
6198

6199
    This checks that the instance is in the cluster.
6200

6201
    """
6202
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6203
    assert self.instance is not None, \
6204
      "Cannot retrieve locked instance %s" % self.op.instance_name
6205

    
6206
    # extra hvparams
6207
    if self.op.hvparams:
6208
      # check hypervisor parameter syntax (locally)
6209
      cluster = self.cfg.GetClusterInfo()
6210
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6211
      filled_hvp = cluster.FillHV(instance)
6212
      filled_hvp.update(self.op.hvparams)
6213
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6214
      hv_type.CheckParameterSyntax(filled_hvp)
6215
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6216

    
6217
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6218

    
6219
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6220

    
6221
    if self.primary_offline and self.op.ignore_offline_nodes:
6222
      self.proc.LogWarning("Ignoring offline primary node")
6223

    
6224
      if self.op.hvparams or self.op.beparams:
6225
        self.proc.LogWarning("Overridden parameters are ignored")
6226
    else:
6227
      _CheckNodeOnline(self, instance.primary_node)
6228

    
6229
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6230

    
6231
      # check bridges existence
6232
      _CheckInstanceBridgesExist(self, instance)
6233

    
6234
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6235
                                                instance.name,
6236
                                                instance.hypervisor)
6237
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6238
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6239
      if not remote_info.payload: # not running already
6240
        _CheckNodeFreeMemory(self, instance.primary_node,
6241
                             "starting instance %s" % instance.name,
6242
                             bep[constants.BE_MAXMEM], instance.hypervisor)
6243

    
6244
  def Exec(self, feedback_fn):
6245
    """Start the instance.
6246

6247
    """
6248
    instance = self.instance
6249
    force = self.op.force
6250

    
6251
    if not self.op.no_remember:
6252
      self.cfg.MarkInstanceUp(instance.name)
6253

    
6254
    if self.primary_offline:
6255
      assert self.op.ignore_offline_nodes
6256
      self.proc.LogInfo("Primary node offline, marked instance as started")
6257
    else:
6258
      node_current = instance.primary_node
6259

    
6260
      _StartInstanceDisks(self, instance, force)
6261

    
6262
      result = \
6263
        self.rpc.call_instance_start(node_current,
6264
                                     (instance, self.op.hvparams,
6265
                                      self.op.beparams),
6266
                                     self.op.startup_paused)
6267
      msg = result.fail_msg
6268
      if msg:
6269
        _ShutdownInstanceDisks(self, instance)
6270
        raise errors.OpExecError("Could not start instance: %s" % msg)
6271

    
6272

    
6273
class LUInstanceReboot(LogicalUnit):
6274
  """Reboot an instance.
6275

6276
  """
6277
  HPATH = "instance-reboot"
6278
  HTYPE = constants.HTYPE_INSTANCE
6279
  REQ_BGL = False
6280

    
6281
  def ExpandNames(self):
6282
    self._ExpandAndLockInstance()
6283

    
6284
  def BuildHooksEnv(self):
6285
    """Build hooks env.
6286

6287
    This runs on master, primary and secondary nodes of the instance.
6288

6289
    """
6290
    env = {
6291
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6292
      "REBOOT_TYPE": self.op.reboot_type,
6293
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6294
      }
6295

    
6296
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6297

    
6298
    return env
6299

    
6300
  def BuildHooksNodes(self):
6301
    """Build hooks nodes.
6302

6303
    """
6304
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6305
    return (nl, nl)
6306

    
6307
  def CheckPrereq(self):
6308
    """Check prerequisites.
6309

6310
    This checks that the instance is in the cluster.
6311

6312
    """
6313
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314
    assert self.instance is not None, \
6315
      "Cannot retrieve locked instance %s" % self.op.instance_name
6316
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6317
    _CheckNodeOnline(self, instance.primary_node)
6318

    
6319
    # check bridges existence
6320
    _CheckInstanceBridgesExist(self, instance)
6321

    
6322
  def Exec(self, feedback_fn):
6323
    """Reboot the instance.
6324

6325
    """
6326
    instance = self.instance
6327
    ignore_secondaries = self.op.ignore_secondaries
6328
    reboot_type = self.op.reboot_type
6329

    
6330
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6331
                                              instance.name,
6332
                                              instance.hypervisor)
6333
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6334
    instance_running = bool(remote_info.payload)
6335

    
6336
    node_current = instance.primary_node
6337

    
6338
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6339
                                            constants.INSTANCE_REBOOT_HARD]:
6340
      for disk in instance.disks:
6341
        self.cfg.SetDiskID(disk, node_current)
6342
      result = self.rpc.call_instance_reboot(node_current, instance,
6343
                                             reboot_type,
6344
                                             self.op.shutdown_timeout)
6345
      result.Raise("Could not reboot instance")
6346
    else:
6347
      if instance_running:
6348
        result = self.rpc.call_instance_shutdown(node_current, instance,
6349
                                                 self.op.shutdown_timeout)
6350
        result.Raise("Could not shutdown instance for full reboot")
6351
        _ShutdownInstanceDisks(self, instance)
6352
      else:
6353
        self.LogInfo("Instance %s was already stopped, starting now",
6354
                     instance.name)
6355
      _StartInstanceDisks(self, instance, ignore_secondaries)
6356
      result = self.rpc.call_instance_start(node_current,
6357
                                            (instance, None, None), False)
6358
      msg = result.fail_msg
6359
      if msg:
6360
        _ShutdownInstanceDisks(self, instance)
6361
        raise errors.OpExecError("Could not start instance for"
6362
                                 " full reboot: %s" % msg)
6363

    
6364
    self.cfg.MarkInstanceUp(instance.name)
6365

    
6366

    
6367
class LUInstanceShutdown(LogicalUnit):
6368
  """Shutdown an instance.
6369

6370
  """
6371
  HPATH = "instance-stop"
6372
  HTYPE = constants.HTYPE_INSTANCE
6373
  REQ_BGL = False
6374

    
6375
  def ExpandNames(self):
6376
    self._ExpandAndLockInstance()
6377

    
6378
  def BuildHooksEnv(self):
6379
    """Build hooks env.
6380

6381
    This runs on master, primary and secondary nodes of the instance.
6382

6383
    """
6384
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6385
    env["TIMEOUT"] = self.op.timeout
6386
    return env
6387

    
6388
  def BuildHooksNodes(self):
6389
    """Build hooks nodes.
6390

6391
    """
6392
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6393
    return (nl, nl)
6394

    
6395
  def CheckPrereq(self):
6396
    """Check prerequisites.
6397

6398
    This checks that the instance is in the cluster.
6399

6400
    """
6401
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6402
    assert self.instance is not None, \
6403
      "Cannot retrieve locked instance %s" % self.op.instance_name
6404

    
6405
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6406

    
6407
    self.primary_offline = \
6408
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6409

    
6410
    if self.primary_offline and self.op.ignore_offline_nodes:
6411
      self.proc.LogWarning("Ignoring offline primary node")
6412
    else:
6413
      _CheckNodeOnline(self, self.instance.primary_node)
6414

    
6415
  def Exec(self, feedback_fn):
6416
    """Shutdown the instance.
6417

6418
    """
6419
    instance = self.instance
6420
    node_current = instance.primary_node
6421
    timeout = self.op.timeout
6422

    
6423
    if not self.op.no_remember:
6424
      self.cfg.MarkInstanceDown(instance.name)
6425

    
6426
    if self.primary_offline:
6427
      assert self.op.ignore_offline_nodes
6428
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6429
    else:
6430
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6431
      msg = result.fail_msg
6432
      if msg:
6433
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6434

    
6435
      _ShutdownInstanceDisks(self, instance)
6436

    
6437

    
6438
class LUInstanceReinstall(LogicalUnit):
6439
  """Reinstall an instance.
6440

6441
  """
6442
  HPATH = "instance-reinstall"
6443
  HTYPE = constants.HTYPE_INSTANCE
6444
  REQ_BGL = False
6445

    
6446
  def ExpandNames(self):
6447
    self._ExpandAndLockInstance()
6448

    
6449
  def BuildHooksEnv(self):
6450
    """Build hooks env.
6451

6452
    This runs on master, primary and secondary nodes of the instance.
6453

6454
    """
6455
    return _BuildInstanceHookEnvByObject(self, self.instance)
6456

    
6457
  def BuildHooksNodes(self):
6458
    """Build hooks nodes.
6459

6460
    """
6461
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6462
    return (nl, nl)
6463

    
6464
  def CheckPrereq(self):
6465
    """Check prerequisites.
6466

6467
    This checks that the instance is in the cluster and is not running.
6468

6469
    """
6470
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471
    assert instance is not None, \
6472
      "Cannot retrieve locked instance %s" % self.op.instance_name
6473
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6474
                     " offline, cannot reinstall")
6475
    for node in instance.secondary_nodes:
6476
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6477
                       " cannot reinstall")
6478

    
6479
    if instance.disk_template == constants.DT_DISKLESS:
6480
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6481
                                 self.op.instance_name,
6482
                                 errors.ECODE_INVAL)
6483
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6484

    
6485
    if self.op.os_type is not None:
6486
      # OS verification
6487
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6488
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6489
      instance_os = self.op.os_type
6490
    else:
6491
      instance_os = instance.os
6492

    
6493
    nodelist = list(instance.all_nodes)
6494

    
6495
    if self.op.osparams:
6496
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6497
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6498
      self.os_inst = i_osdict # the new dict (without defaults)
6499
    else:
6500
      self.os_inst = None
6501

    
6502
    self.instance = instance
6503

    
6504
  def Exec(self, feedback_fn):
6505
    """Reinstall the instance.
6506

6507
    """
6508
    inst = self.instance
6509

    
6510
    if self.op.os_type is not None:
6511
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6512
      inst.os = self.op.os_type
6513
      # Write to configuration
6514
      self.cfg.Update(inst, feedback_fn)
6515

    
6516
    _StartInstanceDisks(self, inst, None)
6517
    try:
6518
      feedback_fn("Running the instance OS create scripts...")
6519
      # FIXME: pass debug option from opcode to backend
6520
      result = self.rpc.call_instance_os_add(inst.primary_node,
6521
                                             (inst, self.os_inst), True,
6522
                                             self.op.debug_level)
6523
      result.Raise("Could not install OS for instance %s on node %s" %
6524
                   (inst.name, inst.primary_node))
6525
    finally:
6526
      _ShutdownInstanceDisks(self, inst)
6527

    
6528

    
6529
class LUInstanceRecreateDisks(LogicalUnit):
6530
  """Recreate an instance's missing disks.
6531

6532
  """
6533
  HPATH = "instance-recreate-disks"
6534
  HTYPE = constants.HTYPE_INSTANCE
6535
  REQ_BGL = False
6536

    
6537
  def CheckArguments(self):
6538
    # normalise the disk list
6539
    self.op.disks = sorted(frozenset(self.op.disks))
6540

    
6541
  def ExpandNames(self):
6542
    self._ExpandAndLockInstance()
6543
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6544
    if self.op.nodes:
6545
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6546
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6547
    else:
6548
      self.needed_locks[locking.LEVEL_NODE] = []
6549

    
6550
  def DeclareLocks(self, level):
6551
    if level == locking.LEVEL_NODE:
6552
      # if we replace the nodes, we only need to lock the old primary,
6553
      # otherwise we need to lock all nodes for disk re-creation
6554
      primary_only = bool(self.op.nodes)
6555
      self._LockInstancesNodes(primary_only=primary_only)
6556
    elif level == locking.LEVEL_NODE_RES:
6557
      # Copy node locks
6558
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6559
        self.needed_locks[locking.LEVEL_NODE][:]
6560

    
6561
  def BuildHooksEnv(self):
6562
    """Build hooks env.
6563

6564
    This runs on master, primary and secondary nodes of the instance.
6565

6566
    """
6567
    return _BuildInstanceHookEnvByObject(self, self.instance)
6568

    
6569
  def BuildHooksNodes(self):
6570
    """Build hooks nodes.
6571

6572
    """
6573
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574
    return (nl, nl)
6575

    
6576
  def CheckPrereq(self):
6577
    """Check prerequisites.
6578

6579
    This checks that the instance is in the cluster and is not running.
6580

6581
    """
6582
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583
    assert instance is not None, \
6584
      "Cannot retrieve locked instance %s" % self.op.instance_name
6585
    if self.op.nodes:
6586
      if len(self.op.nodes) != len(instance.all_nodes):
6587
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6588
                                   " %d replacement nodes were specified" %
6589
                                   (instance.name, len(instance.all_nodes),
6590
                                    len(self.op.nodes)),
6591
                                   errors.ECODE_INVAL)
6592
      assert instance.disk_template != constants.DT_DRBD8 or \
6593
          len(self.op.nodes) == 2
6594
      assert instance.disk_template != constants.DT_PLAIN or \
6595
          len(self.op.nodes) == 1
6596
      primary_node = self.op.nodes[0]
6597
    else:
6598
      primary_node = instance.primary_node
6599
    _CheckNodeOnline(self, primary_node)
6600

    
6601
    if instance.disk_template == constants.DT_DISKLESS:
6602
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6603
                                 self.op.instance_name, errors.ECODE_INVAL)
6604
    # if we replace nodes *and* the old primary is offline, we don't
6605
    # check
6606
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6607
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6608
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6609
    if not (self.op.nodes and old_pnode.offline):
6610
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6611
                          msg="cannot recreate disks")
6612

    
6613
    if not self.op.disks:
6614
      self.op.disks = range(len(instance.disks))
6615
    else:
6616
      for idx in self.op.disks:
6617
        if idx >= len(instance.disks):
6618
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6619
                                     errors.ECODE_INVAL)
6620
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6621
      raise errors.OpPrereqError("Can't recreate disks partially and"
6622
                                 " change the nodes at the same time",
6623
                                 errors.ECODE_INVAL)
6624
    self.instance = instance
6625

    
6626
  def Exec(self, feedback_fn):
6627
    """Recreate the disks.
6628

6629
    """
6630
    instance = self.instance
6631

    
6632
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6633
            self.owned_locks(locking.LEVEL_NODE_RES))
6634

    
6635
    to_skip = []
6636
    mods = [] # keeps track of needed logical_id changes
6637

    
6638
    for idx, disk in enumerate(instance.disks):
6639
      if idx not in self.op.disks: # disk idx has not been passed in
6640
        to_skip.append(idx)
6641
        continue
6642
      # update secondaries for disks, if needed
6643
      if self.op.nodes:
6644
        if disk.dev_type == constants.LD_DRBD8:
6645
          # need to update the nodes and minors
6646
          assert len(self.op.nodes) == 2
6647
          assert len(disk.logical_id) == 6 # otherwise disk internals
6648
                                           # have changed
6649
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6650
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6651
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6652
                    new_minors[0], new_minors[1], old_secret)
6653
          assert len(disk.logical_id) == len(new_id)
6654
          mods.append((idx, new_id))
6655

    
6656
    # now that we have passed all asserts above, we can apply the mods
6657
    # in a single run (to avoid partial changes)
6658
    for idx, new_id in mods:
6659
      instance.disks[idx].logical_id = new_id
6660

    
6661
    # change primary node, if needed
6662
    if self.op.nodes:
6663
      instance.primary_node = self.op.nodes[0]
6664
      self.LogWarning("Changing the instance's nodes, you will have to"
6665
                      " remove any disks left on the older nodes manually")
6666

    
6667
    if self.op.nodes:
6668
      self.cfg.Update(instance, feedback_fn)
6669

    
6670
    _CreateDisks(self, instance, to_skip=to_skip)
6671

    
6672

    
6673
class LUInstanceRename(LogicalUnit):
6674
  """Rename an instance.
6675

6676
  """
6677
  HPATH = "instance-rename"
6678
  HTYPE = constants.HTYPE_INSTANCE
6679

    
6680
  def CheckArguments(self):
6681
    """Check arguments.
6682

6683
    """
6684
    if self.op.ip_check and not self.op.name_check:
6685
      # TODO: make the ip check more flexible and not depend on the name check
6686
      raise errors.OpPrereqError("IP address check requires a name check",
6687
                                 errors.ECODE_INVAL)
6688

    
6689
  def BuildHooksEnv(self):
6690
    """Build hooks env.
6691

6692
    This runs on master, primary and secondary nodes of the instance.
6693

6694
    """
6695
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6696
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6697
    return env
6698

    
6699
  def BuildHooksNodes(self):
6700
    """Build hooks nodes.
6701

6702
    """
6703
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6704
    return (nl, nl)
6705

    
6706
  def CheckPrereq(self):
6707
    """Check prerequisites.
6708

6709
    This checks that the instance is in the cluster and is not running.
6710

6711
    """
6712
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6713
                                                self.op.instance_name)
6714
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715
    assert instance is not None
6716
    _CheckNodeOnline(self, instance.primary_node)
6717
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6718
                        msg="cannot rename")
6719
    self.instance = instance
6720

    
6721
    new_name = self.op.new_name
6722
    if self.op.name_check:
6723
      hostname = netutils.GetHostname(name=new_name)
6724
      if hostname.name != new_name:
6725
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6726
                     hostname.name)
6727
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6728
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6729
                                    " same as given hostname '%s'") %
6730
                                    (hostname.name, self.op.new_name),
6731
                                    errors.ECODE_INVAL)
6732
      new_name = self.op.new_name = hostname.name
6733
      if (self.op.ip_check and
6734
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6735
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6736
                                   (hostname.ip, new_name),
6737
                                   errors.ECODE_NOTUNIQUE)
6738

    
6739
    instance_list = self.cfg.GetInstanceList()
6740
    if new_name in instance_list and new_name != instance.name:
6741
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6742
                                 new_name, errors.ECODE_EXISTS)
6743

    
6744
  def Exec(self, feedback_fn):
6745
    """Rename the instance.
6746

6747
    """
6748
    inst = self.instance
6749
    old_name = inst.name
6750

    
6751
    rename_file_storage = False
6752
    if (inst.disk_template in constants.DTS_FILEBASED and
6753
        self.op.new_name != inst.name):
6754
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6755
      rename_file_storage = True
6756

    
6757
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6758
    # Change the instance lock. This is definitely safe while we hold the BGL.
6759
    # Otherwise the new lock would have to be added in acquired mode.
6760
    assert self.REQ_BGL
6761
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6762
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6763

    
6764
    # re-read the instance from the configuration after rename
6765
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6766

    
6767
    if rename_file_storage:
6768
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6769
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6770
                                                     old_file_storage_dir,
6771
                                                     new_file_storage_dir)
6772
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6773
                   " (but the instance has been renamed in Ganeti)" %
6774
                   (inst.primary_node, old_file_storage_dir,
6775
                    new_file_storage_dir))
6776

    
6777
    _StartInstanceDisks(self, inst, None)
6778
    try:
6779
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6780
                                                 old_name, self.op.debug_level)
6781
      msg = result.fail_msg
6782
      if msg:
6783
        msg = ("Could not run OS rename script for instance %s on node %s"
6784
               " (but the instance has been renamed in Ganeti): %s" %
6785
               (inst.name, inst.primary_node, msg))
6786
        self.proc.LogWarning(msg)
6787
    finally:
6788
      _ShutdownInstanceDisks(self, inst)
6789

    
6790
    return inst.name
6791

    
6792

    
6793
class LUInstanceRemove(LogicalUnit):
6794
  """Remove an instance.
6795

6796
  """
6797
  HPATH = "instance-remove"
6798
  HTYPE = constants.HTYPE_INSTANCE
6799
  REQ_BGL = False
6800

    
6801
  def ExpandNames(self):
6802
    self._ExpandAndLockInstance()
6803
    self.needed_locks[locking.LEVEL_NODE] = []
6804
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6805
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6806

    
6807
  def DeclareLocks(self, level):
6808
    if level == locking.LEVEL_NODE:
6809
      self._LockInstancesNodes()
6810
    elif level == locking.LEVEL_NODE_RES:
6811
      # Copy node locks
6812
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6813
        self.needed_locks[locking.LEVEL_NODE][:]
6814

    
6815
  def BuildHooksEnv(self):
6816
    """Build hooks env.
6817

6818
    This runs on master, primary and secondary nodes of the instance.
6819

6820
    """
6821
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6822
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6823
    return env
6824

    
6825
  def BuildHooksNodes(self):
6826
    """Build hooks nodes.
6827

6828
    """
6829
    nl = [self.cfg.GetMasterNode()]
6830
    nl_post = list(self.instance.all_nodes) + nl
6831
    return (nl, nl_post)
6832

    
6833
  def CheckPrereq(self):
6834
    """Check prerequisites.
6835

6836
    This checks that the instance is in the cluster.
6837

6838
    """
6839
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840
    assert self.instance is not None, \
6841
      "Cannot retrieve locked instance %s" % self.op.instance_name
6842

    
6843
  def Exec(self, feedback_fn):
6844
    """Remove the instance.
6845

6846
    """
6847
    instance = self.instance
6848
    logging.info("Shutting down instance %s on node %s",
6849
                 instance.name, instance.primary_node)
6850

    
6851
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6852
                                             self.op.shutdown_timeout)
6853
    msg = result.fail_msg
6854
    if msg:
6855
      if self.op.ignore_failures:
6856
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6857
      else:
6858
        raise errors.OpExecError("Could not shutdown instance %s on"
6859
                                 " node %s: %s" %
6860
                                 (instance.name, instance.primary_node, msg))
6861

    
6862
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6863
            self.owned_locks(locking.LEVEL_NODE_RES))
6864
    assert not (set(instance.all_nodes) -
6865
                self.owned_locks(locking.LEVEL_NODE)), \
6866
      "Not owning correct locks"
6867

    
6868
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6869

    
6870

    
6871
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6872
  """Utility function to remove an instance.
6873

6874
  """
6875
  logging.info("Removing block devices for instance %s", instance.name)
6876

    
6877
  if not _RemoveDisks(lu, instance):
6878
    if not ignore_failures:
6879
      raise errors.OpExecError("Can't remove instance's disks")
6880
    feedback_fn("Warning: can't remove instance's disks")
6881

    
6882
  logging.info("Removing instance %s out of cluster config", instance.name)
6883

    
6884
  lu.cfg.RemoveInstance(instance.name)
6885

    
6886
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6887
    "Instance lock removal conflict"
6888

    
6889
  # Remove lock for the instance
6890
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6891

    
6892

    
6893
class LUInstanceQuery(NoHooksLU):
6894
  """Logical unit for querying instances.
6895

6896
  """
6897
  # pylint: disable=W0142
6898
  REQ_BGL = False
6899

    
6900
  def CheckArguments(self):
6901
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6902
                             self.op.output_fields, self.op.use_locking)
6903

    
6904
  def ExpandNames(self):
6905
    self.iq.ExpandNames(self)
6906

    
6907
  def DeclareLocks(self, level):
6908
    self.iq.DeclareLocks(self, level)
6909

    
6910
  def Exec(self, feedback_fn):
6911
    return self.iq.OldStyleQuery(self)
6912

    
6913

    
6914
class LUInstanceFailover(LogicalUnit):
6915
  """Failover an instance.
6916

6917
  """
6918
  HPATH = "instance-failover"
6919
  HTYPE = constants.HTYPE_INSTANCE
6920
  REQ_BGL = False
6921

    
6922
  def CheckArguments(self):
6923
    """Check the arguments.
6924

6925
    """
6926
    self.iallocator = getattr(self.op, "iallocator", None)
6927
    self.target_node = getattr(self.op, "target_node", None)
6928

    
6929
  def ExpandNames(self):
6930
    self._ExpandAndLockInstance()
6931

    
6932
    if self.op.target_node is not None:
6933
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6934

    
6935
    self.needed_locks[locking.LEVEL_NODE] = []
6936
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6937

    
6938
    ignore_consistency = self.op.ignore_consistency
6939
    shutdown_timeout = self.op.shutdown_timeout
6940
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6941
                                       cleanup=False,
6942
                                       failover=True,
6943
                                       ignore_consistency=ignore_consistency,
6944
                                       shutdown_timeout=shutdown_timeout)
6945
    self.tasklets = [self._migrater]
6946

    
6947
  def DeclareLocks(self, level):
6948
    if level == locking.LEVEL_NODE:
6949
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6950
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6951
        if self.op.target_node is None:
6952
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6953
        else:
6954
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6955
                                                   self.op.target_node]
6956
        del self.recalculate_locks[locking.LEVEL_NODE]
6957
      else:
6958
        self._LockInstancesNodes()
6959

    
6960
  def BuildHooksEnv(self):
6961
    """Build hooks env.
6962

6963
    This runs on master, primary and secondary nodes of the instance.
6964

6965
    """
6966
    instance = self._migrater.instance
6967
    source_node = instance.primary_node
6968
    target_node = self.op.target_node
6969
    env = {
6970
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6971
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6972
      "OLD_PRIMARY": source_node,
6973
      "NEW_PRIMARY": target_node,
6974
      }
6975

    
6976
    if instance.disk_template in constants.DTS_INT_MIRROR:
6977
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6978
      env["NEW_SECONDARY"] = source_node
6979
    else:
6980
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6981

    
6982
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6983

    
6984
    return env
6985

    
6986
  def BuildHooksNodes(self):
6987
    """Build hooks nodes.
6988

6989
    """
6990
    instance = self._migrater.instance
6991
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6992
    return (nl, nl + [instance.primary_node])
6993

    
6994

    
6995
class LUInstanceMigrate(LogicalUnit):
6996
  """Migrate an instance.
6997

6998
  This is migration without shutting down, compared to the failover,
6999
  which is done with shutdown.
7000

7001
  """
7002
  HPATH = "instance-migrate"
7003
  HTYPE = constants.HTYPE_INSTANCE
7004
  REQ_BGL = False
7005

    
7006
  def ExpandNames(self):
7007
    self._ExpandAndLockInstance()
7008

    
7009
    if self.op.target_node is not None:
7010
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7011

    
7012
    self.needed_locks[locking.LEVEL_NODE] = []
7013
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7014

    
7015
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
7016
                                       cleanup=self.op.cleanup,
7017
                                       failover=False,
7018
                                       fallback=self.op.allow_failover)
7019
    self.tasklets = [self._migrater]
7020

    
7021
  def DeclareLocks(self, level):
7022
    if level == locking.LEVEL_NODE:
7023
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7024
      if instance.disk_template in constants.DTS_EXT_MIRROR:
7025
        if self.op.target_node is None:
7026
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7027
        else:
7028
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7029
                                                   self.op.target_node]
7030
        del self.recalculate_locks[locking.LEVEL_NODE]
7031
      else:
7032
        self._LockInstancesNodes()
7033

    
7034
  def BuildHooksEnv(self):
7035
    """Build hooks env.
7036

7037
    This runs on master, primary and secondary nodes of the instance.
7038

7039
    """
7040
    instance = self._migrater.instance
7041
    source_node = instance.primary_node
7042
    target_node = self.op.target_node
7043
    env = _BuildInstanceHookEnvByObject(self, instance)
7044
    env.update({
7045
      "MIGRATE_LIVE": self._migrater.live,
7046
      "MIGRATE_CLEANUP": self.op.cleanup,
7047
      "OLD_PRIMARY": source_node,
7048
      "NEW_PRIMARY": target_node,
7049
      })
7050

    
7051
    if instance.disk_template in constants.DTS_INT_MIRROR:
7052
      env["OLD_SECONDARY"] = target_node
7053
      env["NEW_SECONDARY"] = source_node
7054
    else:
7055
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7056

    
7057
    return env
7058

    
7059
  def BuildHooksNodes(self):
7060
    """Build hooks nodes.
7061

7062
    """
7063
    instance = self._migrater.instance
7064
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7065
    return (nl, nl + [instance.primary_node])
7066

    
7067

    
7068
class LUInstanceMove(LogicalUnit):
7069
  """Move an instance by data-copying.
7070

7071
  """
7072
  HPATH = "instance-move"
7073
  HTYPE = constants.HTYPE_INSTANCE
7074
  REQ_BGL = False
7075

    
7076
  def ExpandNames(self):
7077
    self._ExpandAndLockInstance()
7078
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7079
    self.op.target_node = target_node
7080
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7081
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7082
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7083

    
7084
  def DeclareLocks(self, level):
7085
    if level == locking.LEVEL_NODE:
7086
      self._LockInstancesNodes(primary_only=True)
7087
    elif level == locking.LEVEL_NODE_RES:
7088
      # Copy node locks
7089
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7090
        self.needed_locks[locking.LEVEL_NODE][:]
7091

    
7092
  def BuildHooksEnv(self):
7093
    """Build hooks env.
7094

7095
    This runs on master, primary and secondary nodes of the instance.
7096

7097
    """
7098
    env = {
7099
      "TARGET_NODE": self.op.target_node,
7100
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7101
      }
7102
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7103
    return env
7104

    
7105
  def BuildHooksNodes(self):
7106
    """Build hooks nodes.
7107

7108
    """
7109
    nl = [
7110
      self.cfg.GetMasterNode(),
7111
      self.instance.primary_node,
7112
      self.op.target_node,
7113
      ]
7114
    return (nl, nl)
7115

    
7116
  def CheckPrereq(self):
7117
    """Check prerequisites.
7118

7119
    This checks that the instance is in the cluster.
7120

7121
    """
7122
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7123
    assert self.instance is not None, \
7124
      "Cannot retrieve locked instance %s" % self.op.instance_name
7125

    
7126
    node = self.cfg.GetNodeInfo(self.op.target_node)
7127
    assert node is not None, \
7128
      "Cannot retrieve locked node %s" % self.op.target_node
7129

    
7130
    self.target_node = target_node = node.name
7131

    
7132
    if target_node == instance.primary_node:
7133
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7134
                                 (instance.name, target_node),
7135
                                 errors.ECODE_STATE)
7136

    
7137
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7138

    
7139
    for idx, dsk in enumerate(instance.disks):
7140
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7141
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7142
                                   " cannot copy" % idx, errors.ECODE_STATE)
7143

    
7144
    _CheckNodeOnline(self, target_node)
7145
    _CheckNodeNotDrained(self, target_node)
7146
    _CheckNodeVmCapable(self, target_node)
7147

    
7148
    if instance.admin_state == constants.ADMINST_UP:
7149
      # check memory requirements on the secondary node
7150
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7151
                           instance.name, bep[constants.BE_MAXMEM],
7152
                           instance.hypervisor)
7153
    else:
7154
      self.LogInfo("Not checking memory on the secondary node as"
7155
                   " instance will not be started")
7156

    
7157
    # check bridge existance
7158
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7159

    
7160
  def Exec(self, feedback_fn):
7161
    """Move an instance.
7162

7163
    The move is done by shutting it down on its present node, copying
7164
    the data over (slow) and starting it on the new node.
7165

7166
    """
7167
    instance = self.instance
7168

    
7169
    source_node = instance.primary_node
7170
    target_node = self.target_node
7171

    
7172
    self.LogInfo("Shutting down instance %s on source node %s",
7173
                 instance.name, source_node)
7174

    
7175
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7176
            self.owned_locks(locking.LEVEL_NODE_RES))
7177

    
7178
    result = self.rpc.call_instance_shutdown(source_node, instance,
7179
                                             self.op.shutdown_timeout)
7180
    msg = result.fail_msg
7181
    if msg:
7182
      if self.op.ignore_consistency:
7183
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7184
                             " Proceeding anyway. Please make sure node"
7185
                             " %s is down. Error details: %s",
7186
                             instance.name, source_node, source_node, msg)
7187
      else:
7188
        raise errors.OpExecError("Could not shutdown instance %s on"
7189
                                 " node %s: %s" %
7190
                                 (instance.name, source_node, msg))
7191

    
7192
    # create the target disks
7193
    try:
7194
      _CreateDisks(self, instance, target_node=target_node)
7195
    except errors.OpExecError:
7196
      self.LogWarning("Device creation failed, reverting...")
7197
      try:
7198
        _RemoveDisks(self, instance, target_node=target_node)
7199
      finally:
7200
        self.cfg.ReleaseDRBDMinors(instance.name)
7201
        raise
7202

    
7203
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7204

    
7205
    errs = []
7206
    # activate, get path, copy the data over
7207
    for idx, disk in enumerate(instance.disks):
7208
      self.LogInfo("Copying data for disk %d", idx)
7209
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7210
                                               instance.name, True, idx)
7211
      if result.fail_msg:
7212
        self.LogWarning("Can't assemble newly created disk %d: %s",
7213
                        idx, result.fail_msg)
7214
        errs.append(result.fail_msg)
7215
        break
7216
      dev_path = result.payload
7217
      result = self.rpc.call_blockdev_export(source_node, disk,
7218
                                             target_node, dev_path,
7219
                                             cluster_name)
7220
      if result.fail_msg:
7221
        self.LogWarning("Can't copy data over for disk %d: %s",
7222
                        idx, result.fail_msg)
7223
        errs.append(result.fail_msg)
7224
        break
7225

    
7226
    if errs:
7227
      self.LogWarning("Some disks failed to copy, aborting")
7228
      try:
7229
        _RemoveDisks(self, instance, target_node=target_node)
7230
      finally:
7231
        self.cfg.ReleaseDRBDMinors(instance.name)
7232
        raise errors.OpExecError("Errors during disk copy: %s" %
7233
                                 (",".join(errs),))
7234

    
7235
    instance.primary_node = target_node
7236
    self.cfg.Update(instance, feedback_fn)
7237

    
7238
    self.LogInfo("Removing the disks on the original node")
7239
    _RemoveDisks(self, instance, target_node=source_node)
7240

    
7241
    # Only start the instance if it's marked as up
7242
    if instance.admin_state == constants.ADMINST_UP:
7243
      self.LogInfo("Starting instance %s on node %s",
7244
                   instance.name, target_node)
7245

    
7246
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7247
                                           ignore_secondaries=True)
7248
      if not disks_ok:
7249
        _ShutdownInstanceDisks(self, instance)
7250
        raise errors.OpExecError("Can't activate the instance's disks")
7251

    
7252
      result = self.rpc.call_instance_start(target_node,
7253
                                            (instance, None, None), False)
7254
      msg = result.fail_msg
7255
      if msg:
7256
        _ShutdownInstanceDisks(self, instance)
7257
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7258
                                 (instance.name, target_node, msg))
7259

    
7260

    
7261
class LUNodeMigrate(LogicalUnit):
7262
  """Migrate all instances from a node.
7263

7264
  """
7265
  HPATH = "node-migrate"
7266
  HTYPE = constants.HTYPE_NODE
7267
  REQ_BGL = False
7268

    
7269
  def CheckArguments(self):
7270
    pass
7271

    
7272
  def ExpandNames(self):
7273
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7274

    
7275
    self.share_locks = _ShareAll()
7276
    self.needed_locks = {
7277
      locking.LEVEL_NODE: [self.op.node_name],
7278
      }
7279

    
7280
  def BuildHooksEnv(self):
7281
    """Build hooks env.
7282

7283
    This runs on the master, the primary and all the secondaries.
7284

7285
    """
7286
    return {
7287
      "NODE_NAME": self.op.node_name,
7288
      }
7289

    
7290
  def BuildHooksNodes(self):
7291
    """Build hooks nodes.
7292

7293
    """
7294
    nl = [self.cfg.GetMasterNode()]
7295
    return (nl, nl)
7296

    
7297
  def CheckPrereq(self):
7298
    pass
7299

    
7300
  def Exec(self, feedback_fn):
7301
    # Prepare jobs for migration instances
7302
    jobs = [
7303
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7304
                                 mode=self.op.mode,
7305
                                 live=self.op.live,
7306
                                 iallocator=self.op.iallocator,
7307
                                 target_node=self.op.target_node)]
7308
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7309
      ]
7310

    
7311
    # TODO: Run iallocator in this opcode and pass correct placement options to
7312
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7313
    # running the iallocator and the actual migration, a good consistency model
7314
    # will have to be found.
7315

    
7316
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7317
            frozenset([self.op.node_name]))
7318

    
7319
    return ResultWithJobs(jobs)
7320

    
7321

    
7322
class TLMigrateInstance(Tasklet):
7323
  """Tasklet class for instance migration.
7324

7325
  @type live: boolean
7326
  @ivar live: whether the migration will be done live or non-live;
7327
      this variable is initalized only after CheckPrereq has run
7328
  @type cleanup: boolean
7329
  @ivar cleanup: Wheater we cleanup from a failed migration
7330
  @type iallocator: string
7331
  @ivar iallocator: The iallocator used to determine target_node
7332
  @type target_node: string
7333
  @ivar target_node: If given, the target_node to reallocate the instance to
7334
  @type failover: boolean
7335
  @ivar failover: Whether operation results in failover or migration
7336
  @type fallback: boolean
7337
  @ivar fallback: Whether fallback to failover is allowed if migration not
7338
                  possible
7339
  @type ignore_consistency: boolean
7340
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7341
                            and target node
7342
  @type shutdown_timeout: int
7343
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7344

7345
  """
7346

    
7347
  # Constants
7348
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7349
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7350

    
7351
  def __init__(self, lu, instance_name, cleanup=False,
7352
               failover=False, fallback=False,
7353
               ignore_consistency=False,
7354
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7355
    """Initializes this class.
7356

7357
    """
7358
    Tasklet.__init__(self, lu)
7359

    
7360
    # Parameters
7361
    self.instance_name = instance_name
7362
    self.cleanup = cleanup
7363
    self.live = False # will be overridden later
7364
    self.failover = failover
7365
    self.fallback = fallback
7366
    self.ignore_consistency = ignore_consistency
7367
    self.shutdown_timeout = shutdown_timeout
7368

    
7369
  def CheckPrereq(self):
7370
    """Check prerequisites.
7371

7372
    This checks that the instance is in the cluster.
7373

7374
    """
7375
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7376
    instance = self.cfg.GetInstanceInfo(instance_name)
7377
    assert instance is not None
7378
    self.instance = instance
7379

    
7380
    if (not self.cleanup and
7381
        not instance.admin_state == constants.ADMINST_UP and
7382
        not self.failover and self.fallback):
7383
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7384
                      " switching to failover")
7385
      self.failover = True
7386

    
7387
    if instance.disk_template not in constants.DTS_MIRRORED:
7388
      if self.failover:
7389
        text = "failovers"
7390
      else:
7391
        text = "migrations"
7392
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7393
                                 " %s" % (instance.disk_template, text),
7394
                                 errors.ECODE_STATE)
7395

    
7396
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7397
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7398

    
7399
      if self.lu.op.iallocator:
7400
        self._RunAllocator()
7401
      else:
7402
        # We set set self.target_node as it is required by
7403
        # BuildHooksEnv
7404
        self.target_node = self.lu.op.target_node
7405

    
7406
      # self.target_node is already populated, either directly or by the
7407
      # iallocator run
7408
      target_node = self.target_node
7409
      if self.target_node == instance.primary_node:
7410
        raise errors.OpPrereqError("Cannot migrate instance %s"
7411
                                   " to its primary (%s)" %
7412
                                   (instance.name, instance.primary_node))
7413

    
7414
      if len(self.lu.tasklets) == 1:
7415
        # It is safe to release locks only when we're the only tasklet
7416
        # in the LU
7417
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7418
                      keep=[instance.primary_node, self.target_node])
7419

    
7420
    else:
7421
      secondary_nodes = instance.secondary_nodes
7422
      if not secondary_nodes:
7423
        raise errors.ConfigurationError("No secondary node but using"
7424
                                        " %s disk template" %
7425
                                        instance.disk_template)
7426
      target_node = secondary_nodes[0]
7427
      if self.lu.op.iallocator or (self.lu.op.target_node and
7428
                                   self.lu.op.target_node != target_node):
7429
        if self.failover:
7430
          text = "failed over"
7431
        else:
7432
          text = "migrated"
7433
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7434
                                   " be %s to arbitrary nodes"
7435
                                   " (neither an iallocator nor a target"
7436
                                   " node can be passed)" %
7437
                                   (instance.disk_template, text),
7438
                                   errors.ECODE_INVAL)
7439

    
7440
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7441

    
7442
    # check memory requirements on the secondary node
7443
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7444
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7445
                           instance.name, i_be[constants.BE_MAXMEM],
7446
                           instance.hypervisor)
7447
    else:
7448
      self.lu.LogInfo("Not checking memory on the secondary node as"
7449
                      " instance will not be started")
7450

    
7451
    # check bridge existance
7452
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7453

    
7454
    if not self.cleanup:
7455
      _CheckNodeNotDrained(self.lu, target_node)
7456
      if not self.failover:
7457
        result = self.rpc.call_instance_migratable(instance.primary_node,
7458
                                                   instance)
7459
        if result.fail_msg and self.fallback:
7460
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7461
                          " failover")
7462
          self.failover = True
7463
        else:
7464
          result.Raise("Can't migrate, please use failover",
7465
                       prereq=True, ecode=errors.ECODE_STATE)
7466

    
7467
    assert not (self.failover and self.cleanup)
7468

    
7469
    if not self.failover:
7470
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7471
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7472
                                   " parameters are accepted",
7473
                                   errors.ECODE_INVAL)
7474
      if self.lu.op.live is not None:
7475
        if self.lu.op.live:
7476
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7477
        else:
7478
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7479
        # reset the 'live' parameter to None so that repeated
7480
        # invocations of CheckPrereq do not raise an exception
7481
        self.lu.op.live = None
7482
      elif self.lu.op.mode is None:
7483
        # read the default value from the hypervisor
7484
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7485
                                                skip_globals=False)
7486
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7487

    
7488
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7489
    else:
7490
      # Failover is never live
7491
      self.live = False
7492

    
7493
  def _RunAllocator(self):
7494
    """Run the allocator based on input opcode.
7495

7496
    """
7497
    ial = IAllocator(self.cfg, self.rpc,
7498
                     mode=constants.IALLOCATOR_MODE_RELOC,
7499
                     name=self.instance_name,
7500
                     # TODO See why hail breaks with a single node below
7501
                     relocate_from=[self.instance.primary_node,
7502
                                    self.instance.primary_node],
7503
                     )
7504

    
7505
    ial.Run(self.lu.op.iallocator)
7506

    
7507
    if not ial.success:
7508
      raise errors.OpPrereqError("Can't compute nodes using"
7509
                                 " iallocator '%s': %s" %
7510
                                 (self.lu.op.iallocator, ial.info),
7511
                                 errors.ECODE_NORES)
7512
    if len(ial.result) != ial.required_nodes:
7513
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7514
                                 " of nodes (%s), required %s" %
7515
                                 (self.lu.op.iallocator, len(ial.result),
7516
                                  ial.required_nodes), errors.ECODE_FAULT)
7517
    self.target_node = ial.result[0]
7518
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7519
                 self.instance_name, self.lu.op.iallocator,
7520
                 utils.CommaJoin(ial.result))
7521

    
7522
  def _WaitUntilSync(self):
7523
    """Poll with custom rpc for disk sync.
7524

7525
    This uses our own step-based rpc call.
7526

7527
    """
7528
    self.feedback_fn("* wait until resync is done")
7529
    all_done = False
7530
    while not all_done:
7531
      all_done = True
7532
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7533
                                            self.nodes_ip,
7534
                                            self.instance.disks)
7535
      min_percent = 100
7536
      for node, nres in result.items():
7537
        nres.Raise("Cannot resync disks on node %s" % node)
7538
        node_done, node_percent = nres.payload
7539
        all_done = all_done and node_done
7540
        if node_percent is not None:
7541
          min_percent = min(min_percent, node_percent)
7542
      if not all_done:
7543
        if min_percent < 100:
7544
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7545
        time.sleep(2)
7546

    
7547
  def _EnsureSecondary(self, node):
7548
    """Demote a node to secondary.
7549

7550
    """
7551
    self.feedback_fn("* switching node %s to secondary mode" % node)
7552

    
7553
    for dev in self.instance.disks:
7554
      self.cfg.SetDiskID(dev, node)
7555

    
7556
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7557
                                          self.instance.disks)
7558
    result.Raise("Cannot change disk to secondary on node %s" % node)
7559

    
7560
  def _GoStandalone(self):
7561
    """Disconnect from the network.
7562

7563
    """
7564
    self.feedback_fn("* changing into standalone mode")
7565
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7566
                                               self.instance.disks)
7567
    for node, nres in result.items():
7568
      nres.Raise("Cannot disconnect disks node %s" % node)
7569

    
7570
  def _GoReconnect(self, multimaster):
7571
    """Reconnect to the network.
7572

7573
    """
7574
    if multimaster:
7575
      msg = "dual-master"
7576
    else:
7577
      msg = "single-master"
7578
    self.feedback_fn("* changing disks into %s mode" % msg)
7579
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7580
                                           self.instance.disks,
7581
                                           self.instance.name, multimaster)
7582
    for node, nres in result.items():
7583
      nres.Raise("Cannot change disks config on node %s" % node)
7584

    
7585
  def _ExecCleanup(self):
7586
    """Try to cleanup after a failed migration.
7587

7588
    The cleanup is done by:
7589
      - check that the instance is running only on one node
7590
        (and update the config if needed)
7591
      - change disks on its secondary node to secondary
7592
      - wait until disks are fully synchronized
7593
      - disconnect from the network
7594
      - change disks into single-master mode
7595
      - wait again until disks are fully synchronized
7596

7597
    """
7598
    instance = self.instance
7599
    target_node = self.target_node
7600
    source_node = self.source_node
7601

    
7602
    # check running on only one node
7603
    self.feedback_fn("* checking where the instance actually runs"
7604
                     " (if this hangs, the hypervisor might be in"
7605
                     " a bad state)")
7606
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7607
    for node, result in ins_l.items():
7608
      result.Raise("Can't contact node %s" % node)
7609

    
7610
    runningon_source = instance.name in ins_l[source_node].payload
7611
    runningon_target = instance.name in ins_l[target_node].payload
7612

    
7613
    if runningon_source and runningon_target:
7614
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7615
                               " or the hypervisor is confused; you will have"
7616
                               " to ensure manually that it runs only on one"
7617
                               " and restart this operation")
7618

    
7619
    if not (runningon_source or runningon_target):
7620
      raise errors.OpExecError("Instance does not seem to be running at all;"
7621
                               " in this case it's safer to repair by"
7622
                               " running 'gnt-instance stop' to ensure disk"
7623
                               " shutdown, and then restarting it")
7624

    
7625
    if runningon_target:
7626
      # the migration has actually succeeded, we need to update the config
7627
      self.feedback_fn("* instance running on secondary node (%s),"
7628
                       " updating config" % target_node)
7629
      instance.primary_node = target_node
7630
      self.cfg.Update(instance, self.feedback_fn)
7631
      demoted_node = source_node
7632
    else:
7633
      self.feedback_fn("* instance confirmed to be running on its"
7634
                       " primary node (%s)" % source_node)
7635
      demoted_node = target_node
7636

    
7637
    if instance.disk_template in constants.DTS_INT_MIRROR:
7638
      self._EnsureSecondary(demoted_node)
7639
      try:
7640
        self._WaitUntilSync()
7641
      except errors.OpExecError:
7642
        # we ignore here errors, since if the device is standalone, it
7643
        # won't be able to sync
7644
        pass
7645
      self._GoStandalone()
7646
      self._GoReconnect(False)
7647
      self._WaitUntilSync()
7648

    
7649
    self.feedback_fn("* done")
7650

    
7651
  def _RevertDiskStatus(self):
7652
    """Try to revert the disk status after a failed migration.
7653

7654
    """
7655
    target_node = self.target_node
7656
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7657
      return
7658

    
7659
    try:
7660
      self._EnsureSecondary(target_node)
7661
      self._GoStandalone()
7662
      self._GoReconnect(False)
7663
      self._WaitUntilSync()
7664
    except errors.OpExecError, err:
7665
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7666
                         " please try to recover the instance manually;"
7667
                         " error '%s'" % str(err))
7668

    
7669
  def _AbortMigration(self):
7670
    """Call the hypervisor code to abort a started migration.
7671

7672
    """
7673
    instance = self.instance
7674
    target_node = self.target_node
7675
    source_node = self.source_node
7676
    migration_info = self.migration_info
7677

    
7678
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7679
                                                                 instance,
7680
                                                                 migration_info,
7681
                                                                 False)
7682
    abort_msg = abort_result.fail_msg
7683
    if abort_msg:
7684
      logging.error("Aborting migration failed on target node %s: %s",
7685
                    target_node, abort_msg)
7686
      # Don't raise an exception here, as we stil have to try to revert the
7687
      # disk status, even if this step failed.
7688

    
7689
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7690
        instance, False, self.live)
7691
    abort_msg = abort_result.fail_msg
7692
    if abort_msg:
7693
      logging.error("Aborting migration failed on source node %s: %s",
7694
                    source_node, abort_msg)
7695

    
7696
  def _ExecMigration(self):
7697
    """Migrate an instance.
7698

7699
    The migrate is done by:
7700
      - change the disks into dual-master mode
7701
      - wait until disks are fully synchronized again
7702
      - migrate the instance
7703
      - change disks on the new secondary node (the old primary) to secondary
7704
      - wait until disks are fully synchronized
7705
      - change disks into single-master mode
7706

7707
    """
7708
    instance = self.instance
7709
    target_node = self.target_node
7710
    source_node = self.source_node
7711

    
7712
    # Check for hypervisor version mismatch and warn the user.
7713
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7714
                                       None, [self.instance.hypervisor])
7715
    for ninfo in nodeinfo.values():
7716
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
7717
                  ninfo.node)
7718
    (_, _, (src_info, )) = nodeinfo[source_node].payload
7719
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
7720

    
7721
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7722
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7723
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7724
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7725
      if src_version != dst_version:
7726
        self.feedback_fn("* warning: hypervisor version mismatch between"
7727
                         " source (%s) and target (%s) node" %
7728
                         (src_version, dst_version))
7729

    
7730
    self.feedback_fn("* checking disk consistency between source and target")
7731
    for dev in instance.disks:
7732
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733
        raise errors.OpExecError("Disk %s is degraded or not fully"
7734
                                 " synchronized on target node,"
7735
                                 " aborting migration" % dev.iv_name)
7736

    
7737
    # First get the migration information from the remote node
7738
    result = self.rpc.call_migration_info(source_node, instance)
7739
    msg = result.fail_msg
7740
    if msg:
7741
      log_err = ("Failed fetching source migration information from %s: %s" %
7742
                 (source_node, msg))
7743
      logging.error(log_err)
7744
      raise errors.OpExecError(log_err)
7745

    
7746
    self.migration_info = migration_info = result.payload
7747

    
7748
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7749
      # Then switch the disks to master/master mode
7750
      self._EnsureSecondary(target_node)
7751
      self._GoStandalone()
7752
      self._GoReconnect(True)
7753
      self._WaitUntilSync()
7754

    
7755
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7756
    result = self.rpc.call_accept_instance(target_node,
7757
                                           instance,
7758
                                           migration_info,
7759
                                           self.nodes_ip[target_node])
7760

    
7761
    msg = result.fail_msg
7762
    if msg:
7763
      logging.error("Instance pre-migration failed, trying to revert"
7764
                    " disk status: %s", msg)
7765
      self.feedback_fn("Pre-migration failed, aborting")
7766
      self._AbortMigration()
7767
      self._RevertDiskStatus()
7768
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7769
                               (instance.name, msg))
7770

    
7771
    self.feedback_fn("* migrating instance to %s" % target_node)
7772
    result = self.rpc.call_instance_migrate(source_node, instance,
7773
                                            self.nodes_ip[target_node],
7774
                                            self.live)
7775
    msg = result.fail_msg
7776
    if msg:
7777
      logging.error("Instance migration failed, trying to revert"
7778
                    " disk status: %s", msg)
7779
      self.feedback_fn("Migration failed, aborting")
7780
      self._AbortMigration()
7781
      self._RevertDiskStatus()
7782
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7783
                               (instance.name, msg))
7784

    
7785
    self.feedback_fn("* starting memory transfer")
7786
    last_feedback = time.time()
7787
    while True:
7788
      result = self.rpc.call_instance_get_migration_status(source_node,
7789
                                                           instance)
7790
      msg = result.fail_msg
7791
      ms = result.payload   # MigrationStatus instance
7792
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7793
        logging.error("Instance migration failed, trying to revert"
7794
                      " disk status: %s", msg)
7795
        self.feedback_fn("Migration failed, aborting")
7796
        self._AbortMigration()
7797
        self._RevertDiskStatus()
7798
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7799
                                 (instance.name, msg))
7800

    
7801
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7802
        self.feedback_fn("* memory transfer complete")
7803
        break
7804

    
7805
      if (utils.TimeoutExpired(last_feedback,
7806
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7807
          ms.transferred_ram is not None):
7808
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7809
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7810
        last_feedback = time.time()
7811

    
7812
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7813

    
7814
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7815
                                                           instance,
7816
                                                           True,
7817
                                                           self.live)
7818
    msg = result.fail_msg
7819
    if msg:
7820
      logging.error("Instance migration succeeded, but finalization failed"
7821
                    " on the source node: %s", msg)
7822
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7823
                               msg)
7824

    
7825
    instance.primary_node = target_node
7826

    
7827
    # distribute new instance config to the other nodes
7828
    self.cfg.Update(instance, self.feedback_fn)
7829

    
7830
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7831
                                                           instance,
7832
                                                           migration_info,
7833
                                                           True)
7834
    msg = result.fail_msg
7835
    if msg:
7836
      logging.error("Instance migration succeeded, but finalization failed"
7837
                    " on the target node: %s", msg)
7838
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7839
                               msg)
7840

    
7841
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7842
      self._EnsureSecondary(source_node)
7843
      self._WaitUntilSync()
7844
      self._GoStandalone()
7845
      self._GoReconnect(False)
7846
      self._WaitUntilSync()
7847

    
7848
    self.feedback_fn("* done")
7849

    
7850
  def _ExecFailover(self):
7851
    """Failover an instance.
7852

7853
    The failover is done by shutting it down on its present node and
7854
    starting it on the secondary.
7855

7856
    """
7857
    instance = self.instance
7858
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7859

    
7860
    source_node = instance.primary_node
7861
    target_node = self.target_node
7862

    
7863
    if instance.admin_state == constants.ADMINST_UP:
7864
      self.feedback_fn("* checking disk consistency between source and target")
7865
      for dev in instance.disks:
7866
        # for drbd, these are drbd over lvm
7867
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7868
          if primary_node.offline:
7869
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7870
                             " target node %s" %
7871
                             (primary_node.name, dev.iv_name, target_node))
7872
          elif not self.ignore_consistency:
7873
            raise errors.OpExecError("Disk %s is degraded on target node,"
7874
                                     " aborting failover" % dev.iv_name)
7875
    else:
7876
      self.feedback_fn("* not checking disk consistency as instance is not"
7877
                       " running")
7878

    
7879
    self.feedback_fn("* shutting down instance on source node")
7880
    logging.info("Shutting down instance %s on node %s",
7881
                 instance.name, source_node)
7882

    
7883
    result = self.rpc.call_instance_shutdown(source_node, instance,
7884
                                             self.shutdown_timeout)
7885
    msg = result.fail_msg
7886
    if msg:
7887
      if self.ignore_consistency or primary_node.offline:
7888
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7889
                           " proceeding anyway; please make sure node"
7890
                           " %s is down; error details: %s",
7891
                           instance.name, source_node, source_node, msg)
7892
      else:
7893
        raise errors.OpExecError("Could not shutdown instance %s on"
7894
                                 " node %s: %s" %
7895
                                 (instance.name, source_node, msg))
7896

    
7897
    self.feedback_fn("* deactivating the instance's disks on source node")
7898
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7899
      raise errors.OpExecError("Can't shut down the instance's disks")
7900

    
7901
    instance.primary_node = target_node
7902
    # distribute new instance config to the other nodes
7903
    self.cfg.Update(instance, self.feedback_fn)
7904

    
7905
    # Only start the instance if it's marked as up
7906
    if instance.admin_state == constants.ADMINST_UP:
7907
      self.feedback_fn("* activating the instance's disks on target node %s" %
7908
                       target_node)
7909
      logging.info("Starting instance %s on node %s",
7910
                   instance.name, target_node)
7911

    
7912
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7913
                                           ignore_secondaries=True)
7914
      if not disks_ok:
7915
        _ShutdownInstanceDisks(self.lu, instance)
7916
        raise errors.OpExecError("Can't activate the instance's disks")
7917

    
7918
      self.feedback_fn("* starting the instance on the target node %s" %
7919
                       target_node)
7920
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7921
                                            False)
7922
      msg = result.fail_msg
7923
      if msg:
7924
        _ShutdownInstanceDisks(self.lu, instance)
7925
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7926
                                 (instance.name, target_node, msg))
7927

    
7928
  def Exec(self, feedback_fn):
7929
    """Perform the migration.
7930

7931
    """
7932
    self.feedback_fn = feedback_fn
7933
    self.source_node = self.instance.primary_node
7934

    
7935
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7936
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7937
      self.target_node = self.instance.secondary_nodes[0]
7938
      # Otherwise self.target_node has been populated either
7939
      # directly, or through an iallocator.
7940

    
7941
    self.all_nodes = [self.source_node, self.target_node]
7942
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7943
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7944

    
7945
    if self.failover:
7946
      feedback_fn("Failover instance %s" % self.instance.name)
7947
      self._ExecFailover()
7948
    else:
7949
      feedback_fn("Migrating instance %s" % self.instance.name)
7950

    
7951
      if self.cleanup:
7952
        return self._ExecCleanup()
7953
      else:
7954
        return self._ExecMigration()
7955

    
7956

    
7957
def _CreateBlockDev(lu, node, instance, device, force_create,
7958
                    info, force_open):
7959
  """Create a tree of block devices on a given node.
7960

7961
  If this device type has to be created on secondaries, create it and
7962
  all its children.
7963

7964
  If not, just recurse to children keeping the same 'force' value.
7965

7966
  @param lu: the lu on whose behalf we execute
7967
  @param node: the node on which to create the device
7968
  @type instance: L{objects.Instance}
7969
  @param instance: the instance which owns the device
7970
  @type device: L{objects.Disk}
7971
  @param device: the device to create
7972
  @type force_create: boolean
7973
  @param force_create: whether to force creation of this device; this
7974
      will be change to True whenever we find a device which has
7975
      CreateOnSecondary() attribute
7976
  @param info: the extra 'metadata' we should attach to the device
7977
      (this will be represented as a LVM tag)
7978
  @type force_open: boolean
7979
  @param force_open: this parameter will be passes to the
7980
      L{backend.BlockdevCreate} function where it specifies
7981
      whether we run on primary or not, and it affects both
7982
      the child assembly and the device own Open() execution
7983

7984
  """
7985
  if device.CreateOnSecondary():
7986
    force_create = True
7987

    
7988
  if device.children:
7989
    for child in device.children:
7990
      _CreateBlockDev(lu, node, instance, child, force_create,
7991
                      info, force_open)
7992

    
7993
  if not force_create:
7994
    return
7995

    
7996
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7997

    
7998

    
7999
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8000
  """Create a single block device on a given node.
8001

8002
  This will not recurse over children of the device, so they must be
8003
  created in advance.
8004

8005
  @param lu: the lu on whose behalf we execute
8006
  @param node: the node on which to create the device
8007
  @type instance: L{objects.Instance}
8008
  @param instance: the instance which owns the device
8009
  @type device: L{objects.Disk}
8010
  @param device: the device to create
8011
  @param info: the extra 'metadata' we should attach to the device
8012
      (this will be represented as a LVM tag)
8013
  @type force_open: boolean
8014
  @param force_open: this parameter will be passes to the
8015
      L{backend.BlockdevCreate} function where it specifies
8016
      whether we run on primary or not, and it affects both
8017
      the child assembly and the device own Open() execution
8018

8019
  """
8020
  lu.cfg.SetDiskID(device, node)
8021
  result = lu.rpc.call_blockdev_create(node, device, device.size,
8022
                                       instance.name, force_open, info)
8023
  result.Raise("Can't create block device %s on"
8024
               " node %s for instance %s" % (device, node, instance.name))
8025
  if device.physical_id is None:
8026
    device.physical_id = result.payload
8027

    
8028

    
8029
def _GenerateUniqueNames(lu, exts):
8030
  """Generate a suitable LV name.
8031

8032
  This will generate a logical volume name for the given instance.
8033

8034
  """
8035
  results = []
8036
  for val in exts:
8037
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8038
    results.append("%s%s" % (new_id, val))
8039
  return results
8040

    
8041

    
8042
def _ComputeLDParams(disk_template, disk_params):
8043
  """Computes Logical Disk parameters from Disk Template parameters.
8044

8045
  @type disk_template: string
8046
  @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8047
  @type disk_params: dict
8048
  @param disk_params: disk template parameters; dict(template_name -> parameters
8049
  @rtype: list(dict)
8050
  @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8051
    contains the LD parameters of the node. The tree is flattened in-order.
8052

8053
  """
8054
  if disk_template not in constants.DISK_TEMPLATES:
8055
    raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8056

    
8057
  result = list()
8058
  dt_params = disk_params[disk_template]
8059
  if disk_template == constants.DT_DRBD8:
8060
    drbd_params = {
8061
      constants.RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8062
      constants.BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8063
      constants.NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8064
      }
8065

    
8066
    drbd_params = \
8067
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8068
                       drbd_params)
8069

    
8070
    result.append(drbd_params)
8071

    
8072
    # data LV
8073
    data_params = {
8074
      constants.STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8075
      }
8076
    data_params = \
8077
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8078
                       data_params)
8079
    result.append(data_params)
8080

    
8081
    # metadata LV
8082
    meta_params = {
8083
      constants.STRIPES: dt_params[constants.DRBD_META_STRIPES],
8084
      }
8085
    meta_params = \
8086
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8087
                       meta_params)
8088
    result.append(meta_params)
8089

    
8090
  elif (disk_template == constants.DT_FILE or
8091
        disk_template == constants.DT_SHARED_FILE):
8092
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8093

    
8094
  elif disk_template == constants.DT_PLAIN:
8095
    params = {
8096
      constants.STRIPES: dt_params[constants.LV_STRIPES],
8097
      }
8098
    params = \
8099
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8100
                       params)
8101
    result.append(params)
8102

    
8103
  elif disk_template == constants.DT_BLOCK:
8104
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8105

    
8106
  return result
8107

    
8108

    
8109
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8110
                         iv_name, p_minor, s_minor, drbd_params, data_params,
8111
                         meta_params):
8112
  """Generate a drbd8 device complete with its children.
8113

8114
  """
8115
  assert len(vgnames) == len(names) == 2
8116
  port = lu.cfg.AllocatePort()
8117
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8118

    
8119
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8120
                          logical_id=(vgnames[0], names[0]),
8121
                          params=data_params)
8122
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8123
                          logical_id=(vgnames[1], names[1]),
8124
                          params=meta_params)
8125
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8126
                          logical_id=(primary, secondary, port,
8127
                                      p_minor, s_minor,
8128
                                      shared_secret),
8129
                          children=[dev_data, dev_meta],
8130
                          iv_name=iv_name, params=drbd_params)
8131
  return drbd_dev
8132

    
8133

    
8134
def _GenerateDiskTemplate(lu, template_name,
8135
                          instance_name, primary_node,
8136
                          secondary_nodes, disk_info,
8137
                          file_storage_dir, file_driver,
8138
                          base_index, feedback_fn, disk_params):
8139
  """Generate the entire disk layout for a given template type.
8140

8141
  """
8142
  #TODO: compute space requirements
8143

    
8144
  vgname = lu.cfg.GetVGName()
8145
  disk_count = len(disk_info)
8146
  disks = []
8147
  ld_params = _ComputeLDParams(template_name, disk_params)
8148
  if template_name == constants.DT_DISKLESS:
8149
    pass
8150
  elif template_name == constants.DT_PLAIN:
8151
    if len(secondary_nodes) != 0:
8152
      raise errors.ProgrammerError("Wrong template configuration")
8153

    
8154
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8155
                                      for i in range(disk_count)])
8156
    for idx, disk in enumerate(disk_info):
8157
      disk_index = idx + base_index
8158
      vg = disk.get(constants.IDISK_VG, vgname)
8159
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8160
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8161
                              size=disk[constants.IDISK_SIZE],
8162
                              logical_id=(vg, names[idx]),
8163
                              iv_name="disk/%d" % disk_index,
8164
                              mode=disk[constants.IDISK_MODE],
8165
                              params=ld_params[0])
8166
      disks.append(disk_dev)
8167
  elif template_name == constants.DT_DRBD8:
8168
    drbd_params, data_params, meta_params = ld_params
8169
    if len(secondary_nodes) != 1:
8170
      raise errors.ProgrammerError("Wrong template configuration")
8171
    remote_node = secondary_nodes[0]
8172
    minors = lu.cfg.AllocateDRBDMinor(
8173
      [primary_node, remote_node] * len(disk_info), instance_name)
8174

    
8175
    names = []
8176
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8177
                                               for i in range(disk_count)]):
8178
      names.append(lv_prefix + "_data")
8179
      names.append(lv_prefix + "_meta")
8180
    for idx, disk in enumerate(disk_info):
8181
      disk_index = idx + base_index
8182
      data_vg = disk.get(constants.IDISK_VG, vgname)
8183
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8184
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8185
                                      disk[constants.IDISK_SIZE],
8186
                                      [data_vg, meta_vg],
8187
                                      names[idx * 2:idx * 2 + 2],
8188
                                      "disk/%d" % disk_index,
8189
                                      minors[idx * 2], minors[idx * 2 + 1],
8190
                                      drbd_params, data_params, meta_params)
8191
      disk_dev.mode = disk[constants.IDISK_MODE]
8192
      disks.append(disk_dev)
8193
  elif template_name == constants.DT_FILE:
8194
    if len(secondary_nodes) != 0:
8195
      raise errors.ProgrammerError("Wrong template configuration")
8196

    
8197
    opcodes.RequireFileStorage()
8198

    
8199
    for idx, disk in enumerate(disk_info):
8200
      disk_index = idx + base_index
8201
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8202
                              size=disk[constants.IDISK_SIZE],
8203
                              iv_name="disk/%d" % disk_index,
8204
                              logical_id=(file_driver,
8205
                                          "%s/disk%d" % (file_storage_dir,
8206
                                                         disk_index)),
8207
                              mode=disk[constants.IDISK_MODE],
8208
                              params=ld_params[0])
8209
      disks.append(disk_dev)
8210
  elif template_name == constants.DT_SHARED_FILE:
8211
    if len(secondary_nodes) != 0:
8212
      raise errors.ProgrammerError("Wrong template configuration")
8213

    
8214
    opcodes.RequireSharedFileStorage()
8215

    
8216
    for idx, disk in enumerate(disk_info):
8217
      disk_index = idx + base_index
8218
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8219
                              size=disk[constants.IDISK_SIZE],
8220
                              iv_name="disk/%d" % disk_index,
8221
                              logical_id=(file_driver,
8222
                                          "%s/disk%d" % (file_storage_dir,
8223
                                                         disk_index)),
8224
                              mode=disk[constants.IDISK_MODE],
8225
                              params=ld_params[0])
8226
      disks.append(disk_dev)
8227
  elif template_name == constants.DT_BLOCK:
8228
    if len(secondary_nodes) != 0:
8229
      raise errors.ProgrammerError("Wrong template configuration")
8230

    
8231
    for idx, disk in enumerate(disk_info):
8232
      disk_index = idx + base_index
8233
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8234
                              size=disk[constants.IDISK_SIZE],
8235
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8236
                                          disk[constants.IDISK_ADOPT]),
8237
                              iv_name="disk/%d" % disk_index,
8238
                              mode=disk[constants.IDISK_MODE],
8239
                              params=ld_params[0])
8240
      disks.append(disk_dev)
8241

    
8242
  else:
8243
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8244
  return disks
8245

    
8246

    
8247
def _GetInstanceInfoText(instance):
8248
  """Compute that text that should be added to the disk's metadata.
8249

8250
  """
8251
  return "originstname+%s" % instance.name
8252

    
8253

    
8254
def _CalcEta(time_taken, written, total_size):
8255
  """Calculates the ETA based on size written and total size.
8256

8257
  @param time_taken: The time taken so far
8258
  @param written: amount written so far
8259
  @param total_size: The total size of data to be written
8260
  @return: The remaining time in seconds
8261

8262
  """
8263
  avg_time = time_taken / float(written)
8264
  return (total_size - written) * avg_time
8265

    
8266

    
8267
def _WipeDisks(lu, instance):
8268
  """Wipes instance disks.
8269

8270
  @type lu: L{LogicalUnit}
8271
  @param lu: the logical unit on whose behalf we execute
8272
  @type instance: L{objects.Instance}
8273
  @param instance: the instance whose disks we should create
8274
  @return: the success of the wipe
8275

8276
  """
8277
  node = instance.primary_node
8278

    
8279
  for device in instance.disks:
8280
    lu.cfg.SetDiskID(device, node)
8281

    
8282
  logging.info("Pause sync of instance %s disks", instance.name)
8283
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8284

    
8285
  for idx, success in enumerate(result.payload):
8286
    if not success:
8287
      logging.warn("pause-sync of instance %s for disks %d failed",
8288
                   instance.name, idx)
8289

    
8290
  try:
8291
    for idx, device in enumerate(instance.disks):
8292
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8293
      # MAX_WIPE_CHUNK at max
8294
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8295
                            constants.MIN_WIPE_CHUNK_PERCENT)
8296
      # we _must_ make this an int, otherwise rounding errors will
8297
      # occur
8298
      wipe_chunk_size = int(wipe_chunk_size)
8299

    
8300
      lu.LogInfo("* Wiping disk %d", idx)
8301
      logging.info("Wiping disk %d for instance %s, node %s using"
8302
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8303

    
8304
      offset = 0
8305
      size = device.size
8306
      last_output = 0
8307
      start_time = time.time()
8308

    
8309
      while offset < size:
8310
        wipe_size = min(wipe_chunk_size, size - offset)
8311
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8312
                      idx, offset, wipe_size)
8313
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8314
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8315
                     (idx, offset, wipe_size))
8316
        now = time.time()
8317
        offset += wipe_size
8318
        if now - last_output >= 60:
8319
          eta = _CalcEta(now - start_time, offset, size)
8320
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8321
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8322
          last_output = now
8323
  finally:
8324
    logging.info("Resume sync of instance %s disks", instance.name)
8325

    
8326
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8327

    
8328
    for idx, success in enumerate(result.payload):
8329
      if not success:
8330
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8331
                      " look at the status and troubleshoot the issue", idx)
8332
        logging.warn("resume-sync of instance %s for disks %d failed",
8333
                     instance.name, idx)
8334

    
8335

    
8336
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8337
  """Create all disks for an instance.
8338

8339
  This abstracts away some work from AddInstance.
8340

8341
  @type lu: L{LogicalUnit}
8342
  @param lu: the logical unit on whose behalf we execute
8343
  @type instance: L{objects.Instance}
8344
  @param instance: the instance whose disks we should create
8345
  @type to_skip: list
8346
  @param to_skip: list of indices to skip
8347
  @type target_node: string
8348
  @param target_node: if passed, overrides the target node for creation
8349
  @rtype: boolean
8350
  @return: the success of the creation
8351

8352
  """
8353
  info = _GetInstanceInfoText(instance)
8354
  if target_node is None:
8355
    pnode = instance.primary_node
8356
    all_nodes = instance.all_nodes
8357
  else:
8358
    pnode = target_node
8359
    all_nodes = [pnode]
8360

    
8361
  if instance.disk_template in constants.DTS_FILEBASED:
8362
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8363
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8364

    
8365
    result.Raise("Failed to create directory '%s' on"
8366
                 " node %s" % (file_storage_dir, pnode))
8367

    
8368
  # Note: this needs to be kept in sync with adding of disks in
8369
  # LUInstanceSetParams
8370
  for idx, device in enumerate(instance.disks):
8371
    if to_skip and idx in to_skip:
8372
      continue
8373
    logging.info("Creating volume %s for instance %s",
8374
                 device.iv_name, instance.name)
8375
    #HARDCODE
8376
    for node in all_nodes:
8377
      f_create = node == pnode
8378
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8379

    
8380

    
8381
def _RemoveDisks(lu, instance, target_node=None):
8382
  """Remove all disks for an instance.
8383

8384
  This abstracts away some work from `AddInstance()` and
8385
  `RemoveInstance()`. Note that in case some of the devices couldn't
8386
  be removed, the removal will continue with the other ones (compare
8387
  with `_CreateDisks()`).
8388

8389
  @type lu: L{LogicalUnit}
8390
  @param lu: the logical unit on whose behalf we execute
8391
  @type instance: L{objects.Instance}
8392
  @param instance: the instance whose disks we should remove
8393
  @type target_node: string
8394
  @param target_node: used to override the node on which to remove the disks
8395
  @rtype: boolean
8396
  @return: the success of the removal
8397

8398
  """
8399
  logging.info("Removing block devices for instance %s", instance.name)
8400

    
8401
  all_result = True
8402
  for device in instance.disks:
8403
    if target_node:
8404
      edata = [(target_node, device)]
8405
    else:
8406
      edata = device.ComputeNodeTree(instance.primary_node)
8407
    for node, disk in edata:
8408
      lu.cfg.SetDiskID(disk, node)
8409
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8410
      if msg:
8411
        lu.LogWarning("Could not remove block device %s on node %s,"
8412
                      " continuing anyway: %s", device.iv_name, node, msg)
8413
        all_result = False
8414

    
8415
    # if this is a DRBD disk, return its port to the pool
8416
    if device.dev_type in constants.LDS_DRBD:
8417
      tcp_port = device.logical_id[2]
8418
      lu.cfg.AddTcpUdpPort(tcp_port)
8419

    
8420
  if instance.disk_template == constants.DT_FILE:
8421
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8422
    if target_node:
8423
      tgt = target_node
8424
    else:
8425
      tgt = instance.primary_node
8426
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8427
    if result.fail_msg:
8428
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8429
                    file_storage_dir, instance.primary_node, result.fail_msg)
8430
      all_result = False
8431

    
8432
  return all_result
8433

    
8434

    
8435
def _ComputeDiskSizePerVG(disk_template, disks):
8436
  """Compute disk size requirements in the volume group
8437

8438
  """
8439
  def _compute(disks, payload):
8440
    """Universal algorithm.
8441

8442
    """
8443
    vgs = {}
8444
    for disk in disks:
8445
      vgs[disk[constants.IDISK_VG]] = \
8446
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8447

    
8448
    return vgs
8449

    
8450
  # Required free disk space as a function of disk and swap space
8451
  req_size_dict = {
8452
    constants.DT_DISKLESS: {},
8453
    constants.DT_PLAIN: _compute(disks, 0),
8454
    # 128 MB are added for drbd metadata for each disk
8455
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8456
    constants.DT_FILE: {},
8457
    constants.DT_SHARED_FILE: {},
8458
  }
8459

    
8460
  if disk_template not in req_size_dict:
8461
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8462
                                 " is unknown" % disk_template)
8463

    
8464
  return req_size_dict[disk_template]
8465

    
8466

    
8467
def _ComputeDiskSize(disk_template, disks):
8468
  """Compute disk size requirements in the volume group
8469

8470
  """
8471
  # Required free disk space as a function of disk and swap space
8472
  req_size_dict = {
8473
    constants.DT_DISKLESS: None,
8474
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8475
    # 128 MB are added for drbd metadata for each disk
8476
    constants.DT_DRBD8:
8477
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8478
    constants.DT_FILE: None,
8479
    constants.DT_SHARED_FILE: 0,
8480
    constants.DT_BLOCK: 0,
8481
  }
8482

    
8483
  if disk_template not in req_size_dict:
8484
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8485
                                 " is unknown" % disk_template)
8486

    
8487
  return req_size_dict[disk_template]
8488

    
8489

    
8490
def _FilterVmNodes(lu, nodenames):
8491
  """Filters out non-vm_capable nodes from a list.
8492

8493
  @type lu: L{LogicalUnit}
8494
  @param lu: the logical unit for which we check
8495
  @type nodenames: list
8496
  @param nodenames: the list of nodes on which we should check
8497
  @rtype: list
8498
  @return: the list of vm-capable nodes
8499

8500
  """
8501
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8502
  return [name for name in nodenames if name not in vm_nodes]
8503

    
8504

    
8505
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8506
  """Hypervisor parameter validation.
8507

8508
  This function abstract the hypervisor parameter validation to be
8509
  used in both instance create and instance modify.
8510

8511
  @type lu: L{LogicalUnit}
8512
  @param lu: the logical unit for which we check
8513
  @type nodenames: list
8514
  @param nodenames: the list of nodes on which we should check
8515
  @type hvname: string
8516
  @param hvname: the name of the hypervisor we should use
8517
  @type hvparams: dict
8518
  @param hvparams: the parameters which we need to check
8519
  @raise errors.OpPrereqError: if the parameters are not valid
8520

8521
  """
8522
  nodenames = _FilterVmNodes(lu, nodenames)
8523

    
8524
  cluster = lu.cfg.GetClusterInfo()
8525
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8526

    
8527
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8528
  for node in nodenames:
8529
    info = hvinfo[node]
8530
    if info.offline:
8531
      continue
8532
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8533

    
8534

    
8535
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8536
  """OS parameters validation.
8537

8538
  @type lu: L{LogicalUnit}
8539
  @param lu: the logical unit for which we check
8540
  @type required: boolean
8541
  @param required: whether the validation should fail if the OS is not
8542
      found
8543
  @type nodenames: list
8544
  @param nodenames: the list of nodes on which we should check
8545
  @type osname: string
8546
  @param osname: the name of the hypervisor we should use
8547
  @type osparams: dict
8548
  @param osparams: the parameters which we need to check
8549
  @raise errors.OpPrereqError: if the parameters are not valid
8550

8551
  """
8552
  nodenames = _FilterVmNodes(lu, nodenames)
8553
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8554
                                   [constants.OS_VALIDATE_PARAMETERS],
8555
                                   osparams)
8556
  for node, nres in result.items():
8557
    # we don't check for offline cases since this should be run only
8558
    # against the master node and/or an instance's nodes
8559
    nres.Raise("OS Parameters validation failed on node %s" % node)
8560
    if not nres.payload:
8561
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8562
                 osname, node)
8563

    
8564

    
8565
class LUInstanceCreate(LogicalUnit):
8566
  """Create an instance.
8567

8568
  """
8569
  HPATH = "instance-add"
8570
  HTYPE = constants.HTYPE_INSTANCE
8571
  REQ_BGL = False
8572

    
8573
  def CheckArguments(self):
8574
    """Check arguments.
8575

8576
    """
8577
    # do not require name_check to ease forward/backward compatibility
8578
    # for tools
8579
    if self.op.no_install and self.op.start:
8580
      self.LogInfo("No-installation mode selected, disabling startup")
8581
      self.op.start = False
8582
    # validate/normalize the instance name
8583
    self.op.instance_name = \
8584
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8585

    
8586
    if self.op.ip_check and not self.op.name_check:
8587
      # TODO: make the ip check more flexible and not depend on the name check
8588
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8589
                                 " check", errors.ECODE_INVAL)
8590

    
8591
    # check nics' parameter names
8592
    for nic in self.op.nics:
8593
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8594

    
8595
    # check disks. parameter names and consistent adopt/no-adopt strategy
8596
    has_adopt = has_no_adopt = False
8597
    for disk in self.op.disks:
8598
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8599
      if constants.IDISK_ADOPT in disk:
8600
        has_adopt = True
8601
      else:
8602
        has_no_adopt = True
8603
    if has_adopt and has_no_adopt:
8604
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8605
                                 errors.ECODE_INVAL)
8606
    if has_adopt:
8607
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8608
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8609
                                   " '%s' disk template" %
8610
                                   self.op.disk_template,
8611
                                   errors.ECODE_INVAL)
8612
      if self.op.iallocator is not None:
8613
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8614
                                   " iallocator script", errors.ECODE_INVAL)
8615
      if self.op.mode == constants.INSTANCE_IMPORT:
8616
        raise errors.OpPrereqError("Disk adoption not allowed for"
8617
                                   " instance import", errors.ECODE_INVAL)
8618
    else:
8619
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8620
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8621
                                   " but no 'adopt' parameter given" %
8622
                                   self.op.disk_template,
8623
                                   errors.ECODE_INVAL)
8624

    
8625
    self.adopt_disks = has_adopt
8626

    
8627
    # instance name verification
8628
    if self.op.name_check:
8629
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8630
      self.op.instance_name = self.hostname1.name
8631
      # used in CheckPrereq for ip ping check
8632
      self.check_ip = self.hostname1.ip
8633
    else:
8634
      self.check_ip = None
8635

    
8636
    # file storage checks
8637
    if (self.op.file_driver and
8638
        not self.op.file_driver in constants.FILE_DRIVER):
8639
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8640
                                 self.op.file_driver, errors.ECODE_INVAL)
8641

    
8642
    if self.op.disk_template == constants.DT_FILE:
8643
      opcodes.RequireFileStorage()
8644
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8645
      opcodes.RequireSharedFileStorage()
8646

    
8647
    ### Node/iallocator related checks
8648
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8649

    
8650
    if self.op.pnode is not None:
8651
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8652
        if self.op.snode is None:
8653
          raise errors.OpPrereqError("The networked disk templates need"
8654
                                     " a mirror node", errors.ECODE_INVAL)
8655
      elif self.op.snode:
8656
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8657
                        " template")
8658
        self.op.snode = None
8659

    
8660
    self._cds = _GetClusterDomainSecret()
8661

    
8662
    if self.op.mode == constants.INSTANCE_IMPORT:
8663
      # On import force_variant must be True, because if we forced it at
8664
      # initial install, our only chance when importing it back is that it
8665
      # works again!
8666
      self.op.force_variant = True
8667

    
8668
      if self.op.no_install:
8669
        self.LogInfo("No-installation mode has no effect during import")
8670

    
8671
    elif self.op.mode == constants.INSTANCE_CREATE:
8672
      if self.op.os_type is None:
8673
        raise errors.OpPrereqError("No guest OS specified",
8674
                                   errors.ECODE_INVAL)
8675
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8676
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8677
                                   " installation" % self.op.os_type,
8678
                                   errors.ECODE_STATE)
8679
      if self.op.disk_template is None:
8680
        raise errors.OpPrereqError("No disk template specified",
8681
                                   errors.ECODE_INVAL)
8682

    
8683
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8684
      # Check handshake to ensure both clusters have the same domain secret
8685
      src_handshake = self.op.source_handshake
8686
      if not src_handshake:
8687
        raise errors.OpPrereqError("Missing source handshake",
8688
                                   errors.ECODE_INVAL)
8689

    
8690
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8691
                                                           src_handshake)
8692
      if errmsg:
8693
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8694
                                   errors.ECODE_INVAL)
8695

    
8696
      # Load and check source CA
8697
      self.source_x509_ca_pem = self.op.source_x509_ca
8698
      if not self.source_x509_ca_pem:
8699
        raise errors.OpPrereqError("Missing source X509 CA",
8700
                                   errors.ECODE_INVAL)
8701

    
8702
      try:
8703
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8704
                                                    self._cds)
8705
      except OpenSSL.crypto.Error, err:
8706
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8707
                                   (err, ), errors.ECODE_INVAL)
8708

    
8709
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8710
      if errcode is not None:
8711
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8712
                                   errors.ECODE_INVAL)
8713

    
8714
      self.source_x509_ca = cert
8715

    
8716
      src_instance_name = self.op.source_instance_name
8717
      if not src_instance_name:
8718
        raise errors.OpPrereqError("Missing source instance name",
8719
                                   errors.ECODE_INVAL)
8720

    
8721
      self.source_instance_name = \
8722
          netutils.GetHostname(name=src_instance_name).name
8723

    
8724
    else:
8725
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8726
                                 self.op.mode, errors.ECODE_INVAL)
8727

    
8728
  def ExpandNames(self):
8729
    """ExpandNames for CreateInstance.
8730

8731
    Figure out the right locks for instance creation.
8732

8733
    """
8734
    self.needed_locks = {}
8735

    
8736
    instance_name = self.op.instance_name
8737
    # this is just a preventive check, but someone might still add this
8738
    # instance in the meantime, and creation will fail at lock-add time
8739
    if instance_name in self.cfg.GetInstanceList():
8740
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8741
                                 instance_name, errors.ECODE_EXISTS)
8742

    
8743
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8744

    
8745
    if self.op.iallocator:
8746
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8747
      # specifying a group on instance creation and then selecting nodes from
8748
      # that group
8749
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8750
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8751
    else:
8752
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8753
      nodelist = [self.op.pnode]
8754
      if self.op.snode is not None:
8755
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8756
        nodelist.append(self.op.snode)
8757
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8758
      # Lock resources of instance's primary and secondary nodes (copy to
8759
      # prevent accidential modification)
8760
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8761

    
8762
    # in case of import lock the source node too
8763
    if self.op.mode == constants.INSTANCE_IMPORT:
8764
      src_node = self.op.src_node
8765
      src_path = self.op.src_path
8766

    
8767
      if src_path is None:
8768
        self.op.src_path = src_path = self.op.instance_name
8769

    
8770
      if src_node is None:
8771
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8772
        self.op.src_node = None
8773
        if os.path.isabs(src_path):
8774
          raise errors.OpPrereqError("Importing an instance from a path"
8775
                                     " requires a source node option",
8776
                                     errors.ECODE_INVAL)
8777
      else:
8778
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8779
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8780
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8781
        if not os.path.isabs(src_path):
8782
          self.op.src_path = src_path = \
8783
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8784

    
8785
  def _RunAllocator(self):
8786
    """Run the allocator based on input opcode.
8787

8788
    """
8789
    nics = [n.ToDict() for n in self.nics]
8790
    ial = IAllocator(self.cfg, self.rpc,
8791
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8792
                     name=self.op.instance_name,
8793
                     disk_template=self.op.disk_template,
8794
                     tags=self.op.tags,
8795
                     os=self.op.os_type,
8796
                     vcpus=self.be_full[constants.BE_VCPUS],
8797
                     memory=self.be_full[constants.BE_MAXMEM],
8798
                     disks=self.disks,
8799
                     nics=nics,
8800
                     hypervisor=self.op.hypervisor,
8801
                     )
8802

    
8803
    ial.Run(self.op.iallocator)
8804

    
8805
    if not ial.success:
8806
      raise errors.OpPrereqError("Can't compute nodes using"
8807
                                 " iallocator '%s': %s" %
8808
                                 (self.op.iallocator, ial.info),
8809
                                 errors.ECODE_NORES)
8810
    if len(ial.result) != ial.required_nodes:
8811
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8812
                                 " of nodes (%s), required %s" %
8813
                                 (self.op.iallocator, len(ial.result),
8814
                                  ial.required_nodes), errors.ECODE_FAULT)
8815
    self.op.pnode = ial.result[0]
8816
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8817
                 self.op.instance_name, self.op.iallocator,
8818
                 utils.CommaJoin(ial.result))
8819
    if ial.required_nodes == 2:
8820
      self.op.snode = ial.result[1]
8821

    
8822
  def BuildHooksEnv(self):
8823
    """Build hooks env.
8824

8825
    This runs on master, primary and secondary nodes of the instance.
8826

8827
    """
8828
    env = {
8829
      "ADD_MODE": self.op.mode,
8830
      }
8831
    if self.op.mode == constants.INSTANCE_IMPORT:
8832
      env["SRC_NODE"] = self.op.src_node
8833
      env["SRC_PATH"] = self.op.src_path
8834
      env["SRC_IMAGES"] = self.src_images
8835

    
8836
    env.update(_BuildInstanceHookEnv(
8837
      name=self.op.instance_name,
8838
      primary_node=self.op.pnode,
8839
      secondary_nodes=self.secondaries,
8840
      status=self.op.start,
8841
      os_type=self.op.os_type,
8842
      minmem=self.be_full[constants.BE_MINMEM],
8843
      maxmem=self.be_full[constants.BE_MAXMEM],
8844
      vcpus=self.be_full[constants.BE_VCPUS],
8845
      nics=_NICListToTuple(self, self.nics),
8846
      disk_template=self.op.disk_template,
8847
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8848
             for d in self.disks],
8849
      bep=self.be_full,
8850
      hvp=self.hv_full,
8851
      hypervisor_name=self.op.hypervisor,
8852
      tags=self.op.tags,
8853
    ))
8854

    
8855
    return env
8856

    
8857
  def BuildHooksNodes(self):
8858
    """Build hooks nodes.
8859

8860
    """
8861
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8862
    return nl, nl
8863

    
8864
  def _ReadExportInfo(self):
8865
    """Reads the export information from disk.
8866

8867
    It will override the opcode source node and path with the actual
8868
    information, if these two were not specified before.
8869

8870
    @return: the export information
8871

8872
    """
8873
    assert self.op.mode == constants.INSTANCE_IMPORT
8874

    
8875
    src_node = self.op.src_node
8876
    src_path = self.op.src_path
8877

    
8878
    if src_node is None:
8879
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8880
      exp_list = self.rpc.call_export_list(locked_nodes)
8881
      found = False
8882
      for node in exp_list:
8883
        if exp_list[node].fail_msg:
8884
          continue
8885
        if src_path in exp_list[node].payload:
8886
          found = True
8887
          self.op.src_node = src_node = node
8888
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8889
                                                       src_path)
8890
          break
8891
      if not found:
8892
        raise errors.OpPrereqError("No export found for relative path %s" %
8893
                                    src_path, errors.ECODE_INVAL)
8894

    
8895
    _CheckNodeOnline(self, src_node)
8896
    result = self.rpc.call_export_info(src_node, src_path)
8897
    result.Raise("No export or invalid export found in dir %s" % src_path)
8898

    
8899
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8900
    if not export_info.has_section(constants.INISECT_EXP):
8901
      raise errors.ProgrammerError("Corrupted export config",
8902
                                   errors.ECODE_ENVIRON)
8903

    
8904
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8905
    if (int(ei_version) != constants.EXPORT_VERSION):
8906
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8907
                                 (ei_version, constants.EXPORT_VERSION),
8908
                                 errors.ECODE_ENVIRON)
8909
    return export_info
8910

    
8911
  def _ReadExportParams(self, einfo):
8912
    """Use export parameters as defaults.
8913

8914
    In case the opcode doesn't specify (as in override) some instance
8915
    parameters, then try to use them from the export information, if
8916
    that declares them.
8917

8918
    """
8919
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8920

    
8921
    if self.op.disk_template is None:
8922
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8923
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8924
                                          "disk_template")
8925
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8926
          raise errors.OpPrereqError("Disk template specified in configuration"
8927
                                     " file is not one of the allowed values:"
8928
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8929
      else:
8930
        raise errors.OpPrereqError("No disk template specified and the export"
8931
                                   " is missing the disk_template information",
8932
                                   errors.ECODE_INVAL)
8933

    
8934
    if not self.op.disks:
8935
      disks = []
8936
      # TODO: import the disk iv_name too
8937
      for idx in range(constants.MAX_DISKS):
8938
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8939
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8940
          disks.append({constants.IDISK_SIZE: disk_sz})
8941
      self.op.disks = disks
8942
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8943
        raise errors.OpPrereqError("No disk info specified and the export"
8944
                                   " is missing the disk information",
8945
                                   errors.ECODE_INVAL)
8946

    
8947
    if not self.op.nics:
8948
      nics = []
8949
      for idx in range(constants.MAX_NICS):
8950
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8951
          ndict = {}
8952
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8953
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8954
            ndict[name] = v
8955
          nics.append(ndict)
8956
        else:
8957
          break
8958
      self.op.nics = nics
8959

    
8960
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8961
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8962

    
8963
    if (self.op.hypervisor is None and
8964
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8965
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8966

    
8967
    if einfo.has_section(constants.INISECT_HYP):
8968
      # use the export parameters but do not override the ones
8969
      # specified by the user
8970
      for name, value in einfo.items(constants.INISECT_HYP):
8971
        if name not in self.op.hvparams:
8972
          self.op.hvparams[name] = value
8973

    
8974
    if einfo.has_section(constants.INISECT_BEP):
8975
      # use the parameters, without overriding
8976
      for name, value in einfo.items(constants.INISECT_BEP):
8977
        if name not in self.op.beparams:
8978
          self.op.beparams[name] = value
8979
        # Compatibility for the old "memory" be param
8980
        if name == constants.BE_MEMORY:
8981
          if constants.BE_MAXMEM not in self.op.beparams:
8982
            self.op.beparams[constants.BE_MAXMEM] = value
8983
          if constants.BE_MINMEM not in self.op.beparams:
8984
            self.op.beparams[constants.BE_MINMEM] = value
8985
    else:
8986
      # try to read the parameters old style, from the main section
8987
      for name in constants.BES_PARAMETERS:
8988
        if (name not in self.op.beparams and
8989
            einfo.has_option(constants.INISECT_INS, name)):
8990
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8991

    
8992
    if einfo.has_section(constants.INISECT_OSP):
8993
      # use the parameters, without overriding
8994
      for name, value in einfo.items(constants.INISECT_OSP):
8995
        if name not in self.op.osparams:
8996
          self.op.osparams[name] = value
8997

    
8998
  def _RevertToDefaults(self, cluster):
8999
    """Revert the instance parameters to the default values.
9000

9001
    """
9002
    # hvparams
9003
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9004
    for name in self.op.hvparams.keys():
9005
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9006
        del self.op.hvparams[name]
9007
    # beparams
9008
    be_defs = cluster.SimpleFillBE({})
9009
    for name in self.op.beparams.keys():
9010
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
9011
        del self.op.beparams[name]
9012
    # nic params
9013
    nic_defs = cluster.SimpleFillNIC({})
9014
    for nic in self.op.nics:
9015
      for name in constants.NICS_PARAMETERS:
9016
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9017
          del nic[name]
9018
    # osparams
9019
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9020
    for name in self.op.osparams.keys():
9021
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
9022
        del self.op.osparams[name]
9023

    
9024
  def _CalculateFileStorageDir(self):
9025
    """Calculate final instance file storage dir.
9026

9027
    """
9028
    # file storage dir calculation/check
9029
    self.instance_file_storage_dir = None
9030
    if self.op.disk_template in constants.DTS_FILEBASED:
9031
      # build the full file storage dir path
9032
      joinargs = []
9033

    
9034
      if self.op.disk_template == constants.DT_SHARED_FILE:
9035
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
9036
      else:
9037
        get_fsd_fn = self.cfg.GetFileStorageDir
9038

    
9039
      cfg_storagedir = get_fsd_fn()
9040
      if not cfg_storagedir:
9041
        raise errors.OpPrereqError("Cluster file storage dir not defined")
9042
      joinargs.append(cfg_storagedir)
9043

    
9044
      if self.op.file_storage_dir is not None:
9045
        joinargs.append(self.op.file_storage_dir)
9046

    
9047
      joinargs.append(self.op.instance_name)
9048

    
9049
      # pylint: disable=W0142
9050
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9051

    
9052
  def CheckPrereq(self):
9053
    """Check prerequisites.
9054

9055
    """
9056
    self._CalculateFileStorageDir()
9057

    
9058
    if self.op.mode == constants.INSTANCE_IMPORT:
9059
      export_info = self._ReadExportInfo()
9060
      self._ReadExportParams(export_info)
9061

    
9062
    if (not self.cfg.GetVGName() and
9063
        self.op.disk_template not in constants.DTS_NOT_LVM):
9064
      raise errors.OpPrereqError("Cluster does not support lvm-based"
9065
                                 " instances", errors.ECODE_STATE)
9066

    
9067
    if (self.op.hypervisor is None or
9068
        self.op.hypervisor == constants.VALUE_AUTO):
9069
      self.op.hypervisor = self.cfg.GetHypervisorType()
9070

    
9071
    cluster = self.cfg.GetClusterInfo()
9072
    enabled_hvs = cluster.enabled_hypervisors
9073
    if self.op.hypervisor not in enabled_hvs:
9074
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9075
                                 " cluster (%s)" % (self.op.hypervisor,
9076
                                  ",".join(enabled_hvs)),
9077
                                 errors.ECODE_STATE)
9078

    
9079
    # Check tag validity
9080
    for tag in self.op.tags:
9081
      objects.TaggableObject.ValidateTag(tag)
9082

    
9083
    # check hypervisor parameter syntax (locally)
9084
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9085
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9086
                                      self.op.hvparams)
9087
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9088
    hv_type.CheckParameterSyntax(filled_hvp)
9089
    self.hv_full = filled_hvp
9090
    # check that we don't specify global parameters on an instance
9091
    _CheckGlobalHvParams(self.op.hvparams)
9092

    
9093
    # fill and remember the beparams dict
9094
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
9095
    for param, value in self.op.beparams.iteritems():
9096
      if value == constants.VALUE_AUTO:
9097
        self.op.beparams[param] = default_beparams[param]
9098
    objects.UpgradeBeParams(self.op.beparams)
9099
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9100
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
9101

    
9102
    # build os parameters
9103
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9104

    
9105
    # now that hvp/bep are in final format, let's reset to defaults,
9106
    # if told to do so
9107
    if self.op.identify_defaults:
9108
      self._RevertToDefaults(cluster)
9109

    
9110
    # NIC buildup
9111
    self.nics = []
9112
    for idx, nic in enumerate(self.op.nics):
9113
      nic_mode_req = nic.get(constants.INIC_MODE, None)
9114
      nic_mode = nic_mode_req
9115
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9116
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9117

    
9118
      # in routed mode, for the first nic, the default ip is 'auto'
9119
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9120
        default_ip_mode = constants.VALUE_AUTO
9121
      else:
9122
        default_ip_mode = constants.VALUE_NONE
9123

    
9124
      # ip validity checks
9125
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9126
      if ip is None or ip.lower() == constants.VALUE_NONE:
9127
        nic_ip = None
9128
      elif ip.lower() == constants.VALUE_AUTO:
9129
        if not self.op.name_check:
9130
          raise errors.OpPrereqError("IP address set to auto but name checks"
9131
                                     " have been skipped",
9132
                                     errors.ECODE_INVAL)
9133
        nic_ip = self.hostname1.ip
9134
      else:
9135
        if not netutils.IPAddress.IsValid(ip):
9136
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9137
                                     errors.ECODE_INVAL)
9138
        nic_ip = ip
9139

    
9140
      # TODO: check the ip address for uniqueness
9141
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9142
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9143
                                   errors.ECODE_INVAL)
9144

    
9145
      # MAC address verification
9146
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9147
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9148
        mac = utils.NormalizeAndValidateMac(mac)
9149

    
9150
        try:
9151
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9152
        except errors.ReservationError:
9153
          raise errors.OpPrereqError("MAC address %s already in use"
9154
                                     " in cluster" % mac,
9155
                                     errors.ECODE_NOTUNIQUE)
9156

    
9157
      #  Build nic parameters
9158
      link = nic.get(constants.INIC_LINK, None)
9159
      if link == constants.VALUE_AUTO:
9160
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9161
      nicparams = {}
9162
      if nic_mode_req:
9163
        nicparams[constants.NIC_MODE] = nic_mode
9164
      if link:
9165
        nicparams[constants.NIC_LINK] = link
9166

    
9167
      check_params = cluster.SimpleFillNIC(nicparams)
9168
      objects.NIC.CheckParameterSyntax(check_params)
9169
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9170

    
9171
    # disk checks/pre-build
9172
    default_vg = self.cfg.GetVGName()
9173
    self.disks = []
9174
    for disk in self.op.disks:
9175
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9176
      if mode not in constants.DISK_ACCESS_SET:
9177
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9178
                                   mode, errors.ECODE_INVAL)
9179
      size = disk.get(constants.IDISK_SIZE, None)
9180
      if size is None:
9181
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9182
      try:
9183
        size = int(size)
9184
      except (TypeError, ValueError):
9185
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9186
                                   errors.ECODE_INVAL)
9187

    
9188
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9189
      new_disk = {
9190
        constants.IDISK_SIZE: size,
9191
        constants.IDISK_MODE: mode,
9192
        constants.IDISK_VG: data_vg,
9193
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9194
        }
9195
      if constants.IDISK_ADOPT in disk:
9196
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9197
      self.disks.append(new_disk)
9198

    
9199
    if self.op.mode == constants.INSTANCE_IMPORT:
9200
      disk_images = []
9201
      for idx in range(len(self.disks)):
9202
        option = "disk%d_dump" % idx
9203
        if export_info.has_option(constants.INISECT_INS, option):
9204
          # FIXME: are the old os-es, disk sizes, etc. useful?
9205
          export_name = export_info.get(constants.INISECT_INS, option)
9206
          image = utils.PathJoin(self.op.src_path, export_name)
9207
          disk_images.append(image)
9208
        else:
9209
          disk_images.append(False)
9210

    
9211
      self.src_images = disk_images
9212

    
9213
      old_name = export_info.get(constants.INISECT_INS, "name")
9214
      if self.op.instance_name == old_name:
9215
        for idx, nic in enumerate(self.nics):
9216
          if nic.mac == constants.VALUE_AUTO:
9217
            nic_mac_ini = "nic%d_mac" % idx
9218
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9219

    
9220
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9221

    
9222
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9223
    if self.op.ip_check:
9224
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9225
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9226
                                   (self.check_ip, self.op.instance_name),
9227
                                   errors.ECODE_NOTUNIQUE)
9228

    
9229
    #### mac address generation
9230
    # By generating here the mac address both the allocator and the hooks get
9231
    # the real final mac address rather than the 'auto' or 'generate' value.
9232
    # There is a race condition between the generation and the instance object
9233
    # creation, which means that we know the mac is valid now, but we're not
9234
    # sure it will be when we actually add the instance. If things go bad
9235
    # adding the instance will abort because of a duplicate mac, and the
9236
    # creation job will fail.
9237
    for nic in self.nics:
9238
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9239
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9240

    
9241
    #### allocator run
9242

    
9243
    if self.op.iallocator is not None:
9244
      self._RunAllocator()
9245

    
9246
    # Release all unneeded node locks
9247
    _ReleaseLocks(self, locking.LEVEL_NODE,
9248
                  keep=filter(None, [self.op.pnode, self.op.snode,
9249
                                     self.op.src_node]))
9250

    
9251
    #### node related checks
9252

    
9253
    # check primary node
9254
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9255
    assert self.pnode is not None, \
9256
      "Cannot retrieve locked node %s" % self.op.pnode
9257
    if pnode.offline:
9258
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9259
                                 pnode.name, errors.ECODE_STATE)
9260
    if pnode.drained:
9261
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9262
                                 pnode.name, errors.ECODE_STATE)
9263
    if not pnode.vm_capable:
9264
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9265
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9266

    
9267
    self.secondaries = []
9268

    
9269
    # mirror node verification
9270
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9271
      if self.op.snode == pnode.name:
9272
        raise errors.OpPrereqError("The secondary node cannot be the"
9273
                                   " primary node", errors.ECODE_INVAL)
9274
      _CheckNodeOnline(self, self.op.snode)
9275
      _CheckNodeNotDrained(self, self.op.snode)
9276
      _CheckNodeVmCapable(self, self.op.snode)
9277
      self.secondaries.append(self.op.snode)
9278

    
9279
      snode = self.cfg.GetNodeInfo(self.op.snode)
9280
      if pnode.group != snode.group:
9281
        self.LogWarning("The primary and secondary nodes are in two"
9282
                        " different node groups; the disk parameters"
9283
                        " from the first disk's node group will be"
9284
                        " used")
9285

    
9286
    nodenames = [pnode.name] + self.secondaries
9287

    
9288
    # disk parameters (not customizable at instance or node level)
9289
    # just use the primary node parameters, ignoring the secondary.
9290
    self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9291

    
9292
    if not self.adopt_disks:
9293
      # Check lv size requirements, if not adopting
9294
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9295
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9296

    
9297
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9298
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9299
                                disk[constants.IDISK_ADOPT])
9300
                     for disk in self.disks])
9301
      if len(all_lvs) != len(self.disks):
9302
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9303
                                   errors.ECODE_INVAL)
9304
      for lv_name in all_lvs:
9305
        try:
9306
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9307
          # to ReserveLV uses the same syntax
9308
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9309
        except errors.ReservationError:
9310
          raise errors.OpPrereqError("LV named %s used by another instance" %
9311
                                     lv_name, errors.ECODE_NOTUNIQUE)
9312

    
9313
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9314
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9315

    
9316
      node_lvs = self.rpc.call_lv_list([pnode.name],
9317
                                       vg_names.payload.keys())[pnode.name]
9318
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9319
      node_lvs = node_lvs.payload
9320

    
9321
      delta = all_lvs.difference(node_lvs.keys())
9322
      if delta:
9323
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9324
                                   utils.CommaJoin(delta),
9325
                                   errors.ECODE_INVAL)
9326
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9327
      if online_lvs:
9328
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9329
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9330
                                   errors.ECODE_STATE)
9331
      # update the size of disk based on what is found
9332
      for dsk in self.disks:
9333
        dsk[constants.IDISK_SIZE] = \
9334
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9335
                                        dsk[constants.IDISK_ADOPT])][0]))
9336

    
9337
    elif self.op.disk_template == constants.DT_BLOCK:
9338
      # Normalize and de-duplicate device paths
9339
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9340
                       for disk in self.disks])
9341
      if len(all_disks) != len(self.disks):
9342
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9343
                                   errors.ECODE_INVAL)
9344
      baddisks = [d for d in all_disks
9345
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9346
      if baddisks:
9347
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9348
                                   " cannot be adopted" %
9349
                                   (", ".join(baddisks),
9350
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9351
                                   errors.ECODE_INVAL)
9352

    
9353
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9354
                                            list(all_disks))[pnode.name]
9355
      node_disks.Raise("Cannot get block device information from node %s" %
9356
                       pnode.name)
9357
      node_disks = node_disks.payload
9358
      delta = all_disks.difference(node_disks.keys())
9359
      if delta:
9360
        raise errors.OpPrereqError("Missing block device(s): %s" %
9361
                                   utils.CommaJoin(delta),
9362
                                   errors.ECODE_INVAL)
9363
      for dsk in self.disks:
9364
        dsk[constants.IDISK_SIZE] = \
9365
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9366

    
9367
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9368

    
9369
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9370
    # check OS parameters (remotely)
9371
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9372

    
9373
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9374

    
9375
    # memory check on primary node
9376
    #TODO(dynmem): use MINMEM for checking
9377
    if self.op.start:
9378
      _CheckNodeFreeMemory(self, self.pnode.name,
9379
                           "creating instance %s" % self.op.instance_name,
9380
                           self.be_full[constants.BE_MAXMEM],
9381
                           self.op.hypervisor)
9382

    
9383
    self.dry_run_result = list(nodenames)
9384

    
9385
  def Exec(self, feedback_fn):
9386
    """Create and add the instance to the cluster.
9387

9388
    """
9389
    instance = self.op.instance_name
9390
    pnode_name = self.pnode.name
9391

    
9392
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9393
                self.owned_locks(locking.LEVEL_NODE)), \
9394
      "Node locks differ from node resource locks"
9395

    
9396
    ht_kind = self.op.hypervisor
9397
    if ht_kind in constants.HTS_REQ_PORT:
9398
      network_port = self.cfg.AllocatePort()
9399
    else:
9400
      network_port = None
9401

    
9402
    disks = _GenerateDiskTemplate(self,
9403
                                  self.op.disk_template,
9404
                                  instance, pnode_name,
9405
                                  self.secondaries,
9406
                                  self.disks,
9407
                                  self.instance_file_storage_dir,
9408
                                  self.op.file_driver,
9409
                                  0,
9410
                                  feedback_fn,
9411
                                  self.diskparams)
9412

    
9413
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9414
                            primary_node=pnode_name,
9415
                            nics=self.nics, disks=disks,
9416
                            disk_template=self.op.disk_template,
9417
                            admin_state=constants.ADMINST_DOWN,
9418
                            network_port=network_port,
9419
                            beparams=self.op.beparams,
9420
                            hvparams=self.op.hvparams,
9421
                            hypervisor=self.op.hypervisor,
9422
                            osparams=self.op.osparams,
9423
                            )
9424

    
9425
    if self.op.tags:
9426
      for tag in self.op.tags:
9427
        iobj.AddTag(tag)
9428

    
9429
    if self.adopt_disks:
9430
      if self.op.disk_template == constants.DT_PLAIN:
9431
        # rename LVs to the newly-generated names; we need to construct
9432
        # 'fake' LV disks with the old data, plus the new unique_id
9433
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9434
        rename_to = []
9435
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9436
          rename_to.append(t_dsk.logical_id)
9437
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9438
          self.cfg.SetDiskID(t_dsk, pnode_name)
9439
        result = self.rpc.call_blockdev_rename(pnode_name,
9440
                                               zip(tmp_disks, rename_to))
9441
        result.Raise("Failed to rename adoped LVs")
9442
    else:
9443
      feedback_fn("* creating instance disks...")
9444
      try:
9445
        _CreateDisks(self, iobj)
9446
      except errors.OpExecError:
9447
        self.LogWarning("Device creation failed, reverting...")
9448
        try:
9449
          _RemoveDisks(self, iobj)
9450
        finally:
9451
          self.cfg.ReleaseDRBDMinors(instance)
9452
          raise
9453

    
9454
    feedback_fn("adding instance %s to cluster config" % instance)
9455

    
9456
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9457

    
9458
    # Declare that we don't want to remove the instance lock anymore, as we've
9459
    # added the instance to the config
9460
    del self.remove_locks[locking.LEVEL_INSTANCE]
9461

    
9462
    if self.op.mode == constants.INSTANCE_IMPORT:
9463
      # Release unused nodes
9464
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9465
    else:
9466
      # Release all nodes
9467
      _ReleaseLocks(self, locking.LEVEL_NODE)
9468

    
9469
    disk_abort = False
9470
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9471
      feedback_fn("* wiping instance disks...")
9472
      try:
9473
        _WipeDisks(self, iobj)
9474
      except errors.OpExecError, err:
9475
        logging.exception("Wiping disks failed")
9476
        self.LogWarning("Wiping instance disks failed (%s)", err)
9477
        disk_abort = True
9478

    
9479
    if disk_abort:
9480
      # Something is already wrong with the disks, don't do anything else
9481
      pass
9482
    elif self.op.wait_for_sync:
9483
      disk_abort = not _WaitForSync(self, iobj)
9484
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9485
      # make sure the disks are not degraded (still sync-ing is ok)
9486
      feedback_fn("* checking mirrors status")
9487
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9488
    else:
9489
      disk_abort = False
9490

    
9491
    if disk_abort:
9492
      _RemoveDisks(self, iobj)
9493
      self.cfg.RemoveInstance(iobj.name)
9494
      # Make sure the instance lock gets removed
9495
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9496
      raise errors.OpExecError("There are some degraded disks for"
9497
                               " this instance")
9498

    
9499
    # Release all node resource locks
9500
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9501

    
9502
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9503
      if self.op.mode == constants.INSTANCE_CREATE:
9504
        if not self.op.no_install:
9505
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9506
                        not self.op.wait_for_sync)
9507
          if pause_sync:
9508
            feedback_fn("* pausing disk sync to install instance OS")
9509
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9510
                                                              iobj.disks, True)
9511
            for idx, success in enumerate(result.payload):
9512
              if not success:
9513
                logging.warn("pause-sync of instance %s for disk %d failed",
9514
                             instance, idx)
9515

    
9516
          feedback_fn("* running the instance OS create scripts...")
9517
          # FIXME: pass debug option from opcode to backend
9518
          os_add_result = \
9519
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9520
                                          self.op.debug_level)
9521
          if pause_sync:
9522
            feedback_fn("* resuming disk sync")
9523
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9524
                                                              iobj.disks, False)
9525
            for idx, success in enumerate(result.payload):
9526
              if not success:
9527
                logging.warn("resume-sync of instance %s for disk %d failed",
9528
                             instance, idx)
9529

    
9530
          os_add_result.Raise("Could not add os for instance %s"
9531
                              " on node %s" % (instance, pnode_name))
9532

    
9533
      elif self.op.mode == constants.INSTANCE_IMPORT:
9534
        feedback_fn("* running the instance OS import scripts...")
9535

    
9536
        transfers = []
9537

    
9538
        for idx, image in enumerate(self.src_images):
9539
          if not image:
9540
            continue
9541

    
9542
          # FIXME: pass debug option from opcode to backend
9543
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9544
                                             constants.IEIO_FILE, (image, ),
9545
                                             constants.IEIO_SCRIPT,
9546
                                             (iobj.disks[idx], idx),
9547
                                             None)
9548
          transfers.append(dt)
9549

    
9550
        import_result = \
9551
          masterd.instance.TransferInstanceData(self, feedback_fn,
9552
                                                self.op.src_node, pnode_name,
9553
                                                self.pnode.secondary_ip,
9554
                                                iobj, transfers)
9555
        if not compat.all(import_result):
9556
          self.LogWarning("Some disks for instance %s on node %s were not"
9557
                          " imported successfully" % (instance, pnode_name))
9558

    
9559
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9560
        feedback_fn("* preparing remote import...")
9561
        # The source cluster will stop the instance before attempting to make a
9562
        # connection. In some cases stopping an instance can take a long time,
9563
        # hence the shutdown timeout is added to the connection timeout.
9564
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9565
                           self.op.source_shutdown_timeout)
9566
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9567

    
9568
        assert iobj.primary_node == self.pnode.name
9569
        disk_results = \
9570
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9571
                                        self.source_x509_ca,
9572
                                        self._cds, timeouts)
9573
        if not compat.all(disk_results):
9574
          # TODO: Should the instance still be started, even if some disks
9575
          # failed to import (valid for local imports, too)?
9576
          self.LogWarning("Some disks for instance %s on node %s were not"
9577
                          " imported successfully" % (instance, pnode_name))
9578

    
9579
        # Run rename script on newly imported instance
9580
        assert iobj.name == instance
9581
        feedback_fn("Running rename script for %s" % instance)
9582
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9583
                                                   self.source_instance_name,
9584
                                                   self.op.debug_level)
9585
        if result.fail_msg:
9586
          self.LogWarning("Failed to run rename script for %s on node"
9587
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9588

    
9589
      else:
9590
        # also checked in the prereq part
9591
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9592
                                     % self.op.mode)
9593

    
9594
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9595

    
9596
    if self.op.start:
9597
      iobj.admin_state = constants.ADMINST_UP
9598
      self.cfg.Update(iobj, feedback_fn)
9599
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9600
      feedback_fn("* starting instance...")
9601
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9602
                                            False)
9603
      result.Raise("Could not start instance")
9604

    
9605
    return list(iobj.all_nodes)
9606

    
9607

    
9608
class LUInstanceConsole(NoHooksLU):
9609
  """Connect to an instance's console.
9610

9611
  This is somewhat special in that it returns the command line that
9612
  you need to run on the master node in order to connect to the
9613
  console.
9614

9615
  """
9616
  REQ_BGL = False
9617

    
9618
  def ExpandNames(self):
9619
    self.share_locks = _ShareAll()
9620
    self._ExpandAndLockInstance()
9621

    
9622
  def CheckPrereq(self):
9623
    """Check prerequisites.
9624

9625
    This checks that the instance is in the cluster.
9626

9627
    """
9628
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9629
    assert self.instance is not None, \
9630
      "Cannot retrieve locked instance %s" % self.op.instance_name
9631
    _CheckNodeOnline(self, self.instance.primary_node)
9632

    
9633
  def Exec(self, feedback_fn):
9634
    """Connect to the console of an instance
9635

9636
    """
9637
    instance = self.instance
9638
    node = instance.primary_node
9639

    
9640
    node_insts = self.rpc.call_instance_list([node],
9641
                                             [instance.hypervisor])[node]
9642
    node_insts.Raise("Can't get node information from %s" % node)
9643

    
9644
    if instance.name not in node_insts.payload:
9645
      if instance.admin_state == constants.ADMINST_UP:
9646
        state = constants.INSTST_ERRORDOWN
9647
      elif instance.admin_state == constants.ADMINST_DOWN:
9648
        state = constants.INSTST_ADMINDOWN
9649
      else:
9650
        state = constants.INSTST_ADMINOFFLINE
9651
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9652
                               (instance.name, state))
9653

    
9654
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9655

    
9656
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9657

    
9658

    
9659
def _GetInstanceConsole(cluster, instance):
9660
  """Returns console information for an instance.
9661

9662
  @type cluster: L{objects.Cluster}
9663
  @type instance: L{objects.Instance}
9664
  @rtype: dict
9665

9666
  """
9667
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9668
  # beparams and hvparams are passed separately, to avoid editing the
9669
  # instance and then saving the defaults in the instance itself.
9670
  hvparams = cluster.FillHV(instance)
9671
  beparams = cluster.FillBE(instance)
9672
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9673

    
9674
  assert console.instance == instance.name
9675
  assert console.Validate()
9676

    
9677
  return console.ToDict()
9678

    
9679

    
9680
class LUInstanceReplaceDisks(LogicalUnit):
9681
  """Replace the disks of an instance.
9682

9683
  """
9684
  HPATH = "mirrors-replace"
9685
  HTYPE = constants.HTYPE_INSTANCE
9686
  REQ_BGL = False
9687

    
9688
  def CheckArguments(self):
9689
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9690
                                  self.op.iallocator)
9691

    
9692
  def ExpandNames(self):
9693
    self._ExpandAndLockInstance()
9694

    
9695
    assert locking.LEVEL_NODE not in self.needed_locks
9696
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9697
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9698

    
9699
    assert self.op.iallocator is None or self.op.remote_node is None, \
9700
      "Conflicting options"
9701

    
9702
    if self.op.remote_node is not None:
9703
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9704

    
9705
      # Warning: do not remove the locking of the new secondary here
9706
      # unless DRBD8.AddChildren is changed to work in parallel;
9707
      # currently it doesn't since parallel invocations of
9708
      # FindUnusedMinor will conflict
9709
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9710
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9711
    else:
9712
      self.needed_locks[locking.LEVEL_NODE] = []
9713
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9714

    
9715
      if self.op.iallocator is not None:
9716
        # iallocator will select a new node in the same group
9717
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9718

    
9719
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9720

    
9721
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9722
                                   self.op.iallocator, self.op.remote_node,
9723
                                   self.op.disks, False, self.op.early_release)
9724

    
9725
    self.tasklets = [self.replacer]
9726

    
9727
  def DeclareLocks(self, level):
9728
    if level == locking.LEVEL_NODEGROUP:
9729
      assert self.op.remote_node is None
9730
      assert self.op.iallocator is not None
9731
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9732

    
9733
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9734
      # Lock all groups used by instance optimistically; this requires going
9735
      # via the node before it's locked, requiring verification later on
9736
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9737
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9738

    
9739
    elif level == locking.LEVEL_NODE:
9740
      if self.op.iallocator is not None:
9741
        assert self.op.remote_node is None
9742
        assert not self.needed_locks[locking.LEVEL_NODE]
9743

    
9744
        # Lock member nodes of all locked groups
9745
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9746
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9747
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9748
      else:
9749
        self._LockInstancesNodes()
9750
    elif level == locking.LEVEL_NODE_RES:
9751
      # Reuse node locks
9752
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9753
        self.needed_locks[locking.LEVEL_NODE]
9754

    
9755
  def BuildHooksEnv(self):
9756
    """Build hooks env.
9757

9758
    This runs on the master, the primary and all the secondaries.
9759

9760
    """
9761
    instance = self.replacer.instance
9762
    env = {
9763
      "MODE": self.op.mode,
9764
      "NEW_SECONDARY": self.op.remote_node,
9765
      "OLD_SECONDARY": instance.secondary_nodes[0],
9766
      }
9767
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9768
    return env
9769

    
9770
  def BuildHooksNodes(self):
9771
    """Build hooks nodes.
9772

9773
    """
9774
    instance = self.replacer.instance
9775
    nl = [
9776
      self.cfg.GetMasterNode(),
9777
      instance.primary_node,
9778
      ]
9779
    if self.op.remote_node is not None:
9780
      nl.append(self.op.remote_node)
9781
    return nl, nl
9782

    
9783
  def CheckPrereq(self):
9784
    """Check prerequisites.
9785

9786
    """
9787
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9788
            self.op.iallocator is None)
9789

    
9790
    # Verify if node group locks are still correct
9791
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9792
    if owned_groups:
9793
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9794

    
9795
    return LogicalUnit.CheckPrereq(self)
9796

    
9797

    
9798
class TLReplaceDisks(Tasklet):
9799
  """Replaces disks for an instance.
9800

9801
  Note: Locking is not within the scope of this class.
9802

9803
  """
9804
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9805
               disks, delay_iallocator, early_release):
9806
    """Initializes this class.
9807

9808
    """
9809
    Tasklet.__init__(self, lu)
9810

    
9811
    # Parameters
9812
    self.instance_name = instance_name
9813
    self.mode = mode
9814
    self.iallocator_name = iallocator_name
9815
    self.remote_node = remote_node
9816
    self.disks = disks
9817
    self.delay_iallocator = delay_iallocator
9818
    self.early_release = early_release
9819

    
9820
    # Runtime data
9821
    self.instance = None
9822
    self.new_node = None
9823
    self.target_node = None
9824
    self.other_node = None
9825
    self.remote_node_info = None
9826
    self.node_secondary_ip = None
9827

    
9828
  @staticmethod
9829
  def CheckArguments(mode, remote_node, iallocator):
9830
    """Helper function for users of this class.
9831

9832
    """
9833
    # check for valid parameter combination
9834
    if mode == constants.REPLACE_DISK_CHG:
9835
      if remote_node is None and iallocator is None:
9836
        raise errors.OpPrereqError("When changing the secondary either an"
9837
                                   " iallocator script must be used or the"
9838
                                   " new node given", errors.ECODE_INVAL)
9839

    
9840
      if remote_node is not None and iallocator is not None:
9841
        raise errors.OpPrereqError("Give either the iallocator or the new"
9842
                                   " secondary, not both", errors.ECODE_INVAL)
9843

    
9844
    elif remote_node is not None or iallocator is not None:
9845
      # Not replacing the secondary
9846
      raise errors.OpPrereqError("The iallocator and new node options can"
9847
                                 " only be used when changing the"
9848
                                 " secondary node", errors.ECODE_INVAL)
9849

    
9850
  @staticmethod
9851
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9852
    """Compute a new secondary node using an IAllocator.
9853

9854
    """
9855
    ial = IAllocator(lu.cfg, lu.rpc,
9856
                     mode=constants.IALLOCATOR_MODE_RELOC,
9857
                     name=instance_name,
9858
                     relocate_from=list(relocate_from))
9859

    
9860
    ial.Run(iallocator_name)
9861

    
9862
    if not ial.success:
9863
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9864
                                 " %s" % (iallocator_name, ial.info),
9865
                                 errors.ECODE_NORES)
9866

    
9867
    if len(ial.result) != ial.required_nodes:
9868
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9869
                                 " of nodes (%s), required %s" %
9870
                                 (iallocator_name,
9871
                                  len(ial.result), ial.required_nodes),
9872
                                 errors.ECODE_FAULT)
9873

    
9874
    remote_node_name = ial.result[0]
9875

    
9876
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9877
               instance_name, remote_node_name)
9878

    
9879
    return remote_node_name
9880

    
9881
  def _FindFaultyDisks(self, node_name):
9882
    """Wrapper for L{_FindFaultyInstanceDisks}.
9883

9884
    """
9885
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9886
                                    node_name, True)
9887

    
9888
  def _CheckDisksActivated(self, instance):
9889
    """Checks if the instance disks are activated.
9890

9891
    @param instance: The instance to check disks
9892
    @return: True if they are activated, False otherwise
9893

9894
    """
9895
    nodes = instance.all_nodes
9896

    
9897
    for idx, dev in enumerate(instance.disks):
9898
      for node in nodes:
9899
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9900
        self.cfg.SetDiskID(dev, node)
9901

    
9902
        result = self.rpc.call_blockdev_find(node, dev)
9903

    
9904
        if result.offline:
9905
          continue
9906
        elif result.fail_msg or not result.payload:
9907
          return False
9908

    
9909
    return True
9910

    
9911
  def CheckPrereq(self):
9912
    """Check prerequisites.
9913

9914
    This checks that the instance is in the cluster.
9915

9916
    """
9917
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9918
    assert instance is not None, \
9919
      "Cannot retrieve locked instance %s" % self.instance_name
9920

    
9921
    if instance.disk_template != constants.DT_DRBD8:
9922
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9923
                                 " instances", errors.ECODE_INVAL)
9924

    
9925
    if len(instance.secondary_nodes) != 1:
9926
      raise errors.OpPrereqError("The instance has a strange layout,"
9927
                                 " expected one secondary but found %d" %
9928
                                 len(instance.secondary_nodes),
9929
                                 errors.ECODE_FAULT)
9930

    
9931
    if not self.delay_iallocator:
9932
      self._CheckPrereq2()
9933

    
9934
  def _CheckPrereq2(self):
9935
    """Check prerequisites, second part.
9936

9937
    This function should always be part of CheckPrereq. It was separated and is
9938
    now called from Exec because during node evacuation iallocator was only
9939
    called with an unmodified cluster model, not taking planned changes into
9940
    account.
9941

9942
    """
9943
    instance = self.instance
9944
    secondary_node = instance.secondary_nodes[0]
9945

    
9946
    if self.iallocator_name is None:
9947
      remote_node = self.remote_node
9948
    else:
9949
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9950
                                       instance.name, instance.secondary_nodes)
9951

    
9952
    if remote_node is None:
9953
      self.remote_node_info = None
9954
    else:
9955
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9956
             "Remote node '%s' is not locked" % remote_node
9957

    
9958
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9959
      assert self.remote_node_info is not None, \
9960
        "Cannot retrieve locked node %s" % remote_node
9961

    
9962
    if remote_node == self.instance.primary_node:
9963
      raise errors.OpPrereqError("The specified node is the primary node of"
9964
                                 " the instance", errors.ECODE_INVAL)
9965

    
9966
    if remote_node == secondary_node:
9967
      raise errors.OpPrereqError("The specified node is already the"
9968
                                 " secondary node of the instance",
9969
                                 errors.ECODE_INVAL)
9970

    
9971
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9972
                                    constants.REPLACE_DISK_CHG):
9973
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9974
                                 errors.ECODE_INVAL)
9975

    
9976
    if self.mode == constants.REPLACE_DISK_AUTO:
9977
      if not self._CheckDisksActivated(instance):
9978
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9979
                                   " first" % self.instance_name,
9980
                                   errors.ECODE_STATE)
9981
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9982
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9983

    
9984
      if faulty_primary and faulty_secondary:
9985
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9986
                                   " one node and can not be repaired"
9987
                                   " automatically" % self.instance_name,
9988
                                   errors.ECODE_STATE)
9989

    
9990
      if faulty_primary:
9991
        self.disks = faulty_primary
9992
        self.target_node = instance.primary_node
9993
        self.other_node = secondary_node
9994
        check_nodes = [self.target_node, self.other_node]
9995
      elif faulty_secondary:
9996
        self.disks = faulty_secondary
9997
        self.target_node = secondary_node
9998
        self.other_node = instance.primary_node
9999
        check_nodes = [self.target_node, self.other_node]
10000
      else:
10001
        self.disks = []
10002
        check_nodes = []
10003

    
10004
    else:
10005
      # Non-automatic modes
10006
      if self.mode == constants.REPLACE_DISK_PRI:
10007
        self.target_node = instance.primary_node
10008
        self.other_node = secondary_node
10009
        check_nodes = [self.target_node, self.other_node]
10010

    
10011
      elif self.mode == constants.REPLACE_DISK_SEC:
10012
        self.target_node = secondary_node
10013
        self.other_node = instance.primary_node
10014
        check_nodes = [self.target_node, self.other_node]
10015

    
10016
      elif self.mode == constants.REPLACE_DISK_CHG:
10017
        self.new_node = remote_node
10018
        self.other_node = instance.primary_node
10019
        self.target_node = secondary_node
10020
        check_nodes = [self.new_node, self.other_node]
10021

    
10022
        _CheckNodeNotDrained(self.lu, remote_node)
10023
        _CheckNodeVmCapable(self.lu, remote_node)
10024

    
10025
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
10026
        assert old_node_info is not None
10027
        if old_node_info.offline and not self.early_release:
10028
          # doesn't make sense to delay the release
10029
          self.early_release = True
10030
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10031
                          " early-release mode", secondary_node)
10032

    
10033
      else:
10034
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10035
                                     self.mode)
10036

    
10037
      # If not specified all disks should be replaced
10038
      if not self.disks:
10039
        self.disks = range(len(self.instance.disks))
10040

    
10041
    # TODO: compute disk parameters
10042
    primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10043
    secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10044
    if primary_node_info.group != secondary_node_info.group:
10045
      self.lu.LogInfo("The instance primary and secondary nodes are in two"
10046
                      " different node groups; the disk parameters of the"
10047
                      " primary node's group will be applied.")
10048

    
10049
    self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10050

    
10051
    for node in check_nodes:
10052
      _CheckNodeOnline(self.lu, node)
10053

    
10054
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
10055
                                                          self.other_node,
10056
                                                          self.target_node]
10057
                              if node_name is not None)
10058

    
10059
    # Release unneeded node and node resource locks
10060
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10061
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10062

    
10063
    # Release any owned node group
10064
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10065
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10066

    
10067
    # Check whether disks are valid
10068
    for disk_idx in self.disks:
10069
      instance.FindDisk(disk_idx)
10070

    
10071
    # Get secondary node IP addresses
10072
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10073
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
10074

    
10075
  def Exec(self, feedback_fn):
10076
    """Execute disk replacement.
10077

10078
    This dispatches the disk replacement to the appropriate handler.
10079

10080
    """
10081
    if self.delay_iallocator:
10082
      self._CheckPrereq2()
10083

    
10084
    if __debug__:
10085
      # Verify owned locks before starting operation
10086
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10087
      assert set(owned_nodes) == set(self.node_secondary_ip), \
10088
          ("Incorrect node locks, owning %s, expected %s" %
10089
           (owned_nodes, self.node_secondary_ip.keys()))
10090
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10091
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
10092

    
10093
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10094
      assert list(owned_instances) == [self.instance_name], \
10095
          "Instance '%s' not locked" % self.instance_name
10096

    
10097
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10098
          "Should not own any node group lock at this point"
10099

    
10100
    if not self.disks:
10101
      feedback_fn("No disks need replacement")
10102
      return
10103

    
10104
    feedback_fn("Replacing disk(s) %s for %s" %
10105
                (utils.CommaJoin(self.disks), self.instance.name))
10106

    
10107
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10108

    
10109
    # Activate the instance disks if we're replacing them on a down instance
10110
    if activate_disks:
10111
      _StartInstanceDisks(self.lu, self.instance, True)
10112

    
10113
    try:
10114
      # Should we replace the secondary node?
10115
      if self.new_node is not None:
10116
        fn = self._ExecDrbd8Secondary
10117
      else:
10118
        fn = self._ExecDrbd8DiskOnly
10119

    
10120
      result = fn(feedback_fn)
10121
    finally:
10122
      # Deactivate the instance disks if we're replacing them on a
10123
      # down instance
10124
      if activate_disks:
10125
        _SafeShutdownInstanceDisks(self.lu, self.instance)
10126

    
10127
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
10128

    
10129
    if __debug__:
10130
      # Verify owned locks
10131
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10132
      nodes = frozenset(self.node_secondary_ip)
10133
      assert ((self.early_release and not owned_nodes) or
10134
              (not self.early_release and not (set(owned_nodes) - nodes))), \
10135
        ("Not owning the correct locks, early_release=%s, owned=%r,"
10136
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
10137

    
10138
    return result
10139

    
10140
  def _CheckVolumeGroup(self, nodes):
10141
    self.lu.LogInfo("Checking volume groups")
10142

    
10143
    vgname = self.cfg.GetVGName()
10144

    
10145
    # Make sure volume group exists on all involved nodes
10146
    results = self.rpc.call_vg_list(nodes)
10147
    if not results:
10148
      raise errors.OpExecError("Can't list volume groups on the nodes")
10149

    
10150
    for node in nodes:
10151
      res = results[node]
10152
      res.Raise("Error checking node %s" % node)
10153
      if vgname not in res.payload:
10154
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10155
                                 (vgname, node))
10156

    
10157
  def _CheckDisksExistence(self, nodes):
10158
    # Check disk existence
10159
    for idx, dev in enumerate(self.instance.disks):
10160
      if idx not in self.disks:
10161
        continue
10162

    
10163
      for node in nodes:
10164
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10165
        self.cfg.SetDiskID(dev, node)
10166

    
10167
        result = self.rpc.call_blockdev_find(node, dev)
10168

    
10169
        msg = result.fail_msg
10170
        if msg or not result.payload:
10171
          if not msg:
10172
            msg = "disk not found"
10173
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10174
                                   (idx, node, msg))
10175

    
10176
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10177
    for idx, dev in enumerate(self.instance.disks):
10178
      if idx not in self.disks:
10179
        continue
10180

    
10181
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10182
                      (idx, node_name))
10183

    
10184
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10185
                                   ldisk=ldisk):
10186
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10187
                                 " replace disks for instance %s" %
10188
                                 (node_name, self.instance.name))
10189

    
10190
  def _CreateNewStorage(self, node_name):
10191
    """Create new storage on the primary or secondary node.
10192

10193
    This is only used for same-node replaces, not for changing the
10194
    secondary node, hence we don't want to modify the existing disk.
10195

10196
    """
10197
    iv_names = {}
10198

    
10199
    for idx, dev in enumerate(self.instance.disks):
10200
      if idx not in self.disks:
10201
        continue
10202

    
10203
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10204

    
10205
      self.cfg.SetDiskID(dev, node_name)
10206

    
10207
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10208
      names = _GenerateUniqueNames(self.lu, lv_names)
10209

    
10210
      _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10211

    
10212
      vg_data = dev.children[0].logical_id[0]
10213
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10214
                             logical_id=(vg_data, names[0]), params=data_p)
10215
      vg_meta = dev.children[1].logical_id[0]
10216
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10217
                             logical_id=(vg_meta, names[1]), params=meta_p)
10218

    
10219
      new_lvs = [lv_data, lv_meta]
10220
      old_lvs = [child.Copy() for child in dev.children]
10221
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10222

    
10223
      # we pass force_create=True to force the LVM creation
10224
      for new_lv in new_lvs:
10225
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10226
                        _GetInstanceInfoText(self.instance), False)
10227

    
10228
    return iv_names
10229

    
10230
  def _CheckDevices(self, node_name, iv_names):
10231
    for name, (dev, _, _) in iv_names.iteritems():
10232
      self.cfg.SetDiskID(dev, node_name)
10233

    
10234
      result = self.rpc.call_blockdev_find(node_name, dev)
10235

    
10236
      msg = result.fail_msg
10237
      if msg or not result.payload:
10238
        if not msg:
10239
          msg = "disk not found"
10240
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10241
                                 (name, msg))
10242

    
10243
      if result.payload.is_degraded:
10244
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10245

    
10246
  def _RemoveOldStorage(self, node_name, iv_names):
10247
    for name, (_, old_lvs, _) in iv_names.iteritems():
10248
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10249

    
10250
      for lv in old_lvs:
10251
        self.cfg.SetDiskID(lv, node_name)
10252

    
10253
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10254
        if msg:
10255
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10256
                             hint="remove unused LVs manually")
10257

    
10258
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10259
    """Replace a disk on the primary or secondary for DRBD 8.
10260

10261
    The algorithm for replace is quite complicated:
10262

10263
      1. for each disk to be replaced:
10264

10265
        1. create new LVs on the target node with unique names
10266
        1. detach old LVs from the drbd device
10267
        1. rename old LVs to name_replaced.<time_t>
10268
        1. rename new LVs to old LVs
10269
        1. attach the new LVs (with the old names now) to the drbd device
10270

10271
      1. wait for sync across all devices
10272

10273
      1. for each modified disk:
10274

10275
        1. remove old LVs (which have the name name_replaces.<time_t>)
10276

10277
    Failures are not very well handled.
10278

10279
    """
10280
    steps_total = 6
10281

    
10282
    # Step: check device activation
10283
    self.lu.LogStep(1, steps_total, "Check device existence")
10284
    self._CheckDisksExistence([self.other_node, self.target_node])
10285
    self._CheckVolumeGroup([self.target_node, self.other_node])
10286

    
10287
    # Step: check other node consistency
10288
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10289
    self._CheckDisksConsistency(self.other_node,
10290
                                self.other_node == self.instance.primary_node,
10291
                                False)
10292

    
10293
    # Step: create new storage
10294
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10295
    iv_names = self._CreateNewStorage(self.target_node)
10296

    
10297
    # Step: for each lv, detach+rename*2+attach
10298
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10299
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10300
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10301

    
10302
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10303
                                                     old_lvs)
10304
      result.Raise("Can't detach drbd from local storage on node"
10305
                   " %s for device %s" % (self.target_node, dev.iv_name))
10306
      #dev.children = []
10307
      #cfg.Update(instance)
10308

    
10309
      # ok, we created the new LVs, so now we know we have the needed
10310
      # storage; as such, we proceed on the target node to rename
10311
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10312
      # using the assumption that logical_id == physical_id (which in
10313
      # turn is the unique_id on that node)
10314

    
10315
      # FIXME(iustin): use a better name for the replaced LVs
10316
      temp_suffix = int(time.time())
10317
      ren_fn = lambda d, suff: (d.physical_id[0],
10318
                                d.physical_id[1] + "_replaced-%s" % suff)
10319

    
10320
      # Build the rename list based on what LVs exist on the node
10321
      rename_old_to_new = []
10322
      for to_ren in old_lvs:
10323
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10324
        if not result.fail_msg and result.payload:
10325
          # device exists
10326
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10327

    
10328
      self.lu.LogInfo("Renaming the old LVs on the target node")
10329
      result = self.rpc.call_blockdev_rename(self.target_node,
10330
                                             rename_old_to_new)
10331
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10332

    
10333
      # Now we rename the new LVs to the old LVs
10334
      self.lu.LogInfo("Renaming the new LVs on the target node")
10335
      rename_new_to_old = [(new, old.physical_id)
10336
                           for old, new in zip(old_lvs, new_lvs)]
10337
      result = self.rpc.call_blockdev_rename(self.target_node,
10338
                                             rename_new_to_old)
10339
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10340

    
10341
      # Intermediate steps of in memory modifications
10342
      for old, new in zip(old_lvs, new_lvs):
10343
        new.logical_id = old.logical_id
10344
        self.cfg.SetDiskID(new, self.target_node)
10345

    
10346
      # We need to modify old_lvs so that removal later removes the
10347
      # right LVs, not the newly added ones; note that old_lvs is a
10348
      # copy here
10349
      for disk in old_lvs:
10350
        disk.logical_id = ren_fn(disk, temp_suffix)
10351
        self.cfg.SetDiskID(disk, self.target_node)
10352

    
10353
      # Now that the new lvs have the old name, we can add them to the device
10354
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10355
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10356
                                                  new_lvs)
10357
      msg = result.fail_msg
10358
      if msg:
10359
        for new_lv in new_lvs:
10360
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10361
                                               new_lv).fail_msg
10362
          if msg2:
10363
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10364
                               hint=("cleanup manually the unused logical"
10365
                                     "volumes"))
10366
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10367

    
10368
    cstep = itertools.count(5)
10369

    
10370
    if self.early_release:
10371
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10372
      self._RemoveOldStorage(self.target_node, iv_names)
10373
      # TODO: Check if releasing locks early still makes sense
10374
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10375
    else:
10376
      # Release all resource locks except those used by the instance
10377
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10378
                    keep=self.node_secondary_ip.keys())
10379

    
10380
    # Release all node locks while waiting for sync
10381
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10382

    
10383
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10384
    # shutdown in the caller into consideration.
10385

    
10386
    # Wait for sync
10387
    # This can fail as the old devices are degraded and _WaitForSync
10388
    # does a combined result over all disks, so we don't check its return value
10389
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10390
    _WaitForSync(self.lu, self.instance)
10391

    
10392
    # Check all devices manually
10393
    self._CheckDevices(self.instance.primary_node, iv_names)
10394

    
10395
    # Step: remove old storage
10396
    if not self.early_release:
10397
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10398
      self._RemoveOldStorage(self.target_node, iv_names)
10399

    
10400
  def _ExecDrbd8Secondary(self, feedback_fn):
10401
    """Replace the secondary node for DRBD 8.
10402

10403
    The algorithm for replace is quite complicated:
10404
      - for all disks of the instance:
10405
        - create new LVs on the new node with same names
10406
        - shutdown the drbd device on the old secondary
10407
        - disconnect the drbd network on the primary
10408
        - create the drbd device on the new secondary
10409
        - network attach the drbd on the primary, using an artifice:
10410
          the drbd code for Attach() will connect to the network if it
10411
          finds a device which is connected to the good local disks but
10412
          not network enabled
10413
      - wait for sync across all devices
10414
      - remove all disks from the old secondary
10415

10416
    Failures are not very well handled.
10417

10418
    """
10419
    steps_total = 6
10420

    
10421
    pnode = self.instance.primary_node
10422

    
10423
    # Step: check device activation
10424
    self.lu.LogStep(1, steps_total, "Check device existence")
10425
    self._CheckDisksExistence([self.instance.primary_node])
10426
    self._CheckVolumeGroup([self.instance.primary_node])
10427

    
10428
    # Step: check other node consistency
10429
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10430
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10431

    
10432
    # Step: create new storage
10433
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10434
    for idx, dev in enumerate(self.instance.disks):
10435
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10436
                      (self.new_node, idx))
10437
      # we pass force_create=True to force LVM creation
10438
      for new_lv in dev.children:
10439
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10440
                        _GetInstanceInfoText(self.instance), False)
10441

    
10442
    # Step 4: dbrd minors and drbd setups changes
10443
    # after this, we must manually remove the drbd minors on both the
10444
    # error and the success paths
10445
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10446
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10447
                                         for dev in self.instance.disks],
10448
                                        self.instance.name)
10449
    logging.debug("Allocated minors %r", minors)
10450

    
10451
    iv_names = {}
10452
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10453
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10454
                      (self.new_node, idx))
10455
      # create new devices on new_node; note that we create two IDs:
10456
      # one without port, so the drbd will be activated without
10457
      # networking information on the new node at this stage, and one
10458
      # with network, for the latter activation in step 4
10459
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10460
      if self.instance.primary_node == o_node1:
10461
        p_minor = o_minor1
10462
      else:
10463
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10464
        p_minor = o_minor2
10465

    
10466
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10467
                      p_minor, new_minor, o_secret)
10468
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10469
                    p_minor, new_minor, o_secret)
10470

    
10471
      iv_names[idx] = (dev, dev.children, new_net_id)
10472
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10473
                    new_net_id)
10474
      drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10475
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10476
                              logical_id=new_alone_id,
10477
                              children=dev.children,
10478
                              size=dev.size,
10479
                              params=drbd_params)
10480
      try:
10481
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10482
                              _GetInstanceInfoText(self.instance), False)
10483
      except errors.GenericError:
10484
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10485
        raise
10486

    
10487
    # We have new devices, shutdown the drbd on the old secondary
10488
    for idx, dev in enumerate(self.instance.disks):
10489
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10490
      self.cfg.SetDiskID(dev, self.target_node)
10491
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10492
      if msg:
10493
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10494
                           "node: %s" % (idx, msg),
10495
                           hint=("Please cleanup this device manually as"
10496
                                 " soon as possible"))
10497

    
10498
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10499
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10500
                                               self.instance.disks)[pnode]
10501

    
10502
    msg = result.fail_msg
10503
    if msg:
10504
      # detaches didn't succeed (unlikely)
10505
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10506
      raise errors.OpExecError("Can't detach the disks from the network on"
10507
                               " old node: %s" % (msg,))
10508

    
10509
    # if we managed to detach at least one, we update all the disks of
10510
    # the instance to point to the new secondary
10511
    self.lu.LogInfo("Updating instance configuration")
10512
    for dev, _, new_logical_id in iv_names.itervalues():
10513
      dev.logical_id = new_logical_id
10514
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10515

    
10516
    self.cfg.Update(self.instance, feedback_fn)
10517

    
10518
    # Release all node locks (the configuration has been updated)
10519
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10520

    
10521
    # and now perform the drbd attach
10522
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10523
                    " (standalone => connected)")
10524
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10525
                                            self.new_node],
10526
                                           self.node_secondary_ip,
10527
                                           self.instance.disks,
10528
                                           self.instance.name,
10529
                                           False)
10530
    for to_node, to_result in result.items():
10531
      msg = to_result.fail_msg
10532
      if msg:
10533
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10534
                           to_node, msg,
10535
                           hint=("please do a gnt-instance info to see the"
10536
                                 " status of disks"))
10537

    
10538
    cstep = itertools.count(5)
10539

    
10540
    if self.early_release:
10541
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10542
      self._RemoveOldStorage(self.target_node, iv_names)
10543
      # TODO: Check if releasing locks early still makes sense
10544
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10545
    else:
10546
      # Release all resource locks except those used by the instance
10547
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10548
                    keep=self.node_secondary_ip.keys())
10549

    
10550
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10551
    # shutdown in the caller into consideration.
10552

    
10553
    # Wait for sync
10554
    # This can fail as the old devices are degraded and _WaitForSync
10555
    # does a combined result over all disks, so we don't check its return value
10556
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10557
    _WaitForSync(self.lu, self.instance)
10558

    
10559
    # Check all devices manually
10560
    self._CheckDevices(self.instance.primary_node, iv_names)
10561

    
10562
    # Step: remove old storage
10563
    if not self.early_release:
10564
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10565
      self._RemoveOldStorage(self.target_node, iv_names)
10566

    
10567

    
10568
class LURepairNodeStorage(NoHooksLU):
10569
  """Repairs the volume group on a node.
10570

10571
  """
10572
  REQ_BGL = False
10573

    
10574
  def CheckArguments(self):
10575
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10576

    
10577
    storage_type = self.op.storage_type
10578

    
10579
    if (constants.SO_FIX_CONSISTENCY not in
10580
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10581
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10582
                                 " repaired" % storage_type,
10583
                                 errors.ECODE_INVAL)
10584

    
10585
  def ExpandNames(self):
10586
    self.needed_locks = {
10587
      locking.LEVEL_NODE: [self.op.node_name],
10588
      }
10589

    
10590
  def _CheckFaultyDisks(self, instance, node_name):
10591
    """Ensure faulty disks abort the opcode or at least warn."""
10592
    try:
10593
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10594
                                  node_name, True):
10595
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10596
                                   " node '%s'" % (instance.name, node_name),
10597
                                   errors.ECODE_STATE)
10598
    except errors.OpPrereqError, err:
10599
      if self.op.ignore_consistency:
10600
        self.proc.LogWarning(str(err.args[0]))
10601
      else:
10602
        raise
10603

    
10604
  def CheckPrereq(self):
10605
    """Check prerequisites.
10606

10607
    """
10608
    # Check whether any instance on this node has faulty disks
10609
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10610
      if inst.admin_state != constants.ADMINST_UP:
10611
        continue
10612
      check_nodes = set(inst.all_nodes)
10613
      check_nodes.discard(self.op.node_name)
10614
      for inst_node_name in check_nodes:
10615
        self._CheckFaultyDisks(inst, inst_node_name)
10616

    
10617
  def Exec(self, feedback_fn):
10618
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10619
                (self.op.name, self.op.node_name))
10620

    
10621
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10622
    result = self.rpc.call_storage_execute(self.op.node_name,
10623
                                           self.op.storage_type, st_args,
10624
                                           self.op.name,
10625
                                           constants.SO_FIX_CONSISTENCY)
10626
    result.Raise("Failed to repair storage unit '%s' on %s" %
10627
                 (self.op.name, self.op.node_name))
10628

    
10629

    
10630
class LUNodeEvacuate(NoHooksLU):
10631
  """Evacuates instances off a list of nodes.
10632

10633
  """
10634
  REQ_BGL = False
10635

    
10636
  _MODE2IALLOCATOR = {
10637
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10638
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10639
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10640
    }
10641
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10642
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10643
          constants.IALLOCATOR_NEVAC_MODES)
10644

    
10645
  def CheckArguments(self):
10646
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10647

    
10648
  def ExpandNames(self):
10649
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10650

    
10651
    if self.op.remote_node is not None:
10652
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10653
      assert self.op.remote_node
10654

    
10655
      if self.op.remote_node == self.op.node_name:
10656
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10657
                                   " secondary node", errors.ECODE_INVAL)
10658

    
10659
      if self.op.mode != constants.NODE_EVAC_SEC:
10660
        raise errors.OpPrereqError("Without the use of an iallocator only"
10661
                                   " secondary instances can be evacuated",
10662
                                   errors.ECODE_INVAL)
10663

    
10664
    # Declare locks
10665
    self.share_locks = _ShareAll()
10666
    self.needed_locks = {
10667
      locking.LEVEL_INSTANCE: [],
10668
      locking.LEVEL_NODEGROUP: [],
10669
      locking.LEVEL_NODE: [],
10670
      }
10671

    
10672
    # Determine nodes (via group) optimistically, needs verification once locks
10673
    # have been acquired
10674
    self.lock_nodes = self._DetermineNodes()
10675

    
10676
  def _DetermineNodes(self):
10677
    """Gets the list of nodes to operate on.
10678

10679
    """
10680
    if self.op.remote_node is None:
10681
      # Iallocator will choose any node(s) in the same group
10682
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10683
    else:
10684
      group_nodes = frozenset([self.op.remote_node])
10685

    
10686
    # Determine nodes to be locked
10687
    return set([self.op.node_name]) | group_nodes
10688

    
10689
  def _DetermineInstances(self):
10690
    """Builds list of instances to operate on.
10691

10692
    """
10693
    assert self.op.mode in constants.NODE_EVAC_MODES
10694

    
10695
    if self.op.mode == constants.NODE_EVAC_PRI:
10696
      # Primary instances only
10697
      inst_fn = _GetNodePrimaryInstances
10698
      assert self.op.remote_node is None, \
10699
        "Evacuating primary instances requires iallocator"
10700
    elif self.op.mode == constants.NODE_EVAC_SEC:
10701
      # Secondary instances only
10702
      inst_fn = _GetNodeSecondaryInstances
10703
    else:
10704
      # All instances
10705
      assert self.op.mode == constants.NODE_EVAC_ALL
10706
      inst_fn = _GetNodeInstances
10707
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10708
      # per instance
10709
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10710
                                 " interface it is not possible to evacuate"
10711
                                 " all instances at once; specify explicitly"
10712
                                 " whether to evacuate primary or secondary"
10713
                                 " instances",
10714
                                 errors.ECODE_INVAL)
10715

    
10716
    return inst_fn(self.cfg, self.op.node_name)
10717

    
10718
  def DeclareLocks(self, level):
10719
    if level == locking.LEVEL_INSTANCE:
10720
      # Lock instances optimistically, needs verification once node and group
10721
      # locks have been acquired
10722
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10723
        set(i.name for i in self._DetermineInstances())
10724

    
10725
    elif level == locking.LEVEL_NODEGROUP:
10726
      # Lock node groups for all potential target nodes optimistically, needs
10727
      # verification once nodes have been acquired
10728
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10729
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10730

    
10731
    elif level == locking.LEVEL_NODE:
10732
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10733

    
10734
  def CheckPrereq(self):
10735
    # Verify locks
10736
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10737
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10738
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10739

    
10740
    need_nodes = self._DetermineNodes()
10741

    
10742
    if not owned_nodes.issuperset(need_nodes):
10743
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10744
                                 " locks were acquired, current nodes are"
10745
                                 " are '%s', used to be '%s'; retry the"
10746
                                 " operation" %
10747
                                 (self.op.node_name,
10748
                                  utils.CommaJoin(need_nodes),
10749
                                  utils.CommaJoin(owned_nodes)),
10750
                                 errors.ECODE_STATE)
10751

    
10752
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10753
    if owned_groups != wanted_groups:
10754
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10755
                               " current groups are '%s', used to be '%s';"
10756
                               " retry the operation" %
10757
                               (utils.CommaJoin(wanted_groups),
10758
                                utils.CommaJoin(owned_groups)))
10759

    
10760
    # Determine affected instances
10761
    self.instances = self._DetermineInstances()
10762
    self.instance_names = [i.name for i in self.instances]
10763

    
10764
    if set(self.instance_names) != owned_instances:
10765
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10766
                               " were acquired, current instances are '%s',"
10767
                               " used to be '%s'; retry the operation" %
10768
                               (self.op.node_name,
10769
                                utils.CommaJoin(self.instance_names),
10770
                                utils.CommaJoin(owned_instances)))
10771

    
10772
    if self.instance_names:
10773
      self.LogInfo("Evacuating instances from node '%s': %s",
10774
                   self.op.node_name,
10775
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10776
    else:
10777
      self.LogInfo("No instances to evacuate from node '%s'",
10778
                   self.op.node_name)
10779

    
10780
    if self.op.remote_node is not None:
10781
      for i in self.instances:
10782
        if i.primary_node == self.op.remote_node:
10783
          raise errors.OpPrereqError("Node %s is the primary node of"
10784
                                     " instance %s, cannot use it as"
10785
                                     " secondary" %
10786
                                     (self.op.remote_node, i.name),
10787
                                     errors.ECODE_INVAL)
10788

    
10789
  def Exec(self, feedback_fn):
10790
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10791

    
10792
    if not self.instance_names:
10793
      # No instances to evacuate
10794
      jobs = []
10795

    
10796
    elif self.op.iallocator is not None:
10797
      # TODO: Implement relocation to other group
10798
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10799
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10800
                       instances=list(self.instance_names))
10801

    
10802
      ial.Run(self.op.iallocator)
10803

    
10804
      if not ial.success:
10805
        raise errors.OpPrereqError("Can't compute node evacuation using"
10806
                                   " iallocator '%s': %s" %
10807
                                   (self.op.iallocator, ial.info),
10808
                                   errors.ECODE_NORES)
10809

    
10810
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10811

    
10812
    elif self.op.remote_node is not None:
10813
      assert self.op.mode == constants.NODE_EVAC_SEC
10814
      jobs = [
10815
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10816
                                        remote_node=self.op.remote_node,
10817
                                        disks=[],
10818
                                        mode=constants.REPLACE_DISK_CHG,
10819
                                        early_release=self.op.early_release)]
10820
        for instance_name in self.instance_names
10821
        ]
10822

    
10823
    else:
10824
      raise errors.ProgrammerError("No iallocator or remote node")
10825

    
10826
    return ResultWithJobs(jobs)
10827

    
10828

    
10829
def _SetOpEarlyRelease(early_release, op):
10830
  """Sets C{early_release} flag on opcodes if available.
10831

10832
  """
10833
  try:
10834
    op.early_release = early_release
10835
  except AttributeError:
10836
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10837

    
10838
  return op
10839

    
10840

    
10841
def _NodeEvacDest(use_nodes, group, nodes):
10842
  """Returns group or nodes depending on caller's choice.
10843

10844
  """
10845
  if use_nodes:
10846
    return utils.CommaJoin(nodes)
10847
  else:
10848
    return group
10849

    
10850

    
10851
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10852
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10853

10854
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10855
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10856

10857
  @type lu: L{LogicalUnit}
10858
  @param lu: Logical unit instance
10859
  @type alloc_result: tuple/list
10860
  @param alloc_result: Result from iallocator
10861
  @type early_release: bool
10862
  @param early_release: Whether to release locks early if possible
10863
  @type use_nodes: bool
10864
  @param use_nodes: Whether to display node names instead of groups
10865

10866
  """
10867
  (moved, failed, jobs) = alloc_result
10868

    
10869
  if failed:
10870
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10871
                                 for (name, reason) in failed)
10872
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10873
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10874

    
10875
  if moved:
10876
    lu.LogInfo("Instances to be moved: %s",
10877
               utils.CommaJoin("%s (to %s)" %
10878
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10879
                               for (name, group, nodes) in moved))
10880

    
10881
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10882
              map(opcodes.OpCode.LoadOpCode, ops))
10883
          for ops in jobs]
10884

    
10885

    
10886
class LUInstanceGrowDisk(LogicalUnit):
10887
  """Grow a disk of an instance.
10888

10889
  """
10890
  HPATH = "disk-grow"
10891
  HTYPE = constants.HTYPE_INSTANCE
10892
  REQ_BGL = False
10893

    
10894
  def ExpandNames(self):
10895
    self._ExpandAndLockInstance()
10896
    self.needed_locks[locking.LEVEL_NODE] = []
10897
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10898
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10899

    
10900
  def DeclareLocks(self, level):
10901
    if level == locking.LEVEL_NODE:
10902
      self._LockInstancesNodes()
10903
    elif level == locking.LEVEL_NODE_RES:
10904
      # Copy node locks
10905
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10906
        self.needed_locks[locking.LEVEL_NODE][:]
10907

    
10908
  def BuildHooksEnv(self):
10909
    """Build hooks env.
10910

10911
    This runs on the master, the primary and all the secondaries.
10912

10913
    """
10914
    env = {
10915
      "DISK": self.op.disk,
10916
      "AMOUNT": self.op.amount,
10917
      }
10918
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10919
    return env
10920

    
10921
  def BuildHooksNodes(self):
10922
    """Build hooks nodes.
10923

10924
    """
10925
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10926
    return (nl, nl)
10927

    
10928
  def CheckPrereq(self):
10929
    """Check prerequisites.
10930

10931
    This checks that the instance is in the cluster.
10932

10933
    """
10934
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10935
    assert instance is not None, \
10936
      "Cannot retrieve locked instance %s" % self.op.instance_name
10937
    nodenames = list(instance.all_nodes)
10938
    for node in nodenames:
10939
      _CheckNodeOnline(self, node)
10940

    
10941
    self.instance = instance
10942

    
10943
    if instance.disk_template not in constants.DTS_GROWABLE:
10944
      raise errors.OpPrereqError("Instance's disk layout does not support"
10945
                                 " growing", errors.ECODE_INVAL)
10946

    
10947
    self.disk = instance.FindDisk(self.op.disk)
10948

    
10949
    if instance.disk_template not in (constants.DT_FILE,
10950
                                      constants.DT_SHARED_FILE):
10951
      # TODO: check the free disk space for file, when that feature will be
10952
      # supported
10953
      _CheckNodesFreeDiskPerVG(self, nodenames,
10954
                               self.disk.ComputeGrowth(self.op.amount))
10955

    
10956
  def Exec(self, feedback_fn):
10957
    """Execute disk grow.
10958

10959
    """
10960
    instance = self.instance
10961
    disk = self.disk
10962

    
10963
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10964
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10965
            self.owned_locks(locking.LEVEL_NODE_RES))
10966

    
10967
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10968
    if not disks_ok:
10969
      raise errors.OpExecError("Cannot activate block device to grow")
10970

    
10971
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10972
                (self.op.disk, instance.name,
10973
                 utils.FormatUnit(self.op.amount, "h")))
10974

    
10975
    # First run all grow ops in dry-run mode
10976
    for node in instance.all_nodes:
10977
      self.cfg.SetDiskID(disk, node)
10978
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10979
      result.Raise("Grow request failed to node %s" % node)
10980

    
10981
    # We know that (as far as we can test) operations across different
10982
    # nodes will succeed, time to run it for real
10983
    for node in instance.all_nodes:
10984
      self.cfg.SetDiskID(disk, node)
10985
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10986
      result.Raise("Grow request failed to node %s" % node)
10987

    
10988
      # TODO: Rewrite code to work properly
10989
      # DRBD goes into sync mode for a short amount of time after executing the
10990
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10991
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10992
      # time is a work-around.
10993
      time.sleep(5)
10994

    
10995
    disk.RecordGrow(self.op.amount)
10996
    self.cfg.Update(instance, feedback_fn)
10997

    
10998
    # Changes have been recorded, release node lock
10999
    _ReleaseLocks(self, locking.LEVEL_NODE)
11000

    
11001
    # Downgrade lock while waiting for sync
11002
    self.glm.downgrade(locking.LEVEL_INSTANCE)
11003

    
11004
    if self.op.wait_for_sync:
11005
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
11006
      if disk_abort:
11007
        self.proc.LogWarning("Disk sync-ing has not returned a good"
11008
                             " status; please check the instance")
11009
      if instance.admin_state != constants.ADMINST_UP:
11010
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11011
    elif instance.admin_state != constants.ADMINST_UP:
11012
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
11013
                           " not supposed to be running because no wait for"
11014
                           " sync mode was requested")
11015

    
11016
    assert self.owned_locks(locking.LEVEL_NODE_RES)
11017
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11018

    
11019

    
11020
class LUInstanceQueryData(NoHooksLU):
11021
  """Query runtime instance data.
11022

11023
  """
11024
  REQ_BGL = False
11025

    
11026
  def ExpandNames(self):
11027
    self.needed_locks = {}
11028

    
11029
    # Use locking if requested or when non-static information is wanted
11030
    if not (self.op.static or self.op.use_locking):
11031
      self.LogWarning("Non-static data requested, locks need to be acquired")
11032
      self.op.use_locking = True
11033

    
11034
    if self.op.instances or not self.op.use_locking:
11035
      # Expand instance names right here
11036
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
11037
    else:
11038
      # Will use acquired locks
11039
      self.wanted_names = None
11040

    
11041
    if self.op.use_locking:
11042
      self.share_locks = _ShareAll()
11043

    
11044
      if self.wanted_names is None:
11045
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11046
      else:
11047
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11048

    
11049
      self.needed_locks[locking.LEVEL_NODE] = []
11050
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11051

    
11052
  def DeclareLocks(self, level):
11053
    if self.op.use_locking and level == locking.LEVEL_NODE:
11054
      self._LockInstancesNodes()
11055

    
11056
  def CheckPrereq(self):
11057
    """Check prerequisites.
11058

11059
    This only checks the optional instance list against the existing names.
11060

11061
    """
11062
    if self.wanted_names is None:
11063
      assert self.op.use_locking, "Locking was not used"
11064
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11065

    
11066
    self.wanted_instances = \
11067
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11068

    
11069
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
11070
    """Returns the status of a block device
11071

11072
    """
11073
    if self.op.static or not node:
11074
      return None
11075

    
11076
    self.cfg.SetDiskID(dev, node)
11077

    
11078
    result = self.rpc.call_blockdev_find(node, dev)
11079
    if result.offline:
11080
      return None
11081

    
11082
    result.Raise("Can't compute disk status for %s" % instance_name)
11083

    
11084
    status = result.payload
11085
    if status is None:
11086
      return None
11087

    
11088
    return (status.dev_path, status.major, status.minor,
11089
            status.sync_percent, status.estimated_time,
11090
            status.is_degraded, status.ldisk_status)
11091

    
11092
  def _ComputeDiskStatus(self, instance, snode, dev):
11093
    """Compute block device status.
11094

11095
    """
11096
    if dev.dev_type in constants.LDS_DRBD:
11097
      # we change the snode then (otherwise we use the one passed in)
11098
      if dev.logical_id[0] == instance.primary_node:
11099
        snode = dev.logical_id[1]
11100
      else:
11101
        snode = dev.logical_id[0]
11102

    
11103
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11104
                                              instance.name, dev)
11105
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11106

    
11107
    if dev.children:
11108
      dev_children = map(compat.partial(self._ComputeDiskStatus,
11109
                                        instance, snode),
11110
                         dev.children)
11111
    else:
11112
      dev_children = []
11113

    
11114
    return {
11115
      "iv_name": dev.iv_name,
11116
      "dev_type": dev.dev_type,
11117
      "logical_id": dev.logical_id,
11118
      "physical_id": dev.physical_id,
11119
      "pstatus": dev_pstatus,
11120
      "sstatus": dev_sstatus,
11121
      "children": dev_children,
11122
      "mode": dev.mode,
11123
      "size": dev.size,
11124
      }
11125

    
11126
  def Exec(self, feedback_fn):
11127
    """Gather and return data"""
11128
    result = {}
11129

    
11130
    cluster = self.cfg.GetClusterInfo()
11131

    
11132
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11133
                                          for i in self.wanted_instances)
11134
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11135
      if self.op.static or pnode.offline:
11136
        remote_state = None
11137
        if pnode.offline:
11138
          self.LogWarning("Primary node %s is marked offline, returning static"
11139
                          " information only for instance %s" %
11140
                          (pnode.name, instance.name))
11141
      else:
11142
        remote_info = self.rpc.call_instance_info(instance.primary_node,
11143
                                                  instance.name,
11144
                                                  instance.hypervisor)
11145
        remote_info.Raise("Error checking node %s" % instance.primary_node)
11146
        remote_info = remote_info.payload
11147
        if remote_info and "state" in remote_info:
11148
          remote_state = "up"
11149
        else:
11150
          if instance.admin_state == constants.ADMINST_UP:
11151
            remote_state = "down"
11152
          else:
11153
            remote_state = instance.admin_state
11154

    
11155
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11156
                  instance.disks)
11157

    
11158
      result[instance.name] = {
11159
        "name": instance.name,
11160
        "config_state": instance.admin_state,
11161
        "run_state": remote_state,
11162
        "pnode": instance.primary_node,
11163
        "snodes": instance.secondary_nodes,
11164
        "os": instance.os,
11165
        # this happens to be the same format used for hooks
11166
        "nics": _NICListToTuple(self, instance.nics),
11167
        "disk_template": instance.disk_template,
11168
        "disks": disks,
11169
        "hypervisor": instance.hypervisor,
11170
        "network_port": instance.network_port,
11171
        "hv_instance": instance.hvparams,
11172
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
11173
        "be_instance": instance.beparams,
11174
        "be_actual": cluster.FillBE(instance),
11175
        "os_instance": instance.osparams,
11176
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11177
        "serial_no": instance.serial_no,
11178
        "mtime": instance.mtime,
11179
        "ctime": instance.ctime,
11180
        "uuid": instance.uuid,
11181
        }
11182

    
11183
    return result
11184

    
11185

    
11186
class LUInstanceSetParams(LogicalUnit):
11187
  """Modifies an instances's parameters.
11188

11189
  """
11190
  HPATH = "instance-modify"
11191
  HTYPE = constants.HTYPE_INSTANCE
11192
  REQ_BGL = False
11193

    
11194
  def CheckArguments(self):
11195
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11196
            self.op.hvparams or self.op.beparams or self.op.os_name or
11197
            self.op.online_inst or self.op.offline_inst):
11198
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11199

    
11200
    if self.op.hvparams:
11201
      _CheckGlobalHvParams(self.op.hvparams)
11202

    
11203
    # Disk validation
11204
    disk_addremove = 0
11205
    for disk_op, disk_dict in self.op.disks:
11206
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11207
      if disk_op == constants.DDM_REMOVE:
11208
        disk_addremove += 1
11209
        continue
11210
      elif disk_op == constants.DDM_ADD:
11211
        disk_addremove += 1
11212
      else:
11213
        if not isinstance(disk_op, int):
11214
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11215
        if not isinstance(disk_dict, dict):
11216
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11217
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11218

    
11219
      if disk_op == constants.DDM_ADD:
11220
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11221
        if mode not in constants.DISK_ACCESS_SET:
11222
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11223
                                     errors.ECODE_INVAL)
11224
        size = disk_dict.get(constants.IDISK_SIZE, None)
11225
        if size is None:
11226
          raise errors.OpPrereqError("Required disk parameter size missing",
11227
                                     errors.ECODE_INVAL)
11228
        try:
11229
          size = int(size)
11230
        except (TypeError, ValueError), err:
11231
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11232
                                     str(err), errors.ECODE_INVAL)
11233
        disk_dict[constants.IDISK_SIZE] = size
11234
      else:
11235
        # modification of disk
11236
        if constants.IDISK_SIZE in disk_dict:
11237
          raise errors.OpPrereqError("Disk size change not possible, use"
11238
                                     " grow-disk", errors.ECODE_INVAL)
11239

    
11240
    if disk_addremove > 1:
11241
      raise errors.OpPrereqError("Only one disk add or remove operation"
11242
                                 " supported at a time", errors.ECODE_INVAL)
11243

    
11244
    if self.op.disks and self.op.disk_template is not None:
11245
      raise errors.OpPrereqError("Disk template conversion and other disk"
11246
                                 " changes not supported at the same time",
11247
                                 errors.ECODE_INVAL)
11248

    
11249
    if (self.op.disk_template and
11250
        self.op.disk_template in constants.DTS_INT_MIRROR and
11251
        self.op.remote_node is None):
11252
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11253
                                 " one requires specifying a secondary node",
11254
                                 errors.ECODE_INVAL)
11255

    
11256
    # NIC validation
11257
    nic_addremove = 0
11258
    for nic_op, nic_dict in self.op.nics:
11259
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11260
      if nic_op == constants.DDM_REMOVE:
11261
        nic_addremove += 1
11262
        continue
11263
      elif nic_op == constants.DDM_ADD:
11264
        nic_addremove += 1
11265
      else:
11266
        if not isinstance(nic_op, int):
11267
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11268
        if not isinstance(nic_dict, dict):
11269
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11270
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11271

    
11272
      # nic_dict should be a dict
11273
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11274
      if nic_ip is not None:
11275
        if nic_ip.lower() == constants.VALUE_NONE:
11276
          nic_dict[constants.INIC_IP] = None
11277
        else:
11278
          if not netutils.IPAddress.IsValid(nic_ip):
11279
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11280
                                       errors.ECODE_INVAL)
11281

    
11282
      nic_bridge = nic_dict.get("bridge", None)
11283
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11284
      if nic_bridge and nic_link:
11285
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11286
                                   " at the same time", errors.ECODE_INVAL)
11287
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11288
        nic_dict["bridge"] = None
11289
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11290
        nic_dict[constants.INIC_LINK] = None
11291

    
11292
      if nic_op == constants.DDM_ADD:
11293
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11294
        if nic_mac is None:
11295
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11296

    
11297
      if constants.INIC_MAC in nic_dict:
11298
        nic_mac = nic_dict[constants.INIC_MAC]
11299
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11300
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11301

    
11302
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11303
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11304
                                     " modifying an existing nic",
11305
                                     errors.ECODE_INVAL)
11306

    
11307
    if nic_addremove > 1:
11308
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11309
                                 " supported at a time", errors.ECODE_INVAL)
11310

    
11311
  def ExpandNames(self):
11312
    self._ExpandAndLockInstance()
11313
    # Can't even acquire node locks in shared mode as upcoming changes in
11314
    # Ganeti 2.6 will start to modify the node object on disk conversion
11315
    self.needed_locks[locking.LEVEL_NODE] = []
11316
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11317
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11318

    
11319
  def DeclareLocks(self, level):
11320
    if level == locking.LEVEL_NODE:
11321
      self._LockInstancesNodes()
11322
      if self.op.disk_template and self.op.remote_node:
11323
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11324
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11325
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11326
      # Copy node locks
11327
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11328
        self.needed_locks[locking.LEVEL_NODE][:]
11329

    
11330
  def BuildHooksEnv(self):
11331
    """Build hooks env.
11332

11333
    This runs on the master, primary and secondaries.
11334

11335
    """
11336
    args = dict()
11337
    if constants.BE_MINMEM in self.be_new:
11338
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11339
    if constants.BE_MAXMEM in self.be_new:
11340
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11341
    if constants.BE_VCPUS in self.be_new:
11342
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11343
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11344
    # information at all.
11345
    if self.op.nics:
11346
      args["nics"] = []
11347
      nic_override = dict(self.op.nics)
11348
      for idx, nic in enumerate(self.instance.nics):
11349
        if idx in nic_override:
11350
          this_nic_override = nic_override[idx]
11351
        else:
11352
          this_nic_override = {}
11353
        if constants.INIC_IP in this_nic_override:
11354
          ip = this_nic_override[constants.INIC_IP]
11355
        else:
11356
          ip = nic.ip
11357
        if constants.INIC_MAC in this_nic_override:
11358
          mac = this_nic_override[constants.INIC_MAC]
11359
        else:
11360
          mac = nic.mac
11361
        if idx in self.nic_pnew:
11362
          nicparams = self.nic_pnew[idx]
11363
        else:
11364
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11365
        mode = nicparams[constants.NIC_MODE]
11366
        link = nicparams[constants.NIC_LINK]
11367
        args["nics"].append((ip, mac, mode, link))
11368
      if constants.DDM_ADD in nic_override:
11369
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11370
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11371
        nicparams = self.nic_pnew[constants.DDM_ADD]
11372
        mode = nicparams[constants.NIC_MODE]
11373
        link = nicparams[constants.NIC_LINK]
11374
        args["nics"].append((ip, mac, mode, link))
11375
      elif constants.DDM_REMOVE in nic_override:
11376
        del args["nics"][-1]
11377

    
11378
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11379
    if self.op.disk_template:
11380
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11381

    
11382
    return env
11383

    
11384
  def BuildHooksNodes(self):
11385
    """Build hooks nodes.
11386

11387
    """
11388
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11389
    return (nl, nl)
11390

    
11391
  def CheckPrereq(self):
11392
    """Check prerequisites.
11393

11394
    This only checks the instance list against the existing names.
11395

11396
    """
11397
    # checking the new params on the primary/secondary nodes
11398

    
11399
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11400
    cluster = self.cluster = self.cfg.GetClusterInfo()
11401
    assert self.instance is not None, \
11402
      "Cannot retrieve locked instance %s" % self.op.instance_name
11403
    pnode = instance.primary_node
11404
    nodelist = list(instance.all_nodes)
11405
    pnode_info = self.cfg.GetNodeInfo(pnode)
11406
    self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11407

    
11408
    # OS change
11409
    if self.op.os_name and not self.op.force:
11410
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11411
                      self.op.force_variant)
11412
      instance_os = self.op.os_name
11413
    else:
11414
      instance_os = instance.os
11415

    
11416
    if self.op.disk_template:
11417
      if instance.disk_template == self.op.disk_template:
11418
        raise errors.OpPrereqError("Instance already has disk template %s" %
11419
                                   instance.disk_template, errors.ECODE_INVAL)
11420

    
11421
      if (instance.disk_template,
11422
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11423
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11424
                                   " %s to %s" % (instance.disk_template,
11425
                                                  self.op.disk_template),
11426
                                   errors.ECODE_INVAL)
11427
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11428
                          msg="cannot change disk template")
11429
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11430
        if self.op.remote_node == pnode:
11431
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11432
                                     " as the primary node of the instance" %
11433
                                     self.op.remote_node, errors.ECODE_STATE)
11434
        _CheckNodeOnline(self, self.op.remote_node)
11435
        _CheckNodeNotDrained(self, self.op.remote_node)
11436
        # FIXME: here we assume that the old instance type is DT_PLAIN
11437
        assert instance.disk_template == constants.DT_PLAIN
11438
        disks = [{constants.IDISK_SIZE: d.size,
11439
                  constants.IDISK_VG: d.logical_id[0]}
11440
                 for d in instance.disks]
11441
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11442
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11443

    
11444
        snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11445
        if pnode_info.group != snode_info.group:
11446
          self.LogWarning("The primary and secondary nodes are in two"
11447
                          " different node groups; the disk parameters"
11448
                          " from the first disk's node group will be"
11449
                          " used")
11450

    
11451
    # hvparams processing
11452
    if self.op.hvparams:
11453
      hv_type = instance.hypervisor
11454
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11455
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11456
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11457

    
11458
      # local check
11459
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11460
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11461
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11462
      self.hv_inst = i_hvdict # the new dict (without defaults)
11463
    else:
11464
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11465
                                              instance.hvparams)
11466
      self.hv_new = self.hv_inst = {}
11467

    
11468
    # beparams processing
11469
    if self.op.beparams:
11470
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11471
                                   use_none=True)
11472
      objects.UpgradeBeParams(i_bedict)
11473
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11474
      be_new = cluster.SimpleFillBE(i_bedict)
11475
      self.be_proposed = self.be_new = be_new # the new actual values
11476
      self.be_inst = i_bedict # the new dict (without defaults)
11477
    else:
11478
      self.be_new = self.be_inst = {}
11479
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11480
    be_old = cluster.FillBE(instance)
11481

    
11482
    # CPU param validation -- checking every time a paramtere is
11483
    # changed to cover all cases where either CPU mask or vcpus have
11484
    # changed
11485
    if (constants.BE_VCPUS in self.be_proposed and
11486
        constants.HV_CPU_MASK in self.hv_proposed):
11487
      cpu_list = \
11488
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11489
      # Verify mask is consistent with number of vCPUs. Can skip this
11490
      # test if only 1 entry in the CPU mask, which means same mask
11491
      # is applied to all vCPUs.
11492
      if (len(cpu_list) > 1 and
11493
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11494
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11495
                                   " CPU mask [%s]" %
11496
                                   (self.be_proposed[constants.BE_VCPUS],
11497
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11498
                                   errors.ECODE_INVAL)
11499

    
11500
      # Only perform this test if a new CPU mask is given
11501
      if constants.HV_CPU_MASK in self.hv_new:
11502
        # Calculate the largest CPU number requested
11503
        max_requested_cpu = max(map(max, cpu_list))
11504
        # Check that all of the instance's nodes have enough physical CPUs to
11505
        # satisfy the requested CPU mask
11506
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11507
                                max_requested_cpu + 1, instance.hypervisor)
11508

    
11509
    # osparams processing
11510
    if self.op.osparams:
11511
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11512
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11513
      self.os_inst = i_osdict # the new dict (without defaults)
11514
    else:
11515
      self.os_inst = {}
11516

    
11517
    self.warn = []
11518

    
11519
    #TODO(dynmem): do the appropriate check involving MINMEM
11520
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11521
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11522
      mem_check_list = [pnode]
11523
      if be_new[constants.BE_AUTO_BALANCE]:
11524
        # either we changed auto_balance to yes or it was from before
11525
        mem_check_list.extend(instance.secondary_nodes)
11526
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11527
                                                  instance.hypervisor)
11528
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11529
                                         [instance.hypervisor])
11530
      pninfo = nodeinfo[pnode]
11531
      msg = pninfo.fail_msg
11532
      if msg:
11533
        # Assume the primary node is unreachable and go ahead
11534
        self.warn.append("Can't get info from primary node %s: %s" %
11535
                         (pnode, msg))
11536
      else:
11537
        (_, _, (pnhvinfo, )) = pninfo.payload
11538
        if not isinstance(pnhvinfo.get("memory_free", None), int):
11539
          self.warn.append("Node data from primary node %s doesn't contain"
11540
                           " free memory information" % pnode)
11541
        elif instance_info.fail_msg:
11542
          self.warn.append("Can't get instance runtime information: %s" %
11543
                          instance_info.fail_msg)
11544
        else:
11545
          if instance_info.payload:
11546
            current_mem = int(instance_info.payload["memory"])
11547
          else:
11548
            # Assume instance not running
11549
            # (there is a slight race condition here, but it's not very
11550
            # probable, and we have no other way to check)
11551
            # TODO: Describe race condition
11552
            current_mem = 0
11553
          #TODO(dynmem): do the appropriate check involving MINMEM
11554
          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11555
                      pnhvinfo["memory_free"])
11556
          if miss_mem > 0:
11557
            raise errors.OpPrereqError("This change will prevent the instance"
11558
                                       " from starting, due to %d MB of memory"
11559
                                       " missing on its primary node" %
11560
                                       miss_mem,
11561
                                       errors.ECODE_NORES)
11562

    
11563
      if be_new[constants.BE_AUTO_BALANCE]:
11564
        for node, nres in nodeinfo.items():
11565
          if node not in instance.secondary_nodes:
11566
            continue
11567
          nres.Raise("Can't get info from secondary node %s" % node,
11568
                     prereq=True, ecode=errors.ECODE_STATE)
11569
          (_, _, (nhvinfo, )) = nres.payload
11570
          if not isinstance(nhvinfo.get("memory_free", None), int):
11571
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11572
                                       " memory information" % node,
11573
                                       errors.ECODE_STATE)
11574
          #TODO(dynmem): do the appropriate check involving MINMEM
11575
          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11576
            raise errors.OpPrereqError("This change will prevent the instance"
11577
                                       " from failover to its secondary node"
11578
                                       " %s, due to not enough memory" % node,
11579
                                       errors.ECODE_STATE)
11580

    
11581
    # NIC processing
11582
    self.nic_pnew = {}
11583
    self.nic_pinst = {}
11584
    for nic_op, nic_dict in self.op.nics:
11585
      if nic_op == constants.DDM_REMOVE:
11586
        if not instance.nics:
11587
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11588
                                     errors.ECODE_INVAL)
11589
        continue
11590
      if nic_op != constants.DDM_ADD:
11591
        # an existing nic
11592
        if not instance.nics:
11593
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11594
                                     " no NICs" % nic_op,
11595
                                     errors.ECODE_INVAL)
11596
        if nic_op < 0 or nic_op >= len(instance.nics):
11597
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11598
                                     " are 0 to %d" %
11599
                                     (nic_op, len(instance.nics) - 1),
11600
                                     errors.ECODE_INVAL)
11601
        old_nic_params = instance.nics[nic_op].nicparams
11602
        old_nic_ip = instance.nics[nic_op].ip
11603
      else:
11604
        old_nic_params = {}
11605
        old_nic_ip = None
11606

    
11607
      update_params_dict = dict([(key, nic_dict[key])
11608
                                 for key in constants.NICS_PARAMETERS
11609
                                 if key in nic_dict])
11610

    
11611
      if "bridge" in nic_dict:
11612
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11613

    
11614
      new_nic_params = _GetUpdatedParams(old_nic_params,
11615
                                         update_params_dict)
11616
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11617
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11618
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11619
      self.nic_pinst[nic_op] = new_nic_params
11620
      self.nic_pnew[nic_op] = new_filled_nic_params
11621
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11622

    
11623
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11624
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11625
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11626
        if msg:
11627
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11628
          if self.op.force:
11629
            self.warn.append(msg)
11630
          else:
11631
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11632
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11633
        if constants.INIC_IP in nic_dict:
11634
          nic_ip = nic_dict[constants.INIC_IP]
11635
        else:
11636
          nic_ip = old_nic_ip
11637
        if nic_ip is None:
11638
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11639
                                     " on a routed nic", errors.ECODE_INVAL)
11640
      if constants.INIC_MAC in nic_dict:
11641
        nic_mac = nic_dict[constants.INIC_MAC]
11642
        if nic_mac is None:
11643
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11644
                                     errors.ECODE_INVAL)
11645
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11646
          # otherwise generate the mac
11647
          nic_dict[constants.INIC_MAC] = \
11648
            self.cfg.GenerateMAC(self.proc.GetECId())
11649
        else:
11650
          # or validate/reserve the current one
11651
          try:
11652
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11653
          except errors.ReservationError:
11654
            raise errors.OpPrereqError("MAC address %s already in use"
11655
                                       " in cluster" % nic_mac,
11656
                                       errors.ECODE_NOTUNIQUE)
11657

    
11658
    # DISK processing
11659
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11660
      raise errors.OpPrereqError("Disk operations not supported for"
11661
                                 " diskless instances",
11662
                                 errors.ECODE_INVAL)
11663
    for disk_op, _ in self.op.disks:
11664
      if disk_op == constants.DDM_REMOVE:
11665
        if len(instance.disks) == 1:
11666
          raise errors.OpPrereqError("Cannot remove the last disk of"
11667
                                     " an instance", errors.ECODE_INVAL)
11668
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11669
                            msg="cannot remove disks")
11670

    
11671
      if (disk_op == constants.DDM_ADD and
11672
          len(instance.disks) >= constants.MAX_DISKS):
11673
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11674
                                   " add more" % constants.MAX_DISKS,
11675
                                   errors.ECODE_STATE)
11676
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11677
        # an existing disk
11678
        if disk_op < 0 or disk_op >= len(instance.disks):
11679
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11680
                                     " are 0 to %d" %
11681
                                     (disk_op, len(instance.disks)),
11682
                                     errors.ECODE_INVAL)
11683

    
11684
    # disabling the instance
11685
    if self.op.offline_inst:
11686
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11687
                          msg="cannot change instance state to offline")
11688

    
11689
    # enabling the instance
11690
    if self.op.online_inst:
11691
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11692
                          msg="cannot make instance go online")
11693

    
11694
  def _ConvertPlainToDrbd(self, feedback_fn):
11695
    """Converts an instance from plain to drbd.
11696

11697
    """
11698
    feedback_fn("Converting template to drbd")
11699
    instance = self.instance
11700
    pnode = instance.primary_node
11701
    snode = self.op.remote_node
11702

    
11703
    assert instance.disk_template == constants.DT_PLAIN
11704

    
11705
    # create a fake disk info for _GenerateDiskTemplate
11706
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11707
                  constants.IDISK_VG: d.logical_id[0]}
11708
                 for d in instance.disks]
11709
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11710
                                      instance.name, pnode, [snode],
11711
                                      disk_info, None, None, 0, feedback_fn,
11712
                                      self.diskparams)
11713
    info = _GetInstanceInfoText(instance)
11714
    feedback_fn("Creating aditional volumes...")
11715
    # first, create the missing data and meta devices
11716
    for disk in new_disks:
11717
      # unfortunately this is... not too nice
11718
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11719
                            info, True)
11720
      for child in disk.children:
11721
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11722
    # at this stage, all new LVs have been created, we can rename the
11723
    # old ones
11724
    feedback_fn("Renaming original volumes...")
11725
    rename_list = [(o, n.children[0].logical_id)
11726
                   for (o, n) in zip(instance.disks, new_disks)]
11727
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11728
    result.Raise("Failed to rename original LVs")
11729

    
11730
    feedback_fn("Initializing DRBD devices...")
11731
    # all child devices are in place, we can now create the DRBD devices
11732
    for disk in new_disks:
11733
      for node in [pnode, snode]:
11734
        f_create = node == pnode
11735
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11736

    
11737
    # at this point, the instance has been modified
11738
    instance.disk_template = constants.DT_DRBD8
11739
    instance.disks = new_disks
11740
    self.cfg.Update(instance, feedback_fn)
11741

    
11742
    # Release node locks while waiting for sync
11743
    _ReleaseLocks(self, locking.LEVEL_NODE)
11744

    
11745
    # disks are created, waiting for sync
11746
    disk_abort = not _WaitForSync(self, instance,
11747
                                  oneshot=not self.op.wait_for_sync)
11748
    if disk_abort:
11749
      raise errors.OpExecError("There are some degraded disks for"
11750
                               " this instance, please cleanup manually")
11751

    
11752
    # Node resource locks will be released by caller
11753

    
11754
  def _ConvertDrbdToPlain(self, feedback_fn):
11755
    """Converts an instance from drbd to plain.
11756

11757
    """
11758
    instance = self.instance
11759

    
11760
    assert len(instance.secondary_nodes) == 1
11761
    assert instance.disk_template == constants.DT_DRBD8
11762

    
11763
    pnode = instance.primary_node
11764
    snode = instance.secondary_nodes[0]
11765
    feedback_fn("Converting template to plain")
11766

    
11767
    old_disks = instance.disks
11768
    new_disks = [d.children[0] for d in old_disks]
11769

    
11770
    # copy over size and mode
11771
    for parent, child in zip(old_disks, new_disks):
11772
      child.size = parent.size
11773
      child.mode = parent.mode
11774

    
11775
    # update instance structure
11776
    instance.disks = new_disks
11777
    instance.disk_template = constants.DT_PLAIN
11778
    self.cfg.Update(instance, feedback_fn)
11779

    
11780
    # Release locks in case removing disks takes a while
11781
    _ReleaseLocks(self, locking.LEVEL_NODE)
11782

    
11783
    feedback_fn("Removing volumes on the secondary node...")
11784
    for disk in old_disks:
11785
      self.cfg.SetDiskID(disk, snode)
11786
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11787
      if msg:
11788
        self.LogWarning("Could not remove block device %s on node %s,"
11789
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11790

    
11791
    feedback_fn("Removing unneeded volumes on the primary node...")
11792
    for idx, disk in enumerate(old_disks):
11793
      meta = disk.children[1]
11794
      self.cfg.SetDiskID(meta, pnode)
11795
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11796
      if msg:
11797
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11798
                        " continuing anyway: %s", idx, pnode, msg)
11799

    
11800
    # this is a DRBD disk, return its port to the pool
11801
    for disk in old_disks:
11802
      tcp_port = disk.logical_id[2]
11803
      self.cfg.AddTcpUdpPort(tcp_port)
11804

    
11805
    # Node resource locks will be released by caller
11806

    
11807
  def Exec(self, feedback_fn):
11808
    """Modifies an instance.
11809

11810
    All parameters take effect only at the next restart of the instance.
11811

11812
    """
11813
    # Process here the warnings from CheckPrereq, as we don't have a
11814
    # feedback_fn there.
11815
    for warn in self.warn:
11816
      feedback_fn("WARNING: %s" % warn)
11817

    
11818
    assert ((self.op.disk_template is None) ^
11819
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11820
      "Not owning any node resource locks"
11821

    
11822
    result = []
11823
    instance = self.instance
11824
    # disk changes
11825
    for disk_op, disk_dict in self.op.disks:
11826
      if disk_op == constants.DDM_REMOVE:
11827
        # remove the last disk
11828
        device = instance.disks.pop()
11829
        device_idx = len(instance.disks)
11830
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11831
          self.cfg.SetDiskID(disk, node)
11832
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11833
          if msg:
11834
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11835
                            " continuing anyway", device_idx, node, msg)
11836
        result.append(("disk/%d" % device_idx, "remove"))
11837

    
11838
        # if this is a DRBD disk, return its port to the pool
11839
        if device.dev_type in constants.LDS_DRBD:
11840
          tcp_port = device.logical_id[2]
11841
          self.cfg.AddTcpUdpPort(tcp_port)
11842
      elif disk_op == constants.DDM_ADD:
11843
        # add a new disk
11844
        if instance.disk_template in (constants.DT_FILE,
11845
                                        constants.DT_SHARED_FILE):
11846
          file_driver, file_path = instance.disks[0].logical_id
11847
          file_path = os.path.dirname(file_path)
11848
        else:
11849
          file_driver = file_path = None
11850
        disk_idx_base = len(instance.disks)
11851
        new_disk = _GenerateDiskTemplate(self,
11852
                                         instance.disk_template,
11853
                                         instance.name, instance.primary_node,
11854
                                         instance.secondary_nodes,
11855
                                         [disk_dict],
11856
                                         file_path,
11857
                                         file_driver,
11858
                                         disk_idx_base,
11859
                                         feedback_fn,
11860
                                         self.diskparams)[0]
11861
        instance.disks.append(new_disk)
11862
        info = _GetInstanceInfoText(instance)
11863

    
11864
        logging.info("Creating volume %s for instance %s",
11865
                     new_disk.iv_name, instance.name)
11866
        # Note: this needs to be kept in sync with _CreateDisks
11867
        #HARDCODE
11868
        for node in instance.all_nodes:
11869
          f_create = node == instance.primary_node
11870
          try:
11871
            _CreateBlockDev(self, node, instance, new_disk,
11872
                            f_create, info, f_create)
11873
          except errors.OpExecError, err:
11874
            self.LogWarning("Failed to create volume %s (%s) on"
11875
                            " node %s: %s",
11876
                            new_disk.iv_name, new_disk, node, err)
11877
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11878
                       (new_disk.size, new_disk.mode)))
11879
      else:
11880
        # change a given disk
11881
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11882
        result.append(("disk.mode/%d" % disk_op,
11883
                       disk_dict[constants.IDISK_MODE]))
11884

    
11885
    if self.op.disk_template:
11886
      if __debug__:
11887
        check_nodes = set(instance.all_nodes)
11888
        if self.op.remote_node:
11889
          check_nodes.add(self.op.remote_node)
11890
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11891
          owned = self.owned_locks(level)
11892
          assert not (check_nodes - owned), \
11893
            ("Not owning the correct locks, owning %r, expected at least %r" %
11894
             (owned, check_nodes))
11895

    
11896
      r_shut = _ShutdownInstanceDisks(self, instance)
11897
      if not r_shut:
11898
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11899
                                 " proceed with disk template conversion")
11900
      mode = (instance.disk_template, self.op.disk_template)
11901
      try:
11902
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11903
      except:
11904
        self.cfg.ReleaseDRBDMinors(instance.name)
11905
        raise
11906
      result.append(("disk_template", self.op.disk_template))
11907

    
11908
      assert instance.disk_template == self.op.disk_template, \
11909
        ("Expected disk template '%s', found '%s'" %
11910
         (self.op.disk_template, instance.disk_template))
11911

    
11912
    # Release node and resource locks if there are any (they might already have
11913
    # been released during disk conversion)
11914
    _ReleaseLocks(self, locking.LEVEL_NODE)
11915
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11916

    
11917
    # NIC changes
11918
    for nic_op, nic_dict in self.op.nics:
11919
      if nic_op == constants.DDM_REMOVE:
11920
        # remove the last nic
11921
        del instance.nics[-1]
11922
        result.append(("nic.%d" % len(instance.nics), "remove"))
11923
      elif nic_op == constants.DDM_ADD:
11924
        # mac and bridge should be set, by now
11925
        mac = nic_dict[constants.INIC_MAC]
11926
        ip = nic_dict.get(constants.INIC_IP, None)
11927
        nicparams = self.nic_pinst[constants.DDM_ADD]
11928
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11929
        instance.nics.append(new_nic)
11930
        result.append(("nic.%d" % (len(instance.nics) - 1),
11931
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11932
                       (new_nic.mac, new_nic.ip,
11933
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11934
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11935
                       )))
11936
      else:
11937
        for key in (constants.INIC_MAC, constants.INIC_IP):
11938
          if key in nic_dict:
11939
            setattr(instance.nics[nic_op], key, nic_dict[key])
11940
        if nic_op in self.nic_pinst:
11941
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11942
        for key, val in nic_dict.iteritems():
11943
          result.append(("nic.%s/%d" % (key, nic_op), val))
11944

    
11945
    # hvparams changes
11946
    if self.op.hvparams:
11947
      instance.hvparams = self.hv_inst
11948
      for key, val in self.op.hvparams.iteritems():
11949
        result.append(("hv/%s" % key, val))
11950

    
11951
    # beparams changes
11952
    if self.op.beparams:
11953
      instance.beparams = self.be_inst
11954
      for key, val in self.op.beparams.iteritems():
11955
        result.append(("be/%s" % key, val))
11956

    
11957
    # OS change
11958
    if self.op.os_name:
11959
      instance.os = self.op.os_name
11960

    
11961
    # osparams changes
11962
    if self.op.osparams:
11963
      instance.osparams = self.os_inst
11964
      for key, val in self.op.osparams.iteritems():
11965
        result.append(("os/%s" % key, val))
11966

    
11967
    # online/offline instance
11968
    if self.op.online_inst:
11969
      self.cfg.MarkInstanceDown(instance.name)
11970
      result.append(("admin_state", constants.ADMINST_DOWN))
11971
    if self.op.offline_inst:
11972
      self.cfg.MarkInstanceOffline(instance.name)
11973
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11974

    
11975
    self.cfg.Update(instance, feedback_fn)
11976

    
11977
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11978
                self.owned_locks(locking.LEVEL_NODE)), \
11979
      "All node locks should have been released by now"
11980

    
11981
    return result
11982

    
11983
  _DISK_CONVERSIONS = {
11984
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11985
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11986
    }
11987

    
11988

    
11989
class LUInstanceChangeGroup(LogicalUnit):
11990
  HPATH = "instance-change-group"
11991
  HTYPE = constants.HTYPE_INSTANCE
11992
  REQ_BGL = False
11993

    
11994
  def ExpandNames(self):
11995
    self.share_locks = _ShareAll()
11996
    self.needed_locks = {
11997
      locking.LEVEL_NODEGROUP: [],
11998
      locking.LEVEL_NODE: [],
11999
      }
12000

    
12001
    self._ExpandAndLockInstance()
12002

    
12003
    if self.op.target_groups:
12004
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12005
                                  self.op.target_groups)
12006
    else:
12007
      self.req_target_uuids = None
12008

    
12009
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12010

    
12011
  def DeclareLocks(self, level):
12012
    if level == locking.LEVEL_NODEGROUP:
12013
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12014

    
12015
      if self.req_target_uuids:
12016
        lock_groups = set(self.req_target_uuids)
12017

    
12018
        # Lock all groups used by instance optimistically; this requires going
12019
        # via the node before it's locked, requiring verification later on
12020
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12021
        lock_groups.update(instance_groups)
12022
      else:
12023
        # No target groups, need to lock all of them
12024
        lock_groups = locking.ALL_SET
12025

    
12026
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12027

    
12028
    elif level == locking.LEVEL_NODE:
12029
      if self.req_target_uuids:
12030
        # Lock all nodes used by instances
12031
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12032
        self._LockInstancesNodes()
12033

    
12034
        # Lock all nodes in all potential target groups
12035
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12036
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12037
        member_nodes = [node_name
12038
                        for group in lock_groups
12039
                        for node_name in self.cfg.GetNodeGroup(group).members]
12040
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12041
      else:
12042
        # Lock all nodes as all groups are potential targets
12043
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12044

    
12045
  def CheckPrereq(self):
12046
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12047
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12048
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12049

    
12050
    assert (self.req_target_uuids is None or
12051
            owned_groups.issuperset(self.req_target_uuids))
12052
    assert owned_instances == set([self.op.instance_name])
12053

    
12054
    # Get instance information
12055
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12056

    
12057
    # Check if node groups for locked instance are still correct
12058
    assert owned_nodes.issuperset(self.instance.all_nodes), \
12059
      ("Instance %s's nodes changed while we kept the lock" %
12060
       self.op.instance_name)
12061

    
12062
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12063
                                           owned_groups)
12064

    
12065
    if self.req_target_uuids:
12066
      # User requested specific target groups
12067
      self.target_uuids = self.req_target_uuids
12068
    else:
12069
      # All groups except those used by the instance are potential targets
12070
      self.target_uuids = owned_groups - inst_groups
12071

    
12072
    conflicting_groups = self.target_uuids & inst_groups
12073
    if conflicting_groups:
12074
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12075
                                 " used by the instance '%s'" %
12076
                                 (utils.CommaJoin(conflicting_groups),
12077
                                  self.op.instance_name),
12078
                                 errors.ECODE_INVAL)
12079

    
12080
    if not self.target_uuids:
12081
      raise errors.OpPrereqError("There are no possible target groups",
12082
                                 errors.ECODE_INVAL)
12083

    
12084
  def BuildHooksEnv(self):
12085
    """Build hooks env.
12086

12087
    """
12088
    assert self.target_uuids
12089

    
12090
    env = {
12091
      "TARGET_GROUPS": " ".join(self.target_uuids),
12092
      }
12093

    
12094
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12095

    
12096
    return env
12097

    
12098
  def BuildHooksNodes(self):
12099
    """Build hooks nodes.
12100

12101
    """
12102
    mn = self.cfg.GetMasterNode()
12103
    return ([mn], [mn])
12104

    
12105
  def Exec(self, feedback_fn):
12106
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12107

    
12108
    assert instances == [self.op.instance_name], "Instance not locked"
12109

    
12110
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12111
                     instances=instances, target_groups=list(self.target_uuids))
12112

    
12113
    ial.Run(self.op.iallocator)
12114

    
12115
    if not ial.success:
12116
      raise errors.OpPrereqError("Can't compute solution for changing group of"
12117
                                 " instance '%s' using iallocator '%s': %s" %
12118
                                 (self.op.instance_name, self.op.iallocator,
12119
                                  ial.info),
12120
                                 errors.ECODE_NORES)
12121

    
12122
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12123

    
12124
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
12125
                 " instance '%s'", len(jobs), self.op.instance_name)
12126

    
12127
    return ResultWithJobs(jobs)
12128

    
12129

    
12130
class LUBackupQuery(NoHooksLU):
12131
  """Query the exports list
12132

12133
  """
12134
  REQ_BGL = False
12135

    
12136
  def ExpandNames(self):
12137
    self.needed_locks = {}
12138
    self.share_locks[locking.LEVEL_NODE] = 1
12139
    if not self.op.nodes:
12140
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12141
    else:
12142
      self.needed_locks[locking.LEVEL_NODE] = \
12143
        _GetWantedNodes(self, self.op.nodes)
12144

    
12145
  def Exec(self, feedback_fn):
12146
    """Compute the list of all the exported system images.
12147

12148
    @rtype: dict
12149
    @return: a dictionary with the structure node->(export-list)
12150
        where export-list is a list of the instances exported on
12151
        that node.
12152

12153
    """
12154
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
12155
    rpcresult = self.rpc.call_export_list(self.nodes)
12156
    result = {}
12157
    for node in rpcresult:
12158
      if rpcresult[node].fail_msg:
12159
        result[node] = False
12160
      else:
12161
        result[node] = rpcresult[node].payload
12162

    
12163
    return result
12164

    
12165

    
12166
class LUBackupPrepare(NoHooksLU):
12167
  """Prepares an instance for an export and returns useful information.
12168

12169
  """
12170
  REQ_BGL = False
12171

    
12172
  def ExpandNames(self):
12173
    self._ExpandAndLockInstance()
12174

    
12175
  def CheckPrereq(self):
12176
    """Check prerequisites.
12177

12178
    """
12179
    instance_name = self.op.instance_name
12180

    
12181
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12182
    assert self.instance is not None, \
12183
          "Cannot retrieve locked instance %s" % self.op.instance_name
12184
    _CheckNodeOnline(self, self.instance.primary_node)
12185

    
12186
    self._cds = _GetClusterDomainSecret()
12187

    
12188
  def Exec(self, feedback_fn):
12189
    """Prepares an instance for an export.
12190

12191
    """
12192
    instance = self.instance
12193

    
12194
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12195
      salt = utils.GenerateSecret(8)
12196

    
12197
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12198
      result = self.rpc.call_x509_cert_create(instance.primary_node,
12199
                                              constants.RIE_CERT_VALIDITY)
12200
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
12201

    
12202
      (name, cert_pem) = result.payload
12203

    
12204
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12205
                                             cert_pem)
12206

    
12207
      return {
12208
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12209
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12210
                          salt),
12211
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12212
        }
12213

    
12214
    return None
12215

    
12216

    
12217
class LUBackupExport(LogicalUnit):
12218
  """Export an instance to an image in the cluster.
12219

12220
  """
12221
  HPATH = "instance-export"
12222
  HTYPE = constants.HTYPE_INSTANCE
12223
  REQ_BGL = False
12224

    
12225
  def CheckArguments(self):
12226
    """Check the arguments.
12227

12228
    """
12229
    self.x509_key_name = self.op.x509_key_name
12230
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12231

    
12232
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12233
      if not self.x509_key_name:
12234
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12235
                                   errors.ECODE_INVAL)
12236

    
12237
      if not self.dest_x509_ca_pem:
12238
        raise errors.OpPrereqError("Missing destination X509 CA",
12239
                                   errors.ECODE_INVAL)
12240

    
12241
  def ExpandNames(self):
12242
    self._ExpandAndLockInstance()
12243

    
12244
    # Lock all nodes for local exports
12245
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12246
      # FIXME: lock only instance primary and destination node
12247
      #
12248
      # Sad but true, for now we have do lock all nodes, as we don't know where
12249
      # the previous export might be, and in this LU we search for it and
12250
      # remove it from its current node. In the future we could fix this by:
12251
      #  - making a tasklet to search (share-lock all), then create the
12252
      #    new one, then one to remove, after
12253
      #  - removing the removal operation altogether
12254
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12255

    
12256
  def DeclareLocks(self, level):
12257
    """Last minute lock declaration."""
12258
    # All nodes are locked anyway, so nothing to do here.
12259

    
12260
  def BuildHooksEnv(self):
12261
    """Build hooks env.
12262

12263
    This will run on the master, primary node and target node.
12264

12265
    """
12266
    env = {
12267
      "EXPORT_MODE": self.op.mode,
12268
      "EXPORT_NODE": self.op.target_node,
12269
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12270
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12271
      # TODO: Generic function for boolean env variables
12272
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12273
      }
12274

    
12275
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12276

    
12277
    return env
12278

    
12279
  def BuildHooksNodes(self):
12280
    """Build hooks nodes.
12281

12282
    """
12283
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12284

    
12285
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12286
      nl.append(self.op.target_node)
12287

    
12288
    return (nl, nl)
12289

    
12290
  def CheckPrereq(self):
12291
    """Check prerequisites.
12292

12293
    This checks that the instance and node names are valid.
12294

12295
    """
12296
    instance_name = self.op.instance_name
12297

    
12298
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12299
    assert self.instance is not None, \
12300
          "Cannot retrieve locked instance %s" % self.op.instance_name
12301
    _CheckNodeOnline(self, self.instance.primary_node)
12302

    
12303
    if (self.op.remove_instance and
12304
        self.instance.admin_state == constants.ADMINST_UP and
12305
        not self.op.shutdown):
12306
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12307
                                 " down before")
12308

    
12309
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12310
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12311
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12312
      assert self.dst_node is not None
12313

    
12314
      _CheckNodeOnline(self, self.dst_node.name)
12315
      _CheckNodeNotDrained(self, self.dst_node.name)
12316

    
12317
      self._cds = None
12318
      self.dest_disk_info = None
12319
      self.dest_x509_ca = None
12320

    
12321
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12322
      self.dst_node = None
12323

    
12324
      if len(self.op.target_node) != len(self.instance.disks):
12325
        raise errors.OpPrereqError(("Received destination information for %s"
12326
                                    " disks, but instance %s has %s disks") %
12327
                                   (len(self.op.target_node), instance_name,
12328
                                    len(self.instance.disks)),
12329
                                   errors.ECODE_INVAL)
12330

    
12331
      cds = _GetClusterDomainSecret()
12332

    
12333
      # Check X509 key name
12334
      try:
12335
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12336
      except (TypeError, ValueError), err:
12337
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12338

    
12339
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12340
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12341
                                   errors.ECODE_INVAL)
12342

    
12343
      # Load and verify CA
12344
      try:
12345
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12346
      except OpenSSL.crypto.Error, err:
12347
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12348
                                   (err, ), errors.ECODE_INVAL)
12349

    
12350
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12351
      if errcode is not None:
12352
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12353
                                   (msg, ), errors.ECODE_INVAL)
12354

    
12355
      self.dest_x509_ca = cert
12356

    
12357
      # Verify target information
12358
      disk_info = []
12359
      for idx, disk_data in enumerate(self.op.target_node):
12360
        try:
12361
          (host, port, magic) = \
12362
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12363
        except errors.GenericError, err:
12364
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12365
                                     (idx, err), errors.ECODE_INVAL)
12366

    
12367
        disk_info.append((host, port, magic))
12368

    
12369
      assert len(disk_info) == len(self.op.target_node)
12370
      self.dest_disk_info = disk_info
12371

    
12372
    else:
12373
      raise errors.ProgrammerError("Unhandled export mode %r" %
12374
                                   self.op.mode)
12375

    
12376
    # instance disk type verification
12377
    # TODO: Implement export support for file-based disks
12378
    for disk in self.instance.disks:
12379
      if disk.dev_type == constants.LD_FILE:
12380
        raise errors.OpPrereqError("Export not supported for instances with"
12381
                                   " file-based disks", errors.ECODE_INVAL)
12382

    
12383
  def _CleanupExports(self, feedback_fn):
12384
    """Removes exports of current instance from all other nodes.
12385

12386
    If an instance in a cluster with nodes A..D was exported to node C, its
12387
    exports will be removed from the nodes A, B and D.
12388

12389
    """
12390
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12391

    
12392
    nodelist = self.cfg.GetNodeList()
12393
    nodelist.remove(self.dst_node.name)
12394

    
12395
    # on one-node clusters nodelist will be empty after the removal
12396
    # if we proceed the backup would be removed because OpBackupQuery
12397
    # substitutes an empty list with the full cluster node list.
12398
    iname = self.instance.name
12399
    if nodelist:
12400
      feedback_fn("Removing old exports for instance %s" % iname)
12401
      exportlist = self.rpc.call_export_list(nodelist)
12402
      for node in exportlist:
12403
        if exportlist[node].fail_msg:
12404
          continue
12405
        if iname in exportlist[node].payload:
12406
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12407
          if msg:
12408
            self.LogWarning("Could not remove older export for instance %s"
12409
                            " on node %s: %s", iname, node, msg)
12410

    
12411
  def Exec(self, feedback_fn):
12412
    """Export an instance to an image in the cluster.
12413

12414
    """
12415
    assert self.op.mode in constants.EXPORT_MODES
12416

    
12417
    instance = self.instance
12418
    src_node = instance.primary_node
12419

    
12420
    if self.op.shutdown:
12421
      # shutdown the instance, but not the disks
12422
      feedback_fn("Shutting down instance %s" % instance.name)
12423
      result = self.rpc.call_instance_shutdown(src_node, instance,
12424
                                               self.op.shutdown_timeout)
12425
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12426
      result.Raise("Could not shutdown instance %s on"
12427
                   " node %s" % (instance.name, src_node))
12428

    
12429
    # set the disks ID correctly since call_instance_start needs the
12430
    # correct drbd minor to create the symlinks
12431
    for disk in instance.disks:
12432
      self.cfg.SetDiskID(disk, src_node)
12433

    
12434
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12435

    
12436
    if activate_disks:
12437
      # Activate the instance disks if we'exporting a stopped instance
12438
      feedback_fn("Activating disks for %s" % instance.name)
12439
      _StartInstanceDisks(self, instance, None)
12440

    
12441
    try:
12442
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12443
                                                     instance)
12444

    
12445
      helper.CreateSnapshots()
12446
      try:
12447
        if (self.op.shutdown and
12448
            instance.admin_state == constants.ADMINST_UP and
12449
            not self.op.remove_instance):
12450
          assert not activate_disks
12451
          feedback_fn("Starting instance %s" % instance.name)
12452
          result = self.rpc.call_instance_start(src_node,
12453
                                                (instance, None, None), False)
12454
          msg = result.fail_msg
12455
          if msg:
12456
            feedback_fn("Failed to start instance: %s" % msg)
12457
            _ShutdownInstanceDisks(self, instance)
12458
            raise errors.OpExecError("Could not start instance: %s" % msg)
12459

    
12460
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12461
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12462
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12463
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12464
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12465

    
12466
          (key_name, _, _) = self.x509_key_name
12467

    
12468
          dest_ca_pem = \
12469
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12470
                                            self.dest_x509_ca)
12471

    
12472
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12473
                                                     key_name, dest_ca_pem,
12474
                                                     timeouts)
12475
      finally:
12476
        helper.Cleanup()
12477

    
12478
      # Check for backwards compatibility
12479
      assert len(dresults) == len(instance.disks)
12480
      assert compat.all(isinstance(i, bool) for i in dresults), \
12481
             "Not all results are boolean: %r" % dresults
12482

    
12483
    finally:
12484
      if activate_disks:
12485
        feedback_fn("Deactivating disks for %s" % instance.name)
12486
        _ShutdownInstanceDisks(self, instance)
12487

    
12488
    if not (compat.all(dresults) and fin_resu):
12489
      failures = []
12490
      if not fin_resu:
12491
        failures.append("export finalization")
12492
      if not compat.all(dresults):
12493
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12494
                               if not dsk)
12495
        failures.append("disk export: disk(s) %s" % fdsk)
12496

    
12497
      raise errors.OpExecError("Export failed, errors in %s" %
12498
                               utils.CommaJoin(failures))
12499

    
12500
    # At this point, the export was successful, we can cleanup/finish
12501

    
12502
    # Remove instance if requested
12503
    if self.op.remove_instance:
12504
      feedback_fn("Removing instance %s" % instance.name)
12505
      _RemoveInstance(self, feedback_fn, instance,
12506
                      self.op.ignore_remove_failures)
12507

    
12508
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12509
      self._CleanupExports(feedback_fn)
12510

    
12511
    return fin_resu, dresults
12512

    
12513

    
12514
class LUBackupRemove(NoHooksLU):
12515
  """Remove exports related to the named instance.
12516

12517
  """
12518
  REQ_BGL = False
12519

    
12520
  def ExpandNames(self):
12521
    self.needed_locks = {}
12522
    # We need all nodes to be locked in order for RemoveExport to work, but we
12523
    # don't need to lock the instance itself, as nothing will happen to it (and
12524
    # we can remove exports also for a removed instance)
12525
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12526

    
12527
  def Exec(self, feedback_fn):
12528
    """Remove any export.
12529

12530
    """
12531
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12532
    # If the instance was not found we'll try with the name that was passed in.
12533
    # This will only work if it was an FQDN, though.
12534
    fqdn_warn = False
12535
    if not instance_name:
12536
      fqdn_warn = True
12537
      instance_name = self.op.instance_name
12538

    
12539
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12540
    exportlist = self.rpc.call_export_list(locked_nodes)
12541
    found = False
12542
    for node in exportlist:
12543
      msg = exportlist[node].fail_msg
12544
      if msg:
12545
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12546
        continue
12547
      if instance_name in exportlist[node].payload:
12548
        found = True
12549
        result = self.rpc.call_export_remove(node, instance_name)
12550
        msg = result.fail_msg
12551
        if msg:
12552
          logging.error("Could not remove export for instance %s"
12553
                        " on node %s: %s", instance_name, node, msg)
12554

    
12555
    if fqdn_warn and not found:
12556
      feedback_fn("Export not found. If trying to remove an export belonging"
12557
                  " to a deleted instance please use its Fully Qualified"
12558
                  " Domain Name.")
12559

    
12560

    
12561
class LUGroupAdd(LogicalUnit):
12562
  """Logical unit for creating node groups.
12563

12564
  """
12565
  HPATH = "group-add"
12566
  HTYPE = constants.HTYPE_GROUP
12567
  REQ_BGL = False
12568

    
12569
  def ExpandNames(self):
12570
    # We need the new group's UUID here so that we can create and acquire the
12571
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12572
    # that it should not check whether the UUID exists in the configuration.
12573
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12574
    self.needed_locks = {}
12575
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12576

    
12577
  def CheckPrereq(self):
12578
    """Check prerequisites.
12579

12580
    This checks that the given group name is not an existing node group
12581
    already.
12582

12583
    """
12584
    try:
12585
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12586
    except errors.OpPrereqError:
12587
      pass
12588
    else:
12589
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12590
                                 " node group (UUID: %s)" %
12591
                                 (self.op.group_name, existing_uuid),
12592
                                 errors.ECODE_EXISTS)
12593

    
12594
    if self.op.ndparams:
12595
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12596

    
12597
    if self.op.diskparams:
12598
      for templ in constants.DISK_TEMPLATES:
12599
        if templ not in self.op.diskparams:
12600
          self.op.diskparams[templ] = {}
12601
        utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12602
    else:
12603
      self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12604

    
12605
  def BuildHooksEnv(self):
12606
    """Build hooks env.
12607

12608
    """
12609
    return {
12610
      "GROUP_NAME": self.op.group_name,
12611
      }
12612

    
12613
  def BuildHooksNodes(self):
12614
    """Build hooks nodes.
12615

12616
    """
12617
    mn = self.cfg.GetMasterNode()
12618
    return ([mn], [mn])
12619

    
12620
  def Exec(self, feedback_fn):
12621
    """Add the node group to the cluster.
12622

12623
    """
12624
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12625
                                  uuid=self.group_uuid,
12626
                                  alloc_policy=self.op.alloc_policy,
12627
                                  ndparams=self.op.ndparams,
12628
                                  diskparams=self.op.diskparams)
12629

    
12630
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12631
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12632

    
12633

    
12634
class LUGroupAssignNodes(NoHooksLU):
12635
  """Logical unit for assigning nodes to groups.
12636

12637
  """
12638
  REQ_BGL = False
12639

    
12640
  def ExpandNames(self):
12641
    # These raise errors.OpPrereqError on their own:
12642
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12643
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12644

    
12645
    # We want to lock all the affected nodes and groups. We have readily
12646
    # available the list of nodes, and the *destination* group. To gather the
12647
    # list of "source" groups, we need to fetch node information later on.
12648
    self.needed_locks = {
12649
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12650
      locking.LEVEL_NODE: self.op.nodes,
12651
      }
12652

    
12653
  def DeclareLocks(self, level):
12654
    if level == locking.LEVEL_NODEGROUP:
12655
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12656

    
12657
      # Try to get all affected nodes' groups without having the group or node
12658
      # lock yet. Needs verification later in the code flow.
12659
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12660

    
12661
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12662

    
12663
  def CheckPrereq(self):
12664
    """Check prerequisites.
12665

12666
    """
12667
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12668
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12669
            frozenset(self.op.nodes))
12670

    
12671
    expected_locks = (set([self.group_uuid]) |
12672
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12673
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12674
    if actual_locks != expected_locks:
12675
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12676
                               " current groups are '%s', used to be '%s'" %
12677
                               (utils.CommaJoin(expected_locks),
12678
                                utils.CommaJoin(actual_locks)))
12679

    
12680
    self.node_data = self.cfg.GetAllNodesInfo()
12681
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12682
    instance_data = self.cfg.GetAllInstancesInfo()
12683

    
12684
    if self.group is None:
12685
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12686
                               (self.op.group_name, self.group_uuid))
12687

    
12688
    (new_splits, previous_splits) = \
12689
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12690
                                             for node in self.op.nodes],
12691
                                            self.node_data, instance_data)
12692

    
12693
    if new_splits:
12694
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12695

    
12696
      if not self.op.force:
12697
        raise errors.OpExecError("The following instances get split by this"
12698
                                 " change and --force was not given: %s" %
12699
                                 fmt_new_splits)
12700
      else:
12701
        self.LogWarning("This operation will split the following instances: %s",
12702
                        fmt_new_splits)
12703

    
12704
        if previous_splits:
12705
          self.LogWarning("In addition, these already-split instances continue"
12706
                          " to be split across groups: %s",
12707
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12708

    
12709
  def Exec(self, feedback_fn):
12710
    """Assign nodes to a new group.
12711

12712
    """
12713
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12714

    
12715
    self.cfg.AssignGroupNodes(mods)
12716

    
12717
  @staticmethod
12718
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12719
    """Check for split instances after a node assignment.
12720

12721
    This method considers a series of node assignments as an atomic operation,
12722
    and returns information about split instances after applying the set of
12723
    changes.
12724

12725
    In particular, it returns information about newly split instances, and
12726
    instances that were already split, and remain so after the change.
12727

12728
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12729
    considered.
12730

12731
    @type changes: list of (node_name, new_group_uuid) pairs.
12732
    @param changes: list of node assignments to consider.
12733
    @param node_data: a dict with data for all nodes
12734
    @param instance_data: a dict with all instances to consider
12735
    @rtype: a two-tuple
12736
    @return: a list of instances that were previously okay and result split as a
12737
      consequence of this change, and a list of instances that were previously
12738
      split and this change does not fix.
12739

12740
    """
12741
    changed_nodes = dict((node, group) for node, group in changes
12742
                         if node_data[node].group != group)
12743

    
12744
    all_split_instances = set()
12745
    previously_split_instances = set()
12746

    
12747
    def InstanceNodes(instance):
12748
      return [instance.primary_node] + list(instance.secondary_nodes)
12749

    
12750
    for inst in instance_data.values():
12751
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12752
        continue
12753

    
12754
      instance_nodes = InstanceNodes(inst)
12755

    
12756
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12757
        previously_split_instances.add(inst.name)
12758

    
12759
      if len(set(changed_nodes.get(node, node_data[node].group)
12760
                 for node in instance_nodes)) > 1:
12761
        all_split_instances.add(inst.name)
12762

    
12763
    return (list(all_split_instances - previously_split_instances),
12764
            list(previously_split_instances & all_split_instances))
12765

    
12766

    
12767
class _GroupQuery(_QueryBase):
12768
  FIELDS = query.GROUP_FIELDS
12769

    
12770
  def ExpandNames(self, lu):
12771
    lu.needed_locks = {}
12772

    
12773
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12774
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12775

    
12776
    if not self.names:
12777
      self.wanted = [name_to_uuid[name]
12778
                     for name in utils.NiceSort(name_to_uuid.keys())]
12779
    else:
12780
      # Accept names to be either names or UUIDs.
12781
      missing = []
12782
      self.wanted = []
12783
      all_uuid = frozenset(self._all_groups.keys())
12784

    
12785
      for name in self.names:
12786
        if name in all_uuid:
12787
          self.wanted.append(name)
12788
        elif name in name_to_uuid:
12789
          self.wanted.append(name_to_uuid[name])
12790
        else:
12791
          missing.append(name)
12792

    
12793
      if missing:
12794
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12795
                                   utils.CommaJoin(missing),
12796
                                   errors.ECODE_NOENT)
12797

    
12798
  def DeclareLocks(self, lu, level):
12799
    pass
12800

    
12801
  def _GetQueryData(self, lu):
12802
    """Computes the list of node groups and their attributes.
12803

12804
    """
12805
    do_nodes = query.GQ_NODE in self.requested_data
12806
    do_instances = query.GQ_INST in self.requested_data
12807

    
12808
    group_to_nodes = None
12809
    group_to_instances = None
12810

    
12811
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12812
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12813
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12814
    # instance->node. Hence, we will need to process nodes even if we only need
12815
    # instance information.
12816
    if do_nodes or do_instances:
12817
      all_nodes = lu.cfg.GetAllNodesInfo()
12818
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12819
      node_to_group = {}
12820

    
12821
      for node in all_nodes.values():
12822
        if node.group in group_to_nodes:
12823
          group_to_nodes[node.group].append(node.name)
12824
          node_to_group[node.name] = node.group
12825

    
12826
      if do_instances:
12827
        all_instances = lu.cfg.GetAllInstancesInfo()
12828
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12829

    
12830
        for instance in all_instances.values():
12831
          node = instance.primary_node
12832
          if node in node_to_group:
12833
            group_to_instances[node_to_group[node]].append(instance.name)
12834

    
12835
        if not do_nodes:
12836
          # Do not pass on node information if it was not requested.
12837
          group_to_nodes = None
12838

    
12839
    return query.GroupQueryData([self._all_groups[uuid]
12840
                                 for uuid in self.wanted],
12841
                                group_to_nodes, group_to_instances)
12842

    
12843

    
12844
class LUGroupQuery(NoHooksLU):
12845
  """Logical unit for querying node groups.
12846

12847
  """
12848
  REQ_BGL = False
12849

    
12850
  def CheckArguments(self):
12851
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12852
                          self.op.output_fields, False)
12853

    
12854
  def ExpandNames(self):
12855
    self.gq.ExpandNames(self)
12856

    
12857
  def DeclareLocks(self, level):
12858
    self.gq.DeclareLocks(self, level)
12859

    
12860
  def Exec(self, feedback_fn):
12861
    return self.gq.OldStyleQuery(self)
12862

    
12863

    
12864
class LUGroupSetParams(LogicalUnit):
12865
  """Modifies the parameters of a node group.
12866

12867
  """
12868
  HPATH = "group-modify"
12869
  HTYPE = constants.HTYPE_GROUP
12870
  REQ_BGL = False
12871

    
12872
  def CheckArguments(self):
12873
    all_changes = [
12874
      self.op.ndparams,
12875
      self.op.diskparams,
12876
      self.op.alloc_policy,
12877
      ]
12878

    
12879
    if all_changes.count(None) == len(all_changes):
12880
      raise errors.OpPrereqError("Please pass at least one modification",
12881
                                 errors.ECODE_INVAL)
12882

    
12883
  def ExpandNames(self):
12884
    # This raises errors.OpPrereqError on its own:
12885
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12886

    
12887
    self.needed_locks = {
12888
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12889
      }
12890

    
12891
  def CheckPrereq(self):
12892
    """Check prerequisites.
12893

12894
    """
12895
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12896

    
12897
    if self.group is None:
12898
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12899
                               (self.op.group_name, self.group_uuid))
12900

    
12901
    if self.op.ndparams:
12902
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12903
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12904
      self.new_ndparams = new_ndparams
12905

    
12906
    if self.op.diskparams:
12907
      self.new_diskparams = dict()
12908
      for templ in constants.DISK_TEMPLATES:
12909
        if templ not in self.op.diskparams:
12910
          self.op.diskparams[templ] = {}
12911
        new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12912
                                             self.op.diskparams[templ])
12913
        utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12914
        self.new_diskparams[templ] = new_templ_params
12915

    
12916
  def BuildHooksEnv(self):
12917
    """Build hooks env.
12918

12919
    """
12920
    return {
12921
      "GROUP_NAME": self.op.group_name,
12922
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12923
      }
12924

    
12925
  def BuildHooksNodes(self):
12926
    """Build hooks nodes.
12927

12928
    """
12929
    mn = self.cfg.GetMasterNode()
12930
    return ([mn], [mn])
12931

    
12932
  def Exec(self, feedback_fn):
12933
    """Modifies the node group.
12934

12935
    """
12936
    result = []
12937

    
12938
    if self.op.ndparams:
12939
      self.group.ndparams = self.new_ndparams
12940
      result.append(("ndparams", str(self.group.ndparams)))
12941

    
12942
    if self.op.diskparams:
12943
      self.group.diskparams = self.new_diskparams
12944
      result.append(("diskparams", str(self.group.diskparams)))
12945

    
12946
    if self.op.alloc_policy:
12947
      self.group.alloc_policy = self.op.alloc_policy
12948

    
12949
    self.cfg.Update(self.group, feedback_fn)
12950
    return result
12951

    
12952

    
12953
class LUGroupRemove(LogicalUnit):
12954
  HPATH = "group-remove"
12955
  HTYPE = constants.HTYPE_GROUP
12956
  REQ_BGL = False
12957

    
12958
  def ExpandNames(self):
12959
    # This will raises errors.OpPrereqError on its own:
12960
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12961
    self.needed_locks = {
12962
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12963
      }
12964

    
12965
  def CheckPrereq(self):
12966
    """Check prerequisites.
12967

12968
    This checks that the given group name exists as a node group, that is
12969
    empty (i.e., contains no nodes), and that is not the last group of the
12970
    cluster.
12971

12972
    """
12973
    # Verify that the group is empty.
12974
    group_nodes = [node.name
12975
                   for node in self.cfg.GetAllNodesInfo().values()
12976
                   if node.group == self.group_uuid]
12977

    
12978
    if group_nodes:
12979
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12980
                                 " nodes: %s" %
12981
                                 (self.op.group_name,
12982
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12983
                                 errors.ECODE_STATE)
12984

    
12985
    # Verify the cluster would not be left group-less.
12986
    if len(self.cfg.GetNodeGroupList()) == 1:
12987
      raise errors.OpPrereqError("Group '%s' is the only group,"
12988
                                 " cannot be removed" %
12989
                                 self.op.group_name,
12990
                                 errors.ECODE_STATE)
12991

    
12992
  def BuildHooksEnv(self):
12993
    """Build hooks env.
12994

12995
    """
12996
    return {
12997
      "GROUP_NAME": self.op.group_name,
12998
      }
12999

    
13000
  def BuildHooksNodes(self):
13001
    """Build hooks nodes.
13002

13003
    """
13004
    mn = self.cfg.GetMasterNode()
13005
    return ([mn], [mn])
13006

    
13007
  def Exec(self, feedback_fn):
13008
    """Remove the node group.
13009

13010
    """
13011
    try:
13012
      self.cfg.RemoveNodeGroup(self.group_uuid)
13013
    except errors.ConfigurationError:
13014
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13015
                               (self.op.group_name, self.group_uuid))
13016

    
13017
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13018

    
13019

    
13020
class LUGroupRename(LogicalUnit):
13021
  HPATH = "group-rename"
13022
  HTYPE = constants.HTYPE_GROUP
13023
  REQ_BGL = False
13024

    
13025
  def ExpandNames(self):
13026
    # This raises errors.OpPrereqError on its own:
13027
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13028

    
13029
    self.needed_locks = {
13030
      locking.LEVEL_NODEGROUP: [self.group_uuid],
13031
      }
13032

    
13033
  def CheckPrereq(self):
13034
    """Check prerequisites.
13035

13036
    Ensures requested new name is not yet used.
13037

13038
    """
13039
    try:
13040
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13041
    except errors.OpPrereqError:
13042
      pass
13043
    else:
13044
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13045
                                 " node group (UUID: %s)" %
13046
                                 (self.op.new_name, new_name_uuid),
13047
                                 errors.ECODE_EXISTS)
13048

    
13049
  def BuildHooksEnv(self):
13050
    """Build hooks env.
13051

13052
    """
13053
    return {
13054
      "OLD_NAME": self.op.group_name,
13055
      "NEW_NAME": self.op.new_name,
13056
      }
13057

    
13058
  def BuildHooksNodes(self):
13059
    """Build hooks nodes.
13060

13061
    """
13062
    mn = self.cfg.GetMasterNode()
13063

    
13064
    all_nodes = self.cfg.GetAllNodesInfo()
13065
    all_nodes.pop(mn, None)
13066

    
13067
    run_nodes = [mn]
13068
    run_nodes.extend(node.name for node in all_nodes.values()
13069
                     if node.group == self.group_uuid)
13070

    
13071
    return (run_nodes, run_nodes)
13072

    
13073
  def Exec(self, feedback_fn):
13074
    """Rename the node group.
13075

13076
    """
13077
    group = self.cfg.GetNodeGroup(self.group_uuid)
13078

    
13079
    if group is None:
13080
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13081
                               (self.op.group_name, self.group_uuid))
13082

    
13083
    group.name = self.op.new_name
13084
    self.cfg.Update(group, feedback_fn)
13085

    
13086
    return self.op.new_name
13087

    
13088

    
13089
class LUGroupEvacuate(LogicalUnit):
13090
  HPATH = "group-evacuate"
13091
  HTYPE = constants.HTYPE_GROUP
13092
  REQ_BGL = False
13093

    
13094
  def ExpandNames(self):
13095
    # This raises errors.OpPrereqError on its own:
13096
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13097

    
13098
    if self.op.target_groups:
13099
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13100
                                  self.op.target_groups)
13101
    else:
13102
      self.req_target_uuids = []
13103

    
13104
    if self.group_uuid in self.req_target_uuids:
13105
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13106
                                 " as a target group (targets are %s)" %
13107
                                 (self.group_uuid,
13108
                                  utils.CommaJoin(self.req_target_uuids)),
13109
                                 errors.ECODE_INVAL)
13110

    
13111
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13112

    
13113
    self.share_locks = _ShareAll()
13114
    self.needed_locks = {
13115
      locking.LEVEL_INSTANCE: [],
13116
      locking.LEVEL_NODEGROUP: [],
13117
      locking.LEVEL_NODE: [],
13118
      }
13119

    
13120
  def DeclareLocks(self, level):
13121
    if level == locking.LEVEL_INSTANCE:
13122
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
13123

    
13124
      # Lock instances optimistically, needs verification once node and group
13125
      # locks have been acquired
13126
      self.needed_locks[locking.LEVEL_INSTANCE] = \
13127
        self.cfg.GetNodeGroupInstances(self.group_uuid)
13128

    
13129
    elif level == locking.LEVEL_NODEGROUP:
13130
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13131

    
13132
      if self.req_target_uuids:
13133
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
13134

    
13135
        # Lock all groups used by instances optimistically; this requires going
13136
        # via the node before it's locked, requiring verification later on
13137
        lock_groups.update(group_uuid
13138
                           for instance_name in
13139
                             self.owned_locks(locking.LEVEL_INSTANCE)
13140
                           for group_uuid in
13141
                             self.cfg.GetInstanceNodeGroups(instance_name))
13142
      else:
13143
        # No target groups, need to lock all of them
13144
        lock_groups = locking.ALL_SET
13145

    
13146
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13147

    
13148
    elif level == locking.LEVEL_NODE:
13149
      # This will only lock the nodes in the group to be evacuated which
13150
      # contain actual instances
13151
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13152
      self._LockInstancesNodes()
13153

    
13154
      # Lock all nodes in group to be evacuated and target groups
13155
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13156
      assert self.group_uuid in owned_groups
13157
      member_nodes = [node_name
13158
                      for group in owned_groups
13159
                      for node_name in self.cfg.GetNodeGroup(group).members]
13160
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13161

    
13162
  def CheckPrereq(self):
13163
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13164
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13165
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13166

    
13167
    assert owned_groups.issuperset(self.req_target_uuids)
13168
    assert self.group_uuid in owned_groups
13169

    
13170
    # Check if locked instances are still correct
13171
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13172

    
13173
    # Get instance information
13174
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13175

    
13176
    # Check if node groups for locked instances are still correct
13177
    for instance_name in owned_instances:
13178
      inst = self.instances[instance_name]
13179
      assert owned_nodes.issuperset(inst.all_nodes), \
13180
        "Instance %s's nodes changed while we kept the lock" % instance_name
13181

    
13182
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13183
                                             owned_groups)
13184

    
13185
      assert self.group_uuid in inst_groups, \
13186
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13187

    
13188
    if self.req_target_uuids:
13189
      # User requested specific target groups
13190
      self.target_uuids = self.req_target_uuids
13191
    else:
13192
      # All groups except the one to be evacuated are potential targets
13193
      self.target_uuids = [group_uuid for group_uuid in owned_groups
13194
                           if group_uuid != self.group_uuid]
13195

    
13196
      if not self.target_uuids:
13197
        raise errors.OpPrereqError("There are no possible target groups",
13198
                                   errors.ECODE_INVAL)
13199

    
13200
  def BuildHooksEnv(self):
13201
    """Build hooks env.
13202

13203
    """
13204
    return {
13205
      "GROUP_NAME": self.op.group_name,
13206
      "TARGET_GROUPS": " ".join(self.target_uuids),
13207
      }
13208

    
13209
  def BuildHooksNodes(self):
13210
    """Build hooks nodes.
13211

13212
    """
13213
    mn = self.cfg.GetMasterNode()
13214

    
13215
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13216

    
13217
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13218

    
13219
    return (run_nodes, run_nodes)
13220

    
13221
  def Exec(self, feedback_fn):
13222
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13223

    
13224
    assert self.group_uuid not in self.target_uuids
13225

    
13226
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13227
                     instances=instances, target_groups=self.target_uuids)
13228

    
13229
    ial.Run(self.op.iallocator)
13230

    
13231
    if not ial.success:
13232
      raise errors.OpPrereqError("Can't compute group evacuation using"
13233
                                 " iallocator '%s': %s" %
13234
                                 (self.op.iallocator, ial.info),
13235
                                 errors.ECODE_NORES)
13236

    
13237
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13238

    
13239
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13240
                 len(jobs), self.op.group_name)
13241

    
13242
    return ResultWithJobs(jobs)
13243

    
13244

    
13245
class TagsLU(NoHooksLU): # pylint: disable=W0223
13246
  """Generic tags LU.
13247

13248
  This is an abstract class which is the parent of all the other tags LUs.
13249

13250
  """
13251
  def ExpandNames(self):
13252
    self.group_uuid = None
13253
    self.needed_locks = {}
13254
    if self.op.kind == constants.TAG_NODE:
13255
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13256
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13257
    elif self.op.kind == constants.TAG_INSTANCE:
13258
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13259
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13260
    elif self.op.kind == constants.TAG_NODEGROUP:
13261
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13262

    
13263
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13264
    # not possible to acquire the BGL based on opcode parameters)
13265

    
13266
  def CheckPrereq(self):
13267
    """Check prerequisites.
13268

13269
    """
13270
    if self.op.kind == constants.TAG_CLUSTER:
13271
      self.target = self.cfg.GetClusterInfo()
13272
    elif self.op.kind == constants.TAG_NODE:
13273
      self.target = self.cfg.GetNodeInfo(self.op.name)
13274
    elif self.op.kind == constants.TAG_INSTANCE:
13275
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13276
    elif self.op.kind == constants.TAG_NODEGROUP:
13277
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13278
    else:
13279
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13280
                                 str(self.op.kind), errors.ECODE_INVAL)
13281

    
13282

    
13283
class LUTagsGet(TagsLU):
13284
  """Returns the tags of a given object.
13285

13286
  """
13287
  REQ_BGL = False
13288

    
13289
  def ExpandNames(self):
13290
    TagsLU.ExpandNames(self)
13291

    
13292
    # Share locks as this is only a read operation
13293
    self.share_locks = _ShareAll()
13294

    
13295
  def Exec(self, feedback_fn):
13296
    """Returns the tag list.
13297

13298
    """
13299
    return list(self.target.GetTags())
13300

    
13301

    
13302
class LUTagsSearch(NoHooksLU):
13303
  """Searches the tags for a given pattern.
13304

13305
  """
13306
  REQ_BGL = False
13307

    
13308
  def ExpandNames(self):
13309
    self.needed_locks = {}
13310

    
13311
  def CheckPrereq(self):
13312
    """Check prerequisites.
13313

13314
    This checks the pattern passed for validity by compiling it.
13315

13316
    """
13317
    try:
13318
      self.re = re.compile(self.op.pattern)
13319
    except re.error, err:
13320
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13321
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13322

    
13323
  def Exec(self, feedback_fn):
13324
    """Returns the tag list.
13325

13326
    """
13327
    cfg = self.cfg
13328
    tgts = [("/cluster", cfg.GetClusterInfo())]
13329
    ilist = cfg.GetAllInstancesInfo().values()
13330
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13331
    nlist = cfg.GetAllNodesInfo().values()
13332
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13333
    tgts.extend(("/nodegroup/%s" % n.name, n)
13334
                for n in cfg.GetAllNodeGroupsInfo().values())
13335
    results = []
13336
    for path, target in tgts:
13337
      for tag in target.GetTags():
13338
        if self.re.search(tag):
13339
          results.append((path, tag))
13340
    return results
13341

    
13342

    
13343
class LUTagsSet(TagsLU):
13344
  """Sets a tag on a given object.
13345

13346
  """
13347
  REQ_BGL = False
13348

    
13349
  def CheckPrereq(self):
13350
    """Check prerequisites.
13351

13352
    This checks the type and length of the tag name and value.
13353

13354
    """
13355
    TagsLU.CheckPrereq(self)
13356
    for tag in self.op.tags:
13357
      objects.TaggableObject.ValidateTag(tag)
13358

    
13359
  def Exec(self, feedback_fn):
13360
    """Sets the tag.
13361

13362
    """
13363
    try:
13364
      for tag in self.op.tags:
13365
        self.target.AddTag(tag)
13366
    except errors.TagError, err:
13367
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13368
    self.cfg.Update(self.target, feedback_fn)
13369

    
13370

    
13371
class LUTagsDel(TagsLU):
13372
  """Delete a list of tags from a given object.
13373

13374
  """
13375
  REQ_BGL = False
13376

    
13377
  def CheckPrereq(self):
13378
    """Check prerequisites.
13379

13380
    This checks that we have the given tag.
13381

13382
    """
13383
    TagsLU.CheckPrereq(self)
13384
    for tag in self.op.tags:
13385
      objects.TaggableObject.ValidateTag(tag)
13386
    del_tags = frozenset(self.op.tags)
13387
    cur_tags = self.target.GetTags()
13388

    
13389
    diff_tags = del_tags - cur_tags
13390
    if diff_tags:
13391
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13392
      raise errors.OpPrereqError("Tag(s) %s not found" %
13393
                                 (utils.CommaJoin(diff_names), ),
13394
                                 errors.ECODE_NOENT)
13395

    
13396
  def Exec(self, feedback_fn):
13397
    """Remove the tag from the object.
13398

13399
    """
13400
    for tag in self.op.tags:
13401
      self.target.RemoveTag(tag)
13402
    self.cfg.Update(self.target, feedback_fn)
13403

    
13404

    
13405
class LUTestDelay(NoHooksLU):
13406
  """Sleep for a specified amount of time.
13407

13408
  This LU sleeps on the master and/or nodes for a specified amount of
13409
  time.
13410

13411
  """
13412
  REQ_BGL = False
13413

    
13414
  def ExpandNames(self):
13415
    """Expand names and set required locks.
13416

13417
    This expands the node list, if any.
13418

13419
    """
13420
    self.needed_locks = {}
13421
    if self.op.on_nodes:
13422
      # _GetWantedNodes can be used here, but is not always appropriate to use
13423
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13424
      # more information.
13425
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13426
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13427

    
13428
  def _TestDelay(self):
13429
    """Do the actual sleep.
13430

13431
    """
13432
    if self.op.on_master:
13433
      if not utils.TestDelay(self.op.duration):
13434
        raise errors.OpExecError("Error during master delay test")
13435
    if self.op.on_nodes:
13436
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13437
      for node, node_result in result.items():
13438
        node_result.Raise("Failure during rpc call to node %s" % node)
13439

    
13440
  def Exec(self, feedback_fn):
13441
    """Execute the test delay opcode, with the wanted repetitions.
13442

13443
    """
13444
    if self.op.repeat == 0:
13445
      self._TestDelay()
13446
    else:
13447
      top_value = self.op.repeat - 1
13448
      for i in range(self.op.repeat):
13449
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13450
        self._TestDelay()
13451

    
13452

    
13453
class LUTestJqueue(NoHooksLU):
13454
  """Utility LU to test some aspects of the job queue.
13455

13456
  """
13457
  REQ_BGL = False
13458

    
13459
  # Must be lower than default timeout for WaitForJobChange to see whether it
13460
  # notices changed jobs
13461
  _CLIENT_CONNECT_TIMEOUT = 20.0
13462
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13463

    
13464
  @classmethod
13465
  def _NotifyUsingSocket(cls, cb, errcls):
13466
    """Opens a Unix socket and waits for another program to connect.
13467

13468
    @type cb: callable
13469
    @param cb: Callback to send socket name to client
13470
    @type errcls: class
13471
    @param errcls: Exception class to use for errors
13472

13473
    """
13474
    # Using a temporary directory as there's no easy way to create temporary
13475
    # sockets without writing a custom loop around tempfile.mktemp and
13476
    # socket.bind
13477
    tmpdir = tempfile.mkdtemp()
13478
    try:
13479
      tmpsock = utils.PathJoin(tmpdir, "sock")
13480

    
13481
      logging.debug("Creating temporary socket at %s", tmpsock)
13482
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13483
      try:
13484
        sock.bind(tmpsock)
13485
        sock.listen(1)
13486

    
13487
        # Send details to client
13488
        cb(tmpsock)
13489

    
13490
        # Wait for client to connect before continuing
13491
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13492
        try:
13493
          (conn, _) = sock.accept()
13494
        except socket.error, err:
13495
          raise errcls("Client didn't connect in time (%s)" % err)
13496
      finally:
13497
        sock.close()
13498
    finally:
13499
      # Remove as soon as client is connected
13500
      shutil.rmtree(tmpdir)
13501

    
13502
    # Wait for client to close
13503
    try:
13504
      try:
13505
        # pylint: disable=E1101
13506
        # Instance of '_socketobject' has no ... member
13507
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13508
        conn.recv(1)
13509
      except socket.error, err:
13510
        raise errcls("Client failed to confirm notification (%s)" % err)
13511
    finally:
13512
      conn.close()
13513

    
13514
  def _SendNotification(self, test, arg, sockname):
13515
    """Sends a notification to the client.
13516

13517
    @type test: string
13518
    @param test: Test name
13519
    @param arg: Test argument (depends on test)
13520
    @type sockname: string
13521
    @param sockname: Socket path
13522

13523
    """
13524
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13525

    
13526
  def _Notify(self, prereq, test, arg):
13527
    """Notifies the client of a test.
13528

13529
    @type prereq: bool
13530
    @param prereq: Whether this is a prereq-phase test
13531
    @type test: string
13532
    @param test: Test name
13533
    @param arg: Test argument (depends on test)
13534

13535
    """
13536
    if prereq:
13537
      errcls = errors.OpPrereqError
13538
    else:
13539
      errcls = errors.OpExecError
13540

    
13541
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13542
                                                  test, arg),
13543
                                   errcls)
13544

    
13545
  def CheckArguments(self):
13546
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13547
    self.expandnames_calls = 0
13548

    
13549
  def ExpandNames(self):
13550
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13551
    if checkargs_calls < 1:
13552
      raise errors.ProgrammerError("CheckArguments was not called")
13553

    
13554
    self.expandnames_calls += 1
13555

    
13556
    if self.op.notify_waitlock:
13557
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13558

    
13559
    self.LogInfo("Expanding names")
13560

    
13561
    # Get lock on master node (just to get a lock, not for a particular reason)
13562
    self.needed_locks = {
13563
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13564
      }
13565

    
13566
  def Exec(self, feedback_fn):
13567
    if self.expandnames_calls < 1:
13568
      raise errors.ProgrammerError("ExpandNames was not called")
13569

    
13570
    if self.op.notify_exec:
13571
      self._Notify(False, constants.JQT_EXEC, None)
13572

    
13573
    self.LogInfo("Executing")
13574

    
13575
    if self.op.log_messages:
13576
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13577
      for idx, msg in enumerate(self.op.log_messages):
13578
        self.LogInfo("Sending log message %s", idx + 1)
13579
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13580
        # Report how many test messages have been sent
13581
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13582

    
13583
    if self.op.fail:
13584
      raise errors.OpExecError("Opcode failure was requested")
13585

    
13586
    return True
13587

    
13588

    
13589
class IAllocator(object):
13590
  """IAllocator framework.
13591

13592
  An IAllocator instance has three sets of attributes:
13593
    - cfg that is needed to query the cluster
13594
    - input data (all members of the _KEYS class attribute are required)
13595
    - four buffer attributes (in|out_data|text), that represent the
13596
      input (to the external script) in text and data structure format,
13597
      and the output from it, again in two formats
13598
    - the result variables from the script (success, info, nodes) for
13599
      easy usage
13600

13601
  """
13602
  # pylint: disable=R0902
13603
  # lots of instance attributes
13604

    
13605
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13606
    self.cfg = cfg
13607
    self.rpc = rpc_runner
13608
    # init buffer variables
13609
    self.in_text = self.out_text = self.in_data = self.out_data = None
13610
    # init all input fields so that pylint is happy
13611
    self.mode = mode
13612
    self.memory = self.disks = self.disk_template = None
13613
    self.os = self.tags = self.nics = self.vcpus = None
13614
    self.hypervisor = None
13615
    self.relocate_from = None
13616
    self.name = None
13617
    self.instances = None
13618
    self.evac_mode = None
13619
    self.target_groups = []
13620
    # computed fields
13621
    self.required_nodes = None
13622
    # init result fields
13623
    self.success = self.info = self.result = None
13624

    
13625
    try:
13626
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13627
    except KeyError:
13628
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13629
                                   " IAllocator" % self.mode)
13630

    
13631
    keyset = [n for (n, _) in keydata]
13632

    
13633
    for key in kwargs:
13634
      if key not in keyset:
13635
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13636
                                     " IAllocator" % key)
13637
      setattr(self, key, kwargs[key])
13638

    
13639
    for key in keyset:
13640
      if key not in kwargs:
13641
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13642
                                     " IAllocator" % key)
13643
    self._BuildInputData(compat.partial(fn, self), keydata)
13644

    
13645
  def _ComputeClusterData(self):
13646
    """Compute the generic allocator input data.
13647

13648
    This is the data that is independent of the actual operation.
13649

13650
    """
13651
    cfg = self.cfg
13652
    cluster_info = cfg.GetClusterInfo()
13653
    # cluster data
13654
    data = {
13655
      "version": constants.IALLOCATOR_VERSION,
13656
      "cluster_name": cfg.GetClusterName(),
13657
      "cluster_tags": list(cluster_info.GetTags()),
13658
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13659
      # we don't have job IDs
13660
      }
13661
    ninfo = cfg.GetAllNodesInfo()
13662
    iinfo = cfg.GetAllInstancesInfo().values()
13663
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13664

    
13665
    # node data
13666
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13667

    
13668
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13669
      hypervisor_name = self.hypervisor
13670
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13671
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13672
    else:
13673
      hypervisor_name = cluster_info.primary_hypervisor
13674

    
13675
    node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13676
                                        [hypervisor_name])
13677
    node_iinfo = \
13678
      self.rpc.call_all_instances_info(node_list,
13679
                                       cluster_info.enabled_hypervisors)
13680

    
13681
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13682

    
13683
    config_ndata = self._ComputeBasicNodeData(ninfo)
13684
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13685
                                                 i_list, config_ndata)
13686
    assert len(data["nodes"]) == len(ninfo), \
13687
        "Incomplete node data computed"
13688

    
13689
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13690

    
13691
    self.in_data = data
13692

    
13693
  @staticmethod
13694
  def _ComputeNodeGroupData(cfg):
13695
    """Compute node groups data.
13696

13697
    """
13698
    ng = dict((guuid, {
13699
      "name": gdata.name,
13700
      "alloc_policy": gdata.alloc_policy,
13701
      })
13702
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13703

    
13704
    return ng
13705

    
13706
  @staticmethod
13707
  def _ComputeBasicNodeData(node_cfg):
13708
    """Compute global node data.
13709

13710
    @rtype: dict
13711
    @returns: a dict of name: (node dict, node config)
13712

13713
    """
13714
    # fill in static (config-based) values
13715
    node_results = dict((ninfo.name, {
13716
      "tags": list(ninfo.GetTags()),
13717
      "primary_ip": ninfo.primary_ip,
13718
      "secondary_ip": ninfo.secondary_ip,
13719
      "offline": ninfo.offline,
13720
      "drained": ninfo.drained,
13721
      "master_candidate": ninfo.master_candidate,
13722
      "group": ninfo.group,
13723
      "master_capable": ninfo.master_capable,
13724
      "vm_capable": ninfo.vm_capable,
13725
      })
13726
      for ninfo in node_cfg.values())
13727

    
13728
    return node_results
13729

    
13730
  @staticmethod
13731
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13732
                              node_results):
13733
    """Compute global node data.
13734

13735
    @param node_results: the basic node structures as filled from the config
13736

13737
    """
13738
    #TODO(dynmem): compute the right data on MAX and MIN memory
13739
    # make a copy of the current dict
13740
    node_results = dict(node_results)
13741
    for nname, nresult in node_data.items():
13742
      assert nname in node_results, "Missing basic data for node %s" % nname
13743
      ninfo = node_cfg[nname]
13744

    
13745
      if not (ninfo.offline or ninfo.drained):
13746
        nresult.Raise("Can't get data for node %s" % nname)
13747
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13748
                                nname)
13749
        remote_info = _MakeLegacyNodeInfo(nresult.payload)
13750

    
13751
        for attr in ["memory_total", "memory_free", "memory_dom0",
13752
                     "vg_size", "vg_free", "cpu_total"]:
13753
          if attr not in remote_info:
13754
            raise errors.OpExecError("Node '%s' didn't return attribute"
13755
                                     " '%s'" % (nname, attr))
13756
          if not isinstance(remote_info[attr], int):
13757
            raise errors.OpExecError("Node '%s' returned invalid value"
13758
                                     " for '%s': %s" %
13759
                                     (nname, attr, remote_info[attr]))
13760
        # compute memory used by primary instances
13761
        i_p_mem = i_p_up_mem = 0
13762
        for iinfo, beinfo in i_list:
13763
          if iinfo.primary_node == nname:
13764
            i_p_mem += beinfo[constants.BE_MAXMEM]
13765
            if iinfo.name not in node_iinfo[nname].payload:
13766
              i_used_mem = 0
13767
            else:
13768
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13769
            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13770
            remote_info["memory_free"] -= max(0, i_mem_diff)
13771

    
13772
            if iinfo.admin_state == constants.ADMINST_UP:
13773
              i_p_up_mem += beinfo[constants.BE_MAXMEM]
13774

    
13775
        # compute memory used by instances
13776
        pnr_dyn = {
13777
          "total_memory": remote_info["memory_total"],
13778
          "reserved_memory": remote_info["memory_dom0"],
13779
          "free_memory": remote_info["memory_free"],
13780
          "total_disk": remote_info["vg_size"],
13781
          "free_disk": remote_info["vg_free"],
13782
          "total_cpus": remote_info["cpu_total"],
13783
          "i_pri_memory": i_p_mem,
13784
          "i_pri_up_memory": i_p_up_mem,
13785
          }
13786
        pnr_dyn.update(node_results[nname])
13787
        node_results[nname] = pnr_dyn
13788

    
13789
    return node_results
13790

    
13791
  @staticmethod
13792
  def _ComputeInstanceData(cluster_info, i_list):
13793
    """Compute global instance data.
13794

13795
    """
13796
    instance_data = {}
13797
    for iinfo, beinfo in i_list:
13798
      nic_data = []
13799
      for nic in iinfo.nics:
13800
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13801
        nic_dict = {
13802
          "mac": nic.mac,
13803
          "ip": nic.ip,
13804
          "mode": filled_params[constants.NIC_MODE],
13805
          "link": filled_params[constants.NIC_LINK],
13806
          }
13807
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13808
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13809
        nic_data.append(nic_dict)
13810
      pir = {
13811
        "tags": list(iinfo.GetTags()),
13812
        "admin_state": iinfo.admin_state,
13813
        "vcpus": beinfo[constants.BE_VCPUS],
13814
        "memory": beinfo[constants.BE_MAXMEM],
13815
        "os": iinfo.os,
13816
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13817
        "nics": nic_data,
13818
        "disks": [{constants.IDISK_SIZE: dsk.size,
13819
                   constants.IDISK_MODE: dsk.mode}
13820
                  for dsk in iinfo.disks],
13821
        "disk_template": iinfo.disk_template,
13822
        "hypervisor": iinfo.hypervisor,
13823
        }
13824
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13825
                                                 pir["disks"])
13826
      instance_data[iinfo.name] = pir
13827

    
13828
    return instance_data
13829

    
13830
  def _AddNewInstance(self):
13831
    """Add new instance data to allocator structure.
13832

13833
    This in combination with _AllocatorGetClusterData will create the
13834
    correct structure needed as input for the allocator.
13835

13836
    The checks for the completeness of the opcode must have already been
13837
    done.
13838

13839
    """
13840
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13841

    
13842
    if self.disk_template in constants.DTS_INT_MIRROR:
13843
      self.required_nodes = 2
13844
    else:
13845
      self.required_nodes = 1
13846

    
13847
    request = {
13848
      "name": self.name,
13849
      "disk_template": self.disk_template,
13850
      "tags": self.tags,
13851
      "os": self.os,
13852
      "vcpus": self.vcpus,
13853
      "memory": self.memory,
13854
      "disks": self.disks,
13855
      "disk_space_total": disk_space,
13856
      "nics": self.nics,
13857
      "required_nodes": self.required_nodes,
13858
      "hypervisor": self.hypervisor,
13859
      }
13860

    
13861
    return request
13862

    
13863
  def _AddRelocateInstance(self):
13864
    """Add relocate instance data to allocator structure.
13865

13866
    This in combination with _IAllocatorGetClusterData will create the
13867
    correct structure needed as input for the allocator.
13868

13869
    The checks for the completeness of the opcode must have already been
13870
    done.
13871

13872
    """
13873
    instance = self.cfg.GetInstanceInfo(self.name)
13874
    if instance is None:
13875
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13876
                                   " IAllocator" % self.name)
13877

    
13878
    if instance.disk_template not in constants.DTS_MIRRORED:
13879
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13880
                                 errors.ECODE_INVAL)
13881

    
13882
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13883
        len(instance.secondary_nodes) != 1:
13884
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13885
                                 errors.ECODE_STATE)
13886

    
13887
    self.required_nodes = 1
13888
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13889
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13890

    
13891
    request = {
13892
      "name": self.name,
13893
      "disk_space_total": disk_space,
13894
      "required_nodes": self.required_nodes,
13895
      "relocate_from": self.relocate_from,
13896
      }
13897
    return request
13898

    
13899
  def _AddNodeEvacuate(self):
13900
    """Get data for node-evacuate requests.
13901

13902
    """
13903
    return {
13904
      "instances": self.instances,
13905
      "evac_mode": self.evac_mode,
13906
      }
13907

    
13908
  def _AddChangeGroup(self):
13909
    """Get data for node-evacuate requests.
13910

13911
    """
13912
    return {
13913
      "instances": self.instances,
13914
      "target_groups": self.target_groups,
13915
      }
13916

    
13917
  def _BuildInputData(self, fn, keydata):
13918
    """Build input data structures.
13919

13920
    """
13921
    self._ComputeClusterData()
13922

    
13923
    request = fn()
13924
    request["type"] = self.mode
13925
    for keyname, keytype in keydata:
13926
      if keyname not in request:
13927
        raise errors.ProgrammerError("Request parameter %s is missing" %
13928
                                     keyname)
13929
      val = request[keyname]
13930
      if not keytype(val):
13931
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13932
                                     " validation, value %s, expected"
13933
                                     " type %s" % (keyname, val, keytype))
13934
    self.in_data["request"] = request
13935

    
13936
    self.in_text = serializer.Dump(self.in_data)
13937

    
13938
  _STRING_LIST = ht.TListOf(ht.TString)
13939
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13940
     # pylint: disable=E1101
13941
     # Class '...' has no 'OP_ID' member
13942
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13943
                          opcodes.OpInstanceMigrate.OP_ID,
13944
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13945
     })))
13946

    
13947
  _NEVAC_MOVED = \
13948
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13949
                       ht.TItems([ht.TNonEmptyString,
13950
                                  ht.TNonEmptyString,
13951
                                  ht.TListOf(ht.TNonEmptyString),
13952
                                 ])))
13953
  _NEVAC_FAILED = \
13954
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13955
                       ht.TItems([ht.TNonEmptyString,
13956
                                  ht.TMaybeString,
13957
                                 ])))
13958
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13959
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13960

    
13961
  _MODE_DATA = {
13962
    constants.IALLOCATOR_MODE_ALLOC:
13963
      (_AddNewInstance,
13964
       [
13965
        ("name", ht.TString),
13966
        ("memory", ht.TInt),
13967
        ("disks", ht.TListOf(ht.TDict)),
13968
        ("disk_template", ht.TString),
13969
        ("os", ht.TString),
13970
        ("tags", _STRING_LIST),
13971
        ("nics", ht.TListOf(ht.TDict)),
13972
        ("vcpus", ht.TInt),
13973
        ("hypervisor", ht.TString),
13974
        ], ht.TList),
13975
    constants.IALLOCATOR_MODE_RELOC:
13976
      (_AddRelocateInstance,
13977
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13978
       ht.TList),
13979
     constants.IALLOCATOR_MODE_NODE_EVAC:
13980
      (_AddNodeEvacuate, [
13981
        ("instances", _STRING_LIST),
13982
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13983
        ], _NEVAC_RESULT),
13984
     constants.IALLOCATOR_MODE_CHG_GROUP:
13985
      (_AddChangeGroup, [
13986
        ("instances", _STRING_LIST),
13987
        ("target_groups", _STRING_LIST),
13988
        ], _NEVAC_RESULT),
13989
    }
13990

    
13991
  def Run(self, name, validate=True, call_fn=None):
13992
    """Run an instance allocator and return the results.
13993

13994
    """
13995
    if call_fn is None:
13996
      call_fn = self.rpc.call_iallocator_runner
13997

    
13998
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13999
    result.Raise("Failure while running the iallocator script")
14000

    
14001
    self.out_text = result.payload
14002
    if validate:
14003
      self._ValidateResult()
14004

    
14005
  def _ValidateResult(self):
14006
    """Process the allocator results.
14007

14008
    This will process and if successful save the result in
14009
    self.out_data and the other parameters.
14010

14011
    """
14012
    try:
14013
      rdict = serializer.Load(self.out_text)
14014
    except Exception, err:
14015
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14016

    
14017
    if not isinstance(rdict, dict):
14018
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
14019

    
14020
    # TODO: remove backwards compatiblity in later versions
14021
    if "nodes" in rdict and "result" not in rdict:
14022
      rdict["result"] = rdict["nodes"]
14023
      del rdict["nodes"]
14024

    
14025
    for key in "success", "info", "result":
14026
      if key not in rdict:
14027
        raise errors.OpExecError("Can't parse iallocator results:"
14028
                                 " missing key '%s'" % key)
14029
      setattr(self, key, rdict[key])
14030

    
14031
    if not self._result_check(self.result):
14032
      raise errors.OpExecError("Iallocator returned invalid result,"
14033
                               " expected %s, got %s" %
14034
                               (self._result_check, self.result),
14035
                               errors.ECODE_INVAL)
14036

    
14037
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
14038
      assert self.relocate_from is not None
14039
      assert self.required_nodes == 1
14040

    
14041
      node2group = dict((name, ndata["group"])
14042
                        for (name, ndata) in self.in_data["nodes"].items())
14043

    
14044
      fn = compat.partial(self._NodesToGroups, node2group,
14045
                          self.in_data["nodegroups"])
14046

    
14047
      instance = self.cfg.GetInstanceInfo(self.name)
14048
      request_groups = fn(self.relocate_from + [instance.primary_node])
14049
      result_groups = fn(rdict["result"] + [instance.primary_node])
14050

    
14051
      if self.success and not set(result_groups).issubset(request_groups):
14052
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14053
                                 " differ from original groups (%s)" %
14054
                                 (utils.CommaJoin(result_groups),
14055
                                  utils.CommaJoin(request_groups)))
14056

    
14057
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14058
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14059

    
14060
    self.out_data = rdict
14061

    
14062
  @staticmethod
14063
  def _NodesToGroups(node2group, groups, nodes):
14064
    """Returns a list of unique group names for a list of nodes.
14065

14066
    @type node2group: dict
14067
    @param node2group: Map from node name to group UUID
14068
    @type groups: dict
14069
    @param groups: Group information
14070
    @type nodes: list
14071
    @param nodes: Node names
14072

14073
    """
14074
    result = set()
14075

    
14076
    for node in nodes:
14077
      try:
14078
        group_uuid = node2group[node]
14079
      except KeyError:
14080
        # Ignore unknown node
14081
        pass
14082
      else:
14083
        try:
14084
          group = groups[group_uuid]
14085
        except KeyError:
14086
          # Can't find group, let's use UUID
14087
          group_name = group_uuid
14088
        else:
14089
          group_name = group["name"]
14090

    
14091
        result.add(group_name)
14092

    
14093
    return sorted(result)
14094

    
14095

    
14096
class LUTestAllocator(NoHooksLU):
14097
  """Run allocator tests.
14098

14099
  This LU runs the allocator tests
14100

14101
  """
14102
  def CheckPrereq(self):
14103
    """Check prerequisites.
14104

14105
    This checks the opcode parameters depending on the director and mode test.
14106

14107
    """
14108
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14109
      for attr in ["memory", "disks", "disk_template",
14110
                   "os", "tags", "nics", "vcpus"]:
14111
        if not hasattr(self.op, attr):
14112
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14113
                                     attr, errors.ECODE_INVAL)
14114
      iname = self.cfg.ExpandInstanceName(self.op.name)
14115
      if iname is not None:
14116
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14117
                                   iname, errors.ECODE_EXISTS)
14118
      if not isinstance(self.op.nics, list):
14119
        raise errors.OpPrereqError("Invalid parameter 'nics'",
14120
                                   errors.ECODE_INVAL)
14121
      if not isinstance(self.op.disks, list):
14122
        raise errors.OpPrereqError("Invalid parameter 'disks'",
14123
                                   errors.ECODE_INVAL)
14124
      for row in self.op.disks:
14125
        if (not isinstance(row, dict) or
14126
            constants.IDISK_SIZE not in row or
14127
            not isinstance(row[constants.IDISK_SIZE], int) or
14128
            constants.IDISK_MODE not in row or
14129
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14130
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
14131
                                     " parameter", errors.ECODE_INVAL)
14132
      if self.op.hypervisor is None:
14133
        self.op.hypervisor = self.cfg.GetHypervisorType()
14134
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14135
      fname = _ExpandInstanceName(self.cfg, self.op.name)
14136
      self.op.name = fname
14137
      self.relocate_from = \
14138
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14139
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14140
                          constants.IALLOCATOR_MODE_NODE_EVAC):
14141
      if not self.op.instances:
14142
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14143
      self.op.instances = _GetWantedInstances(self, self.op.instances)
14144
    else:
14145
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14146
                                 self.op.mode, errors.ECODE_INVAL)
14147

    
14148
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14149
      if self.op.allocator is None:
14150
        raise errors.OpPrereqError("Missing allocator name",
14151
                                   errors.ECODE_INVAL)
14152
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14153
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
14154
                                 self.op.direction, errors.ECODE_INVAL)
14155

    
14156
  def Exec(self, feedback_fn):
14157
    """Run the allocator test.
14158

14159
    """
14160
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14161
      ial = IAllocator(self.cfg, self.rpc,
14162
                       mode=self.op.mode,
14163
                       name=self.op.name,
14164
                       memory=self.op.memory,
14165
                       disks=self.op.disks,
14166
                       disk_template=self.op.disk_template,
14167
                       os=self.op.os,
14168
                       tags=self.op.tags,
14169
                       nics=self.op.nics,
14170
                       vcpus=self.op.vcpus,
14171
                       hypervisor=self.op.hypervisor,
14172
                       )
14173
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14174
      ial = IAllocator(self.cfg, self.rpc,
14175
                       mode=self.op.mode,
14176
                       name=self.op.name,
14177
                       relocate_from=list(self.relocate_from),
14178
                       )
14179
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14180
      ial = IAllocator(self.cfg, self.rpc,
14181
                       mode=self.op.mode,
14182
                       instances=self.op.instances,
14183
                       target_groups=self.op.target_groups)
14184
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14185
      ial = IAllocator(self.cfg, self.rpc,
14186
                       mode=self.op.mode,
14187
                       instances=self.op.instances,
14188
                       evac_mode=self.op.evac_mode)
14189
    else:
14190
      raise errors.ProgrammerError("Uncatched mode %s in"
14191
                                   " LUTestAllocator.Exec", self.op.mode)
14192

    
14193
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
14194
      result = ial.in_text
14195
    else:
14196
      ial.Run(self.op.allocator, validate=False)
14197
      result = ial.out_text
14198
    return result
14199

    
14200

    
14201
#: Query type implementations
14202
_QUERY_IMPL = {
14203
  constants.QR_INSTANCE: _InstanceQuery,
14204
  constants.QR_NODE: _NodeQuery,
14205
  constants.QR_GROUP: _GroupQuery,
14206
  constants.QR_OS: _OsQuery,
14207
  }
14208

    
14209
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14210

    
14211

    
14212
def _GetQueryImplementation(name):
14213
  """Returns the implemtnation for a query type.
14214

14215
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
14216

14217
  """
14218
  try:
14219
    return _QUERY_IMPL[name]
14220
  except KeyError:
14221
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14222
                               errors.ECODE_INVAL)