Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ f9d20654

History | View | Annotate | Download (483.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False):
353
    """Helper function to declare instances' nodes for locking.
354

355
    This function should be called after locking one or more instances to lock
356
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
357
    with all primary or secondary nodes for instances already locked and
358
    present in self.needed_locks[locking.LEVEL_INSTANCE].
359

360
    It should be called from DeclareLocks, and for safety only works if
361
    self.recalculate_locks[locking.LEVEL_NODE] is set.
362

363
    In the future it may grow parameters to just lock some instance's nodes, or
364
    to just lock primaries or secondary nodes, if needed.
365

366
    If should be called in DeclareLocks in a way similar to::
367

368
      if level == locking.LEVEL_NODE:
369
        self._LockInstancesNodes()
370

371
    @type primary_only: boolean
372
    @param primary_only: only lock primary nodes of locked instances
373

374
    """
375
    assert locking.LEVEL_NODE in self.recalculate_locks, \
376
      "_LockInstancesNodes helper function called with no nodes to recalculate"
377

    
378
    # TODO: check if we're really been called with the instance locks held
379

    
380
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
381
    # future we might want to have different behaviors depending on the value
382
    # of self.recalculate_locks[locking.LEVEL_NODE]
383
    wanted_nodes = []
384
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
385
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
386
      wanted_nodes.append(instance.primary_node)
387
      if not primary_only:
388
        wanted_nodes.extend(instance.secondary_nodes)
389

    
390
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
391
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
392
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
393
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
394

    
395
    del self.recalculate_locks[locking.LEVEL_NODE]
396

    
397

    
398
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
399
  """Simple LU which runs no hooks.
400

401
  This LU is intended as a parent for other LogicalUnits which will
402
  run no hooks, in order to reduce duplicate code.
403

404
  """
405
  HPATH = None
406
  HTYPE = None
407

    
408
  def BuildHooksEnv(self):
409
    """Empty BuildHooksEnv for NoHooksLu.
410

411
    This just raises an error.
412

413
    """
414
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
415

    
416
  def BuildHooksNodes(self):
417
    """Empty BuildHooksNodes for NoHooksLU.
418

419
    """
420
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
421

    
422

    
423
class Tasklet:
424
  """Tasklet base class.
425

426
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
427
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
428
  tasklets know nothing about locks.
429

430
  Subclasses must follow these rules:
431
    - Implement CheckPrereq
432
    - Implement Exec
433

434
  """
435
  def __init__(self, lu):
436
    self.lu = lu
437

    
438
    # Shortcuts
439
    self.cfg = lu.cfg
440
    self.rpc = lu.rpc
441

    
442
  def CheckPrereq(self):
443
    """Check prerequisites for this tasklets.
444

445
    This method should check whether the prerequisites for the execution of
446
    this tasklet are fulfilled. It can do internode communication, but it
447
    should be idempotent - no cluster or system changes are allowed.
448

449
    The method should raise errors.OpPrereqError in case something is not
450
    fulfilled. Its return value is ignored.
451

452
    This method should also update all parameters to their canonical form if it
453
    hasn't been done before.
454

455
    """
456
    pass
457

    
458
  def Exec(self, feedback_fn):
459
    """Execute the tasklet.
460

461
    This method should implement the actual work. It should raise
462
    errors.OpExecError for failures that are somewhat dealt with in code, or
463
    expected.
464

465
    """
466
    raise NotImplementedError
467

    
468

    
469
class _QueryBase:
470
  """Base for query utility classes.
471

472
  """
473
  #: Attribute holding field definitions
474
  FIELDS = None
475

    
476
  def __init__(self, qfilter, fields, use_locking):
477
    """Initializes this class.
478

479
    """
480
    self.use_locking = use_locking
481

    
482
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
483
                             namefield="name")
484
    self.requested_data = self.query.RequestedData()
485
    self.names = self.query.RequestedNames()
486

    
487
    # Sort only if no names were requested
488
    self.sort_by_name = not self.names
489

    
490
    self.do_locking = None
491
    self.wanted = None
492

    
493
  def _GetNames(self, lu, all_names, lock_level):
494
    """Helper function to determine names asked for in the query.
495

496
    """
497
    if self.do_locking:
498
      names = lu.owned_locks(lock_level)
499
    else:
500
      names = all_names
501

    
502
    if self.wanted == locking.ALL_SET:
503
      assert not self.names
504
      # caller didn't specify names, so ordering is not important
505
      return utils.NiceSort(names)
506

    
507
    # caller specified names and we must keep the same order
508
    assert self.names
509
    assert not self.do_locking or lu.glm.is_owned(lock_level)
510

    
511
    missing = set(self.wanted).difference(names)
512
    if missing:
513
      raise errors.OpExecError("Some items were removed before retrieving"
514
                               " their data: %s" % missing)
515

    
516
    # Return expanded names
517
    return self.wanted
518

    
519
  def ExpandNames(self, lu):
520
    """Expand names for this query.
521

522
    See L{LogicalUnit.ExpandNames}.
523

524
    """
525
    raise NotImplementedError()
526

    
527
  def DeclareLocks(self, lu, level):
528
    """Declare locks for this query.
529

530
    See L{LogicalUnit.DeclareLocks}.
531

532
    """
533
    raise NotImplementedError()
534

    
535
  def _GetQueryData(self, lu):
536
    """Collects all data for this query.
537

538
    @return: Query data object
539

540
    """
541
    raise NotImplementedError()
542

    
543
  def NewStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
548
                                  sort_by_name=self.sort_by_name)
549

    
550
  def OldStyleQuery(self, lu):
551
    """Collect data and execute query.
552

553
    """
554
    return self.query.OldStyleQuery(self._GetQueryData(lu),
555
                                    sort_by_name=self.sort_by_name)
556

    
557

    
558
def _ShareAll():
559
  """Returns a dict declaring all lock levels shared.
560

561
  """
562
  return dict.fromkeys(locking.LEVELS, 1)
563

    
564

    
565
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
566
  """Checks if the owned node groups are still correct for an instance.
567

568
  @type cfg: L{config.ConfigWriter}
569
  @param cfg: The cluster configuration
570
  @type instance_name: string
571
  @param instance_name: Instance name
572
  @type owned_groups: set or frozenset
573
  @param owned_groups: List of currently owned node groups
574

575
  """
576
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
577

    
578
  if not owned_groups.issuperset(inst_groups):
579
    raise errors.OpPrereqError("Instance %s's node groups changed since"
580
                               " locks were acquired, current groups are"
581
                               " are '%s', owning groups '%s'; retry the"
582
                               " operation" %
583
                               (instance_name,
584
                                utils.CommaJoin(inst_groups),
585
                                utils.CommaJoin(owned_groups)),
586
                               errors.ECODE_STATE)
587

    
588
  return inst_groups
589

    
590

    
591
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
592
  """Checks if the instances in a node group are still correct.
593

594
  @type cfg: L{config.ConfigWriter}
595
  @param cfg: The cluster configuration
596
  @type group_uuid: string
597
  @param group_uuid: Node group UUID
598
  @type owned_instances: set or frozenset
599
  @param owned_instances: List of currently owned instances
600

601
  """
602
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
603
  if owned_instances != wanted_instances:
604
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
605
                               " locks were acquired, wanted '%s', have '%s';"
606
                               " retry the operation" %
607
                               (group_uuid,
608
                                utils.CommaJoin(wanted_instances),
609
                                utils.CommaJoin(owned_instances)),
610
                               errors.ECODE_STATE)
611

    
612
  return wanted_instances
613

    
614

    
615
def _SupportsOob(cfg, node):
616
  """Tells if node supports OOB.
617

618
  @type cfg: L{config.ConfigWriter}
619
  @param cfg: The cluster configuration
620
  @type node: L{objects.Node}
621
  @param node: The node
622
  @return: The OOB script if supported or an empty string otherwise
623

624
  """
625
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
626

    
627

    
628
def _GetWantedNodes(lu, nodes):
629
  """Returns list of checked and expanded node names.
630

631
  @type lu: L{LogicalUnit}
632
  @param lu: the logical unit on whose behalf we execute
633
  @type nodes: list
634
  @param nodes: list of node names or None for all nodes
635
  @rtype: list
636
  @return: the list of nodes, sorted
637
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
638

639
  """
640
  if nodes:
641
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
642

    
643
  return utils.NiceSort(lu.cfg.GetNodeList())
644

    
645

    
646
def _GetWantedInstances(lu, instances):
647
  """Returns list of checked and expanded instance names.
648

649
  @type lu: L{LogicalUnit}
650
  @param lu: the logical unit on whose behalf we execute
651
  @type instances: list
652
  @param instances: list of instance names or None for all instances
653
  @rtype: list
654
  @return: the list of instances, sorted
655
  @raise errors.OpPrereqError: if the instances parameter is wrong type
656
  @raise errors.OpPrereqError: if any of the passed instances is not found
657

658
  """
659
  if instances:
660
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
661
  else:
662
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
663
  return wanted
664

    
665

    
666
def _GetUpdatedParams(old_params, update_dict,
667
                      use_default=True, use_none=False):
668
  """Return the new version of a parameter dictionary.
669

670
  @type old_params: dict
671
  @param old_params: old parameters
672
  @type update_dict: dict
673
  @param update_dict: dict containing new parameter values, or
674
      constants.VALUE_DEFAULT to reset the parameter to its default
675
      value
676
  @param use_default: boolean
677
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
678
      values as 'to be deleted' values
679
  @param use_none: boolean
680
  @type use_none: whether to recognise C{None} values as 'to be
681
      deleted' values
682
  @rtype: dict
683
  @return: the new parameter dictionary
684

685
  """
686
  params_copy = copy.deepcopy(old_params)
687
  for key, val in update_dict.iteritems():
688
    if ((use_default and val == constants.VALUE_DEFAULT) or
689
        (use_none and val is None)):
690
      try:
691
        del params_copy[key]
692
      except KeyError:
693
        pass
694
    else:
695
      params_copy[key] = val
696
  return params_copy
697

    
698

    
699
def _ReleaseLocks(lu, level, names=None, keep=None):
700
  """Releases locks owned by an LU.
701

702
  @type lu: L{LogicalUnit}
703
  @param level: Lock level
704
  @type names: list or None
705
  @param names: Names of locks to release
706
  @type keep: list or None
707
  @param keep: Names of locks to retain
708

709
  """
710
  assert not (keep is not None and names is not None), \
711
         "Only one of the 'names' and the 'keep' parameters can be given"
712

    
713
  if names is not None:
714
    should_release = names.__contains__
715
  elif keep:
716
    should_release = lambda name: name not in keep
717
  else:
718
    should_release = None
719

    
720
  if should_release:
721
    retain = []
722
    release = []
723

    
724
    # Determine which locks to release
725
    for name in lu.owned_locks(level):
726
      if should_release(name):
727
        release.append(name)
728
      else:
729
        retain.append(name)
730

    
731
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
732

    
733
    # Release just some locks
734
    lu.glm.release(level, names=release)
735

    
736
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
737
  else:
738
    # Release everything
739
    lu.glm.release(level)
740

    
741
    assert not lu.glm.is_owned(level), "No locks should be owned"
742

    
743

    
744
def _MapInstanceDisksToNodes(instances):
745
  """Creates a map from (node, volume) to instance name.
746

747
  @type instances: list of L{objects.Instance}
748
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
749

750
  """
751
  return dict(((node, vol), inst.name)
752
              for inst in instances
753
              for (node, vols) in inst.MapLVsByNode().items()
754
              for vol in vols)
755

    
756

    
757
def _RunPostHook(lu, node_name):
758
  """Runs the post-hook for an opcode on a single node.
759

760
  """
761
  hm = lu.proc.BuildHooksManager(lu)
762
  try:
763
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
764
  except:
765
    # pylint: disable=W0702
766
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
767

    
768

    
769
def _CheckOutputFields(static, dynamic, selected):
770
  """Checks whether all selected fields are valid.
771

772
  @type static: L{utils.FieldSet}
773
  @param static: static fields set
774
  @type dynamic: L{utils.FieldSet}
775
  @param dynamic: dynamic fields set
776

777
  """
778
  f = utils.FieldSet()
779
  f.Extend(static)
780
  f.Extend(dynamic)
781

    
782
  delta = f.NonMatching(selected)
783
  if delta:
784
    raise errors.OpPrereqError("Unknown output fields selected: %s"
785
                               % ",".join(delta), errors.ECODE_INVAL)
786

    
787

    
788
def _CheckGlobalHvParams(params):
789
  """Validates that given hypervisor params are not global ones.
790

791
  This will ensure that instances don't get customised versions of
792
  global params.
793

794
  """
795
  used_globals = constants.HVC_GLOBALS.intersection(params)
796
  if used_globals:
797
    msg = ("The following hypervisor parameters are global and cannot"
798
           " be customized at instance level, please modify them at"
799
           " cluster level: %s" % utils.CommaJoin(used_globals))
800
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
801

    
802

    
803
def _CheckNodeOnline(lu, node, msg=None):
804
  """Ensure that a given node is online.
805

806
  @param lu: the LU on behalf of which we make the check
807
  @param node: the node to check
808
  @param msg: if passed, should be a message to replace the default one
809
  @raise errors.OpPrereqError: if the node is offline
810

811
  """
812
  if msg is None:
813
    msg = "Can't use offline node"
814
  if lu.cfg.GetNodeInfo(node).offline:
815
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
816

    
817

    
818
def _CheckNodeNotDrained(lu, node):
819
  """Ensure that a given node is not drained.
820

821
  @param lu: the LU on behalf of which we make the check
822
  @param node: the node to check
823
  @raise errors.OpPrereqError: if the node is drained
824

825
  """
826
  if lu.cfg.GetNodeInfo(node).drained:
827
    raise errors.OpPrereqError("Can't use drained node %s" % node,
828
                               errors.ECODE_STATE)
829

    
830

    
831
def _CheckNodeVmCapable(lu, node):
832
  """Ensure that a given node is vm capable.
833

834
  @param lu: the LU on behalf of which we make the check
835
  @param node: the node to check
836
  @raise errors.OpPrereqError: if the node is not vm capable
837

838
  """
839
  if not lu.cfg.GetNodeInfo(node).vm_capable:
840
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
841
                               errors.ECODE_STATE)
842

    
843

    
844
def _CheckNodeHasOS(lu, node, os_name, force_variant):
845
  """Ensure that a node supports a given OS.
846

847
  @param lu: the LU on behalf of which we make the check
848
  @param node: the node to check
849
  @param os_name: the OS to query about
850
  @param force_variant: whether to ignore variant errors
851
  @raise errors.OpPrereqError: if the node is not supporting the OS
852

853
  """
854
  result = lu.rpc.call_os_get(node, os_name)
855
  result.Raise("OS '%s' not in supported OS list for node %s" %
856
               (os_name, node),
857
               prereq=True, ecode=errors.ECODE_INVAL)
858
  if not force_variant:
859
    _CheckOSVariant(result.payload, os_name)
860

    
861

    
862
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
863
  """Ensure that a node has the given secondary ip.
864

865
  @type lu: L{LogicalUnit}
866
  @param lu: the LU on behalf of which we make the check
867
  @type node: string
868
  @param node: the node to check
869
  @type secondary_ip: string
870
  @param secondary_ip: the ip to check
871
  @type prereq: boolean
872
  @param prereq: whether to throw a prerequisite or an execute error
873
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
874
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
875

876
  """
877
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
878
  result.Raise("Failure checking secondary ip on node %s" % node,
879
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
880
  if not result.payload:
881
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
882
           " please fix and re-run this command" % secondary_ip)
883
    if prereq:
884
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
885
    else:
886
      raise errors.OpExecError(msg)
887

    
888

    
889
def _GetClusterDomainSecret():
890
  """Reads the cluster domain secret.
891

892
  """
893
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
894
                               strict=True)
895

    
896

    
897
def _CheckInstanceDown(lu, instance, reason):
898
  """Ensure that an instance is not running."""
899
  if instance.admin_up:
900
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
901
                               (instance.name, reason), errors.ECODE_STATE)
902

    
903
  pnode = instance.primary_node
904
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
905
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
906
              prereq=True, ecode=errors.ECODE_ENVIRON)
907

    
908
  if instance.name in ins_l.payload:
909
    raise errors.OpPrereqError("Instance %s is running, %s" %
910
                               (instance.name, reason), errors.ECODE_STATE)
911

    
912

    
913
def _ExpandItemName(fn, name, kind):
914
  """Expand an item name.
915

916
  @param fn: the function to use for expansion
917
  @param name: requested item name
918
  @param kind: text description ('Node' or 'Instance')
919
  @return: the resolved (full) name
920
  @raise errors.OpPrereqError: if the item is not found
921

922
  """
923
  full_name = fn(name)
924
  if full_name is None:
925
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
926
                               errors.ECODE_NOENT)
927
  return full_name
928

    
929

    
930
def _ExpandNodeName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for nodes."""
932
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
933

    
934

    
935
def _ExpandInstanceName(cfg, name):
936
  """Wrapper over L{_ExpandItemName} for instance."""
937
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
938

    
939

    
940
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
941
                          memory, vcpus, nics, disk_template, disks,
942
                          bep, hvp, hypervisor_name, tags):
943
  """Builds instance related env variables for hooks
944

945
  This builds the hook environment from individual variables.
946

947
  @type name: string
948
  @param name: the name of the instance
949
  @type primary_node: string
950
  @param primary_node: the name of the instance's primary node
951
  @type secondary_nodes: list
952
  @param secondary_nodes: list of secondary nodes as strings
953
  @type os_type: string
954
  @param os_type: the name of the instance's OS
955
  @type status: boolean
956
  @param status: the should_run status of the instance
957
  @type memory: string
958
  @param memory: the memory size of the instance
959
  @type vcpus: string
960
  @param vcpus: the count of VCPUs the instance has
961
  @type nics: list
962
  @param nics: list of tuples (ip, mac, mode, link) representing
963
      the NICs the instance has
964
  @type disk_template: string
965
  @param disk_template: the disk template of the instance
966
  @type disks: list
967
  @param disks: the list of (size, mode) pairs
968
  @type bep: dict
969
  @param bep: the backend parameters for the instance
970
  @type hvp: dict
971
  @param hvp: the hypervisor parameters for the instance
972
  @type hypervisor_name: string
973
  @param hypervisor_name: the hypervisor for the instance
974
  @type tags: list
975
  @param tags: list of instance tags as strings
976
  @rtype: dict
977
  @return: the hook environment for this instance
978

979
  """
980
  if status:
981
    str_status = "up"
982
  else:
983
    str_status = "down"
984
  env = {
985
    "OP_TARGET": name,
986
    "INSTANCE_NAME": name,
987
    "INSTANCE_PRIMARY": primary_node,
988
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
989
    "INSTANCE_OS_TYPE": os_type,
990
    "INSTANCE_STATUS": str_status,
991
    "INSTANCE_MEMORY": memory,
992
    "INSTANCE_VCPUS": vcpus,
993
    "INSTANCE_DISK_TEMPLATE": disk_template,
994
    "INSTANCE_HYPERVISOR": hypervisor_name,
995
  }
996

    
997
  if nics:
998
    nic_count = len(nics)
999
    for idx, (ip, mac, mode, link) in enumerate(nics):
1000
      if ip is None:
1001
        ip = ""
1002
      env["INSTANCE_NIC%d_IP" % idx] = ip
1003
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1004
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1005
      env["INSTANCE_NIC%d_LINK" % idx] = link
1006
      if mode == constants.NIC_MODE_BRIDGED:
1007
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1008
  else:
1009
    nic_count = 0
1010

    
1011
  env["INSTANCE_NIC_COUNT"] = nic_count
1012

    
1013
  if disks:
1014
    disk_count = len(disks)
1015
    for idx, (size, mode) in enumerate(disks):
1016
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1017
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1018
  else:
1019
    disk_count = 0
1020

    
1021
  env["INSTANCE_DISK_COUNT"] = disk_count
1022

    
1023
  if not tags:
1024
    tags = []
1025

    
1026
  env["INSTANCE_TAGS"] = " ".join(tags)
1027

    
1028
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1029
    for key, value in source.items():
1030
      env["INSTANCE_%s_%s" % (kind, key)] = value
1031

    
1032
  return env
1033

    
1034

    
1035
def _NICListToTuple(lu, nics):
1036
  """Build a list of nic information tuples.
1037

1038
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1039
  value in LUInstanceQueryData.
1040

1041
  @type lu:  L{LogicalUnit}
1042
  @param lu: the logical unit on whose behalf we execute
1043
  @type nics: list of L{objects.NIC}
1044
  @param nics: list of nics to convert to hooks tuples
1045

1046
  """
1047
  hooks_nics = []
1048
  cluster = lu.cfg.GetClusterInfo()
1049
  for nic in nics:
1050
    ip = nic.ip
1051
    mac = nic.mac
1052
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1053
    mode = filled_params[constants.NIC_MODE]
1054
    link = filled_params[constants.NIC_LINK]
1055
    hooks_nics.append((ip, mac, mode, link))
1056
  return hooks_nics
1057

    
1058

    
1059
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1060
  """Builds instance related env variables for hooks from an object.
1061

1062
  @type lu: L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type instance: L{objects.Instance}
1065
  @param instance: the instance for which we should build the
1066
      environment
1067
  @type override: dict
1068
  @param override: dictionary with key/values that will override
1069
      our values
1070
  @rtype: dict
1071
  @return: the hook environment dictionary
1072

1073
  """
1074
  cluster = lu.cfg.GetClusterInfo()
1075
  bep = cluster.FillBE(instance)
1076
  hvp = cluster.FillHV(instance)
1077
  args = {
1078
    "name": instance.name,
1079
    "primary_node": instance.primary_node,
1080
    "secondary_nodes": instance.secondary_nodes,
1081
    "os_type": instance.os,
1082
    "status": instance.admin_up,
1083
    "memory": bep[constants.BE_MEMORY],
1084
    "vcpus": bep[constants.BE_VCPUS],
1085
    "nics": _NICListToTuple(lu, instance.nics),
1086
    "disk_template": instance.disk_template,
1087
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1088
    "bep": bep,
1089
    "hvp": hvp,
1090
    "hypervisor_name": instance.hypervisor,
1091
    "tags": instance.tags,
1092
  }
1093
  if override:
1094
    args.update(override)
1095
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1096

    
1097

    
1098
def _AdjustCandidatePool(lu, exceptions):
1099
  """Adjust the candidate pool after node operations.
1100

1101
  """
1102
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1103
  if mod_list:
1104
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1105
               utils.CommaJoin(node.name for node in mod_list))
1106
    for name in mod_list:
1107
      lu.context.ReaddNode(name)
1108
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1109
  if mc_now > mc_max:
1110
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1111
               (mc_now, mc_max))
1112

    
1113

    
1114
def _DecideSelfPromotion(lu, exceptions=None):
1115
  """Decide whether I should promote myself as a master candidate.
1116

1117
  """
1118
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1119
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1120
  # the new node will increase mc_max with one, so:
1121
  mc_should = min(mc_should + 1, cp_size)
1122
  return mc_now < mc_should
1123

    
1124

    
1125
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1126
  """Check that the brigdes needed by a list of nics exist.
1127

1128
  """
1129
  cluster = lu.cfg.GetClusterInfo()
1130
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1131
  brlist = [params[constants.NIC_LINK] for params in paramslist
1132
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1133
  if brlist:
1134
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1135
    result.Raise("Error checking bridges on destination node '%s'" %
1136
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1137

    
1138

    
1139
def _CheckInstanceBridgesExist(lu, instance, node=None):
1140
  """Check that the brigdes needed by an instance exist.
1141

1142
  """
1143
  if node is None:
1144
    node = instance.primary_node
1145
  _CheckNicsBridgesExist(lu, instance.nics, node)
1146

    
1147

    
1148
def _CheckOSVariant(os_obj, name):
1149
  """Check whether an OS name conforms to the os variants specification.
1150

1151
  @type os_obj: L{objects.OS}
1152
  @param os_obj: OS object to check
1153
  @type name: string
1154
  @param name: OS name passed by the user, to check for validity
1155

1156
  """
1157
  variant = objects.OS.GetVariant(name)
1158
  if not os_obj.supported_variants:
1159
    if variant:
1160
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1161
                                 " passed)" % (os_obj.name, variant),
1162
                                 errors.ECODE_INVAL)
1163
    return
1164
  if not variant:
1165
    raise errors.OpPrereqError("OS name must include a variant",
1166
                               errors.ECODE_INVAL)
1167

    
1168
  if variant not in os_obj.supported_variants:
1169
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1170

    
1171

    
1172
def _GetNodeInstancesInner(cfg, fn):
1173
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1174

    
1175

    
1176
def _GetNodeInstances(cfg, node_name):
1177
  """Returns a list of all primary and secondary instances on a node.
1178

1179
  """
1180

    
1181
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1182

    
1183

    
1184
def _GetNodePrimaryInstances(cfg, node_name):
1185
  """Returns primary instances on a node.
1186

1187
  """
1188
  return _GetNodeInstancesInner(cfg,
1189
                                lambda inst: node_name == inst.primary_node)
1190

    
1191

    
1192
def _GetNodeSecondaryInstances(cfg, node_name):
1193
  """Returns secondary instances on a node.
1194

1195
  """
1196
  return _GetNodeInstancesInner(cfg,
1197
                                lambda inst: node_name in inst.secondary_nodes)
1198

    
1199

    
1200
def _GetStorageTypeArgs(cfg, storage_type):
1201
  """Returns the arguments for a storage type.
1202

1203
  """
1204
  # Special case for file storage
1205
  if storage_type == constants.ST_FILE:
1206
    # storage.FileStorage wants a list of storage directories
1207
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1208

    
1209
  return []
1210

    
1211

    
1212
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1213
  faulty = []
1214

    
1215
  for dev in instance.disks:
1216
    cfg.SetDiskID(dev, node_name)
1217

    
1218
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1219
  result.Raise("Failed to get disk status from node %s" % node_name,
1220
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1221

    
1222
  for idx, bdev_status in enumerate(result.payload):
1223
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224
      faulty.append(idx)
1225

    
1226
  return faulty
1227

    
1228

    
1229
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1230
  """Check the sanity of iallocator and node arguments and use the
1231
  cluster-wide iallocator if appropriate.
1232

1233
  Check that at most one of (iallocator, node) is specified. If none is
1234
  specified, then the LU's opcode's iallocator slot is filled with the
1235
  cluster-wide default iallocator.
1236

1237
  @type iallocator_slot: string
1238
  @param iallocator_slot: the name of the opcode iallocator slot
1239
  @type node_slot: string
1240
  @param node_slot: the name of the opcode target node slot
1241

1242
  """
1243
  node = getattr(lu.op, node_slot, None)
1244
  iallocator = getattr(lu.op, iallocator_slot, None)
1245

    
1246
  if node is not None and iallocator is not None:
1247
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1248
                               errors.ECODE_INVAL)
1249
  elif node is None and iallocator is None:
1250
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1251
    if default_iallocator:
1252
      setattr(lu.op, iallocator_slot, default_iallocator)
1253
    else:
1254
      raise errors.OpPrereqError("No iallocator or node given and no"
1255
                                 " cluster-wide default iallocator found;"
1256
                                 " please specify either an iallocator or a"
1257
                                 " node, or set a cluster-wide default"
1258
                                 " iallocator")
1259

    
1260

    
1261
def _GetDefaultIAllocator(cfg, iallocator):
1262
  """Decides on which iallocator to use.
1263

1264
  @type cfg: L{config.ConfigWriter}
1265
  @param cfg: Cluster configuration object
1266
  @type iallocator: string or None
1267
  @param iallocator: Iallocator specified in opcode
1268
  @rtype: string
1269
  @return: Iallocator name
1270

1271
  """
1272
  if not iallocator:
1273
    # Use default iallocator
1274
    iallocator = cfg.GetDefaultIAllocator()
1275

    
1276
  if not iallocator:
1277
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1278
                               " opcode nor as a cluster-wide default",
1279
                               errors.ECODE_INVAL)
1280

    
1281
  return iallocator
1282

    
1283

    
1284
class LUClusterPostInit(LogicalUnit):
1285
  """Logical unit for running hooks after cluster initialization.
1286

1287
  """
1288
  HPATH = "cluster-init"
1289
  HTYPE = constants.HTYPE_CLUSTER
1290

    
1291
  def BuildHooksEnv(self):
1292
    """Build hooks env.
1293

1294
    """
1295
    return {
1296
      "OP_TARGET": self.cfg.GetClusterName(),
1297
      }
1298

    
1299
  def BuildHooksNodes(self):
1300
    """Build hooks nodes.
1301

1302
    """
1303
    return ([], [self.cfg.GetMasterNode()])
1304

    
1305
  def Exec(self, feedback_fn):
1306
    """Nothing to do.
1307

1308
    """
1309
    return True
1310

    
1311

    
1312
class LUClusterDestroy(LogicalUnit):
1313
  """Logical unit for destroying the cluster.
1314

1315
  """
1316
  HPATH = "cluster-destroy"
1317
  HTYPE = constants.HTYPE_CLUSTER
1318

    
1319
  def BuildHooksEnv(self):
1320
    """Build hooks env.
1321

1322
    """
1323
    return {
1324
      "OP_TARGET": self.cfg.GetClusterName(),
1325
      }
1326

    
1327
  def BuildHooksNodes(self):
1328
    """Build hooks nodes.
1329

1330
    """
1331
    return ([], [])
1332

    
1333
  def CheckPrereq(self):
1334
    """Check prerequisites.
1335

1336
    This checks whether the cluster is empty.
1337

1338
    Any errors are signaled by raising errors.OpPrereqError.
1339

1340
    """
1341
    master = self.cfg.GetMasterNode()
1342

    
1343
    nodelist = self.cfg.GetNodeList()
1344
    if len(nodelist) != 1 or nodelist[0] != master:
1345
      raise errors.OpPrereqError("There are still %d node(s) in"
1346
                                 " this cluster." % (len(nodelist) - 1),
1347
                                 errors.ECODE_INVAL)
1348
    instancelist = self.cfg.GetInstanceList()
1349
    if instancelist:
1350
      raise errors.OpPrereqError("There are still %d instance(s) in"
1351
                                 " this cluster." % len(instancelist),
1352
                                 errors.ECODE_INVAL)
1353

    
1354
  def Exec(self, feedback_fn):
1355
    """Destroys the cluster.
1356

1357
    """
1358
    master_params = self.cfg.GetMasterNetworkParameters()
1359

    
1360
    # Run post hooks on master node before it's removed
1361
    _RunPostHook(self, master_params.name)
1362

    
1363
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1364
                                                     master_params.ip,
1365
                                                     master_params.netmask,
1366
                                                     master_params.netdev,
1367
                                                     master_params.ip_family)
1368
    result.Raise("Could not disable the master role")
1369

    
1370
    return master_params.name
1371

    
1372

    
1373
def _VerifyCertificate(filename):
1374
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1375

1376
  @type filename: string
1377
  @param filename: Path to PEM file
1378

1379
  """
1380
  try:
1381
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1382
                                           utils.ReadFile(filename))
1383
  except Exception, err: # pylint: disable=W0703
1384
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1385
            "Failed to load X509 certificate %s: %s" % (filename, err))
1386

    
1387
  (errcode, msg) = \
1388
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1389
                                constants.SSL_CERT_EXPIRATION_ERROR)
1390

    
1391
  if msg:
1392
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1393
  else:
1394
    fnamemsg = None
1395

    
1396
  if errcode is None:
1397
    return (None, fnamemsg)
1398
  elif errcode == utils.CERT_WARNING:
1399
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1400
  elif errcode == utils.CERT_ERROR:
1401
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1402

    
1403
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1404

    
1405

    
1406
def _GetAllHypervisorParameters(cluster, instances):
1407
  """Compute the set of all hypervisor parameters.
1408

1409
  @type cluster: L{objects.Cluster}
1410
  @param cluster: the cluster object
1411
  @param instances: list of L{objects.Instance}
1412
  @param instances: additional instances from which to obtain parameters
1413
  @rtype: list of (origin, hypervisor, parameters)
1414
  @return: a list with all parameters found, indicating the hypervisor they
1415
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1416

1417
  """
1418
  hvp_data = []
1419

    
1420
  for hv_name in cluster.enabled_hypervisors:
1421
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1422

    
1423
  for os_name, os_hvp in cluster.os_hvp.items():
1424
    for hv_name, hv_params in os_hvp.items():
1425
      if hv_params:
1426
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1427
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1428

    
1429
  # TODO: collapse identical parameter values in a single one
1430
  for instance in instances:
1431
    if instance.hvparams:
1432
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1433
                       cluster.FillHV(instance)))
1434

    
1435
  return hvp_data
1436

    
1437

    
1438
class _VerifyErrors(object):
1439
  """Mix-in for cluster/group verify LUs.
1440

1441
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1442
  self.op and self._feedback_fn to be available.)
1443

1444
  """
1445

    
1446
  ETYPE_FIELD = "code"
1447
  ETYPE_ERROR = "ERROR"
1448
  ETYPE_WARNING = "WARNING"
1449

    
1450
  def _Error(self, ecode, item, msg, *args, **kwargs):
1451
    """Format an error message.
1452

1453
    Based on the opcode's error_codes parameter, either format a
1454
    parseable error code, or a simpler error string.
1455

1456
    This must be called only from Exec and functions called from Exec.
1457

1458
    """
1459
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1460
    itype, etxt, _ = ecode
1461
    # first complete the msg
1462
    if args:
1463
      msg = msg % args
1464
    # then format the whole message
1465
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1466
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1467
    else:
1468
      if item:
1469
        item = " " + item
1470
      else:
1471
        item = ""
1472
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1473
    # and finally report it via the feedback_fn
1474
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1475

    
1476
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1477
    """Log an error message if the passed condition is True.
1478

1479
    """
1480
    cond = (bool(cond)
1481
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1482

    
1483
    # If the error code is in the list of ignored errors, demote the error to a
1484
    # warning
1485
    (_, etxt, _) = ecode
1486
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1487
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1488

    
1489
    if cond:
1490
      self._Error(ecode, *args, **kwargs)
1491

    
1492
    # do not mark the operation as failed for WARN cases only
1493
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1494
      self.bad = self.bad or cond
1495

    
1496

    
1497
class LUClusterVerify(NoHooksLU):
1498
  """Submits all jobs necessary to verify the cluster.
1499

1500
  """
1501
  REQ_BGL = False
1502

    
1503
  def ExpandNames(self):
1504
    self.needed_locks = {}
1505

    
1506
  def Exec(self, feedback_fn):
1507
    jobs = []
1508

    
1509
    if self.op.group_name:
1510
      groups = [self.op.group_name]
1511
      depends_fn = lambda: None
1512
    else:
1513
      groups = self.cfg.GetNodeGroupList()
1514

    
1515
      # Verify global configuration
1516
      jobs.append([
1517
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1518
        ])
1519

    
1520
      # Always depend on global verification
1521
      depends_fn = lambda: [(-len(jobs), [])]
1522

    
1523
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1524
                                            ignore_errors=self.op.ignore_errors,
1525
                                            depends=depends_fn())]
1526
                for group in groups)
1527

    
1528
    # Fix up all parameters
1529
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1530
      op.debug_simulate_errors = self.op.debug_simulate_errors
1531
      op.verbose = self.op.verbose
1532
      op.error_codes = self.op.error_codes
1533
      try:
1534
        op.skip_checks = self.op.skip_checks
1535
      except AttributeError:
1536
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1537

    
1538
    return ResultWithJobs(jobs)
1539

    
1540

    
1541
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1542
  """Verifies the cluster config.
1543

1544
  """
1545
  REQ_BGL = True
1546

    
1547
  def _VerifyHVP(self, hvp_data):
1548
    """Verifies locally the syntax of the hypervisor parameters.
1549

1550
    """
1551
    for item, hv_name, hv_params in hvp_data:
1552
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1553
             (item, hv_name))
1554
      try:
1555
        hv_class = hypervisor.GetHypervisor(hv_name)
1556
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1557
        hv_class.CheckParameterSyntax(hv_params)
1558
      except errors.GenericError, err:
1559
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1560

    
1561
  def ExpandNames(self):
1562
    # Information can be safely retrieved as the BGL is acquired in exclusive
1563
    # mode
1564
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1565
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1566
    self.all_node_info = self.cfg.GetAllNodesInfo()
1567
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1568
    self.needed_locks = {}
1569

    
1570
  def Exec(self, feedback_fn):
1571
    """Verify integrity of cluster, performing various test on nodes.
1572

1573
    """
1574
    self.bad = False
1575
    self._feedback_fn = feedback_fn
1576

    
1577
    feedback_fn("* Verifying cluster config")
1578

    
1579
    for msg in self.cfg.VerifyConfig():
1580
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1581

    
1582
    feedback_fn("* Verifying cluster certificate files")
1583

    
1584
    for cert_filename in constants.ALL_CERT_FILES:
1585
      (errcode, msg) = _VerifyCertificate(cert_filename)
1586
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1587

    
1588
    feedback_fn("* Verifying hypervisor parameters")
1589

    
1590
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1591
                                                self.all_inst_info.values()))
1592

    
1593
    feedback_fn("* Verifying all nodes belong to an existing group")
1594

    
1595
    # We do this verification here because, should this bogus circumstance
1596
    # occur, it would never be caught by VerifyGroup, which only acts on
1597
    # nodes/instances reachable from existing node groups.
1598

    
1599
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1600
                         if node.group not in self.all_group_info)
1601

    
1602
    dangling_instances = {}
1603
    no_node_instances = []
1604

    
1605
    for inst in self.all_inst_info.values():
1606
      if inst.primary_node in dangling_nodes:
1607
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1608
      elif inst.primary_node not in self.all_node_info:
1609
        no_node_instances.append(inst.name)
1610

    
1611
    pretty_dangling = [
1612
        "%s (%s)" %
1613
        (node.name,
1614
         utils.CommaJoin(dangling_instances.get(node.name,
1615
                                                ["no instances"])))
1616
        for node in dangling_nodes]
1617

    
1618
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1619
                  None,
1620
                  "the following nodes (and their instances) belong to a non"
1621
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1622

    
1623
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1624
                  None,
1625
                  "the following instances have a non-existing primary-node:"
1626
                  " %s", utils.CommaJoin(no_node_instances))
1627

    
1628
    return not self.bad
1629

    
1630

    
1631
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1632
  """Verifies the status of a node group.
1633

1634
  """
1635
  HPATH = "cluster-verify"
1636
  HTYPE = constants.HTYPE_CLUSTER
1637
  REQ_BGL = False
1638

    
1639
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1640

    
1641
  class NodeImage(object):
1642
    """A class representing the logical and physical status of a node.
1643

1644
    @type name: string
1645
    @ivar name: the node name to which this object refers
1646
    @ivar volumes: a structure as returned from
1647
        L{ganeti.backend.GetVolumeList} (runtime)
1648
    @ivar instances: a list of running instances (runtime)
1649
    @ivar pinst: list of configured primary instances (config)
1650
    @ivar sinst: list of configured secondary instances (config)
1651
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1652
        instances for which this node is secondary (config)
1653
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1654
    @ivar dfree: free disk, as reported by the node (runtime)
1655
    @ivar offline: the offline status (config)
1656
    @type rpc_fail: boolean
1657
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1658
        not whether the individual keys were correct) (runtime)
1659
    @type lvm_fail: boolean
1660
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1661
    @type hyp_fail: boolean
1662
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1663
    @type ghost: boolean
1664
    @ivar ghost: whether this is a known node or not (config)
1665
    @type os_fail: boolean
1666
    @ivar os_fail: whether the RPC call didn't return valid OS data
1667
    @type oslist: list
1668
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1669
    @type vm_capable: boolean
1670
    @ivar vm_capable: whether the node can host instances
1671

1672
    """
1673
    def __init__(self, offline=False, name=None, vm_capable=True):
1674
      self.name = name
1675
      self.volumes = {}
1676
      self.instances = []
1677
      self.pinst = []
1678
      self.sinst = []
1679
      self.sbp = {}
1680
      self.mfree = 0
1681
      self.dfree = 0
1682
      self.offline = offline
1683
      self.vm_capable = vm_capable
1684
      self.rpc_fail = False
1685
      self.lvm_fail = False
1686
      self.hyp_fail = False
1687
      self.ghost = False
1688
      self.os_fail = False
1689
      self.oslist = {}
1690

    
1691
  def ExpandNames(self):
1692
    # This raises errors.OpPrereqError on its own:
1693
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1694

    
1695
    # Get instances in node group; this is unsafe and needs verification later
1696
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1697

    
1698
    self.needed_locks = {
1699
      locking.LEVEL_INSTANCE: inst_names,
1700
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1701
      locking.LEVEL_NODE: [],
1702
      }
1703

    
1704
    self.share_locks = _ShareAll()
1705

    
1706
  def DeclareLocks(self, level):
1707
    if level == locking.LEVEL_NODE:
1708
      # Get members of node group; this is unsafe and needs verification later
1709
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1710

    
1711
      all_inst_info = self.cfg.GetAllInstancesInfo()
1712

    
1713
      # In Exec(), we warn about mirrored instances that have primary and
1714
      # secondary living in separate node groups. To fully verify that
1715
      # volumes for these instances are healthy, we will need to do an
1716
      # extra call to their secondaries. We ensure here those nodes will
1717
      # be locked.
1718
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1719
        # Important: access only the instances whose lock is owned
1720
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1721
          nodes.update(all_inst_info[inst].secondary_nodes)
1722

    
1723
      self.needed_locks[locking.LEVEL_NODE] = nodes
1724

    
1725
  def CheckPrereq(self):
1726
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1727
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1728

    
1729
    group_nodes = set(self.group_info.members)
1730
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1731

    
1732
    unlocked_nodes = \
1733
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1734

    
1735
    unlocked_instances = \
1736
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1737

    
1738
    if unlocked_nodes:
1739
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1740
                                 utils.CommaJoin(unlocked_nodes))
1741

    
1742
    if unlocked_instances:
1743
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1744
                                 utils.CommaJoin(unlocked_instances))
1745

    
1746
    self.all_node_info = self.cfg.GetAllNodesInfo()
1747
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1748

    
1749
    self.my_node_names = utils.NiceSort(group_nodes)
1750
    self.my_inst_names = utils.NiceSort(group_instances)
1751

    
1752
    self.my_node_info = dict((name, self.all_node_info[name])
1753
                             for name in self.my_node_names)
1754

    
1755
    self.my_inst_info = dict((name, self.all_inst_info[name])
1756
                             for name in self.my_inst_names)
1757

    
1758
    # We detect here the nodes that will need the extra RPC calls for verifying
1759
    # split LV volumes; they should be locked.
1760
    extra_lv_nodes = set()
1761

    
1762
    for inst in self.my_inst_info.values():
1763
      if inst.disk_template in constants.DTS_INT_MIRROR:
1764
        group = self.my_node_info[inst.primary_node].group
1765
        for nname in inst.secondary_nodes:
1766
          if self.all_node_info[nname].group != group:
1767
            extra_lv_nodes.add(nname)
1768

    
1769
    unlocked_lv_nodes = \
1770
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1771

    
1772
    if unlocked_lv_nodes:
1773
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1774
                                 utils.CommaJoin(unlocked_lv_nodes))
1775
    self.extra_lv_nodes = list(extra_lv_nodes)
1776

    
1777
  def _VerifyNode(self, ninfo, nresult):
1778
    """Perform some basic validation on data returned from a node.
1779

1780
      - check the result data structure is well formed and has all the
1781
        mandatory fields
1782
      - check ganeti version
1783

1784
    @type ninfo: L{objects.Node}
1785
    @param ninfo: the node to check
1786
    @param nresult: the results from the node
1787
    @rtype: boolean
1788
    @return: whether overall this call was successful (and we can expect
1789
         reasonable values in the respose)
1790

1791
    """
1792
    node = ninfo.name
1793
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1794

    
1795
    # main result, nresult should be a non-empty dict
1796
    test = not nresult or not isinstance(nresult, dict)
1797
    _ErrorIf(test, constants.CV_ENODERPC, node,
1798
                  "unable to verify node: no data returned")
1799
    if test:
1800
      return False
1801

    
1802
    # compares ganeti version
1803
    local_version = constants.PROTOCOL_VERSION
1804
    remote_version = nresult.get("version", None)
1805
    test = not (remote_version and
1806
                isinstance(remote_version, (list, tuple)) and
1807
                len(remote_version) == 2)
1808
    _ErrorIf(test, constants.CV_ENODERPC, node,
1809
             "connection to node returned invalid data")
1810
    if test:
1811
      return False
1812

    
1813
    test = local_version != remote_version[0]
1814
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1815
             "incompatible protocol versions: master %s,"
1816
             " node %s", local_version, remote_version[0])
1817
    if test:
1818
      return False
1819

    
1820
    # node seems compatible, we can actually try to look into its results
1821

    
1822
    # full package version
1823
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1824
                  constants.CV_ENODEVERSION, node,
1825
                  "software version mismatch: master %s, node %s",
1826
                  constants.RELEASE_VERSION, remote_version[1],
1827
                  code=self.ETYPE_WARNING)
1828

    
1829
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1830
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1831
      for hv_name, hv_result in hyp_result.iteritems():
1832
        test = hv_result is not None
1833
        _ErrorIf(test, constants.CV_ENODEHV, node,
1834
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1835

    
1836
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1837
    if ninfo.vm_capable and isinstance(hvp_result, list):
1838
      for item, hv_name, hv_result in hvp_result:
1839
        _ErrorIf(True, constants.CV_ENODEHV, node,
1840
                 "hypervisor %s parameter verify failure (source %s): %s",
1841
                 hv_name, item, hv_result)
1842

    
1843
    test = nresult.get(constants.NV_NODESETUP,
1844
                       ["Missing NODESETUP results"])
1845
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1846
             "; ".join(test))
1847

    
1848
    return True
1849

    
1850
  def _VerifyNodeTime(self, ninfo, nresult,
1851
                      nvinfo_starttime, nvinfo_endtime):
1852
    """Check the node time.
1853

1854
    @type ninfo: L{objects.Node}
1855
    @param ninfo: the node to check
1856
    @param nresult: the remote results for the node
1857
    @param nvinfo_starttime: the start time of the RPC call
1858
    @param nvinfo_endtime: the end time of the RPC call
1859

1860
    """
1861
    node = ninfo.name
1862
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1863

    
1864
    ntime = nresult.get(constants.NV_TIME, None)
1865
    try:
1866
      ntime_merged = utils.MergeTime(ntime)
1867
    except (ValueError, TypeError):
1868
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1869
      return
1870

    
1871
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1872
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1873
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1874
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1875
    else:
1876
      ntime_diff = None
1877

    
1878
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1879
             "Node time diverges by at least %s from master node time",
1880
             ntime_diff)
1881

    
1882
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1883
    """Check the node LVM results.
1884

1885
    @type ninfo: L{objects.Node}
1886
    @param ninfo: the node to check
1887
    @param nresult: the remote results for the node
1888
    @param vg_name: the configured VG name
1889

1890
    """
1891
    if vg_name is None:
1892
      return
1893

    
1894
    node = ninfo.name
1895
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1896

    
1897
    # checks vg existence and size > 20G
1898
    vglist = nresult.get(constants.NV_VGLIST, None)
1899
    test = not vglist
1900
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1901
    if not test:
1902
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1903
                                            constants.MIN_VG_SIZE)
1904
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1905

    
1906
    # check pv names
1907
    pvlist = nresult.get(constants.NV_PVLIST, None)
1908
    test = pvlist is None
1909
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1910
    if not test:
1911
      # check that ':' is not present in PV names, since it's a
1912
      # special character for lvcreate (denotes the range of PEs to
1913
      # use on the PV)
1914
      for _, pvname, owner_vg in pvlist:
1915
        test = ":" in pvname
1916
        _ErrorIf(test, constants.CV_ENODELVM, node,
1917
                 "Invalid character ':' in PV '%s' of VG '%s'",
1918
                 pvname, owner_vg)
1919

    
1920
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1921
    """Check the node bridges.
1922

1923
    @type ninfo: L{objects.Node}
1924
    @param ninfo: the node to check
1925
    @param nresult: the remote results for the node
1926
    @param bridges: the expected list of bridges
1927

1928
    """
1929
    if not bridges:
1930
      return
1931

    
1932
    node = ninfo.name
1933
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1934

    
1935
    missing = nresult.get(constants.NV_BRIDGES, None)
1936
    test = not isinstance(missing, list)
1937
    _ErrorIf(test, constants.CV_ENODENET, node,
1938
             "did not return valid bridge information")
1939
    if not test:
1940
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1941
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1942

    
1943
  def _VerifyNodeNetwork(self, ninfo, nresult):
1944
    """Check the node network connectivity results.
1945

1946
    @type ninfo: L{objects.Node}
1947
    @param ninfo: the node to check
1948
    @param nresult: the remote results for the node
1949

1950
    """
1951
    node = ninfo.name
1952
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1953

    
1954
    test = constants.NV_NODELIST not in nresult
1955
    _ErrorIf(test, constants.CV_ENODESSH, node,
1956
             "node hasn't returned node ssh connectivity data")
1957
    if not test:
1958
      if nresult[constants.NV_NODELIST]:
1959
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1960
          _ErrorIf(True, constants.CV_ENODESSH, node,
1961
                   "ssh communication with node '%s': %s", a_node, a_msg)
1962

    
1963
    test = constants.NV_NODENETTEST not in nresult
1964
    _ErrorIf(test, constants.CV_ENODENET, node,
1965
             "node hasn't returned node tcp connectivity data")
1966
    if not test:
1967
      if nresult[constants.NV_NODENETTEST]:
1968
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1969
        for anode in nlist:
1970
          _ErrorIf(True, constants.CV_ENODENET, node,
1971
                   "tcp communication with node '%s': %s",
1972
                   anode, nresult[constants.NV_NODENETTEST][anode])
1973

    
1974
    test = constants.NV_MASTERIP not in nresult
1975
    _ErrorIf(test, constants.CV_ENODENET, node,
1976
             "node hasn't returned node master IP reachability data")
1977
    if not test:
1978
      if not nresult[constants.NV_MASTERIP]:
1979
        if node == self.master_node:
1980
          msg = "the master node cannot reach the master IP (not configured?)"
1981
        else:
1982
          msg = "cannot reach the master IP"
1983
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1984

    
1985
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1986
                      diskstatus):
1987
    """Verify an instance.
1988

1989
    This function checks to see if the required block devices are
1990
    available on the instance's node.
1991

1992
    """
1993
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1994
    node_current = instanceconfig.primary_node
1995

    
1996
    node_vol_should = {}
1997
    instanceconfig.MapLVsByNode(node_vol_should)
1998

    
1999
    for node in node_vol_should:
2000
      n_img = node_image[node]
2001
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2002
        # ignore missing volumes on offline or broken nodes
2003
        continue
2004
      for volume in node_vol_should[node]:
2005
        test = volume not in n_img.volumes
2006
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2007
                 "volume %s missing on node %s", volume, node)
2008

    
2009
    if instanceconfig.admin_up:
2010
      pri_img = node_image[node_current]
2011
      test = instance not in pri_img.instances and not pri_img.offline
2012
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2013
               "instance not running on its primary node %s",
2014
               node_current)
2015

    
2016
    diskdata = [(nname, success, status, idx)
2017
                for (nname, disks) in diskstatus.items()
2018
                for idx, (success, status) in enumerate(disks)]
2019

    
2020
    for nname, success, bdev_status, idx in diskdata:
2021
      # the 'ghost node' construction in Exec() ensures that we have a
2022
      # node here
2023
      snode = node_image[nname]
2024
      bad_snode = snode.ghost or snode.offline
2025
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2026
               constants.CV_EINSTANCEFAULTYDISK, instance,
2027
               "couldn't retrieve status for disk/%s on %s: %s",
2028
               idx, nname, bdev_status)
2029
      _ErrorIf((instanceconfig.admin_up and success and
2030
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2031
               constants.CV_EINSTANCEFAULTYDISK, instance,
2032
               "disk/%s on %s is faulty", idx, nname)
2033

    
2034
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2035
    """Verify if there are any unknown volumes in the cluster.
2036

2037
    The .os, .swap and backup volumes are ignored. All other volumes are
2038
    reported as unknown.
2039

2040
    @type reserved: L{ganeti.utils.FieldSet}
2041
    @param reserved: a FieldSet of reserved volume names
2042

2043
    """
2044
    for node, n_img in node_image.items():
2045
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2046
        # skip non-healthy nodes
2047
        continue
2048
      for volume in n_img.volumes:
2049
        test = ((node not in node_vol_should or
2050
                volume not in node_vol_should[node]) and
2051
                not reserved.Matches(volume))
2052
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2053
                      "volume %s is unknown", volume)
2054

    
2055
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2056
    """Verify N+1 Memory Resilience.
2057

2058
    Check that if one single node dies we can still start all the
2059
    instances it was primary for.
2060

2061
    """
2062
    cluster_info = self.cfg.GetClusterInfo()
2063
    for node, n_img in node_image.items():
2064
      # This code checks that every node which is now listed as
2065
      # secondary has enough memory to host all instances it is
2066
      # supposed to should a single other node in the cluster fail.
2067
      # FIXME: not ready for failover to an arbitrary node
2068
      # FIXME: does not support file-backed instances
2069
      # WARNING: we currently take into account down instances as well
2070
      # as up ones, considering that even if they're down someone
2071
      # might want to start them even in the event of a node failure.
2072
      if n_img.offline:
2073
        # we're skipping offline nodes from the N+1 warning, since
2074
        # most likely we don't have good memory infromation from them;
2075
        # we already list instances living on such nodes, and that's
2076
        # enough warning
2077
        continue
2078
      for prinode, instances in n_img.sbp.items():
2079
        needed_mem = 0
2080
        for instance in instances:
2081
          bep = cluster_info.FillBE(instance_cfg[instance])
2082
          if bep[constants.BE_AUTO_BALANCE]:
2083
            needed_mem += bep[constants.BE_MEMORY]
2084
        test = n_img.mfree < needed_mem
2085
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2086
                      "not enough memory to accomodate instance failovers"
2087
                      " should node %s fail (%dMiB needed, %dMiB available)",
2088
                      prinode, needed_mem, n_img.mfree)
2089

    
2090
  @classmethod
2091
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2092
                   (files_all, files_opt, files_mc, files_vm)):
2093
    """Verifies file checksums collected from all nodes.
2094

2095
    @param errorif: Callback for reporting errors
2096
    @param nodeinfo: List of L{objects.Node} objects
2097
    @param master_node: Name of master node
2098
    @param all_nvinfo: RPC results
2099

2100
    """
2101
    # Define functions determining which nodes to consider for a file
2102
    files2nodefn = [
2103
      (files_all, None),
2104
      (files_mc, lambda node: (node.master_candidate or
2105
                               node.name == master_node)),
2106
      (files_vm, lambda node: node.vm_capable),
2107
      ]
2108

    
2109
    # Build mapping from filename to list of nodes which should have the file
2110
    nodefiles = {}
2111
    for (files, fn) in files2nodefn:
2112
      if fn is None:
2113
        filenodes = nodeinfo
2114
      else:
2115
        filenodes = filter(fn, nodeinfo)
2116
      nodefiles.update((filename,
2117
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2118
                       for filename in files)
2119

    
2120
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2121

    
2122
    fileinfo = dict((filename, {}) for filename in nodefiles)
2123
    ignore_nodes = set()
2124

    
2125
    for node in nodeinfo:
2126
      if node.offline:
2127
        ignore_nodes.add(node.name)
2128
        continue
2129

    
2130
      nresult = all_nvinfo[node.name]
2131

    
2132
      if nresult.fail_msg or not nresult.payload:
2133
        node_files = None
2134
      else:
2135
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2136

    
2137
      test = not (node_files and isinstance(node_files, dict))
2138
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2139
              "Node did not return file checksum data")
2140
      if test:
2141
        ignore_nodes.add(node.name)
2142
        continue
2143

    
2144
      # Build per-checksum mapping from filename to nodes having it
2145
      for (filename, checksum) in node_files.items():
2146
        assert filename in nodefiles
2147
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2148

    
2149
    for (filename, checksums) in fileinfo.items():
2150
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2151

    
2152
      # Nodes having the file
2153
      with_file = frozenset(node_name
2154
                            for nodes in fileinfo[filename].values()
2155
                            for node_name in nodes) - ignore_nodes
2156

    
2157
      expected_nodes = nodefiles[filename] - ignore_nodes
2158

    
2159
      # Nodes missing file
2160
      missing_file = expected_nodes - with_file
2161

    
2162
      if filename in files_opt:
2163
        # All or no nodes
2164
        errorif(missing_file and missing_file != expected_nodes,
2165
                constants.CV_ECLUSTERFILECHECK, None,
2166
                "File %s is optional, but it must exist on all or no"
2167
                " nodes (not found on %s)",
2168
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2169
      else:
2170
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2171
                "File %s is missing from node(s) %s", filename,
2172
                utils.CommaJoin(utils.NiceSort(missing_file)))
2173

    
2174
        # Warn if a node has a file it shouldn't
2175
        unexpected = with_file - expected_nodes
2176
        errorif(unexpected,
2177
                constants.CV_ECLUSTERFILECHECK, None,
2178
                "File %s should not exist on node(s) %s",
2179
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2180

    
2181
      # See if there are multiple versions of the file
2182
      test = len(checksums) > 1
2183
      if test:
2184
        variants = ["variant %s on %s" %
2185
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2186
                    for (idx, (checksum, nodes)) in
2187
                      enumerate(sorted(checksums.items()))]
2188
      else:
2189
        variants = []
2190

    
2191
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2192
              "File %s found with %s different checksums (%s)",
2193
              filename, len(checksums), "; ".join(variants))
2194

    
2195
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2196
                      drbd_map):
2197
    """Verifies and the node DRBD status.
2198

2199
    @type ninfo: L{objects.Node}
2200
    @param ninfo: the node to check
2201
    @param nresult: the remote results for the node
2202
    @param instanceinfo: the dict of instances
2203
    @param drbd_helper: the configured DRBD usermode helper
2204
    @param drbd_map: the DRBD map as returned by
2205
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2206

2207
    """
2208
    node = ninfo.name
2209
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2210

    
2211
    if drbd_helper:
2212
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2213
      test = (helper_result == None)
2214
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2215
               "no drbd usermode helper returned")
2216
      if helper_result:
2217
        status, payload = helper_result
2218
        test = not status
2219
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2220
                 "drbd usermode helper check unsuccessful: %s", payload)
2221
        test = status and (payload != drbd_helper)
2222
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2223
                 "wrong drbd usermode helper: %s", payload)
2224

    
2225
    # compute the DRBD minors
2226
    node_drbd = {}
2227
    for minor, instance in drbd_map[node].items():
2228
      test = instance not in instanceinfo
2229
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2230
               "ghost instance '%s' in temporary DRBD map", instance)
2231
        # ghost instance should not be running, but otherwise we
2232
        # don't give double warnings (both ghost instance and
2233
        # unallocated minor in use)
2234
      if test:
2235
        node_drbd[minor] = (instance, False)
2236
      else:
2237
        instance = instanceinfo[instance]
2238
        node_drbd[minor] = (instance.name, instance.admin_up)
2239

    
2240
    # and now check them
2241
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2242
    test = not isinstance(used_minors, (tuple, list))
2243
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2244
             "cannot parse drbd status file: %s", str(used_minors))
2245
    if test:
2246
      # we cannot check drbd status
2247
      return
2248

    
2249
    for minor, (iname, must_exist) in node_drbd.items():
2250
      test = minor not in used_minors and must_exist
2251
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2252
               "drbd minor %d of instance %s is not active", minor, iname)
2253
    for minor in used_minors:
2254
      test = minor not in node_drbd
2255
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2256
               "unallocated drbd minor %d is in use", minor)
2257

    
2258
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2259
    """Builds the node OS structures.
2260

2261
    @type ninfo: L{objects.Node}
2262
    @param ninfo: the node to check
2263
    @param nresult: the remote results for the node
2264
    @param nimg: the node image object
2265

2266
    """
2267
    node = ninfo.name
2268
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269

    
2270
    remote_os = nresult.get(constants.NV_OSLIST, None)
2271
    test = (not isinstance(remote_os, list) or
2272
            not compat.all(isinstance(v, list) and len(v) == 7
2273
                           for v in remote_os))
2274

    
2275
    _ErrorIf(test, constants.CV_ENODEOS, node,
2276
             "node hasn't returned valid OS data")
2277

    
2278
    nimg.os_fail = test
2279

    
2280
    if test:
2281
      return
2282

    
2283
    os_dict = {}
2284

    
2285
    for (name, os_path, status, diagnose,
2286
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2287

    
2288
      if name not in os_dict:
2289
        os_dict[name] = []
2290

    
2291
      # parameters is a list of lists instead of list of tuples due to
2292
      # JSON lacking a real tuple type, fix it:
2293
      parameters = [tuple(v) for v in parameters]
2294
      os_dict[name].append((os_path, status, diagnose,
2295
                            set(variants), set(parameters), set(api_ver)))
2296

    
2297
    nimg.oslist = os_dict
2298

    
2299
  def _VerifyNodeOS(self, ninfo, nimg, base):
2300
    """Verifies the node OS list.
2301

2302
    @type ninfo: L{objects.Node}
2303
    @param ninfo: the node to check
2304
    @param nimg: the node image object
2305
    @param base: the 'template' node we match against (e.g. from the master)
2306

2307
    """
2308
    node = ninfo.name
2309
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2310

    
2311
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2312

    
2313
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2314
    for os_name, os_data in nimg.oslist.items():
2315
      assert os_data, "Empty OS status for OS %s?!" % os_name
2316
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2317
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2318
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2319
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2320
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2321
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2322
      # comparisons with the 'base' image
2323
      test = os_name not in base.oslist
2324
      _ErrorIf(test, constants.CV_ENODEOS, node,
2325
               "Extra OS %s not present on reference node (%s)",
2326
               os_name, base.name)
2327
      if test:
2328
        continue
2329
      assert base.oslist[os_name], "Base node has empty OS status?"
2330
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2331
      if not b_status:
2332
        # base OS is invalid, skipping
2333
        continue
2334
      for kind, a, b in [("API version", f_api, b_api),
2335
                         ("variants list", f_var, b_var),
2336
                         ("parameters", beautify_params(f_param),
2337
                          beautify_params(b_param))]:
2338
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2339
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2340
                 kind, os_name, base.name,
2341
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2342

    
2343
    # check any missing OSes
2344
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2345
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2346
             "OSes present on reference node %s but missing on this node: %s",
2347
             base.name, utils.CommaJoin(missing))
2348

    
2349
  def _VerifyOob(self, ninfo, nresult):
2350
    """Verifies out of band functionality of a node.
2351

2352
    @type ninfo: L{objects.Node}
2353
    @param ninfo: the node to check
2354
    @param nresult: the remote results for the node
2355

2356
    """
2357
    node = ninfo.name
2358
    # We just have to verify the paths on master and/or master candidates
2359
    # as the oob helper is invoked on the master
2360
    if ((ninfo.master_candidate or ninfo.master_capable) and
2361
        constants.NV_OOB_PATHS in nresult):
2362
      for path_result in nresult[constants.NV_OOB_PATHS]:
2363
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2364

    
2365
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2366
    """Verifies and updates the node volume data.
2367

2368
    This function will update a L{NodeImage}'s internal structures
2369
    with data from the remote call.
2370

2371
    @type ninfo: L{objects.Node}
2372
    @param ninfo: the node to check
2373
    @param nresult: the remote results for the node
2374
    @param nimg: the node image object
2375
    @param vg_name: the configured VG name
2376

2377
    """
2378
    node = ninfo.name
2379
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2380

    
2381
    nimg.lvm_fail = True
2382
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2383
    if vg_name is None:
2384
      pass
2385
    elif isinstance(lvdata, basestring):
2386
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2387
               utils.SafeEncode(lvdata))
2388
    elif not isinstance(lvdata, dict):
2389
      _ErrorIf(True, constants.CV_ENODELVM, node,
2390
               "rpc call to node failed (lvlist)")
2391
    else:
2392
      nimg.volumes = lvdata
2393
      nimg.lvm_fail = False
2394

    
2395
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2396
    """Verifies and updates the node instance list.
2397

2398
    If the listing was successful, then updates this node's instance
2399
    list. Otherwise, it marks the RPC call as failed for the instance
2400
    list key.
2401

2402
    @type ninfo: L{objects.Node}
2403
    @param ninfo: the node to check
2404
    @param nresult: the remote results for the node
2405
    @param nimg: the node image object
2406

2407
    """
2408
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2409
    test = not isinstance(idata, list)
2410
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2411
                  "rpc call to node failed (instancelist): %s",
2412
                  utils.SafeEncode(str(idata)))
2413
    if test:
2414
      nimg.hyp_fail = True
2415
    else:
2416
      nimg.instances = idata
2417

    
2418
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2419
    """Verifies and computes a node information map
2420

2421
    @type ninfo: L{objects.Node}
2422
    @param ninfo: the node to check
2423
    @param nresult: the remote results for the node
2424
    @param nimg: the node image object
2425
    @param vg_name: the configured VG name
2426

2427
    """
2428
    node = ninfo.name
2429
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430

    
2431
    # try to read free memory (from the hypervisor)
2432
    hv_info = nresult.get(constants.NV_HVINFO, None)
2433
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2434
    _ErrorIf(test, constants.CV_ENODEHV, node,
2435
             "rpc call to node failed (hvinfo)")
2436
    if not test:
2437
      try:
2438
        nimg.mfree = int(hv_info["memory_free"])
2439
      except (ValueError, TypeError):
2440
        _ErrorIf(True, constants.CV_ENODERPC, node,
2441
                 "node returned invalid nodeinfo, check hypervisor")
2442

    
2443
    # FIXME: devise a free space model for file based instances as well
2444
    if vg_name is not None:
2445
      test = (constants.NV_VGLIST not in nresult or
2446
              vg_name not in nresult[constants.NV_VGLIST])
2447
      _ErrorIf(test, constants.CV_ENODELVM, node,
2448
               "node didn't return data for the volume group '%s'"
2449
               " - it is either missing or broken", vg_name)
2450
      if not test:
2451
        try:
2452
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2453
        except (ValueError, TypeError):
2454
          _ErrorIf(True, constants.CV_ENODERPC, node,
2455
                   "node returned invalid LVM info, check LVM status")
2456

    
2457
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2458
    """Gets per-disk status information for all instances.
2459

2460
    @type nodelist: list of strings
2461
    @param nodelist: Node names
2462
    @type node_image: dict of (name, L{objects.Node})
2463
    @param node_image: Node objects
2464
    @type instanceinfo: dict of (name, L{objects.Instance})
2465
    @param instanceinfo: Instance objects
2466
    @rtype: {instance: {node: [(succes, payload)]}}
2467
    @return: a dictionary of per-instance dictionaries with nodes as
2468
        keys and disk information as values; the disk information is a
2469
        list of tuples (success, payload)
2470

2471
    """
2472
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2473

    
2474
    node_disks = {}
2475
    node_disks_devonly = {}
2476
    diskless_instances = set()
2477
    diskless = constants.DT_DISKLESS
2478

    
2479
    for nname in nodelist:
2480
      node_instances = list(itertools.chain(node_image[nname].pinst,
2481
                                            node_image[nname].sinst))
2482
      diskless_instances.update(inst for inst in node_instances
2483
                                if instanceinfo[inst].disk_template == diskless)
2484
      disks = [(inst, disk)
2485
               for inst in node_instances
2486
               for disk in instanceinfo[inst].disks]
2487

    
2488
      if not disks:
2489
        # No need to collect data
2490
        continue
2491

    
2492
      node_disks[nname] = disks
2493

    
2494
      # Creating copies as SetDiskID below will modify the objects and that can
2495
      # lead to incorrect data returned from nodes
2496
      devonly = [dev.Copy() for (_, dev) in disks]
2497

    
2498
      for dev in devonly:
2499
        self.cfg.SetDiskID(dev, nname)
2500

    
2501
      node_disks_devonly[nname] = devonly
2502

    
2503
    assert len(node_disks) == len(node_disks_devonly)
2504

    
2505
    # Collect data from all nodes with disks
2506
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2507
                                                          node_disks_devonly)
2508

    
2509
    assert len(result) == len(node_disks)
2510

    
2511
    instdisk = {}
2512

    
2513
    for (nname, nres) in result.items():
2514
      disks = node_disks[nname]
2515

    
2516
      if nres.offline:
2517
        # No data from this node
2518
        data = len(disks) * [(False, "node offline")]
2519
      else:
2520
        msg = nres.fail_msg
2521
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2522
                 "while getting disk information: %s", msg)
2523
        if msg:
2524
          # No data from this node
2525
          data = len(disks) * [(False, msg)]
2526
        else:
2527
          data = []
2528
          for idx, i in enumerate(nres.payload):
2529
            if isinstance(i, (tuple, list)) and len(i) == 2:
2530
              data.append(i)
2531
            else:
2532
              logging.warning("Invalid result from node %s, entry %d: %s",
2533
                              nname, idx, i)
2534
              data.append((False, "Invalid result from the remote node"))
2535

    
2536
      for ((inst, _), status) in zip(disks, data):
2537
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2538

    
2539
    # Add empty entries for diskless instances.
2540
    for inst in diskless_instances:
2541
      assert inst not in instdisk
2542
      instdisk[inst] = {}
2543

    
2544
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2545
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2546
                      compat.all(isinstance(s, (tuple, list)) and
2547
                                 len(s) == 2 for s in statuses)
2548
                      for inst, nnames in instdisk.items()
2549
                      for nname, statuses in nnames.items())
2550
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2551

    
2552
    return instdisk
2553

    
2554
  @staticmethod
2555
  def _SshNodeSelector(group_uuid, all_nodes):
2556
    """Create endless iterators for all potential SSH check hosts.
2557

2558
    """
2559
    nodes = [node for node in all_nodes
2560
             if (node.group != group_uuid and
2561
                 not node.offline)]
2562
    keyfunc = operator.attrgetter("group")
2563

    
2564
    return map(itertools.cycle,
2565
               [sorted(map(operator.attrgetter("name"), names))
2566
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2567
                                                  keyfunc)])
2568

    
2569
  @classmethod
2570
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2571
    """Choose which nodes should talk to which other nodes.
2572

2573
    We will make nodes contact all nodes in their group, and one node from
2574
    every other group.
2575

2576
    @warning: This algorithm has a known issue if one node group is much
2577
      smaller than others (e.g. just one node). In such a case all other
2578
      nodes will talk to the single node.
2579

2580
    """
2581
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2582
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2583

    
2584
    return (online_nodes,
2585
            dict((name, sorted([i.next() for i in sel]))
2586
                 for name in online_nodes))
2587

    
2588
  def BuildHooksEnv(self):
2589
    """Build hooks env.
2590

2591
    Cluster-Verify hooks just ran in the post phase and their failure makes
2592
    the output be logged in the verify output and the verification to fail.
2593

2594
    """
2595
    env = {
2596
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2597
      }
2598

    
2599
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2600
               for node in self.my_node_info.values())
2601

    
2602
    return env
2603

    
2604
  def BuildHooksNodes(self):
2605
    """Build hooks nodes.
2606

2607
    """
2608
    return ([], self.my_node_names)
2609

    
2610
  def Exec(self, feedback_fn):
2611
    """Verify integrity of the node group, performing various test on nodes.
2612

2613
    """
2614
    # This method has too many local variables. pylint: disable=R0914
2615
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2616

    
2617
    if not self.my_node_names:
2618
      # empty node group
2619
      feedback_fn("* Empty node group, skipping verification")
2620
      return True
2621

    
2622
    self.bad = False
2623
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624
    verbose = self.op.verbose
2625
    self._feedback_fn = feedback_fn
2626

    
2627
    vg_name = self.cfg.GetVGName()
2628
    drbd_helper = self.cfg.GetDRBDHelper()
2629
    cluster = self.cfg.GetClusterInfo()
2630
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2631
    hypervisors = cluster.enabled_hypervisors
2632
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2633

    
2634
    i_non_redundant = [] # Non redundant instances
2635
    i_non_a_balanced = [] # Non auto-balanced instances
2636
    n_offline = 0 # Count of offline nodes
2637
    n_drained = 0 # Count of nodes being drained
2638
    node_vol_should = {}
2639

    
2640
    # FIXME: verify OS list
2641

    
2642
    # File verification
2643
    filemap = _ComputeAncillaryFiles(cluster, False)
2644

    
2645
    # do local checksums
2646
    master_node = self.master_node = self.cfg.GetMasterNode()
2647
    master_ip = self.cfg.GetMasterIP()
2648

    
2649
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2650

    
2651
    node_verify_param = {
2652
      constants.NV_FILELIST:
2653
        utils.UniqueSequence(filename
2654
                             for files in filemap
2655
                             for filename in files),
2656
      constants.NV_NODELIST:
2657
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2658
                                  self.all_node_info.values()),
2659
      constants.NV_HYPERVISOR: hypervisors,
2660
      constants.NV_HVPARAMS:
2661
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2662
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2663
                                 for node in node_data_list
2664
                                 if not node.offline],
2665
      constants.NV_INSTANCELIST: hypervisors,
2666
      constants.NV_VERSION: None,
2667
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2668
      constants.NV_NODESETUP: None,
2669
      constants.NV_TIME: None,
2670
      constants.NV_MASTERIP: (master_node, master_ip),
2671
      constants.NV_OSLIST: None,
2672
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2673
      }
2674

    
2675
    if vg_name is not None:
2676
      node_verify_param[constants.NV_VGLIST] = None
2677
      node_verify_param[constants.NV_LVLIST] = vg_name
2678
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2679
      node_verify_param[constants.NV_DRBDLIST] = None
2680

    
2681
    if drbd_helper:
2682
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2683

    
2684
    # bridge checks
2685
    # FIXME: this needs to be changed per node-group, not cluster-wide
2686
    bridges = set()
2687
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2688
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2689
      bridges.add(default_nicpp[constants.NIC_LINK])
2690
    for instance in self.my_inst_info.values():
2691
      for nic in instance.nics:
2692
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2693
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2694
          bridges.add(full_nic[constants.NIC_LINK])
2695

    
2696
    if bridges:
2697
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2698

    
2699
    # Build our expected cluster state
2700
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2701
                                                 name=node.name,
2702
                                                 vm_capable=node.vm_capable))
2703
                      for node in node_data_list)
2704

    
2705
    # Gather OOB paths
2706
    oob_paths = []
2707
    for node in self.all_node_info.values():
2708
      path = _SupportsOob(self.cfg, node)
2709
      if path and path not in oob_paths:
2710
        oob_paths.append(path)
2711

    
2712
    if oob_paths:
2713
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2714

    
2715
    for instance in self.my_inst_names:
2716
      inst_config = self.my_inst_info[instance]
2717

    
2718
      for nname in inst_config.all_nodes:
2719
        if nname not in node_image:
2720
          gnode = self.NodeImage(name=nname)
2721
          gnode.ghost = (nname not in self.all_node_info)
2722
          node_image[nname] = gnode
2723

    
2724
      inst_config.MapLVsByNode(node_vol_should)
2725

    
2726
      pnode = inst_config.primary_node
2727
      node_image[pnode].pinst.append(instance)
2728

    
2729
      for snode in inst_config.secondary_nodes:
2730
        nimg = node_image[snode]
2731
        nimg.sinst.append(instance)
2732
        if pnode not in nimg.sbp:
2733
          nimg.sbp[pnode] = []
2734
        nimg.sbp[pnode].append(instance)
2735

    
2736
    # At this point, we have the in-memory data structures complete,
2737
    # except for the runtime information, which we'll gather next
2738

    
2739
    # Due to the way our RPC system works, exact response times cannot be
2740
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2741
    # time before and after executing the request, we can at least have a time
2742
    # window.
2743
    nvinfo_starttime = time.time()
2744
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2745
                                           node_verify_param,
2746
                                           self.cfg.GetClusterName())
2747
    nvinfo_endtime = time.time()
2748

    
2749
    if self.extra_lv_nodes and vg_name is not None:
2750
      extra_lv_nvinfo = \
2751
          self.rpc.call_node_verify(self.extra_lv_nodes,
2752
                                    {constants.NV_LVLIST: vg_name},
2753
                                    self.cfg.GetClusterName())
2754
    else:
2755
      extra_lv_nvinfo = {}
2756

    
2757
    all_drbd_map = self.cfg.ComputeDRBDMap()
2758

    
2759
    feedback_fn("* Gathering disk information (%s nodes)" %
2760
                len(self.my_node_names))
2761
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2762
                                     self.my_inst_info)
2763

    
2764
    feedback_fn("* Verifying configuration file consistency")
2765

    
2766
    # If not all nodes are being checked, we need to make sure the master node
2767
    # and a non-checked vm_capable node are in the list.
2768
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2769
    if absent_nodes:
2770
      vf_nvinfo = all_nvinfo.copy()
2771
      vf_node_info = list(self.my_node_info.values())
2772
      additional_nodes = []
2773
      if master_node not in self.my_node_info:
2774
        additional_nodes.append(master_node)
2775
        vf_node_info.append(self.all_node_info[master_node])
2776
      # Add the first vm_capable node we find which is not included
2777
      for node in absent_nodes:
2778
        nodeinfo = self.all_node_info[node]
2779
        if nodeinfo.vm_capable and not nodeinfo.offline:
2780
          additional_nodes.append(node)
2781
          vf_node_info.append(self.all_node_info[node])
2782
          break
2783
      key = constants.NV_FILELIST
2784
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2785
                                                 {key: node_verify_param[key]},
2786
                                                 self.cfg.GetClusterName()))
2787
    else:
2788
      vf_nvinfo = all_nvinfo
2789
      vf_node_info = self.my_node_info.values()
2790

    
2791
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2792

    
2793
    feedback_fn("* Verifying node status")
2794

    
2795
    refos_img = None
2796

    
2797
    for node_i in node_data_list:
2798
      node = node_i.name
2799
      nimg = node_image[node]
2800

    
2801
      if node_i.offline:
2802
        if verbose:
2803
          feedback_fn("* Skipping offline node %s" % (node,))
2804
        n_offline += 1
2805
        continue
2806

    
2807
      if node == master_node:
2808
        ntype = "master"
2809
      elif node_i.master_candidate:
2810
        ntype = "master candidate"
2811
      elif node_i.drained:
2812
        ntype = "drained"
2813
        n_drained += 1
2814
      else:
2815
        ntype = "regular"
2816
      if verbose:
2817
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2818

    
2819
      msg = all_nvinfo[node].fail_msg
2820
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2821
               msg)
2822
      if msg:
2823
        nimg.rpc_fail = True
2824
        continue
2825

    
2826
      nresult = all_nvinfo[node].payload
2827

    
2828
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2829
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2830
      self._VerifyNodeNetwork(node_i, nresult)
2831
      self._VerifyOob(node_i, nresult)
2832

    
2833
      if nimg.vm_capable:
2834
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2835
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2836
                             all_drbd_map)
2837

    
2838
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2839
        self._UpdateNodeInstances(node_i, nresult, nimg)
2840
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2841
        self._UpdateNodeOS(node_i, nresult, nimg)
2842

    
2843
        if not nimg.os_fail:
2844
          if refos_img is None:
2845
            refos_img = nimg
2846
          self._VerifyNodeOS(node_i, nimg, refos_img)
2847
        self._VerifyNodeBridges(node_i, nresult, bridges)
2848

    
2849
        # Check whether all running instancies are primary for the node. (This
2850
        # can no longer be done from _VerifyInstance below, since some of the
2851
        # wrong instances could be from other node groups.)
2852
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2853

    
2854
        for inst in non_primary_inst:
2855
          test = inst in self.all_inst_info
2856
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2857
                   "instance should not run on node %s", node_i.name)
2858
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2859
                   "node is running unknown instance %s", inst)
2860

    
2861
    for node, result in extra_lv_nvinfo.items():
2862
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2863
                              node_image[node], vg_name)
2864

    
2865
    feedback_fn("* Verifying instance status")
2866
    for instance in self.my_inst_names:
2867
      if verbose:
2868
        feedback_fn("* Verifying instance %s" % instance)
2869
      inst_config = self.my_inst_info[instance]
2870
      self._VerifyInstance(instance, inst_config, node_image,
2871
                           instdisk[instance])
2872
      inst_nodes_offline = []
2873

    
2874
      pnode = inst_config.primary_node
2875
      pnode_img = node_image[pnode]
2876
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2877
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2878
               " primary node failed", instance)
2879

    
2880
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2881
               constants.CV_EINSTANCEBADNODE, instance,
2882
               "instance is marked as running and lives on offline node %s",
2883
               inst_config.primary_node)
2884

    
2885
      # If the instance is non-redundant we cannot survive losing its primary
2886
      # node, so we are not N+1 compliant. On the other hand we have no disk
2887
      # templates with more than one secondary so that situation is not well
2888
      # supported either.
2889
      # FIXME: does not support file-backed instances
2890
      if not inst_config.secondary_nodes:
2891
        i_non_redundant.append(instance)
2892

    
2893
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2894
               constants.CV_EINSTANCELAYOUT,
2895
               instance, "instance has multiple secondary nodes: %s",
2896
               utils.CommaJoin(inst_config.secondary_nodes),
2897
               code=self.ETYPE_WARNING)
2898

    
2899
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2900
        pnode = inst_config.primary_node
2901
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2902
        instance_groups = {}
2903

    
2904
        for node in instance_nodes:
2905
          instance_groups.setdefault(self.all_node_info[node].group,
2906
                                     []).append(node)
2907

    
2908
        pretty_list = [
2909
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2910
          # Sort so that we always list the primary node first.
2911
          for group, nodes in sorted(instance_groups.items(),
2912
                                     key=lambda (_, nodes): pnode in nodes,
2913
                                     reverse=True)]
2914

    
2915
        self._ErrorIf(len(instance_groups) > 1,
2916
                      constants.CV_EINSTANCESPLITGROUPS,
2917
                      instance, "instance has primary and secondary nodes in"
2918
                      " different groups: %s", utils.CommaJoin(pretty_list),
2919
                      code=self.ETYPE_WARNING)
2920

    
2921
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2922
        i_non_a_balanced.append(instance)
2923

    
2924
      for snode in inst_config.secondary_nodes:
2925
        s_img = node_image[snode]
2926
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2927
                 snode, "instance %s, connection to secondary node failed",
2928
                 instance)
2929

    
2930
        if s_img.offline:
2931
          inst_nodes_offline.append(snode)
2932

    
2933
      # warn that the instance lives on offline nodes
2934
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2935
               "instance has offline secondary node(s) %s",
2936
               utils.CommaJoin(inst_nodes_offline))
2937
      # ... or ghost/non-vm_capable nodes
2938
      for node in inst_config.all_nodes:
2939
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2940
                 instance, "instance lives on ghost node %s", node)
2941
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2942
                 instance, "instance lives on non-vm_capable node %s", node)
2943

    
2944
    feedback_fn("* Verifying orphan volumes")
2945
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2946

    
2947
    # We will get spurious "unknown volume" warnings if any node of this group
2948
    # is secondary for an instance whose primary is in another group. To avoid
2949
    # them, we find these instances and add their volumes to node_vol_should.
2950
    for inst in self.all_inst_info.values():
2951
      for secondary in inst.secondary_nodes:
2952
        if (secondary in self.my_node_info
2953
            and inst.name not in self.my_inst_info):
2954
          inst.MapLVsByNode(node_vol_should)
2955
          break
2956

    
2957
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2958

    
2959
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2960
      feedback_fn("* Verifying N+1 Memory redundancy")
2961
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2962

    
2963
    feedback_fn("* Other Notes")
2964
    if i_non_redundant:
2965
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2966
                  % len(i_non_redundant))
2967

    
2968
    if i_non_a_balanced:
2969
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2970
                  % len(i_non_a_balanced))
2971

    
2972
    if n_offline:
2973
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2974

    
2975
    if n_drained:
2976
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2977

    
2978
    return not self.bad
2979

    
2980
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2981
    """Analyze the post-hooks' result
2982

2983
    This method analyses the hook result, handles it, and sends some
2984
    nicely-formatted feedback back to the user.
2985

2986
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2987
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2988
    @param hooks_results: the results of the multi-node hooks rpc call
2989
    @param feedback_fn: function used send feedback back to the caller
2990
    @param lu_result: previous Exec result
2991
    @return: the new Exec result, based on the previous result
2992
        and hook results
2993

2994
    """
2995
    # We only really run POST phase hooks, only for non-empty groups,
2996
    # and are only interested in their results
2997
    if not self.my_node_names:
2998
      # empty node group
2999
      pass
3000
    elif phase == constants.HOOKS_PHASE_POST:
3001
      # Used to change hooks' output to proper indentation
3002
      feedback_fn("* Hooks Results")
3003
      assert hooks_results, "invalid result from hooks"
3004

    
3005
      for node_name in hooks_results:
3006
        res = hooks_results[node_name]
3007
        msg = res.fail_msg
3008
        test = msg and not res.offline
3009
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3010
                      "Communication failure in hooks execution: %s", msg)
3011
        if res.offline or msg:
3012
          # No need to investigate payload if node is offline or gave
3013
          # an error.
3014
          continue
3015
        for script, hkr, output in res.payload:
3016
          test = hkr == constants.HKR_FAIL
3017
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3018
                        "Script %s failed, output:", script)
3019
          if test:
3020
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3021
            feedback_fn("%s" % output)
3022
            lu_result = False
3023

    
3024
    return lu_result
3025

    
3026

    
3027
class LUClusterVerifyDisks(NoHooksLU):
3028
  """Verifies the cluster disks status.
3029

3030
  """
3031
  REQ_BGL = False
3032

    
3033
  def ExpandNames(self):
3034
    self.share_locks = _ShareAll()
3035
    self.needed_locks = {
3036
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3037
      }
3038

    
3039
  def Exec(self, feedback_fn):
3040
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3041

    
3042
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3043
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3044
                           for group in group_names])
3045

    
3046

    
3047
class LUGroupVerifyDisks(NoHooksLU):
3048
  """Verifies the status of all disks in a node group.
3049

3050
  """
3051
  REQ_BGL = False
3052

    
3053
  def ExpandNames(self):
3054
    # Raises errors.OpPrereqError on its own if group can't be found
3055
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3056

    
3057
    self.share_locks = _ShareAll()
3058
    self.needed_locks = {
3059
      locking.LEVEL_INSTANCE: [],
3060
      locking.LEVEL_NODEGROUP: [],
3061
      locking.LEVEL_NODE: [],
3062
      }
3063

    
3064
  def DeclareLocks(self, level):
3065
    if level == locking.LEVEL_INSTANCE:
3066
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3067

    
3068
      # Lock instances optimistically, needs verification once node and group
3069
      # locks have been acquired
3070
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3071
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3072

    
3073
    elif level == locking.LEVEL_NODEGROUP:
3074
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3075

    
3076
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3077
        set([self.group_uuid] +
3078
            # Lock all groups used by instances optimistically; this requires
3079
            # going via the node before it's locked, requiring verification
3080
            # later on
3081
            [group_uuid
3082
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3083
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3084

    
3085
    elif level == locking.LEVEL_NODE:
3086
      # This will only lock the nodes in the group to be verified which contain
3087
      # actual instances
3088
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3089
      self._LockInstancesNodes()
3090

    
3091
      # Lock all nodes in group to be verified
3092
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3093
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3094
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3095

    
3096
  def CheckPrereq(self):
3097
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3098
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3099
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3100

    
3101
    assert self.group_uuid in owned_groups
3102

    
3103
    # Check if locked instances are still correct
3104
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3105

    
3106
    # Get instance information
3107
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3108

    
3109
    # Check if node groups for locked instances are still correct
3110
    for (instance_name, inst) in self.instances.items():
3111
      assert owned_nodes.issuperset(inst.all_nodes), \
3112
        "Instance %s's nodes changed while we kept the lock" % instance_name
3113

    
3114
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3115
                                             owned_groups)
3116

    
3117
      assert self.group_uuid in inst_groups, \
3118
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3119

    
3120
  def Exec(self, feedback_fn):
3121
    """Verify integrity of cluster disks.
3122

3123
    @rtype: tuple of three items
3124
    @return: a tuple of (dict of node-to-node_error, list of instances
3125
        which need activate-disks, dict of instance: (node, volume) for
3126
        missing volumes
3127

3128
    """
3129
    res_nodes = {}
3130
    res_instances = set()
3131
    res_missing = {}
3132

    
3133
    nv_dict = _MapInstanceDisksToNodes([inst
3134
                                        for inst in self.instances.values()
3135
                                        if inst.admin_up])
3136

    
3137
    if nv_dict:
3138
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3139
                             set(self.cfg.GetVmCapableNodeList()))
3140

    
3141
      node_lvs = self.rpc.call_lv_list(nodes, [])
3142

    
3143
      for (node, node_res) in node_lvs.items():
3144
        if node_res.offline:
3145
          continue
3146

    
3147
        msg = node_res.fail_msg
3148
        if msg:
3149
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3150
          res_nodes[node] = msg
3151
          continue
3152

    
3153
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3154
          inst = nv_dict.pop((node, lv_name), None)
3155
          if not (lv_online or inst is None):
3156
            res_instances.add(inst)
3157

    
3158
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3159
      # better
3160
      for key, inst in nv_dict.iteritems():
3161
        res_missing.setdefault(inst, []).append(list(key))
3162

    
3163
    return (res_nodes, list(res_instances), res_missing)
3164

    
3165

    
3166
class LUClusterRepairDiskSizes(NoHooksLU):
3167
  """Verifies the cluster disks sizes.
3168

3169
  """
3170
  REQ_BGL = False
3171

    
3172
  def ExpandNames(self):
3173
    if self.op.instances:
3174
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3175
      self.needed_locks = {
3176
        locking.LEVEL_NODE: [],
3177
        locking.LEVEL_INSTANCE: self.wanted_names,
3178
        }
3179
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3180
    else:
3181
      self.wanted_names = None
3182
      self.needed_locks = {
3183
        locking.LEVEL_NODE: locking.ALL_SET,
3184
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3185
        }
3186
    self.share_locks = _ShareAll()
3187

    
3188
  def DeclareLocks(self, level):
3189
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3190
      self._LockInstancesNodes(primary_only=True)
3191

    
3192
  def CheckPrereq(self):
3193
    """Check prerequisites.
3194

3195
    This only checks the optional instance list against the existing names.
3196

3197
    """
3198
    if self.wanted_names is None:
3199
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3200

    
3201
    self.wanted_instances = \
3202
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3203

    
3204
  def _EnsureChildSizes(self, disk):
3205
    """Ensure children of the disk have the needed disk size.
3206

3207
    This is valid mainly for DRBD8 and fixes an issue where the
3208
    children have smaller disk size.
3209

3210
    @param disk: an L{ganeti.objects.Disk} object
3211

3212
    """
3213
    if disk.dev_type == constants.LD_DRBD8:
3214
      assert disk.children, "Empty children for DRBD8?"
3215
      fchild = disk.children[0]
3216
      mismatch = fchild.size < disk.size
3217
      if mismatch:
3218
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3219
                     fchild.size, disk.size)
3220
        fchild.size = disk.size
3221

    
3222
      # and we recurse on this child only, not on the metadev
3223
      return self._EnsureChildSizes(fchild) or mismatch
3224
    else:
3225
      return False
3226

    
3227
  def Exec(self, feedback_fn):
3228
    """Verify the size of cluster disks.
3229

3230
    """
3231
    # TODO: check child disks too
3232
    # TODO: check differences in size between primary/secondary nodes
3233
    per_node_disks = {}
3234
    for instance in self.wanted_instances:
3235
      pnode = instance.primary_node
3236
      if pnode not in per_node_disks:
3237
        per_node_disks[pnode] = []
3238
      for idx, disk in enumerate(instance.disks):
3239
        per_node_disks[pnode].append((instance, idx, disk))
3240

    
3241
    changed = []
3242
    for node, dskl in per_node_disks.items():
3243
      newl = [v[2].Copy() for v in dskl]
3244
      for dsk in newl:
3245
        self.cfg.SetDiskID(dsk, node)
3246
      result = self.rpc.call_blockdev_getsize(node, newl)
3247
      if result.fail_msg:
3248
        self.LogWarning("Failure in blockdev_getsize call to node"
3249
                        " %s, ignoring", node)
3250
        continue
3251
      if len(result.payload) != len(dskl):
3252
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3253
                        " result.payload=%s", node, len(dskl), result.payload)
3254
        self.LogWarning("Invalid result from node %s, ignoring node results",
3255
                        node)
3256
        continue
3257
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3258
        if size is None:
3259
          self.LogWarning("Disk %d of instance %s did not return size"
3260
                          " information, ignoring", idx, instance.name)
3261
          continue
3262
        if not isinstance(size, (int, long)):
3263
          self.LogWarning("Disk %d of instance %s did not return valid"
3264
                          " size information, ignoring", idx, instance.name)
3265
          continue
3266
        size = size >> 20
3267
        if size != disk.size:
3268
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3269
                       " correcting: recorded %d, actual %d", idx,
3270
                       instance.name, disk.size, size)
3271
          disk.size = size
3272
          self.cfg.Update(instance, feedback_fn)
3273
          changed.append((instance.name, idx, size))
3274
        if self._EnsureChildSizes(disk):
3275
          self.cfg.Update(instance, feedback_fn)
3276
          changed.append((instance.name, idx, disk.size))
3277
    return changed
3278

    
3279

    
3280
class LUClusterRename(LogicalUnit):
3281
  """Rename the cluster.
3282

3283
  """
3284
  HPATH = "cluster-rename"
3285
  HTYPE = constants.HTYPE_CLUSTER
3286

    
3287
  def BuildHooksEnv(self):
3288
    """Build hooks env.
3289

3290
    """
3291
    return {
3292
      "OP_TARGET": self.cfg.GetClusterName(),
3293
      "NEW_NAME": self.op.name,
3294
      }
3295

    
3296
  def BuildHooksNodes(self):
3297
    """Build hooks nodes.
3298

3299
    """
3300
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3301

    
3302
  def CheckPrereq(self):
3303
    """Verify that the passed name is a valid one.
3304

3305
    """
3306
    hostname = netutils.GetHostname(name=self.op.name,
3307
                                    family=self.cfg.GetPrimaryIPFamily())
3308

    
3309
    new_name = hostname.name
3310
    self.ip = new_ip = hostname.ip
3311
    old_name = self.cfg.GetClusterName()
3312
    old_ip = self.cfg.GetMasterIP()
3313
    if new_name == old_name and new_ip == old_ip:
3314
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3315
                                 " cluster has changed",
3316
                                 errors.ECODE_INVAL)
3317
    if new_ip != old_ip:
3318
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3319
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3320
                                   " reachable on the network" %
3321
                                   new_ip, errors.ECODE_NOTUNIQUE)
3322

    
3323
    self.op.name = new_name
3324

    
3325
  def Exec(self, feedback_fn):
3326
    """Rename the cluster.
3327

3328
    """
3329
    clustername = self.op.name
3330
    new_ip = self.ip
3331

    
3332
    # shutdown the master IP
3333
    master_params = self.cfg.GetMasterNetworkParameters()
3334
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3335
                                                     master_params.ip,
3336
                                                     master_params.netmask,
3337
                                                     master_params.netdev,
3338
                                                     master_params.ip_family)
3339
    result.Raise("Could not disable the master role")
3340

    
3341
    try:
3342
      cluster = self.cfg.GetClusterInfo()
3343
      cluster.cluster_name = clustername
3344
      cluster.master_ip = new_ip
3345
      self.cfg.Update(cluster, feedback_fn)
3346

    
3347
      # update the known hosts file
3348
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3349
      node_list = self.cfg.GetOnlineNodeList()
3350
      try:
3351
        node_list.remove(master_params.name)
3352
      except ValueError:
3353
        pass
3354
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3355
    finally:
3356
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3357
                                                     new_ip,
3358
                                                     master_params.netmask,
3359
                                                     master_params.netdev,
3360
                                                     master_params.ip_family)
3361
      msg = result.fail_msg
3362
      if msg:
3363
        self.LogWarning("Could not re-enable the master role on"
3364
                        " the master, please restart manually: %s", msg)
3365

    
3366
    return clustername
3367

    
3368

    
3369
def _ValidateNetmask(cfg, netmask):
3370
  """Checks if a netmask is valid.
3371

3372
  @type cfg: L{config.ConfigWriter}
3373
  @param cfg: The cluster configuration
3374
  @type netmask: int
3375
  @param netmask: the netmask to be verified
3376
  @raise errors.OpPrereqError: if the validation fails
3377

3378
  """
3379
  ip_family = cfg.GetPrimaryIPFamily()
3380
  try:
3381
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3382
  except errors.ProgrammerError:
3383
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3384
                               ip_family)
3385
  if not ipcls.ValidateNetmask(netmask):
3386
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3387
                                (netmask))
3388

    
3389

    
3390
class LUClusterSetParams(LogicalUnit):
3391
  """Change the parameters of the cluster.
3392

3393
  """
3394
  HPATH = "cluster-modify"
3395
  HTYPE = constants.HTYPE_CLUSTER
3396
  REQ_BGL = False
3397

    
3398
  def CheckArguments(self):
3399
    """Check parameters
3400

3401
    """
3402
    if self.op.uid_pool:
3403
      uidpool.CheckUidPool(self.op.uid_pool)
3404

    
3405
    if self.op.add_uids:
3406
      uidpool.CheckUidPool(self.op.add_uids)
3407

    
3408
    if self.op.remove_uids:
3409
      uidpool.CheckUidPool(self.op.remove_uids)
3410

    
3411
    if self.op.master_netmask is not None:
3412
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3413

    
3414
  def ExpandNames(self):
3415
    # FIXME: in the future maybe other cluster params won't require checking on
3416
    # all nodes to be modified.
3417
    self.needed_locks = {
3418
      locking.LEVEL_NODE: locking.ALL_SET,
3419
    }
3420
    self.share_locks[locking.LEVEL_NODE] = 1
3421

    
3422
  def BuildHooksEnv(self):
3423
    """Build hooks env.
3424

3425
    """
3426
    return {
3427
      "OP_TARGET": self.cfg.GetClusterName(),
3428
      "NEW_VG_NAME": self.op.vg_name,
3429
      }
3430

    
3431
  def BuildHooksNodes(self):
3432
    """Build hooks nodes.
3433

3434
    """
3435
    mn = self.cfg.GetMasterNode()
3436
    return ([mn], [mn])
3437

    
3438
  def CheckPrereq(self):
3439
    """Check prerequisites.
3440

3441
    This checks whether the given params don't conflict and
3442
    if the given volume group is valid.
3443

3444
    """
3445
    if self.op.vg_name is not None and not self.op.vg_name:
3446
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3447
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3448
                                   " instances exist", errors.ECODE_INVAL)
3449

    
3450
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3451
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3452
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3453
                                   " drbd-based instances exist",
3454
                                   errors.ECODE_INVAL)
3455

    
3456
    node_list = self.owned_locks(locking.LEVEL_NODE)
3457

    
3458
    # if vg_name not None, checks given volume group on all nodes
3459
    if self.op.vg_name:
3460
      vglist = self.rpc.call_vg_list(node_list)
3461
      for node in node_list:
3462
        msg = vglist[node].fail_msg
3463
        if msg:
3464
          # ignoring down node
3465
          self.LogWarning("Error while gathering data on node %s"
3466
                          " (ignoring node): %s", node, msg)
3467
          continue
3468
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3469
                                              self.op.vg_name,
3470
                                              constants.MIN_VG_SIZE)
3471
        if vgstatus:
3472
          raise errors.OpPrereqError("Error on node '%s': %s" %
3473
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3474

    
3475
    if self.op.drbd_helper:
3476
      # checks given drbd helper on all nodes
3477
      helpers = self.rpc.call_drbd_helper(node_list)
3478
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3479
        if ninfo.offline:
3480
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3481
          continue
3482
        msg = helpers[node].fail_msg
3483
        if msg:
3484
          raise errors.OpPrereqError("Error checking drbd helper on node"
3485
                                     " '%s': %s" % (node, msg),
3486
                                     errors.ECODE_ENVIRON)
3487
        node_helper = helpers[node].payload
3488
        if node_helper != self.op.drbd_helper:
3489
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3490
                                     (node, node_helper), errors.ECODE_ENVIRON)
3491

    
3492
    self.cluster = cluster = self.cfg.GetClusterInfo()
3493
    # validate params changes
3494
    if self.op.beparams:
3495
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3496
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3497

    
3498
    if self.op.ndparams:
3499
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3500
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3501

    
3502
      # TODO: we need a more general way to handle resetting
3503
      # cluster-level parameters to default values
3504
      if self.new_ndparams["oob_program"] == "":
3505
        self.new_ndparams["oob_program"] = \
3506
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3507

    
3508
    if self.op.nicparams:
3509
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3510
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3511
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3512
      nic_errors = []
3513

    
3514
      # check all instances for consistency
3515
      for instance in self.cfg.GetAllInstancesInfo().values():
3516
        for nic_idx, nic in enumerate(instance.nics):
3517
          params_copy = copy.deepcopy(nic.nicparams)
3518
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3519

    
3520
          # check parameter syntax
3521
          try:
3522
            objects.NIC.CheckParameterSyntax(params_filled)
3523
          except errors.ConfigurationError, err:
3524
            nic_errors.append("Instance %s, nic/%d: %s" %
3525
                              (instance.name, nic_idx, err))
3526

    
3527
          # if we're moving instances to routed, check that they have an ip
3528
          target_mode = params_filled[constants.NIC_MODE]
3529
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3530
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3531
                              " address" % (instance.name, nic_idx))
3532
      if nic_errors:
3533
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3534
                                   "\n".join(nic_errors))
3535

    
3536
    # hypervisor list/parameters
3537
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3538
    if self.op.hvparams:
3539
      for hv_name, hv_dict in self.op.hvparams.items():
3540
        if hv_name not in self.new_hvparams:
3541
          self.new_hvparams[hv_name] = hv_dict
3542
        else:
3543
          self.new_hvparams[hv_name].update(hv_dict)
3544

    
3545
    # os hypervisor parameters
3546
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3547
    if self.op.os_hvp:
3548
      for os_name, hvs in self.op.os_hvp.items():
3549
        if os_name not in self.new_os_hvp:
3550
          self.new_os_hvp[os_name] = hvs
3551
        else:
3552
          for hv_name, hv_dict in hvs.items():
3553
            if hv_name not in self.new_os_hvp[os_name]:
3554
              self.new_os_hvp[os_name][hv_name] = hv_dict
3555
            else:
3556
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3557

    
3558
    # os parameters
3559
    self.new_osp = objects.FillDict(cluster.osparams, {})
3560
    if self.op.osparams:
3561
      for os_name, osp in self.op.osparams.items():
3562
        if os_name not in self.new_osp:
3563
          self.new_osp[os_name] = {}
3564

    
3565
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3566
                                                  use_none=True)
3567

    
3568
        if not self.new_osp[os_name]:
3569
          # we removed all parameters
3570
          del self.new_osp[os_name]
3571
        else:
3572
          # check the parameter validity (remote check)
3573
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3574
                         os_name, self.new_osp[os_name])
3575

    
3576
    # changes to the hypervisor list
3577
    if self.op.enabled_hypervisors is not None:
3578
      self.hv_list = self.op.enabled_hypervisors
3579
      for hv in self.hv_list:
3580
        # if the hypervisor doesn't already exist in the cluster
3581
        # hvparams, we initialize it to empty, and then (in both
3582
        # cases) we make sure to fill the defaults, as we might not
3583
        # have a complete defaults list if the hypervisor wasn't
3584
        # enabled before
3585
        if hv not in new_hvp:
3586
          new_hvp[hv] = {}
3587
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3588
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3589
    else:
3590
      self.hv_list = cluster.enabled_hypervisors
3591

    
3592
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3593
      # either the enabled list has changed, or the parameters have, validate
3594
      for hv_name, hv_params in self.new_hvparams.items():
3595
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3596
            (self.op.enabled_hypervisors and
3597
             hv_name in self.op.enabled_hypervisors)):
3598
          # either this is a new hypervisor, or its parameters have changed
3599
          hv_class = hypervisor.GetHypervisor(hv_name)
3600
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3601
          hv_class.CheckParameterSyntax(hv_params)
3602
          _CheckHVParams(self, node_list, hv_name, hv_params)
3603

    
3604
    if self.op.os_hvp:
3605
      # no need to check any newly-enabled hypervisors, since the
3606
      # defaults have already been checked in the above code-block
3607
      for os_name, os_hvp in self.new_os_hvp.items():
3608
        for hv_name, hv_params in os_hvp.items():
3609
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3610
          # we need to fill in the new os_hvp on top of the actual hv_p
3611
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3612
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3613
          hv_class = hypervisor.GetHypervisor(hv_name)
3614
          hv_class.CheckParameterSyntax(new_osp)
3615
          _CheckHVParams(self, node_list, hv_name, new_osp)
3616

    
3617
    if self.op.default_iallocator:
3618
      alloc_script = utils.FindFile(self.op.default_iallocator,
3619
                                    constants.IALLOCATOR_SEARCH_PATH,
3620
                                    os.path.isfile)
3621
      if alloc_script is None:
3622
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3623
                                   " specified" % self.op.default_iallocator,
3624
                                   errors.ECODE_INVAL)
3625

    
3626
  def Exec(self, feedback_fn):
3627
    """Change the parameters of the cluster.
3628

3629
    """
3630
    if self.op.vg_name is not None:
3631
      new_volume = self.op.vg_name
3632
      if not new_volume:
3633
        new_volume = None
3634
      if new_volume != self.cfg.GetVGName():
3635
        self.cfg.SetVGName(new_volume)
3636
      else:
3637
        feedback_fn("Cluster LVM configuration already in desired"
3638
                    " state, not changing")
3639
    if self.op.drbd_helper is not None:
3640
      new_helper = self.op.drbd_helper
3641
      if not new_helper:
3642
        new_helper = None
3643
      if new_helper != self.cfg.GetDRBDHelper():
3644
        self.cfg.SetDRBDHelper(new_helper)
3645
      else:
3646
        feedback_fn("Cluster DRBD helper already in desired state,"
3647
                    " not changing")
3648
    if self.op.hvparams:
3649
      self.cluster.hvparams = self.new_hvparams
3650
    if self.op.os_hvp:
3651
      self.cluster.os_hvp = self.new_os_hvp
3652
    if self.op.enabled_hypervisors is not None:
3653
      self.cluster.hvparams = self.new_hvparams
3654
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3655
    if self.op.beparams:
3656
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3657
    if self.op.nicparams:
3658
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3659
    if self.op.osparams:
3660
      self.cluster.osparams = self.new_osp
3661
    if self.op.ndparams:
3662
      self.cluster.ndparams = self.new_ndparams
3663

    
3664
    if self.op.candidate_pool_size is not None:
3665
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3666
      # we need to update the pool size here, otherwise the save will fail
3667
      _AdjustCandidatePool(self, [])
3668

    
3669
    if self.op.maintain_node_health is not None:
3670
      self.cluster.maintain_node_health = self.op.maintain_node_health
3671

    
3672
    if self.op.prealloc_wipe_disks is not None:
3673
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3674

    
3675
    if self.op.add_uids is not None:
3676
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3677

    
3678
    if self.op.remove_uids is not None:
3679
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3680

    
3681
    if self.op.uid_pool is not None:
3682
      self.cluster.uid_pool = self.op.uid_pool
3683

    
3684
    if self.op.default_iallocator is not None:
3685
      self.cluster.default_iallocator = self.op.default_iallocator
3686

    
3687
    if self.op.reserved_lvs is not None:
3688
      self.cluster.reserved_lvs = self.op.reserved_lvs
3689

    
3690
    def helper_os(aname, mods, desc):
3691
      desc += " OS list"
3692
      lst = getattr(self.cluster, aname)
3693
      for key, val in mods:
3694
        if key == constants.DDM_ADD:
3695
          if val in lst:
3696
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3697
          else:
3698
            lst.append(val)
3699
        elif key == constants.DDM_REMOVE:
3700
          if val in lst:
3701
            lst.remove(val)
3702
          else:
3703
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3704
        else:
3705
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3706

    
3707
    if self.op.hidden_os:
3708
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3709

    
3710
    if self.op.blacklisted_os:
3711
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3712

    
3713
    if self.op.master_netdev:
3714
      master_params = self.cfg.GetMasterNetworkParameters()
3715
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3716
                  self.cluster.master_netdev)
3717
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3718
                                                       master_params.ip,
3719
                                                       master_params.netmask,
3720
                                                       master_params.netdev,
3721
                                                       master_params.ip_family)
3722
      result.Raise("Could not disable the master ip")
3723
      feedback_fn("Changing master_netdev from %s to %s" %
3724
                  (master_params.netdev, self.op.master_netdev))
3725
      self.cluster.master_netdev = self.op.master_netdev
3726

    
3727
    if self.op.master_netmask:
3728
      master_params = self.cfg.GetMasterNetworkParameters()
3729
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3730
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3731
                                                        master_params.netmask,
3732
                                                        self.op.master_netmask,
3733
                                                        master_params.ip,
3734
                                                        master_params.netdev)
3735
      if result.fail_msg:
3736
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3737
        self.LogWarning(msg)
3738
        feedback_fn(msg)
3739
      else:
3740
        self.cluster.master_netmask = self.op.master_netmask
3741

    
3742
    self.cfg.Update(self.cluster, feedback_fn)
3743

    
3744
    if self.op.master_netdev:
3745
      master_params = self.cfg.GetMasterNetworkParameters()
3746
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3747
                  self.op.master_netdev)
3748
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3749
                                                     master_params.ip,
3750
                                                     master_params.netmask,
3751
                                                     master_params.netdev,
3752
                                                     master_params.ip_family)
3753
      if result.fail_msg:
3754
        self.LogWarning("Could not re-enable the master ip on"
3755
                        " the master, please restart manually: %s",
3756
                        result.fail_msg)
3757

    
3758

    
3759
def _UploadHelper(lu, nodes, fname):
3760
  """Helper for uploading a file and showing warnings.
3761

3762
  """
3763
  if os.path.exists(fname):
3764
    result = lu.rpc.call_upload_file(nodes, fname)
3765
    for to_node, to_result in result.items():
3766
      msg = to_result.fail_msg
3767
      if msg:
3768
        msg = ("Copy of file %s to node %s failed: %s" %
3769
               (fname, to_node, msg))
3770
        lu.proc.LogWarning(msg)
3771

    
3772

    
3773
def _ComputeAncillaryFiles(cluster, redist):
3774
  """Compute files external to Ganeti which need to be consistent.
3775

3776
  @type redist: boolean
3777
  @param redist: Whether to include files which need to be redistributed
3778

3779
  """
3780
  # Compute files for all nodes
3781
  files_all = set([
3782
    constants.SSH_KNOWN_HOSTS_FILE,
3783
    constants.CONFD_HMAC_KEY,
3784
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3785
    constants.SPICE_CERT_FILE,
3786
    constants.SPICE_CACERT_FILE,
3787
    constants.RAPI_USERS_FILE,
3788
    ])
3789

    
3790
  if not redist:
3791
    files_all.update(constants.ALL_CERT_FILES)
3792
    files_all.update(ssconf.SimpleStore().GetFileList())
3793
  else:
3794
    # we need to ship at least the RAPI certificate
3795
    files_all.add(constants.RAPI_CERT_FILE)
3796

    
3797
  if cluster.modify_etc_hosts:
3798
    files_all.add(constants.ETC_HOSTS)
3799

    
3800
  # Files which are optional, these must:
3801
  # - be present in one other category as well
3802
  # - either exist or not exist on all nodes of that category (mc, vm all)
3803
  files_opt = set([
3804
    constants.RAPI_USERS_FILE,
3805
    ])
3806

    
3807
  # Files which should only be on master candidates
3808
  files_mc = set()
3809
  if not redist:
3810
    files_mc.add(constants.CLUSTER_CONF_FILE)
3811

    
3812
  # Files which should only be on VM-capable nodes
3813
  files_vm = set(filename
3814
    for hv_name in cluster.enabled_hypervisors
3815
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3816

    
3817
  files_opt |= set(filename
3818
    for hv_name in cluster.enabled_hypervisors
3819
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3820

    
3821
  # Filenames in each category must be unique
3822
  all_files_set = files_all | files_mc | files_vm
3823
  assert (len(all_files_set) ==
3824
          sum(map(len, [files_all, files_mc, files_vm]))), \
3825
         "Found file listed in more than one file list"
3826

    
3827
  # Optional files must be present in one other category
3828
  assert all_files_set.issuperset(files_opt), \
3829
         "Optional file not in a different required list"
3830

    
3831
  return (files_all, files_opt, files_mc, files_vm)
3832

    
3833

    
3834
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3835
  """Distribute additional files which are part of the cluster configuration.
3836

3837
  ConfigWriter takes care of distributing the config and ssconf files, but
3838
  there are more files which should be distributed to all nodes. This function
3839
  makes sure those are copied.
3840

3841
  @param lu: calling logical unit
3842
  @param additional_nodes: list of nodes not in the config to distribute to
3843
  @type additional_vm: boolean
3844
  @param additional_vm: whether the additional nodes are vm-capable or not
3845

3846
  """
3847
  # Gather target nodes
3848
  cluster = lu.cfg.GetClusterInfo()
3849
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3850

    
3851
  online_nodes = lu.cfg.GetOnlineNodeList()
3852
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3853

    
3854
  if additional_nodes is not None:
3855
    online_nodes.extend(additional_nodes)
3856
    if additional_vm:
3857
      vm_nodes.extend(additional_nodes)
3858

    
3859
  # Never distribute to master node
3860
  for nodelist in [online_nodes, vm_nodes]:
3861
    if master_info.name in nodelist:
3862
      nodelist.remove(master_info.name)
3863

    
3864
  # Gather file lists
3865
  (files_all, _, files_mc, files_vm) = \
3866
    _ComputeAncillaryFiles(cluster, True)
3867

    
3868
  # Never re-distribute configuration file from here
3869
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3870
              constants.CLUSTER_CONF_FILE in files_vm)
3871
  assert not files_mc, "Master candidates not handled in this function"
3872

    
3873
  filemap = [
3874
    (online_nodes, files_all),
3875
    (vm_nodes, files_vm),
3876
    ]
3877

    
3878
  # Upload the files
3879
  for (node_list, files) in filemap:
3880
    for fname in files:
3881
      _UploadHelper(lu, node_list, fname)
3882

    
3883

    
3884
class LUClusterRedistConf(NoHooksLU):
3885
  """Force the redistribution of cluster configuration.
3886

3887
  This is a very simple LU.
3888

3889
  """
3890
  REQ_BGL = False
3891

    
3892
  def ExpandNames(self):
3893
    self.needed_locks = {
3894
      locking.LEVEL_NODE: locking.ALL_SET,
3895
    }
3896
    self.share_locks[locking.LEVEL_NODE] = 1
3897

    
3898
  def Exec(self, feedback_fn):
3899
    """Redistribute the configuration.
3900

3901
    """
3902
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3903
    _RedistributeAncillaryFiles(self)
3904

    
3905

    
3906
class LUClusterActivateMasterIp(NoHooksLU):
3907
  """Activate the master IP on the master node.
3908

3909
  """
3910
  def Exec(self, feedback_fn):
3911
    """Activate the master IP.
3912

3913
    """
3914
    master_params = self.cfg.GetMasterNetworkParameters()
3915
    self.rpc.call_node_activate_master_ip(master_params.name,
3916
                                          master_params.ip,
3917
                                          master_params.netmask,
3918
                                          master_params.netdev,
3919
                                          master_params.ip_family)
3920

    
3921

    
3922
class LUClusterDeactivateMasterIp(NoHooksLU):
3923
  """Deactivate the master IP on the master node.
3924

3925
  """
3926
  def Exec(self, feedback_fn):
3927
    """Deactivate the master IP.
3928

3929
    """
3930
    master_params = self.cfg.GetMasterNetworkParameters()
3931
    self.rpc.call_node_deactivate_master_ip(master_params.name,
3932
                                            master_params.ip,
3933
                                            master_params.netmask,
3934
                                            master_params.netdev,
3935
                                            master_params.ip_family)
3936

    
3937

    
3938
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3939
  """Sleep and poll for an instance's disk to sync.
3940

3941
  """
3942
  if not instance.disks or disks is not None and not disks:
3943
    return True
3944

    
3945
  disks = _ExpandCheckDisks(instance, disks)
3946

    
3947
  if not oneshot:
3948
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3949

    
3950
  node = instance.primary_node
3951

    
3952
  for dev in disks:
3953
    lu.cfg.SetDiskID(dev, node)
3954

    
3955
  # TODO: Convert to utils.Retry
3956

    
3957
  retries = 0
3958
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3959
  while True:
3960
    max_time = 0
3961
    done = True
3962
    cumul_degraded = False
3963
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3964
    msg = rstats.fail_msg
3965
    if msg:
3966
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3967
      retries += 1
3968
      if retries >= 10:
3969
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3970
                                 " aborting." % node)
3971
      time.sleep(6)
3972
      continue
3973
    rstats = rstats.payload
3974
    retries = 0
3975
    for i, mstat in enumerate(rstats):
3976
      if mstat is None:
3977
        lu.LogWarning("Can't compute data for node %s/%s",
3978
                           node, disks[i].iv_name)
3979
        continue
3980

    
3981
      cumul_degraded = (cumul_degraded or
3982
                        (mstat.is_degraded and mstat.sync_percent is None))
3983
      if mstat.sync_percent is not None:
3984
        done = False
3985
        if mstat.estimated_time is not None:
3986
          rem_time = ("%s remaining (estimated)" %
3987
                      utils.FormatSeconds(mstat.estimated_time))
3988
          max_time = mstat.estimated_time
3989
        else:
3990
          rem_time = "no time estimate"
3991
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3992
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3993

    
3994
    # if we're done but degraded, let's do a few small retries, to
3995
    # make sure we see a stable and not transient situation; therefore
3996
    # we force restart of the loop
3997
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3998
      logging.info("Degraded disks found, %d retries left", degr_retries)
3999
      degr_retries -= 1
4000
      time.sleep(1)
4001
      continue
4002

    
4003
    if done or oneshot:
4004
      break
4005

    
4006
    time.sleep(min(60, max_time))
4007

    
4008
  if done:
4009
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4010
  return not cumul_degraded
4011

    
4012

    
4013
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4014
  """Check that mirrors are not degraded.
4015

4016
  The ldisk parameter, if True, will change the test from the
4017
  is_degraded attribute (which represents overall non-ok status for
4018
  the device(s)) to the ldisk (representing the local storage status).
4019

4020
  """
4021
  lu.cfg.SetDiskID(dev, node)
4022

    
4023
  result = True
4024

    
4025
  if on_primary or dev.AssembleOnSecondary():
4026
    rstats = lu.rpc.call_blockdev_find(node, dev)
4027
    msg = rstats.fail_msg
4028
    if msg:
4029
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4030
      result = False
4031
    elif not rstats.payload:
4032
      lu.LogWarning("Can't find disk on node %s", node)
4033
      result = False
4034
    else:
4035
      if ldisk:
4036
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4037
      else:
4038
        result = result and not rstats.payload.is_degraded
4039

    
4040
  if dev.children:
4041
    for child in dev.children:
4042
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4043

    
4044
  return result
4045

    
4046

    
4047
class LUOobCommand(NoHooksLU):
4048
  """Logical unit for OOB handling.
4049

4050
  """
4051
  REG_BGL = False
4052
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4053

    
4054
  def ExpandNames(self):
4055
    """Gather locks we need.
4056

4057
    """
4058
    if self.op.node_names:
4059
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4060
      lock_names = self.op.node_names
4061
    else:
4062
      lock_names = locking.ALL_SET
4063

    
4064
    self.needed_locks = {
4065
      locking.LEVEL_NODE: lock_names,
4066
      }
4067

    
4068
  def CheckPrereq(self):
4069
    """Check prerequisites.
4070

4071
    This checks:
4072
     - the node exists in the configuration
4073
     - OOB is supported
4074

4075
    Any errors are signaled by raising errors.OpPrereqError.
4076

4077
    """
4078
    self.nodes = []
4079
    self.master_node = self.cfg.GetMasterNode()
4080

    
4081
    assert self.op.power_delay >= 0.0
4082

    
4083
    if self.op.node_names:
4084
      if (self.op.command in self._SKIP_MASTER and
4085
          self.master_node in self.op.node_names):
4086
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4087
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4088

    
4089
        if master_oob_handler:
4090
          additional_text = ("run '%s %s %s' if you want to operate on the"
4091
                             " master regardless") % (master_oob_handler,
4092
                                                      self.op.command,
4093
                                                      self.master_node)
4094
        else:
4095
          additional_text = "it does not support out-of-band operations"
4096

    
4097
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4098
                                    " allowed for %s; %s") %
4099
                                   (self.master_node, self.op.command,
4100
                                    additional_text), errors.ECODE_INVAL)
4101
    else:
4102
      self.op.node_names = self.cfg.GetNodeList()
4103
      if self.op.command in self._SKIP_MASTER:
4104
        self.op.node_names.remove(self.master_node)
4105

    
4106
    if self.op.command in self._SKIP_MASTER:
4107
      assert self.master_node not in self.op.node_names
4108

    
4109
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4110
      if node is None:
4111
        raise errors.OpPrereqError("Node %s not found" % node_name,
4112
                                   errors.ECODE_NOENT)
4113
      else:
4114
        self.nodes.append(node)
4115

    
4116
      if (not self.op.ignore_status and
4117
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4118
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4119
                                    " not marked offline") % node_name,
4120
                                   errors.ECODE_STATE)
4121

    
4122
  def Exec(self, feedback_fn):
4123
    """Execute OOB and return result if we expect any.
4124

4125
    """
4126
    master_node = self.master_node
4127
    ret = []
4128

    
4129
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4130
                                              key=lambda node: node.name)):
4131
      node_entry = [(constants.RS_NORMAL, node.name)]
4132
      ret.append(node_entry)
4133

    
4134
      oob_program = _SupportsOob(self.cfg, node)
4135

    
4136
      if not oob_program:
4137
        node_entry.append((constants.RS_UNAVAIL, None))
4138
        continue
4139

    
4140
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4141
                   self.op.command, oob_program, node.name)
4142
      result = self.rpc.call_run_oob(master_node, oob_program,
4143
                                     self.op.command, node.name,
4144
                                     self.op.timeout)
4145

    
4146
      if result.fail_msg:
4147
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4148
                        node.name, result.fail_msg)
4149
        node_entry.append((constants.RS_NODATA, None))
4150
      else:
4151
        try:
4152
          self._CheckPayload(result)
4153
        except errors.OpExecError, err:
4154
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4155
                          node.name, err)
4156
          node_entry.append((constants.RS_NODATA, None))
4157
        else:
4158
          if self.op.command == constants.OOB_HEALTH:
4159
            # For health we should log important events
4160
            for item, status in result.payload:
4161
              if status in [constants.OOB_STATUS_WARNING,
4162
                            constants.OOB_STATUS_CRITICAL]:
4163
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4164
                                item, node.name, status)
4165

    
4166
          if self.op.command == constants.OOB_POWER_ON:
4167
            node.powered = True
4168
          elif self.op.command == constants.OOB_POWER_OFF:
4169
            node.powered = False
4170
          elif self.op.command == constants.OOB_POWER_STATUS:
4171
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4172
            if powered != node.powered:
4173
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4174
                               " match actual power state (%s)"), node.powered,
4175
                              node.name, powered)
4176

    
4177
          # For configuration changing commands we should update the node
4178
          if self.op.command in (constants.OOB_POWER_ON,
4179
                                 constants.OOB_POWER_OFF):
4180
            self.cfg.Update(node, feedback_fn)
4181

    
4182
          node_entry.append((constants.RS_NORMAL, result.payload))
4183

    
4184
          if (self.op.command == constants.OOB_POWER_ON and
4185
              idx < len(self.nodes) - 1):
4186
            time.sleep(self.op.power_delay)
4187

    
4188
    return ret
4189

    
4190
  def _CheckPayload(self, result):
4191
    """Checks if the payload is valid.
4192

4193
    @param result: RPC result
4194
    @raises errors.OpExecError: If payload is not valid
4195

4196
    """
4197
    errs = []
4198
    if self.op.command == constants.OOB_HEALTH:
4199
      if not isinstance(result.payload, list):
4200
        errs.append("command 'health' is expected to return a list but got %s" %
4201
                    type(result.payload))
4202
      else:
4203
        for item, status in result.payload:
4204
          if status not in constants.OOB_STATUSES:
4205
            errs.append("health item '%s' has invalid status '%s'" %
4206
                        (item, status))
4207

    
4208
    if self.op.command == constants.OOB_POWER_STATUS:
4209
      if not isinstance(result.payload, dict):
4210
        errs.append("power-status is expected to return a dict but got %s" %
4211
                    type(result.payload))
4212

    
4213
    if self.op.command in [
4214
        constants.OOB_POWER_ON,
4215
        constants.OOB_POWER_OFF,
4216
        constants.OOB_POWER_CYCLE,
4217
        ]:
4218
      if result.payload is not None:
4219
        errs.append("%s is expected to not return payload but got '%s'" %
4220
                    (self.op.command, result.payload))
4221

    
4222
    if errs:
4223
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4224
                               utils.CommaJoin(errs))
4225

    
4226

    
4227
class _OsQuery(_QueryBase):
4228
  FIELDS = query.OS_FIELDS
4229

    
4230
  def ExpandNames(self, lu):
4231
    # Lock all nodes in shared mode
4232
    # Temporary removal of locks, should be reverted later
4233
    # TODO: reintroduce locks when they are lighter-weight
4234
    lu.needed_locks = {}
4235
    #self.share_locks[locking.LEVEL_NODE] = 1
4236
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237

    
4238
    # The following variables interact with _QueryBase._GetNames
4239
    if self.names:
4240
      self.wanted = self.names
4241
    else:
4242
      self.wanted = locking.ALL_SET
4243

    
4244
    self.do_locking = self.use_locking
4245

    
4246
  def DeclareLocks(self, lu, level):
4247
    pass
4248

    
4249
  @staticmethod
4250
  def _DiagnoseByOS(rlist):
4251
    """Remaps a per-node return list into an a per-os per-node dictionary
4252

4253
    @param rlist: a map with node names as keys and OS objects as values
4254

4255
    @rtype: dict
4256
    @return: a dictionary with osnames as keys and as value another
4257
        map, with nodes as keys and tuples of (path, status, diagnose,
4258
        variants, parameters, api_versions) as values, eg::
4259

4260
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4261
                                     (/srv/..., False, "invalid api")],
4262
                           "node2": [(/srv/..., True, "", [], [])]}
4263
          }
4264

4265
    """
4266
    all_os = {}
4267
    # we build here the list of nodes that didn't fail the RPC (at RPC
4268
    # level), so that nodes with a non-responding node daemon don't
4269
    # make all OSes invalid
4270
    good_nodes = [node_name for node_name in rlist
4271
                  if not rlist[node_name].fail_msg]
4272
    for node_name, nr in rlist.items():
4273
      if nr.fail_msg or not nr.payload:
4274
        continue
4275
      for (name, path, status, diagnose, variants,
4276
           params, api_versions) in nr.payload:
4277
        if name not in all_os:
4278
          # build a list of nodes for this os containing empty lists
4279
          # for each node in node_list
4280
          all_os[name] = {}
4281
          for nname in good_nodes:
4282
            all_os[name][nname] = []
4283
        # convert params from [name, help] to (name, help)
4284
        params = [tuple(v) for v in params]
4285
        all_os[name][node_name].append((path, status, diagnose,
4286
                                        variants, params, api_versions))
4287
    return all_os
4288

    
4289
  def _GetQueryData(self, lu):
4290
    """Computes the list of nodes and their attributes.
4291

4292
    """
4293
    # Locking is not used
4294
    assert not (compat.any(lu.glm.is_owned(level)
4295
                           for level in locking.LEVELS
4296
                           if level != locking.LEVEL_CLUSTER) or
4297
                self.do_locking or self.use_locking)
4298

    
4299
    valid_nodes = [node.name
4300
                   for node in lu.cfg.GetAllNodesInfo().values()
4301
                   if not node.offline and node.vm_capable]
4302
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4303
    cluster = lu.cfg.GetClusterInfo()
4304

    
4305
    data = {}
4306

    
4307
    for (os_name, os_data) in pol.items():
4308
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4309
                          hidden=(os_name in cluster.hidden_os),
4310
                          blacklisted=(os_name in cluster.blacklisted_os))
4311

    
4312
      variants = set()
4313
      parameters = set()
4314
      api_versions = set()
4315

    
4316
      for idx, osl in enumerate(os_data.values()):
4317
        info.valid = bool(info.valid and osl and osl[0][1])
4318
        if not info.valid:
4319
          break
4320

    
4321
        (node_variants, node_params, node_api) = osl[0][3:6]
4322
        if idx == 0:
4323
          # First entry
4324
          variants.update(node_variants)
4325
          parameters.update(node_params)
4326
          api_versions.update(node_api)
4327
        else:
4328
          # Filter out inconsistent values
4329
          variants.intersection_update(node_variants)
4330
          parameters.intersection_update(node_params)
4331
          api_versions.intersection_update(node_api)
4332

    
4333
      info.variants = list(variants)
4334
      info.parameters = list(parameters)
4335
      info.api_versions = list(api_versions)
4336

    
4337
      data[os_name] = info
4338

    
4339
    # Prepare data in requested order
4340
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4341
            if name in data]
4342

    
4343

    
4344
class LUOsDiagnose(NoHooksLU):
4345
  """Logical unit for OS diagnose/query.
4346

4347
  """
4348
  REQ_BGL = False
4349

    
4350
  @staticmethod
4351
  def _BuildFilter(fields, names):
4352
    """Builds a filter for querying OSes.
4353

4354
    """
4355
    name_filter = qlang.MakeSimpleFilter("name", names)
4356

    
4357
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4358
    # respective field is not requested
4359
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4360
                     for fname in ["hidden", "blacklisted"]
4361
                     if fname not in fields]
4362
    if "valid" not in fields:
4363
      status_filter.append([qlang.OP_TRUE, "valid"])
4364

    
4365
    if status_filter:
4366
      status_filter.insert(0, qlang.OP_AND)
4367
    else:
4368
      status_filter = None
4369

    
4370
    if name_filter and status_filter:
4371
      return [qlang.OP_AND, name_filter, status_filter]
4372
    elif name_filter:
4373
      return name_filter
4374
    else:
4375
      return status_filter
4376

    
4377
  def CheckArguments(self):
4378
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4379
                       self.op.output_fields, False)
4380

    
4381
  def ExpandNames(self):
4382
    self.oq.ExpandNames(self)
4383

    
4384
  def Exec(self, feedback_fn):
4385
    return self.oq.OldStyleQuery(self)
4386

    
4387

    
4388
class LUNodeRemove(LogicalUnit):
4389
  """Logical unit for removing a node.
4390

4391
  """
4392
  HPATH = "node-remove"
4393
  HTYPE = constants.HTYPE_NODE
4394

    
4395
  def BuildHooksEnv(self):
4396
    """Build hooks env.
4397

4398
    This doesn't run on the target node in the pre phase as a failed
4399
    node would then be impossible to remove.
4400

4401
    """
4402
    return {
4403
      "OP_TARGET": self.op.node_name,
4404
      "NODE_NAME": self.op.node_name,
4405
      }
4406

    
4407
  def BuildHooksNodes(self):
4408
    """Build hooks nodes.
4409

4410
    """
4411
    all_nodes = self.cfg.GetNodeList()
4412
    try:
4413
      all_nodes.remove(self.op.node_name)
4414
    except ValueError:
4415
      logging.warning("Node '%s', which is about to be removed, was not found"
4416
                      " in the list of all nodes", self.op.node_name)
4417
    return (all_nodes, all_nodes)
4418

    
4419
  def CheckPrereq(self):
4420
    """Check prerequisites.
4421

4422
    This checks:
4423
     - the node exists in the configuration
4424
     - it does not have primary or secondary instances
4425
     - it's not the master
4426

4427
    Any errors are signaled by raising errors.OpPrereqError.
4428

4429
    """
4430
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4431
    node = self.cfg.GetNodeInfo(self.op.node_name)
4432
    assert node is not None
4433

    
4434
    masternode = self.cfg.GetMasterNode()
4435
    if node.name == masternode:
4436
      raise errors.OpPrereqError("Node is the master node, failover to another"
4437
                                 " node is required", errors.ECODE_INVAL)
4438

    
4439
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4440
      if node.name in instance.all_nodes:
4441
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4442
                                   " please remove first" % instance_name,
4443
                                   errors.ECODE_INVAL)
4444
    self.op.node_name = node.name
4445
    self.node = node
4446

    
4447
  def Exec(self, feedback_fn):
4448
    """Removes the node from the cluster.
4449

4450
    """
4451
    node = self.node
4452
    logging.info("Stopping the node daemon and removing configs from node %s",
4453
                 node.name)
4454

    
4455
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4456

    
4457
    # Promote nodes to master candidate as needed
4458
    _AdjustCandidatePool(self, exceptions=[node.name])
4459
    self.context.RemoveNode(node.name)
4460

    
4461
    # Run post hooks on the node before it's removed
4462
    _RunPostHook(self, node.name)
4463

    
4464
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4465
    msg = result.fail_msg
4466
    if msg:
4467
      self.LogWarning("Errors encountered on the remote node while leaving"
4468
                      " the cluster: %s", msg)
4469

    
4470
    # Remove node from our /etc/hosts
4471
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4472
      master_node = self.cfg.GetMasterNode()
4473
      result = self.rpc.call_etc_hosts_modify(master_node,
4474
                                              constants.ETC_HOSTS_REMOVE,
4475
                                              node.name, None)
4476
      result.Raise("Can't update hosts file with new host data")
4477
      _RedistributeAncillaryFiles(self)
4478

    
4479

    
4480
class _NodeQuery(_QueryBase):
4481
  FIELDS = query.NODE_FIELDS
4482

    
4483
  def ExpandNames(self, lu):
4484
    lu.needed_locks = {}
4485
    lu.share_locks = _ShareAll()
4486

    
4487
    if self.names:
4488
      self.wanted = _GetWantedNodes(lu, self.names)
4489
    else:
4490
      self.wanted = locking.ALL_SET
4491

    
4492
    self.do_locking = (self.use_locking and
4493
                       query.NQ_LIVE in self.requested_data)
4494

    
4495
    if self.do_locking:
4496
      # If any non-static field is requested we need to lock the nodes
4497
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4498

    
4499
  def DeclareLocks(self, lu, level):
4500
    pass
4501

    
4502
  def _GetQueryData(self, lu):
4503
    """Computes the list of nodes and their attributes.
4504

4505
    """
4506
    all_info = lu.cfg.GetAllNodesInfo()
4507

    
4508
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4509

    
4510
    # Gather data as requested
4511
    if query.NQ_LIVE in self.requested_data:
4512
      # filter out non-vm_capable nodes
4513
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4514

    
4515
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4516
                                        lu.cfg.GetHypervisorType())
4517
      live_data = dict((name, nresult.payload)
4518
                       for (name, nresult) in node_data.items()
4519
                       if not nresult.fail_msg and nresult.payload)
4520
    else:
4521
      live_data = None
4522

    
4523
    if query.NQ_INST in self.requested_data:
4524
      node_to_primary = dict([(name, set()) for name in nodenames])
4525
      node_to_secondary = dict([(name, set()) for name in nodenames])
4526

    
4527
      inst_data = lu.cfg.GetAllInstancesInfo()
4528

    
4529
      for inst in inst_data.values():
4530
        if inst.primary_node in node_to_primary:
4531
          node_to_primary[inst.primary_node].add(inst.name)
4532
        for secnode in inst.secondary_nodes:
4533
          if secnode in node_to_secondary:
4534
            node_to_secondary[secnode].add(inst.name)
4535
    else:
4536
      node_to_primary = None
4537
      node_to_secondary = None
4538

    
4539
    if query.NQ_OOB in self.requested_data:
4540
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4541
                         for name, node in all_info.iteritems())
4542
    else:
4543
      oob_support = None
4544

    
4545
    if query.NQ_GROUP in self.requested_data:
4546
      groups = lu.cfg.GetAllNodeGroupsInfo()
4547
    else:
4548
      groups = {}
4549

    
4550
    return query.NodeQueryData([all_info[name] for name in nodenames],
4551
                               live_data, lu.cfg.GetMasterNode(),
4552
                               node_to_primary, node_to_secondary, groups,
4553
                               oob_support, lu.cfg.GetClusterInfo())
4554

    
4555

    
4556
class LUNodeQuery(NoHooksLU):
4557
  """Logical unit for querying nodes.
4558

4559
  """
4560
  # pylint: disable=W0142
4561
  REQ_BGL = False
4562

    
4563
  def CheckArguments(self):
4564
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4565
                         self.op.output_fields, self.op.use_locking)
4566

    
4567
  def ExpandNames(self):
4568
    self.nq.ExpandNames(self)
4569

    
4570
  def Exec(self, feedback_fn):
4571
    return self.nq.OldStyleQuery(self)
4572

    
4573

    
4574
class LUNodeQueryvols(NoHooksLU):
4575
  """Logical unit for getting volumes on node(s).
4576

4577
  """
4578
  REQ_BGL = False
4579
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4580
  _FIELDS_STATIC = utils.FieldSet("node")
4581

    
4582
  def CheckArguments(self):
4583
    _CheckOutputFields(static=self._FIELDS_STATIC,
4584
                       dynamic=self._FIELDS_DYNAMIC,
4585
                       selected=self.op.output_fields)
4586

    
4587
  def ExpandNames(self):
4588
    self.needed_locks = {}
4589
    self.share_locks[locking.LEVEL_NODE] = 1
4590
    if not self.op.nodes:
4591
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4592
    else:
4593
      self.needed_locks[locking.LEVEL_NODE] = \
4594
        _GetWantedNodes(self, self.op.nodes)
4595

    
4596
  def Exec(self, feedback_fn):
4597
    """Computes the list of nodes and their attributes.
4598

4599
    """
4600
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4601
    volumes = self.rpc.call_node_volumes(nodenames)
4602

    
4603
    ilist = self.cfg.GetAllInstancesInfo()
4604
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4605

    
4606
    output = []
4607
    for node in nodenames:
4608
      nresult = volumes[node]
4609
      if nresult.offline:
4610
        continue
4611
      msg = nresult.fail_msg
4612
      if msg:
4613
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4614
        continue
4615

    
4616
      node_vols = sorted(nresult.payload,
4617
                         key=operator.itemgetter("dev"))
4618

    
4619
      for vol in node_vols:
4620
        node_output = []
4621
        for field in self.op.output_fields:
4622
          if field == "node":
4623
            val = node
4624
          elif field == "phys":
4625
            val = vol["dev"]
4626
          elif field == "vg":
4627
            val = vol["vg"]
4628
          elif field == "name":
4629
            val = vol["name"]
4630
          elif field == "size":
4631
            val = int(float(vol["size"]))
4632
          elif field == "instance":
4633
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4634
          else:
4635
            raise errors.ParameterError(field)
4636
          node_output.append(str(val))
4637

    
4638
        output.append(node_output)
4639

    
4640
    return output
4641

    
4642

    
4643
class LUNodeQueryStorage(NoHooksLU):
4644
  """Logical unit for getting information on storage units on node(s).
4645

4646
  """
4647
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4648
  REQ_BGL = False
4649

    
4650
  def CheckArguments(self):
4651
    _CheckOutputFields(static=self._FIELDS_STATIC,
4652
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4653
                       selected=self.op.output_fields)
4654

    
4655
  def ExpandNames(self):
4656
    self.needed_locks = {}
4657
    self.share_locks[locking.LEVEL_NODE] = 1
4658

    
4659
    if self.op.nodes:
4660
      self.needed_locks[locking.LEVEL_NODE] = \
4661
        _GetWantedNodes(self, self.op.nodes)
4662
    else:
4663
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4664

    
4665
  def Exec(self, feedback_fn):
4666
    """Computes the list of nodes and their attributes.
4667

4668
    """
4669
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4670

    
4671
    # Always get name to sort by
4672
    if constants.SF_NAME in self.op.output_fields:
4673
      fields = self.op.output_fields[:]
4674
    else:
4675
      fields = [constants.SF_NAME] + self.op.output_fields
4676

    
4677
    # Never ask for node or type as it's only known to the LU
4678
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4679
      while extra in fields:
4680
        fields.remove(extra)
4681

    
4682
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4683
    name_idx = field_idx[constants.SF_NAME]
4684

    
4685
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4686
    data = self.rpc.call_storage_list(self.nodes,
4687
                                      self.op.storage_type, st_args,
4688
                                      self.op.name, fields)
4689

    
4690
    result = []
4691

    
4692
    for node in utils.NiceSort(self.nodes):
4693
      nresult = data[node]
4694
      if nresult.offline:
4695
        continue
4696

    
4697
      msg = nresult.fail_msg
4698
      if msg:
4699
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4700
        continue
4701

    
4702
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4703

    
4704
      for name in utils.NiceSort(rows.keys()):
4705
        row = rows[name]
4706

    
4707
        out = []
4708

    
4709
        for field in self.op.output_fields:
4710
          if field == constants.SF_NODE:
4711
            val = node
4712
          elif field == constants.SF_TYPE:
4713
            val = self.op.storage_type
4714
          elif field in field_idx:
4715
            val = row[field_idx[field]]
4716
          else:
4717
            raise errors.ParameterError(field)
4718

    
4719
          out.append(val)
4720

    
4721
        result.append(out)
4722

    
4723
    return result
4724

    
4725

    
4726
class _InstanceQuery(_QueryBase):
4727
  FIELDS = query.INSTANCE_FIELDS
4728

    
4729
  def ExpandNames(self, lu):
4730
    lu.needed_locks = {}
4731
    lu.share_locks = _ShareAll()
4732

    
4733
    if self.names:
4734
      self.wanted = _GetWantedInstances(lu, self.names)
4735
    else:
4736
      self.wanted = locking.ALL_SET
4737

    
4738
    self.do_locking = (self.use_locking and
4739
                       query.IQ_LIVE in self.requested_data)
4740
    if self.do_locking:
4741
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4742
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4743
      lu.needed_locks[locking.LEVEL_NODE] = []
4744
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4745

    
4746
    self.do_grouplocks = (self.do_locking and
4747
                          query.IQ_NODES in self.requested_data)
4748

    
4749
  def DeclareLocks(self, lu, level):
4750
    if self.do_locking:
4751
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4752
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4753

    
4754
        # Lock all groups used by instances optimistically; this requires going
4755
        # via the node before it's locked, requiring verification later on
4756
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4757
          set(group_uuid
4758
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4759
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4760
      elif level == locking.LEVEL_NODE:
4761
        lu._LockInstancesNodes() # pylint: disable=W0212
4762

    
4763
  @staticmethod
4764
  def _CheckGroupLocks(lu):
4765
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4766
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4767

    
4768
    # Check if node groups for locked instances are still correct
4769
    for instance_name in owned_instances:
4770
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4771

    
4772
  def _GetQueryData(self, lu):
4773
    """Computes the list of instances and their attributes.
4774

4775
    """
4776
    if self.do_grouplocks:
4777
      self._CheckGroupLocks(lu)
4778

    
4779
    cluster = lu.cfg.GetClusterInfo()
4780
    all_info = lu.cfg.GetAllInstancesInfo()
4781

    
4782
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4783

    
4784
    instance_list = [all_info[name] for name in instance_names]
4785
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4786
                                        for inst in instance_list)))
4787
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4788
    bad_nodes = []
4789
    offline_nodes = []
4790
    wrongnode_inst = set()
4791

    
4792
    # Gather data as requested
4793
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4794
      live_data = {}
4795
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4796
      for name in nodes:
4797
        result = node_data[name]
4798
        if result.offline:
4799
          # offline nodes will be in both lists
4800
          assert result.fail_msg
4801
          offline_nodes.append(name)
4802
        if result.fail_msg:
4803
          bad_nodes.append(name)
4804
        elif result.payload:
4805
          for inst in result.payload:
4806
            if inst in all_info:
4807
              if all_info[inst].primary_node == name:
4808
                live_data.update(result.payload)
4809
              else:
4810
                wrongnode_inst.add(inst)
4811
            else:
4812
              # orphan instance; we don't list it here as we don't
4813
              # handle this case yet in the output of instance listing
4814
              logging.warning("Orphan instance '%s' found on node %s",
4815
                              inst, name)
4816
        # else no instance is alive
4817
    else:
4818
      live_data = {}
4819

    
4820
    if query.IQ_DISKUSAGE in self.requested_data:
4821
      disk_usage = dict((inst.name,
4822
                         _ComputeDiskSize(inst.disk_template,
4823
                                          [{constants.IDISK_SIZE: disk.size}
4824
                                           for disk in inst.disks]))
4825
                        for inst in instance_list)
4826
    else:
4827
      disk_usage = None
4828

    
4829
    if query.IQ_CONSOLE in self.requested_data:
4830
      consinfo = {}
4831
      for inst in instance_list:
4832
        if inst.name in live_data:
4833
          # Instance is running
4834
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4835
        else:
4836
          consinfo[inst.name] = None
4837
      assert set(consinfo.keys()) == set(instance_names)
4838
    else:
4839
      consinfo = None
4840

    
4841
    if query.IQ_NODES in self.requested_data:
4842
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4843
                                            instance_list)))
4844
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4845
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4846
                    for uuid in set(map(operator.attrgetter("group"),
4847
                                        nodes.values())))
4848
    else:
4849
      nodes = None
4850
      groups = None
4851

    
4852
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4853
                                   disk_usage, offline_nodes, bad_nodes,
4854
                                   live_data, wrongnode_inst, consinfo,
4855
                                   nodes, groups)
4856

    
4857

    
4858
class LUQuery(NoHooksLU):
4859
  """Query for resources/items of a certain kind.
4860

4861
  """
4862
  # pylint: disable=W0142
4863
  REQ_BGL = False
4864

    
4865
  def CheckArguments(self):
4866
    qcls = _GetQueryImplementation(self.op.what)
4867

    
4868
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4869

    
4870
  def ExpandNames(self):
4871
    self.impl.ExpandNames(self)
4872

    
4873
  def DeclareLocks(self, level):
4874
    self.impl.DeclareLocks(self, level)
4875

    
4876
  def Exec(self, feedback_fn):
4877
    return self.impl.NewStyleQuery(self)
4878

    
4879

    
4880
class LUQueryFields(NoHooksLU):
4881
  """Query for resources/items of a certain kind.
4882

4883
  """
4884
  # pylint: disable=W0142
4885
  REQ_BGL = False
4886

    
4887
  def CheckArguments(self):
4888
    self.qcls = _GetQueryImplementation(self.op.what)
4889

    
4890
  def ExpandNames(self):
4891
    self.needed_locks = {}
4892

    
4893
  def Exec(self, feedback_fn):
4894
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4895

    
4896

    
4897
class LUNodeModifyStorage(NoHooksLU):
4898
  """Logical unit for modifying a storage volume on a node.
4899

4900
  """
4901
  REQ_BGL = False
4902

    
4903
  def CheckArguments(self):
4904
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4905

    
4906
    storage_type = self.op.storage_type
4907

    
4908
    try:
4909
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4910
    except KeyError:
4911
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4912
                                 " modified" % storage_type,
4913
                                 errors.ECODE_INVAL)
4914

    
4915
    diff = set(self.op.changes.keys()) - modifiable
4916
    if diff:
4917
      raise errors.OpPrereqError("The following fields can not be modified for"
4918
                                 " storage units of type '%s': %r" %
4919
                                 (storage_type, list(diff)),
4920
                                 errors.ECODE_INVAL)
4921

    
4922
  def ExpandNames(self):
4923
    self.needed_locks = {
4924
      locking.LEVEL_NODE: self.op.node_name,
4925
      }
4926

    
4927
  def Exec(self, feedback_fn):
4928
    """Computes the list of nodes and their attributes.
4929

4930
    """
4931
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4932
    result = self.rpc.call_storage_modify(self.op.node_name,
4933
                                          self.op.storage_type, st_args,
4934
                                          self.op.name, self.op.changes)
4935
    result.Raise("Failed to modify storage unit '%s' on %s" %
4936
                 (self.op.name, self.op.node_name))
4937

    
4938

    
4939
class LUNodeAdd(LogicalUnit):
4940
  """Logical unit for adding node to the cluster.
4941

4942
  """
4943
  HPATH = "node-add"
4944
  HTYPE = constants.HTYPE_NODE
4945
  _NFLAGS = ["master_capable", "vm_capable"]
4946

    
4947
  def CheckArguments(self):
4948
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4949
    # validate/normalize the node name
4950
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4951
                                         family=self.primary_ip_family)
4952
    self.op.node_name = self.hostname.name
4953

    
4954
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4955
      raise errors.OpPrereqError("Cannot readd the master node",
4956
                                 errors.ECODE_STATE)
4957

    
4958
    if self.op.readd and self.op.group:
4959
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4960
                                 " being readded", errors.ECODE_INVAL)
4961

    
4962
  def BuildHooksEnv(self):
4963
    """Build hooks env.
4964

4965
    This will run on all nodes before, and on all nodes + the new node after.
4966

4967
    """
4968
    return {
4969
      "OP_TARGET": self.op.node_name,
4970
      "NODE_NAME": self.op.node_name,
4971
      "NODE_PIP": self.op.primary_ip,
4972
      "NODE_SIP": self.op.secondary_ip,
4973
      "MASTER_CAPABLE": str(self.op.master_capable),
4974
      "VM_CAPABLE": str(self.op.vm_capable),
4975
      }
4976

    
4977
  def BuildHooksNodes(self):
4978
    """Build hooks nodes.
4979

4980
    """
4981
    # Exclude added node
4982
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4983
    post_nodes = pre_nodes + [self.op.node_name, ]
4984

    
4985
    return (pre_nodes, post_nodes)
4986

    
4987
  def CheckPrereq(self):
4988
    """Check prerequisites.
4989

4990
    This checks:
4991
     - the new node is not already in the config
4992
     - it is resolvable
4993
     - its parameters (single/dual homed) matches the cluster
4994

4995
    Any errors are signaled by raising errors.OpPrereqError.
4996

4997
    """
4998
    cfg = self.cfg
4999
    hostname = self.hostname
5000
    node = hostname.name
5001
    primary_ip = self.op.primary_ip = hostname.ip
5002
    if self.op.secondary_ip is None:
5003
      if self.primary_ip_family == netutils.IP6Address.family:
5004
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5005
                                   " IPv4 address must be given as secondary",
5006
                                   errors.ECODE_INVAL)
5007
      self.op.secondary_ip = primary_ip
5008

    
5009
    secondary_ip = self.op.secondary_ip
5010
    if not netutils.IP4Address.IsValid(secondary_ip):
5011
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5012
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5013

    
5014
    node_list = cfg.GetNodeList()
5015
    if not self.op.readd and node in node_list:
5016
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5017
                                 node, errors.ECODE_EXISTS)
5018
    elif self.op.readd and node not in node_list:
5019
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5020
                                 errors.ECODE_NOENT)
5021

    
5022
    self.changed_primary_ip = False
5023

    
5024
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5025
      if self.op.readd and node == existing_node_name:
5026
        if existing_node.secondary_ip != secondary_ip:
5027
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5028
                                     " address configuration as before",
5029
                                     errors.ECODE_INVAL)
5030
        if existing_node.primary_ip != primary_ip:
5031
          self.changed_primary_ip = True
5032

    
5033
        continue
5034

    
5035
      if (existing_node.primary_ip == primary_ip or
5036
          existing_node.secondary_ip == primary_ip or
5037
          existing_node.primary_ip == secondary_ip or
5038
          existing_node.secondary_ip == secondary_ip):
5039
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5040
                                   " existing node %s" % existing_node.name,
5041
                                   errors.ECODE_NOTUNIQUE)
5042

    
5043
    # After this 'if' block, None is no longer a valid value for the
5044
    # _capable op attributes
5045
    if self.op.readd:
5046
      old_node = self.cfg.GetNodeInfo(node)
5047
      assert old_node is not None, "Can't retrieve locked node %s" % node
5048
      for attr in self._NFLAGS:
5049
        if getattr(self.op, attr) is None:
5050
          setattr(self.op, attr, getattr(old_node, attr))
5051
    else:
5052
      for attr in self._NFLAGS:
5053
        if getattr(self.op, attr) is None:
5054
          setattr(self.op, attr, True)
5055

    
5056
    if self.op.readd and not self.op.vm_capable:
5057
      pri, sec = cfg.GetNodeInstances(node)
5058
      if pri or sec:
5059
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5060
                                   " flag set to false, but it already holds"
5061
                                   " instances" % node,
5062
                                   errors.ECODE_STATE)
5063

    
5064
    # check that the type of the node (single versus dual homed) is the
5065
    # same as for the master
5066
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5067
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5068
    newbie_singlehomed = secondary_ip == primary_ip
5069
    if master_singlehomed != newbie_singlehomed:
5070
      if master_singlehomed:
5071
        raise errors.OpPrereqError("The master has no secondary ip but the"
5072
                                   " new node has one",
5073
                                   errors.ECODE_INVAL)
5074
      else:
5075
        raise errors.OpPrereqError("The master has a secondary ip but the"
5076
                                   " new node doesn't have one",
5077
                                   errors.ECODE_INVAL)
5078

    
5079
    # checks reachability
5080
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5081
      raise errors.OpPrereqError("Node not reachable by ping",
5082
                                 errors.ECODE_ENVIRON)
5083

    
5084
    if not newbie_singlehomed:
5085
      # check reachability from my secondary ip to newbie's secondary ip
5086
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5087
                           source=myself.secondary_ip):
5088
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5089
                                   " based ping to node daemon port",
5090
                                   errors.ECODE_ENVIRON)
5091

    
5092
    if self.op.readd:
5093
      exceptions = [node]
5094
    else:
5095
      exceptions = []
5096

    
5097
    if self.op.master_capable:
5098
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5099
    else:
5100
      self.master_candidate = False
5101

    
5102
    if self.op.readd:
5103
      self.new_node = old_node
5104
    else:
5105
      node_group = cfg.LookupNodeGroup(self.op.group)
5106
      self.new_node = objects.Node(name=node,
5107
                                   primary_ip=primary_ip,
5108
                                   secondary_ip=secondary_ip,
5109
                                   master_candidate=self.master_candidate,
5110
                                   offline=False, drained=False,
5111
                                   group=node_group)
5112

    
5113
    if self.op.ndparams:
5114
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5115

    
5116
  def Exec(self, feedback_fn):
5117
    """Adds the new node to the cluster.
5118

5119
    """
5120
    new_node = self.new_node
5121
    node = new_node.name
5122

    
5123
    # We adding a new node so we assume it's powered
5124
    new_node.powered = True
5125

    
5126
    # for re-adds, reset the offline/drained/master-candidate flags;
5127
    # we need to reset here, otherwise offline would prevent RPC calls
5128
    # later in the procedure; this also means that if the re-add
5129
    # fails, we are left with a non-offlined, broken node
5130
    if self.op.readd:
5131
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5132
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5133
      # if we demote the node, we do cleanup later in the procedure
5134
      new_node.master_candidate = self.master_candidate
5135
      if self.changed_primary_ip:
5136
        new_node.primary_ip = self.op.primary_ip
5137

    
5138
    # copy the master/vm_capable flags
5139
    for attr in self._NFLAGS:
5140
      setattr(new_node, attr, getattr(self.op, attr))
5141

    
5142
    # notify the user about any possible mc promotion
5143
    if new_node.master_candidate:
5144
      self.LogInfo("Node will be a master candidate")
5145

    
5146
    if self.op.ndparams:
5147
      new_node.ndparams = self.op.ndparams
5148
    else:
5149
      new_node.ndparams = {}
5150

    
5151
    # check connectivity
5152
    result = self.rpc.call_version([node])[node]
5153
    result.Raise("Can't get version information from node %s" % node)
5154
    if constants.PROTOCOL_VERSION == result.payload:
5155
      logging.info("Communication to node %s fine, sw version %s match",
5156
                   node, result.payload)
5157
    else:
5158
      raise errors.OpExecError("Version mismatch master version %s,"
5159
                               " node version %s" %
5160
                               (constants.PROTOCOL_VERSION, result.payload))
5161

    
5162
    # Add node to our /etc/hosts, and add key to known_hosts
5163
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5164
      master_node = self.cfg.GetMasterNode()
5165
      result = self.rpc.call_etc_hosts_modify(master_node,
5166
                                              constants.ETC_HOSTS_ADD,
5167
                                              self.hostname.name,
5168
                                              self.hostname.ip)
5169
      result.Raise("Can't update hosts file with new host data")
5170

    
5171
    if new_node.secondary_ip != new_node.primary_ip:
5172
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5173
                               False)
5174

    
5175
    node_verify_list = [self.cfg.GetMasterNode()]
5176
    node_verify_param = {
5177
      constants.NV_NODELIST: ([node], {}),
5178
      # TODO: do a node-net-test as well?
5179
    }
5180

    
5181
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5182
                                       self.cfg.GetClusterName())
5183
    for verifier in node_verify_list:
5184
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5185
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5186
      if nl_payload:
5187
        for failed in nl_payload:
5188
          feedback_fn("ssh/hostname verification failed"
5189
                      " (checking from %s): %s" %
5190
                      (verifier, nl_payload[failed]))
5191
        raise errors.OpExecError("ssh/hostname verification failed")
5192

    
5193
    if self.op.readd:
5194
      _RedistributeAncillaryFiles(self)
5195
      self.context.ReaddNode(new_node)
5196
      # make sure we redistribute the config
5197
      self.cfg.Update(new_node, feedback_fn)
5198
      # and make sure the new node will not have old files around
5199
      if not new_node.master_candidate:
5200
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5201
        msg = result.fail_msg
5202
        if msg:
5203
          self.LogWarning("Node failed to demote itself from master"
5204
                          " candidate status: %s" % msg)
5205
    else:
5206
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5207
                                  additional_vm=self.op.vm_capable)
5208
      self.context.AddNode(new_node, self.proc.GetECId())
5209

    
5210

    
5211
class LUNodeSetParams(LogicalUnit):
5212
  """Modifies the parameters of a node.
5213

5214
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5215
      to the node role (as _ROLE_*)
5216
  @cvar _R2F: a dictionary from node role to tuples of flags
5217
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5218

5219
  """
5220
  HPATH = "node-modify"
5221
  HTYPE = constants.HTYPE_NODE
5222
  REQ_BGL = False
5223
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5224
  _F2R = {
5225
    (True, False, False): _ROLE_CANDIDATE,
5226
    (False, True, False): _ROLE_DRAINED,
5227
    (False, False, True): _ROLE_OFFLINE,
5228
    (False, False, False): _ROLE_REGULAR,
5229
    }
5230
  _R2F = dict((v, k) for k, v in _F2R.items())
5231
  _FLAGS = ["master_candidate", "drained", "offline"]
5232

    
5233
  def CheckArguments(self):
5234
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5235
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5236
                self.op.master_capable, self.op.vm_capable,
5237
                self.op.secondary_ip, self.op.ndparams]
5238
    if all_mods.count(None) == len(all_mods):
5239
      raise errors.OpPrereqError("Please pass at least one modification",
5240
                                 errors.ECODE_INVAL)
5241
    if all_mods.count(True) > 1:
5242
      raise errors.OpPrereqError("Can't set the node into more than one"
5243
                                 " state at the same time",
5244
                                 errors.ECODE_INVAL)
5245

    
5246
    # Boolean value that tells us whether we might be demoting from MC
5247
    self.might_demote = (self.op.master_candidate == False or
5248
                         self.op.offline == True or
5249
                         self.op.drained == True or
5250
                         self.op.master_capable == False)
5251

    
5252
    if self.op.secondary_ip:
5253
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5254
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5255
                                   " address" % self.op.secondary_ip,
5256
                                   errors.ECODE_INVAL)
5257

    
5258
    self.lock_all = self.op.auto_promote and self.might_demote
5259
    self.lock_instances = self.op.secondary_ip is not None
5260

    
5261
  def ExpandNames(self):
5262
    if self.lock_all:
5263
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5264
    else:
5265
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5266

    
5267
    if self.lock_instances:
5268
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5269

    
5270
  def DeclareLocks(self, level):
5271
    # If we have locked all instances, before waiting to lock nodes, release
5272
    # all the ones living on nodes unrelated to the current operation.
5273
    if level == locking.LEVEL_NODE and self.lock_instances:
5274
      self.affected_instances = []
5275
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5276
        instances_keep = []
5277

    
5278
        # Build list of instances to release
5279
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5280
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5281
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5282
              self.op.node_name in instance.all_nodes):
5283
            instances_keep.append(instance_name)
5284
            self.affected_instances.append(instance)
5285

    
5286
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5287

    
5288
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5289
                set(instances_keep))
5290

    
5291
  def BuildHooksEnv(self):
5292
    """Build hooks env.
5293

5294
    This runs on the master node.
5295

5296
    """
5297
    return {
5298
      "OP_TARGET": self.op.node_name,
5299
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5300
      "OFFLINE": str(self.op.offline),
5301
      "DRAINED": str(self.op.drained),
5302
      "MASTER_CAPABLE": str(self.op.master_capable),
5303
      "VM_CAPABLE": str(self.op.vm_capable),
5304
      }
5305

    
5306
  def BuildHooksNodes(self):
5307
    """Build hooks nodes.
5308

5309
    """
5310
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5311
    return (nl, nl)
5312

    
5313
  def CheckPrereq(self):
5314
    """Check prerequisites.
5315

5316
    This only checks the instance list against the existing names.
5317

5318
    """
5319
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5320

    
5321
    if (self.op.master_candidate is not None or
5322
        self.op.drained is not None or
5323
        self.op.offline is not None):
5324
      # we can't change the master's node flags
5325
      if self.op.node_name == self.cfg.GetMasterNode():
5326
        raise errors.OpPrereqError("The master role can be changed"
5327
                                   " only via master-failover",
5328
                                   errors.ECODE_INVAL)
5329

    
5330
    if self.op.master_candidate and not node.master_capable:
5331
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5332
                                 " it a master candidate" % node.name,
5333
                                 errors.ECODE_STATE)
5334

    
5335
    if self.op.vm_capable == False:
5336
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5337
      if ipri or isec:
5338
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5339
                                   " the vm_capable flag" % node.name,
5340
                                   errors.ECODE_STATE)
5341

    
5342
    if node.master_candidate and self.might_demote and not self.lock_all:
5343
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5344
      # check if after removing the current node, we're missing master
5345
      # candidates
5346
      (mc_remaining, mc_should, _) = \
5347
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5348
      if mc_remaining < mc_should:
5349
        raise errors.OpPrereqError("Not enough master candidates, please"
5350
                                   " pass auto promote option to allow"
5351
                                   " promotion", errors.ECODE_STATE)
5352

    
5353
    self.old_flags = old_flags = (node.master_candidate,
5354
                                  node.drained, node.offline)
5355
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5356
    self.old_role = old_role = self._F2R[old_flags]
5357

    
5358
    # Check for ineffective changes
5359
    for attr in self._FLAGS:
5360
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5361
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5362
        setattr(self.op, attr, None)
5363

    
5364
    # Past this point, any flag change to False means a transition
5365
    # away from the respective state, as only real changes are kept
5366

    
5367
    # TODO: We might query the real power state if it supports OOB
5368
    if _SupportsOob(self.cfg, node):
5369
      if self.op.offline is False and not (node.powered or
5370
                                           self.op.powered == True):
5371
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5372
                                    " offline status can be reset") %
5373
                                   self.op.node_name)
5374
    elif self.op.powered is not None:
5375
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5376
                                  " as it does not support out-of-band"
5377
                                  " handling") % self.op.node_name)
5378

    
5379
    # If we're being deofflined/drained, we'll MC ourself if needed
5380
    if (self.op.drained == False or self.op.offline == False or
5381
        (self.op.master_capable and not node.master_capable)):
5382
      if _DecideSelfPromotion(self):
5383
        self.op.master_candidate = True
5384
        self.LogInfo("Auto-promoting node to master candidate")
5385

    
5386
    # If we're no longer master capable, we'll demote ourselves from MC
5387
    if self.op.master_capable == False and node.master_candidate:
5388
      self.LogInfo("Demoting from master candidate")
5389
      self.op.master_candidate = False
5390

    
5391
    # Compute new role
5392
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5393
    if self.op.master_candidate:
5394
      new_role = self._ROLE_CANDIDATE
5395
    elif self.op.drained:
5396
      new_role = self._ROLE_DRAINED
5397
    elif self.op.offline:
5398
      new_role = self._ROLE_OFFLINE
5399
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5400
      # False is still in new flags, which means we're un-setting (the
5401
      # only) True flag
5402
      new_role = self._ROLE_REGULAR
5403
    else: # no new flags, nothing, keep old role
5404
      new_role = old_role
5405

    
5406
    self.new_role = new_role
5407

    
5408
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5409
      # Trying to transition out of offline status
5410
      # TODO: Use standard RPC runner, but make sure it works when the node is
5411
      # still marked offline
5412
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5413
      if result.fail_msg:
5414
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5415
                                   " to report its version: %s" %
5416
                                   (node.name, result.fail_msg),
5417
                                   errors.ECODE_STATE)
5418
      else:
5419
        self.LogWarning("Transitioning node from offline to online state"
5420
                        " without using re-add. Please make sure the node"
5421
                        " is healthy!")
5422

    
5423
    if self.op.secondary_ip:
5424
      # Ok even without locking, because this can't be changed by any LU
5425
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5426
      master_singlehomed = master.secondary_ip == master.primary_ip
5427
      if master_singlehomed and self.op.secondary_ip:
5428
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5429
                                   " homed cluster", errors.ECODE_INVAL)
5430

    
5431
      if node.offline:
5432
        if self.affected_instances:
5433
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5434
                                     " node has instances (%s) configured"
5435
                                     " to use it" % self.affected_instances)
5436
      else:
5437
        # On online nodes, check that no instances are running, and that
5438
        # the node has the new ip and we can reach it.
5439
        for instance in self.affected_instances:
5440
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5441

    
5442
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5443
        if master.name != node.name:
5444
          # check reachability from master secondary ip to new secondary ip
5445
          if not netutils.TcpPing(self.op.secondary_ip,
5446
                                  constants.DEFAULT_NODED_PORT,
5447
                                  source=master.secondary_ip):
5448
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5449
                                       " based ping to node daemon port",
5450
                                       errors.ECODE_ENVIRON)
5451

    
5452
    if self.op.ndparams:
5453
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5454
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5455
      self.new_ndparams = new_ndparams
5456

    
5457
  def Exec(self, feedback_fn):
5458
    """Modifies a node.
5459

5460
    """
5461
    node = self.node
5462
    old_role = self.old_role
5463
    new_role = self.new_role
5464

    
5465
    result = []
5466

    
5467
    if self.op.ndparams:
5468
      node.ndparams = self.new_ndparams
5469

    
5470
    if self.op.powered is not None:
5471
      node.powered = self.op.powered
5472

    
5473
    for attr in ["master_capable", "vm_capable"]:
5474
      val = getattr(self.op, attr)
5475
      if val is not None:
5476
        setattr(node, attr, val)
5477
        result.append((attr, str(val)))
5478

    
5479
    if new_role != old_role:
5480
      # Tell the node to demote itself, if no longer MC and not offline
5481
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5482
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5483
        if msg:
5484
          self.LogWarning("Node failed to demote itself: %s", msg)
5485

    
5486
      new_flags = self._R2F[new_role]
5487
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5488
        if of != nf:
5489
          result.append((desc, str(nf)))
5490
      (node.master_candidate, node.drained, node.offline) = new_flags
5491

    
5492
      # we locked all nodes, we adjust the CP before updating this node
5493
      if self.lock_all:
5494
        _AdjustCandidatePool(self, [node.name])
5495

    
5496
    if self.op.secondary_ip:
5497
      node.secondary_ip = self.op.secondary_ip
5498
      result.append(("secondary_ip", self.op.secondary_ip))
5499

    
5500
    # this will trigger configuration file update, if needed
5501
    self.cfg.Update(node, feedback_fn)
5502

    
5503
    # this will trigger job queue propagation or cleanup if the mc
5504
    # flag changed
5505
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5506
      self.context.ReaddNode(node)
5507

    
5508
    return result
5509

    
5510

    
5511
class LUNodePowercycle(NoHooksLU):
5512
  """Powercycles a node.
5513

5514
  """
5515
  REQ_BGL = False
5516

    
5517
  def CheckArguments(self):
5518
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5519
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5520
      raise errors.OpPrereqError("The node is the master and the force"
5521
                                 " parameter was not set",
5522
                                 errors.ECODE_INVAL)
5523

    
5524
  def ExpandNames(self):
5525
    """Locking for PowercycleNode.
5526

5527
    This is a last-resort option and shouldn't block on other
5528
    jobs. Therefore, we grab no locks.
5529

5530
    """
5531
    self.needed_locks = {}
5532

    
5533
  def Exec(self, feedback_fn):
5534
    """Reboots a node.
5535

5536
    """
5537
    result = self.rpc.call_node_powercycle(self.op.node_name,
5538
                                           self.cfg.GetHypervisorType())
5539
    result.Raise("Failed to schedule the reboot")
5540
    return result.payload
5541

    
5542

    
5543
class LUClusterQuery(NoHooksLU):
5544
  """Query cluster configuration.
5545

5546
  """
5547
  REQ_BGL = False
5548

    
5549
  def ExpandNames(self):
5550
    self.needed_locks = {}
5551

    
5552
  def Exec(self, feedback_fn):
5553
    """Return cluster config.
5554

5555
    """
5556
    cluster = self.cfg.GetClusterInfo()
5557
    os_hvp = {}
5558

    
5559
    # Filter just for enabled hypervisors
5560
    for os_name, hv_dict in cluster.os_hvp.items():
5561
      os_hvp[os_name] = {}
5562
      for hv_name, hv_params in hv_dict.items():
5563
        if hv_name in cluster.enabled_hypervisors:
5564
          os_hvp[os_name][hv_name] = hv_params
5565

    
5566
    # Convert ip_family to ip_version
5567
    primary_ip_version = constants.IP4_VERSION
5568
    if cluster.primary_ip_family == netutils.IP6Address.family:
5569
      primary_ip_version = constants.IP6_VERSION
5570

    
5571
    result = {
5572
      "software_version": constants.RELEASE_VERSION,
5573
      "protocol_version": constants.PROTOCOL_VERSION,
5574
      "config_version": constants.CONFIG_VERSION,
5575
      "os_api_version": max(constants.OS_API_VERSIONS),
5576
      "export_version": constants.EXPORT_VERSION,
5577
      "architecture": (platform.architecture()[0], platform.machine()),
5578
      "name": cluster.cluster_name,
5579
      "master": cluster.master_node,
5580
      "default_hypervisor": cluster.enabled_hypervisors[0],
5581
      "enabled_hypervisors": cluster.enabled_hypervisors,
5582
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5583
                        for hypervisor_name in cluster.enabled_hypervisors]),
5584
      "os_hvp": os_hvp,
5585
      "beparams": cluster.beparams,
5586
      "osparams": cluster.osparams,
5587
      "nicparams": cluster.nicparams,
5588
      "ndparams": cluster.ndparams,
5589
      "candidate_pool_size": cluster.candidate_pool_size,
5590
      "master_netdev": cluster.master_netdev,
5591
      "master_netmask": cluster.master_netmask,
5592
      "volume_group_name": cluster.volume_group_name,
5593
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5594
      "file_storage_dir": cluster.file_storage_dir,
5595
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5596
      "maintain_node_health": cluster.maintain_node_health,
5597
      "ctime": cluster.ctime,
5598
      "mtime": cluster.mtime,
5599
      "uuid": cluster.uuid,
5600
      "tags": list(cluster.GetTags()),
5601
      "uid_pool": cluster.uid_pool,
5602
      "default_iallocator": cluster.default_iallocator,
5603
      "reserved_lvs": cluster.reserved_lvs,
5604
      "primary_ip_version": primary_ip_version,
5605
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5606
      "hidden_os": cluster.hidden_os,
5607
      "blacklisted_os": cluster.blacklisted_os,
5608
      }
5609

    
5610
    return result
5611

    
5612

    
5613
class LUClusterConfigQuery(NoHooksLU):
5614
  """Return configuration values.
5615

5616
  """
5617
  REQ_BGL = False
5618
  _FIELDS_DYNAMIC = utils.FieldSet()
5619
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5620
                                  "watcher_pause", "volume_group_name")
5621

    
5622
  def CheckArguments(self):
5623
    _CheckOutputFields(static=self._FIELDS_STATIC,
5624
                       dynamic=self._FIELDS_DYNAMIC,
5625
                       selected=self.op.output_fields)
5626

    
5627
  def ExpandNames(self):
5628
    self.needed_locks = {}
5629

    
5630
  def Exec(self, feedback_fn):
5631
    """Dump a representation of the cluster config to the standard output.
5632

5633
    """
5634
    values = []
5635
    for field in self.op.output_fields:
5636
      if field == "cluster_name":
5637
        entry = self.cfg.GetClusterName()
5638
      elif field == "master_node":
5639
        entry = self.cfg.GetMasterNode()
5640
      elif field == "drain_flag":
5641
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5642
      elif field == "watcher_pause":
5643
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5644
      elif field == "volume_group_name":
5645
        entry = self.cfg.GetVGName()
5646
      else:
5647
        raise errors.ParameterError(field)
5648
      values.append(entry)
5649
    return values
5650

    
5651

    
5652
class LUInstanceActivateDisks(NoHooksLU):
5653
  """Bring up an instance's disks.
5654

5655
  """
5656
  REQ_BGL = False
5657

    
5658
  def ExpandNames(self):
5659
    self._ExpandAndLockInstance()
5660
    self.needed_locks[locking.LEVEL_NODE] = []
5661
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5662

    
5663
  def DeclareLocks(self, level):
5664
    if level == locking.LEVEL_NODE:
5665
      self._LockInstancesNodes()
5666

    
5667
  def CheckPrereq(self):
5668
    """Check prerequisites.
5669

5670
    This checks that the instance is in the cluster.
5671

5672
    """
5673
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674
    assert self.instance is not None, \
5675
      "Cannot retrieve locked instance %s" % self.op.instance_name
5676
    _CheckNodeOnline(self, self.instance.primary_node)
5677

    
5678
  def Exec(self, feedback_fn):
5679
    """Activate the disks.
5680

5681
    """
5682
    disks_ok, disks_info = \
5683
              _AssembleInstanceDisks(self, self.instance,
5684
                                     ignore_size=self.op.ignore_size)
5685
    if not disks_ok:
5686
      raise errors.OpExecError("Cannot activate block devices")
5687

    
5688
    return disks_info
5689

    
5690

    
5691
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5692
                           ignore_size=False):
5693
  """Prepare the block devices for an instance.
5694

5695
  This sets up the block devices on all nodes.
5696

5697
  @type lu: L{LogicalUnit}
5698
  @param lu: the logical unit on whose behalf we execute
5699
  @type instance: L{objects.Instance}
5700
  @param instance: the instance for whose disks we assemble
5701
  @type disks: list of L{objects.Disk} or None
5702
  @param disks: which disks to assemble (or all, if None)
5703
  @type ignore_secondaries: boolean
5704
  @param ignore_secondaries: if true, errors on secondary nodes
5705
      won't result in an error return from the function
5706
  @type ignore_size: boolean
5707
  @param ignore_size: if true, the current known size of the disk
5708
      will not be used during the disk activation, useful for cases
5709
      when the size is wrong
5710
  @return: False if the operation failed, otherwise a list of
5711
      (host, instance_visible_name, node_visible_name)
5712
      with the mapping from node devices to instance devices
5713

5714
  """
5715
  device_info = []
5716
  disks_ok = True
5717
  iname = instance.name
5718
  disks = _ExpandCheckDisks(instance, disks)
5719

    
5720
  # With the two passes mechanism we try to reduce the window of
5721
  # opportunity for the race condition of switching DRBD to primary
5722
  # before handshaking occured, but we do not eliminate it
5723

    
5724
  # The proper fix would be to wait (with some limits) until the
5725
  # connection has been made and drbd transitions from WFConnection
5726
  # into any other network-connected state (Connected, SyncTarget,
5727
  # SyncSource, etc.)
5728

    
5729
  # 1st pass, assemble on all nodes in secondary mode
5730
  for idx, inst_disk in enumerate(disks):
5731
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5732
      if ignore_size:
5733
        node_disk = node_disk.Copy()
5734
        node_disk.UnsetSize()
5735
      lu.cfg.SetDiskID(node_disk, node)
5736
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5737
      msg = result.fail_msg
5738
      if msg:
5739
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5740
                           " (is_primary=False, pass=1): %s",
5741
                           inst_disk.iv_name, node, msg)
5742
        if not ignore_secondaries:
5743
          disks_ok = False
5744

    
5745
  # FIXME: race condition on drbd migration to primary
5746

    
5747
  # 2nd pass, do only the primary node
5748
  for idx, inst_disk in enumerate(disks):
5749
    dev_path = None
5750

    
5751
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5752
      if node != instance.primary_node:
5753
        continue
5754
      if ignore_size:
5755
        node_disk = node_disk.Copy()
5756
        node_disk.UnsetSize()
5757
      lu.cfg.SetDiskID(node_disk, node)
5758
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5759
      msg = result.fail_msg
5760
      if msg:
5761
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5762
                           " (is_primary=True, pass=2): %s",
5763
                           inst_disk.iv_name, node, msg)
5764
        disks_ok = False
5765
      else:
5766
        dev_path = result.payload
5767

    
5768
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5769

    
5770
  # leave the disks configured for the primary node
5771
  # this is a workaround that would be fixed better by
5772
  # improving the logical/physical id handling
5773
  for disk in disks:
5774
    lu.cfg.SetDiskID(disk, instance.primary_node)
5775

    
5776
  return disks_ok, device_info
5777

    
5778

    
5779
def _StartInstanceDisks(lu, instance, force):
5780
  """Start the disks of an instance.
5781

5782
  """
5783
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5784
                                           ignore_secondaries=force)
5785
  if not disks_ok:
5786
    _ShutdownInstanceDisks(lu, instance)
5787
    if force is not None and not force:
5788
      lu.proc.LogWarning("", hint="If the message above refers to a"
5789
                         " secondary node,"
5790
                         " you can retry the operation using '--force'.")
5791
    raise errors.OpExecError("Disk consistency error")
5792

    
5793

    
5794
class LUInstanceDeactivateDisks(NoHooksLU):
5795
  """Shutdown an instance's disks.
5796

5797
  """
5798
  REQ_BGL = False
5799

    
5800
  def ExpandNames(self):
5801
    self._ExpandAndLockInstance()
5802
    self.needed_locks[locking.LEVEL_NODE] = []
5803
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5804

    
5805
  def DeclareLocks(self, level):
5806
    if level == locking.LEVEL_NODE:
5807
      self._LockInstancesNodes()
5808

    
5809
  def CheckPrereq(self):
5810
    """Check prerequisites.
5811

5812
    This checks that the instance is in the cluster.
5813

5814
    """
5815
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5816
    assert self.instance is not None, \
5817
      "Cannot retrieve locked instance %s" % self.op.instance_name
5818

    
5819
  def Exec(self, feedback_fn):
5820
    """Deactivate the disks
5821

5822
    """
5823
    instance = self.instance
5824
    if self.op.force:
5825
      _ShutdownInstanceDisks(self, instance)
5826
    else:
5827
      _SafeShutdownInstanceDisks(self, instance)
5828

    
5829

    
5830
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5831
  """Shutdown block devices of an instance.
5832

5833
  This function checks if an instance is running, before calling
5834
  _ShutdownInstanceDisks.
5835

5836
  """
5837
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5838
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5839

    
5840

    
5841
def _ExpandCheckDisks(instance, disks):
5842
  """Return the instance disks selected by the disks list
5843

5844
  @type disks: list of L{objects.Disk} or None
5845
  @param disks: selected disks
5846
  @rtype: list of L{objects.Disk}
5847
  @return: selected instance disks to act on
5848

5849
  """
5850
  if disks is None:
5851
    return instance.disks
5852
  else:
5853
    if not set(disks).issubset(instance.disks):
5854
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5855
                                   " target instance")
5856
    return disks
5857

    
5858

    
5859
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5860
  """Shutdown block devices of an instance.
5861

5862
  This does the shutdown on all nodes of the instance.
5863

5864
  If the ignore_primary is false, errors on the primary node are
5865
  ignored.
5866

5867
  """
5868
  all_result = True
5869
  disks = _ExpandCheckDisks(instance, disks)
5870

    
5871
  for disk in disks:
5872
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5873
      lu.cfg.SetDiskID(top_disk, node)
5874
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5875
      msg = result.fail_msg
5876
      if msg:
5877
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5878
                      disk.iv_name, node, msg)
5879
        if ((node == instance.primary_node and not ignore_primary) or
5880
            (node != instance.primary_node and not result.offline)):
5881
          all_result = False
5882
  return all_result
5883

    
5884

    
5885
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5886
  """Checks if a node has enough free memory.
5887

5888
  This function check if a given node has the needed amount of free
5889
  memory. In case the node has less memory or we cannot get the
5890
  information from the node, this function raise an OpPrereqError
5891
  exception.
5892

5893
  @type lu: C{LogicalUnit}
5894
  @param lu: a logical unit from which we get configuration data
5895
  @type node: C{str}
5896
  @param node: the node to check
5897
  @type reason: C{str}
5898
  @param reason: string to use in the error message
5899
  @type requested: C{int}
5900
  @param requested: the amount of memory in MiB to check for
5901
  @type hypervisor_name: C{str}
5902
  @param hypervisor_name: the hypervisor to ask for memory stats
5903
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5904
      we cannot check the node
5905

5906
  """
5907
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5908
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5909
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5910
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5911
  if not isinstance(free_mem, int):
5912
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5913
                               " was '%s'" % (node, free_mem),
5914
                               errors.ECODE_ENVIRON)
5915
  if requested > free_mem:
5916
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5917
                               " needed %s MiB, available %s MiB" %
5918
                               (node, reason, requested, free_mem),
5919
                               errors.ECODE_NORES)
5920

    
5921

    
5922
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5923
  """Checks if nodes have enough free disk space in the all VGs.
5924

5925
  This function check if all given nodes have the needed amount of
5926
  free disk. In case any node has less disk or we cannot get the
5927
  information from the node, this function raise an OpPrereqError
5928
  exception.
5929

5930
  @type lu: C{LogicalUnit}
5931
  @param lu: a logical unit from which we get configuration data
5932
  @type nodenames: C{list}
5933
  @param nodenames: the list of node names to check
5934
  @type req_sizes: C{dict}
5935
  @param req_sizes: the hash of vg and corresponding amount of disk in
5936
      MiB to check for
5937
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5938
      or we cannot check the node
5939

5940
  """
5941
  for vg, req_size in req_sizes.items():
5942
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5943

    
5944

    
5945
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5946
  """Checks if nodes have enough free disk space in the specified VG.
5947

5948
  This function check if all given nodes have the needed amount of
5949
  free disk. In case any node has less disk or we cannot get the
5950
  information from the node, this function raise an OpPrereqError
5951
  exception.
5952

5953
  @type lu: C{LogicalUnit}
5954
  @param lu: a logical unit from which we get configuration data
5955
  @type nodenames: C{list}
5956
  @param nodenames: the list of node names to check
5957
  @type vg: C{str}
5958
  @param vg: the volume group to check
5959
  @type requested: C{int}
5960
  @param requested: the amount of disk in MiB to check for
5961
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5962
      or we cannot check the node
5963

5964
  """
5965
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5966
  for node in nodenames:
5967
    info = nodeinfo[node]
5968
    info.Raise("Cannot get current information from node %s" % node,
5969
               prereq=True, ecode=errors.ECODE_ENVIRON)
5970
    vg_free = info.payload.get("vg_free", None)
5971
    if not isinstance(vg_free, int):
5972
      raise errors.OpPrereqError("Can't compute free disk space on node"
5973
                                 " %s for vg %s, result was '%s'" %
5974
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5975
    if requested > vg_free:
5976
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5977
                                 " vg %s: required %d MiB, available %d MiB" %
5978
                                 (node, vg, requested, vg_free),
5979
                                 errors.ECODE_NORES)
5980

    
5981

    
5982
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5983
  """Checks if nodes have enough physical CPUs
5984

5985
  This function checks if all given nodes have the needed number of
5986
  physical CPUs. In case any node has less CPUs or we cannot get the
5987
  information from the node, this function raises an OpPrereqError
5988
  exception.
5989

5990
  @type lu: C{LogicalUnit}
5991
  @param lu: a logical unit from which we get configuration data
5992
  @type nodenames: C{list}
5993
  @param nodenames: the list of node names to check
5994
  @type requested: C{int}
5995
  @param requested: the minimum acceptable number of physical CPUs
5996
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5997
      or we cannot check the node
5998

5999
  """
6000
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6001
  for node in nodenames:
6002
    info = nodeinfo[node]
6003
    info.Raise("Cannot get current information from node %s" % node,
6004
               prereq=True, ecode=errors.ECODE_ENVIRON)
6005
    num_cpus = info.payload.get("cpu_total", None)
6006
    if not isinstance(num_cpus, int):
6007
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6008
                                 " on node %s, result was '%s'" %
6009
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6010
    if requested > num_cpus:
6011
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6012
                                 "required" % (node, num_cpus, requested),
6013
                                 errors.ECODE_NORES)
6014

    
6015

    
6016
class LUInstanceStartup(LogicalUnit):
6017
  """Starts an instance.
6018

6019
  """
6020
  HPATH = "instance-start"
6021
  HTYPE = constants.HTYPE_INSTANCE
6022
  REQ_BGL = False
6023

    
6024
  def CheckArguments(self):
6025
    # extra beparams
6026
    if self.op.beparams:
6027
      # fill the beparams dict
6028
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6029

    
6030
  def ExpandNames(self):
6031
    self._ExpandAndLockInstance()
6032

    
6033
  def BuildHooksEnv(self):
6034
    """Build hooks env.
6035

6036
    This runs on master, primary and secondary nodes of the instance.
6037

6038
    """
6039
    env = {
6040
      "FORCE": self.op.force,
6041
      }
6042

    
6043
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6044

    
6045
    return env
6046

    
6047
  def BuildHooksNodes(self):
6048
    """Build hooks nodes.
6049

6050
    """
6051
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6052
    return (nl, nl)
6053

    
6054
  def CheckPrereq(self):
6055
    """Check prerequisites.
6056

6057
    This checks that the instance is in the cluster.
6058

6059
    """
6060
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6061
    assert self.instance is not None, \
6062
      "Cannot retrieve locked instance %s" % self.op.instance_name
6063

    
6064
    # extra hvparams
6065
    if self.op.hvparams:
6066
      # check hypervisor parameter syntax (locally)
6067
      cluster = self.cfg.GetClusterInfo()
6068
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6069
      filled_hvp = cluster.FillHV(instance)
6070
      filled_hvp.update(self.op.hvparams)
6071
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6072
      hv_type.CheckParameterSyntax(filled_hvp)
6073
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6074

    
6075
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6076

    
6077
    if self.primary_offline and self.op.ignore_offline_nodes:
6078
      self.proc.LogWarning("Ignoring offline primary node")
6079

    
6080
      if self.op.hvparams or self.op.beparams:
6081
        self.proc.LogWarning("Overridden parameters are ignored")
6082
    else:
6083
      _CheckNodeOnline(self, instance.primary_node)
6084

    
6085
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6086

    
6087
      # check bridges existence
6088
      _CheckInstanceBridgesExist(self, instance)
6089

    
6090
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6091
                                                instance.name,
6092
                                                instance.hypervisor)
6093
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6094
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6095
      if not remote_info.payload: # not running already
6096
        _CheckNodeFreeMemory(self, instance.primary_node,
6097
                             "starting instance %s" % instance.name,
6098
                             bep[constants.BE_MEMORY], instance.hypervisor)
6099

    
6100
  def Exec(self, feedback_fn):
6101
    """Start the instance.
6102

6103
    """
6104
    instance = self.instance
6105
    force = self.op.force
6106

    
6107
    if not self.op.no_remember:
6108
      self.cfg.MarkInstanceUp(instance.name)
6109

    
6110
    if self.primary_offline:
6111
      assert self.op.ignore_offline_nodes
6112
      self.proc.LogInfo("Primary node offline, marked instance as started")
6113
    else:
6114
      node_current = instance.primary_node
6115

    
6116
      _StartInstanceDisks(self, instance, force)
6117

    
6118
      result = \
6119
        self.rpc.call_instance_start(node_current,
6120
                                     (instance, self.op.hvparams,
6121
                                      self.op.beparams),
6122
                                     self.op.startup_paused)
6123
      msg = result.fail_msg
6124
      if msg:
6125
        _ShutdownInstanceDisks(self, instance)
6126
        raise errors.OpExecError("Could not start instance: %s" % msg)
6127

    
6128

    
6129
class LUInstanceReboot(LogicalUnit):
6130
  """Reboot an instance.
6131

6132
  """
6133
  HPATH = "instance-reboot"
6134
  HTYPE = constants.HTYPE_INSTANCE
6135
  REQ_BGL = False
6136

    
6137
  def ExpandNames(self):
6138
    self._ExpandAndLockInstance()
6139

    
6140
  def BuildHooksEnv(self):
6141
    """Build hooks env.
6142

6143
    This runs on master, primary and secondary nodes of the instance.
6144

6145
    """
6146
    env = {
6147
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6148
      "REBOOT_TYPE": self.op.reboot_type,
6149
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6150
      }
6151

    
6152
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6153

    
6154
    return env
6155

    
6156
  def BuildHooksNodes(self):
6157
    """Build hooks nodes.
6158

6159
    """
6160
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6161
    return (nl, nl)
6162

    
6163
  def CheckPrereq(self):
6164
    """Check prerequisites.
6165

6166
    This checks that the instance is in the cluster.
6167

6168
    """
6169
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6170
    assert self.instance is not None, \
6171
      "Cannot retrieve locked instance %s" % self.op.instance_name
6172

    
6173
    _CheckNodeOnline(self, instance.primary_node)
6174

    
6175
    # check bridges existence
6176
    _CheckInstanceBridgesExist(self, instance)
6177

    
6178
  def Exec(self, feedback_fn):
6179
    """Reboot the instance.
6180

6181
    """
6182
    instance = self.instance
6183
    ignore_secondaries = self.op.ignore_secondaries
6184
    reboot_type = self.op.reboot_type
6185

    
6186
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6187
                                              instance.name,
6188
                                              instance.hypervisor)
6189
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6190
    instance_running = bool(remote_info.payload)
6191

    
6192
    node_current = instance.primary_node
6193

    
6194
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6195
                                            constants.INSTANCE_REBOOT_HARD]:
6196
      for disk in instance.disks:
6197
        self.cfg.SetDiskID(disk, node_current)
6198
      result = self.rpc.call_instance_reboot(node_current, instance,
6199
                                             reboot_type,
6200
                                             self.op.shutdown_timeout)
6201
      result.Raise("Could not reboot instance")
6202
    else:
6203
      if instance_running:
6204
        result = self.rpc.call_instance_shutdown(node_current, instance,
6205
                                                 self.op.shutdown_timeout)
6206
        result.Raise("Could not shutdown instance for full reboot")
6207
        _ShutdownInstanceDisks(self, instance)
6208
      else:
6209
        self.LogInfo("Instance %s was already stopped, starting now",
6210
                     instance.name)
6211
      _StartInstanceDisks(self, instance, ignore_secondaries)
6212
      result = self.rpc.call_instance_start(node_current,
6213
                                            (instance, None, None), False)
6214
      msg = result.fail_msg
6215
      if msg:
6216
        _ShutdownInstanceDisks(self, instance)
6217
        raise errors.OpExecError("Could not start instance for"
6218
                                 " full reboot: %s" % msg)
6219

    
6220
    self.cfg.MarkInstanceUp(instance.name)
6221

    
6222

    
6223
class LUInstanceShutdown(LogicalUnit):
6224
  """Shutdown an instance.
6225

6226
  """
6227
  HPATH = "instance-stop"
6228
  HTYPE = constants.HTYPE_INSTANCE
6229
  REQ_BGL = False
6230

    
6231
  def ExpandNames(self):
6232
    self._ExpandAndLockInstance()
6233

    
6234
  def BuildHooksEnv(self):
6235
    """Build hooks env.
6236

6237
    This runs on master, primary and secondary nodes of the instance.
6238

6239
    """
6240
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6241
    env["TIMEOUT"] = self.op.timeout
6242
    return env
6243

    
6244
  def BuildHooksNodes(self):
6245
    """Build hooks nodes.
6246

6247
    """
6248
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6249
    return (nl, nl)
6250

    
6251
  def CheckPrereq(self):
6252
    """Check prerequisites.
6253

6254
    This checks that the instance is in the cluster.
6255

6256
    """
6257
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258
    assert self.instance is not None, \
6259
      "Cannot retrieve locked instance %s" % self.op.instance_name
6260

    
6261
    self.primary_offline = \
6262
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6263

    
6264
    if self.primary_offline and self.op.ignore_offline_nodes:
6265
      self.proc.LogWarning("Ignoring offline primary node")
6266
    else:
6267
      _CheckNodeOnline(self, self.instance.primary_node)
6268

    
6269
  def Exec(self, feedback_fn):
6270
    """Shutdown the instance.
6271

6272
    """
6273
    instance = self.instance
6274
    node_current = instance.primary_node
6275
    timeout = self.op.timeout
6276

    
6277
    if not self.op.no_remember:
6278
      self.cfg.MarkInstanceDown(instance.name)
6279

    
6280
    if self.primary_offline:
6281
      assert self.op.ignore_offline_nodes
6282
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6283
    else:
6284
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6285
      msg = result.fail_msg
6286
      if msg:
6287
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6288

    
6289
      _ShutdownInstanceDisks(self, instance)
6290

    
6291

    
6292
class LUInstanceReinstall(LogicalUnit):
6293
  """Reinstall an instance.
6294

6295
  """
6296
  HPATH = "instance-reinstall"
6297
  HTYPE = constants.HTYPE_INSTANCE
6298
  REQ_BGL = False
6299

    
6300
  def ExpandNames(self):
6301
    self._ExpandAndLockInstance()
6302

    
6303
  def BuildHooksEnv(self):
6304
    """Build hooks env.
6305

6306
    This runs on master, primary and secondary nodes of the instance.
6307

6308
    """
6309
    return _BuildInstanceHookEnvByObject(self, self.instance)
6310

    
6311
  def BuildHooksNodes(self):
6312
    """Build hooks nodes.
6313

6314
    """
6315
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6316
    return (nl, nl)
6317

    
6318
  def CheckPrereq(self):
6319
    """Check prerequisites.
6320

6321
    This checks that the instance is in the cluster and is not running.
6322

6323
    """
6324
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325
    assert instance is not None, \
6326
      "Cannot retrieve locked instance %s" % self.op.instance_name
6327
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6328
                     " offline, cannot reinstall")
6329
    for node in instance.secondary_nodes:
6330
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6331
                       " cannot reinstall")
6332

    
6333
    if instance.disk_template == constants.DT_DISKLESS:
6334
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6335
                                 self.op.instance_name,
6336
                                 errors.ECODE_INVAL)
6337
    _CheckInstanceDown(self, instance, "cannot reinstall")
6338

    
6339
    if self.op.os_type is not None:
6340
      # OS verification
6341
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6342
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6343
      instance_os = self.op.os_type
6344
    else:
6345
      instance_os = instance.os
6346

    
6347
    nodelist = list(instance.all_nodes)
6348

    
6349
    if self.op.osparams:
6350
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6351
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6352
      self.os_inst = i_osdict # the new dict (without defaults)
6353
    else:
6354
      self.os_inst = None
6355

    
6356
    self.instance = instance
6357

    
6358
  def Exec(self, feedback_fn):
6359
    """Reinstall the instance.
6360

6361
    """
6362
    inst = self.instance
6363

    
6364
    if self.op.os_type is not None:
6365
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6366
      inst.os = self.op.os_type
6367
      # Write to configuration
6368
      self.cfg.Update(inst, feedback_fn)
6369

    
6370
    _StartInstanceDisks(self, inst, None)
6371
    try:
6372
      feedback_fn("Running the instance OS create scripts...")
6373
      # FIXME: pass debug option from opcode to backend
6374
      result = self.rpc.call_instance_os_add(inst.primary_node,
6375
                                             (inst, self.os_inst), True,
6376
                                             self.op.debug_level)
6377
      result.Raise("Could not install OS for instance %s on node %s" %
6378
                   (inst.name, inst.primary_node))
6379
    finally:
6380
      _ShutdownInstanceDisks(self, inst)
6381

    
6382

    
6383
class LUInstanceRecreateDisks(LogicalUnit):
6384
  """Recreate an instance's missing disks.
6385

6386
  """
6387
  HPATH = "instance-recreate-disks"
6388
  HTYPE = constants.HTYPE_INSTANCE
6389
  REQ_BGL = False
6390

    
6391
  def CheckArguments(self):
6392
    # normalise the disk list
6393
    self.op.disks = sorted(frozenset(self.op.disks))
6394

    
6395
  def ExpandNames(self):
6396
    self._ExpandAndLockInstance()
6397
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6398
    if self.op.nodes:
6399
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6400
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6401
    else:
6402
      self.needed_locks[locking.LEVEL_NODE] = []
6403

    
6404
  def DeclareLocks(self, level):
6405
    if level == locking.LEVEL_NODE:
6406
      # if we replace the nodes, we only need to lock the old primary,
6407
      # otherwise we need to lock all nodes for disk re-creation
6408
      primary_only = bool(self.op.nodes)
6409
      self._LockInstancesNodes(primary_only=primary_only)
6410

    
6411
  def BuildHooksEnv(self):
6412
    """Build hooks env.
6413

6414
    This runs on master, primary and secondary nodes of the instance.
6415

6416
    """
6417
    return _BuildInstanceHookEnvByObject(self, self.instance)
6418

    
6419
  def BuildHooksNodes(self):
6420
    """Build hooks nodes.
6421

6422
    """
6423
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6424
    return (nl, nl)
6425

    
6426
  def CheckPrereq(self):
6427
    """Check prerequisites.
6428

6429
    This checks that the instance is in the cluster and is not running.
6430

6431
    """
6432
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6433
    assert instance is not None, \
6434
      "Cannot retrieve locked instance %s" % self.op.instance_name
6435
    if self.op.nodes:
6436
      if len(self.op.nodes) != len(instance.all_nodes):
6437
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6438
                                   " %d replacement nodes were specified" %
6439
                                   (instance.name, len(instance.all_nodes),
6440
                                    len(self.op.nodes)),
6441
                                   errors.ECODE_INVAL)
6442
      assert instance.disk_template != constants.DT_DRBD8 or \
6443
          len(self.op.nodes) == 2
6444
      assert instance.disk_template != constants.DT_PLAIN or \
6445
          len(self.op.nodes) == 1
6446
      primary_node = self.op.nodes[0]
6447
    else:
6448
      primary_node = instance.primary_node
6449
    _CheckNodeOnline(self, primary_node)
6450

    
6451
    if instance.disk_template == constants.DT_DISKLESS:
6452
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6453
                                 self.op.instance_name, errors.ECODE_INVAL)
6454
    # if we replace nodes *and* the old primary is offline, we don't
6455
    # check
6456
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6457
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6458
    if not (self.op.nodes and old_pnode.offline):
6459
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6460

    
6461
    if not self.op.disks:
6462
      self.op.disks = range(len(instance.disks))
6463
    else:
6464
      for idx in self.op.disks:
6465
        if idx >= len(instance.disks):
6466
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6467
                                     errors.ECODE_INVAL)
6468
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6469
      raise errors.OpPrereqError("Can't recreate disks partially and"
6470
                                 " change the nodes at the same time",
6471
                                 errors.ECODE_INVAL)
6472
    self.instance = instance
6473

    
6474
  def Exec(self, feedback_fn):
6475
    """Recreate the disks.
6476

6477
    """
6478
    instance = self.instance
6479

    
6480
    to_skip = []
6481
    mods = [] # keeps track of needed logical_id changes
6482

    
6483
    for idx, disk in enumerate(instance.disks):
6484
      if idx not in self.op.disks: # disk idx has not been passed in
6485
        to_skip.append(idx)
6486
        continue
6487
      # update secondaries for disks, if needed
6488
      if self.op.nodes:
6489
        if disk.dev_type == constants.LD_DRBD8:
6490
          # need to update the nodes and minors
6491
          assert len(self.op.nodes) == 2
6492
          assert len(disk.logical_id) == 6 # otherwise disk internals
6493
                                           # have changed
6494
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6495
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6496
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6497
                    new_minors[0], new_minors[1], old_secret)
6498
          assert len(disk.logical_id) == len(new_id)
6499
          mods.append((idx, new_id))
6500

    
6501
    # now that we have passed all asserts above, we can apply the mods
6502
    # in a single run (to avoid partial changes)
6503
    for idx, new_id in mods:
6504
      instance.disks[idx].logical_id = new_id
6505

    
6506
    # change primary node, if needed
6507
    if self.op.nodes:
6508
      instance.primary_node = self.op.nodes[0]
6509
      self.LogWarning("Changing the instance's nodes, you will have to"
6510
                      " remove any disks left on the older nodes manually")
6511

    
6512
    if self.op.nodes:
6513
      self.cfg.Update(instance, feedback_fn)
6514

    
6515
    _CreateDisks(self, instance, to_skip=to_skip)
6516

    
6517

    
6518
class LUInstanceRename(LogicalUnit):
6519
  """Rename an instance.
6520

6521
  """
6522
  HPATH = "instance-rename"
6523
  HTYPE = constants.HTYPE_INSTANCE
6524

    
6525
  def CheckArguments(self):
6526
    """Check arguments.
6527

6528
    """
6529
    if self.op.ip_check and not self.op.name_check:
6530
      # TODO: make the ip check more flexible and not depend on the name check
6531
      raise errors.OpPrereqError("IP address check requires a name check",
6532
                                 errors.ECODE_INVAL)
6533

    
6534
  def BuildHooksEnv(self):
6535
    """Build hooks env.
6536

6537
    This runs on master, primary and secondary nodes of the instance.
6538

6539
    """
6540
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6541
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6542
    return env
6543

    
6544
  def BuildHooksNodes(self):
6545
    """Build hooks nodes.
6546

6547
    """
6548
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6549
    return (nl, nl)
6550

    
6551
  def CheckPrereq(self):
6552
    """Check prerequisites.
6553

6554
    This checks that the instance is in the cluster and is not running.
6555

6556
    """
6557
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6558
                                                self.op.instance_name)
6559
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6560
    assert instance is not None
6561
    _CheckNodeOnline(self, instance.primary_node)
6562
    _CheckInstanceDown(self, instance, "cannot rename")
6563
    self.instance = instance
6564

    
6565
    new_name = self.op.new_name
6566
    if self.op.name_check:
6567
      hostname = netutils.GetHostname(name=new_name)
6568
      if hostname != new_name:
6569
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6570
                     hostname.name)
6571
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6572
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6573
                                    " same as given hostname '%s'") %
6574
                                    (hostname.name, self.op.new_name),
6575
                                    errors.ECODE_INVAL)
6576
      new_name = self.op.new_name = hostname.name
6577
      if (self.op.ip_check and
6578
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6579
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6580
                                   (hostname.ip, new_name),
6581
                                   errors.ECODE_NOTUNIQUE)
6582

    
6583
    instance_list = self.cfg.GetInstanceList()
6584
    if new_name in instance_list and new_name != instance.name:
6585
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6586
                                 new_name, errors.ECODE_EXISTS)
6587

    
6588
  def Exec(self, feedback_fn):
6589
    """Rename the instance.
6590

6591
    """
6592
    inst = self.instance
6593
    old_name = inst.name
6594

    
6595
    rename_file_storage = False
6596
    if (inst.disk_template in constants.DTS_FILEBASED and
6597
        self.op.new_name != inst.name):
6598
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6599
      rename_file_storage = True
6600

    
6601
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6602
    # Change the instance lock. This is definitely safe while we hold the BGL.
6603
    # Otherwise the new lock would have to be added in acquired mode.
6604
    assert self.REQ_BGL
6605
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6606
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6607

    
6608
    # re-read the instance from the configuration after rename
6609
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6610

    
6611
    if rename_file_storage:
6612
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6613
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6614
                                                     old_file_storage_dir,
6615
                                                     new_file_storage_dir)
6616
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6617
                   " (but the instance has been renamed in Ganeti)" %
6618
                   (inst.primary_node, old_file_storage_dir,
6619
                    new_file_storage_dir))
6620

    
6621
    _StartInstanceDisks(self, inst, None)
6622
    try:
6623
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6624
                                                 old_name, self.op.debug_level)
6625
      msg = result.fail_msg
6626
      if msg:
6627
        msg = ("Could not run OS rename script for instance %s on node %s"
6628
               " (but the instance has been renamed in Ganeti): %s" %
6629
               (inst.name, inst.primary_node, msg))
6630
        self.proc.LogWarning(msg)
6631
    finally:
6632
      _ShutdownInstanceDisks(self, inst)
6633

    
6634
    return inst.name
6635

    
6636

    
6637
class LUInstanceRemove(LogicalUnit):
6638
  """Remove an instance.
6639

6640
  """
6641
  HPATH = "instance-remove"
6642
  HTYPE = constants.HTYPE_INSTANCE
6643
  REQ_BGL = False
6644

    
6645
  def ExpandNames(self):
6646
    self._ExpandAndLockInstance()
6647
    self.needed_locks[locking.LEVEL_NODE] = []
6648
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6649

    
6650
  def DeclareLocks(self, level):
6651
    if level == locking.LEVEL_NODE:
6652
      self._LockInstancesNodes()
6653

    
6654
  def BuildHooksEnv(self):
6655
    """Build hooks env.
6656

6657
    This runs on master, primary and secondary nodes of the instance.
6658

6659
    """
6660
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6661
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6662
    return env
6663

    
6664
  def BuildHooksNodes(self):
6665
    """Build hooks nodes.
6666

6667
    """
6668
    nl = [self.cfg.GetMasterNode()]
6669
    nl_post = list(self.instance.all_nodes) + nl
6670
    return (nl, nl_post)
6671

    
6672
  def CheckPrereq(self):
6673
    """Check prerequisites.
6674

6675
    This checks that the instance is in the cluster.
6676

6677
    """
6678
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6679
    assert self.instance is not None, \
6680
      "Cannot retrieve locked instance %s" % self.op.instance_name
6681

    
6682
  def Exec(self, feedback_fn):
6683
    """Remove the instance.
6684

6685
    """
6686
    instance = self.instance
6687
    logging.info("Shutting down instance %s on node %s",
6688
                 instance.name, instance.primary_node)
6689

    
6690
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6691
                                             self.op.shutdown_timeout)
6692
    msg = result.fail_msg
6693
    if msg:
6694
      if self.op.ignore_failures:
6695
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6696
      else:
6697
        raise errors.OpExecError("Could not shutdown instance %s on"
6698
                                 " node %s: %s" %
6699
                                 (instance.name, instance.primary_node, msg))
6700

    
6701
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6702

    
6703

    
6704
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6705
  """Utility function to remove an instance.
6706

6707
  """
6708
  logging.info("Removing block devices for instance %s", instance.name)
6709

    
6710
  if not _RemoveDisks(lu, instance):
6711
    if not ignore_failures:
6712
      raise errors.OpExecError("Can't remove instance's disks")
6713
    feedback_fn("Warning: can't remove instance's disks")
6714

    
6715
  logging.info("Removing instance %s out of cluster config", instance.name)
6716

    
6717
  lu.cfg.RemoveInstance(instance.name)
6718

    
6719
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6720
    "Instance lock removal conflict"
6721

    
6722
  # Remove lock for the instance
6723
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6724

    
6725

    
6726
class LUInstanceQuery(NoHooksLU):
6727
  """Logical unit for querying instances.
6728

6729
  """
6730
  # pylint: disable=W0142
6731
  REQ_BGL = False
6732

    
6733
  def CheckArguments(self):
6734
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6735
                             self.op.output_fields, self.op.use_locking)
6736

    
6737
  def ExpandNames(self):
6738
    self.iq.ExpandNames(self)
6739

    
6740
  def DeclareLocks(self, level):
6741
    self.iq.DeclareLocks(self, level)
6742

    
6743
  def Exec(self, feedback_fn):
6744
    return self.iq.OldStyleQuery(self)
6745

    
6746

    
6747
class LUInstanceFailover(LogicalUnit):
6748
  """Failover an instance.
6749

6750
  """
6751
  HPATH = "instance-failover"
6752
  HTYPE = constants.HTYPE_INSTANCE
6753
  REQ_BGL = False
6754

    
6755
  def CheckArguments(self):
6756
    """Check the arguments.
6757

6758
    """
6759
    self.iallocator = getattr(self.op, "iallocator", None)
6760
    self.target_node = getattr(self.op, "target_node", None)
6761

    
6762
  def ExpandNames(self):
6763
    self._ExpandAndLockInstance()
6764

    
6765
    if self.op.target_node is not None:
6766
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6767

    
6768
    self.needed_locks[locking.LEVEL_NODE] = []
6769
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6770

    
6771
    ignore_consistency = self.op.ignore_consistency
6772
    shutdown_timeout = self.op.shutdown_timeout
6773
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6774
                                       cleanup=False,
6775
                                       failover=True,
6776
                                       ignore_consistency=ignore_consistency,
6777
                                       shutdown_timeout=shutdown_timeout)
6778
    self.tasklets = [self._migrater]
6779

    
6780
  def DeclareLocks(self, level):
6781
    if level == locking.LEVEL_NODE:
6782
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6783
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6784
        if self.op.target_node is None:
6785
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6786
        else:
6787
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6788
                                                   self.op.target_node]
6789
        del self.recalculate_locks[locking.LEVEL_NODE]
6790
      else:
6791
        self._LockInstancesNodes()
6792

    
6793
  def BuildHooksEnv(self):
6794
    """Build hooks env.
6795

6796
    This runs on master, primary and secondary nodes of the instance.
6797

6798
    """
6799
    instance = self._migrater.instance
6800
    source_node = instance.primary_node
6801
    target_node = self.op.target_node
6802
    env = {
6803
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6804
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6805
      "OLD_PRIMARY": source_node,
6806
      "NEW_PRIMARY": target_node,
6807
      }
6808

    
6809
    if instance.disk_template in constants.DTS_INT_MIRROR:
6810
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6811
      env["NEW_SECONDARY"] = source_node
6812
    else:
6813
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6814

    
6815
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6816

    
6817
    return env
6818

    
6819
  def BuildHooksNodes(self):
6820
    """Build hooks nodes.
6821

6822
    """
6823
    instance = self._migrater.instance
6824
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6825
    return (nl, nl + [instance.primary_node])
6826

    
6827

    
6828
class LUInstanceMigrate(LogicalUnit):
6829
  """Migrate an instance.
6830

6831
  This is migration without shutting down, compared to the failover,
6832
  which is done with shutdown.
6833

6834
  """
6835
  HPATH = "instance-migrate"
6836
  HTYPE = constants.HTYPE_INSTANCE
6837
  REQ_BGL = False
6838

    
6839
  def ExpandNames(self):
6840
    self._ExpandAndLockInstance()
6841

    
6842
    if self.op.target_node is not None:
6843
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6844

    
6845
    self.needed_locks[locking.LEVEL_NODE] = []
6846
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6847

    
6848
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6849
                                       cleanup=self.op.cleanup,
6850
                                       failover=False,
6851
                                       fallback=self.op.allow_failover)
6852
    self.tasklets = [self._migrater]
6853

    
6854
  def DeclareLocks(self, level):
6855
    if level == locking.LEVEL_NODE:
6856
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6857
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6858
        if self.op.target_node is None:
6859
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6860
        else:
6861
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6862
                                                   self.op.target_node]
6863
        del self.recalculate_locks[locking.LEVEL_NODE]
6864
      else:
6865
        self._LockInstancesNodes()
6866

    
6867
  def BuildHooksEnv(self):
6868
    """Build hooks env.
6869

6870
    This runs on master, primary and secondary nodes of the instance.
6871

6872
    """
6873
    instance = self._migrater.instance
6874
    source_node = instance.primary_node
6875
    target_node = self.op.target_node
6876
    env = _BuildInstanceHookEnvByObject(self, instance)
6877
    env.update({
6878
      "MIGRATE_LIVE": self._migrater.live,
6879
      "MIGRATE_CLEANUP": self.op.cleanup,
6880
      "OLD_PRIMARY": source_node,
6881
      "NEW_PRIMARY": target_node,
6882
      })
6883

    
6884
    if instance.disk_template in constants.DTS_INT_MIRROR:
6885
      env["OLD_SECONDARY"] = target_node
6886
      env["NEW_SECONDARY"] = source_node
6887
    else:
6888
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6889

    
6890
    return env
6891

    
6892
  def BuildHooksNodes(self):
6893
    """Build hooks nodes.
6894

6895
    """
6896
    instance = self._migrater.instance
6897
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6898
    return (nl, nl + [instance.primary_node])
6899

    
6900

    
6901
class LUInstanceMove(LogicalUnit):
6902
  """Move an instance by data-copying.
6903

6904
  """
6905
  HPATH = "instance-move"
6906
  HTYPE = constants.HTYPE_INSTANCE
6907
  REQ_BGL = False
6908

    
6909
  def ExpandNames(self):
6910
    self._ExpandAndLockInstance()
6911
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6912
    self.op.target_node = target_node
6913
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6914
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6915

    
6916
  def DeclareLocks(self, level):
6917
    if level == locking.LEVEL_NODE:
6918
      self._LockInstancesNodes(primary_only=True)
6919

    
6920
  def BuildHooksEnv(self):
6921
    """Build hooks env.
6922

6923
    This runs on master, primary and secondary nodes of the instance.
6924

6925
    """
6926
    env = {
6927
      "TARGET_NODE": self.op.target_node,
6928
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6929
      }
6930
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6931
    return env
6932

    
6933
  def BuildHooksNodes(self):
6934
    """Build hooks nodes.
6935

6936
    """
6937
    nl = [
6938
      self.cfg.GetMasterNode(),
6939
      self.instance.primary_node,
6940
      self.op.target_node,
6941
      ]
6942
    return (nl, nl)
6943

    
6944
  def CheckPrereq(self):
6945
    """Check prerequisites.
6946

6947
    This checks that the instance is in the cluster.
6948

6949
    """
6950
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6951
    assert self.instance is not None, \
6952
      "Cannot retrieve locked instance %s" % self.op.instance_name
6953

    
6954
    node = self.cfg.GetNodeInfo(self.op.target_node)
6955
    assert node is not None, \
6956
      "Cannot retrieve locked node %s" % self.op.target_node
6957

    
6958
    self.target_node = target_node = node.name
6959

    
6960
    if target_node == instance.primary_node:
6961
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6962
                                 (instance.name, target_node),
6963
                                 errors.ECODE_STATE)
6964

    
6965
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6966

    
6967
    for idx, dsk in enumerate(instance.disks):
6968
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6969
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6970
                                   " cannot copy" % idx, errors.ECODE_STATE)
6971

    
6972
    _CheckNodeOnline(self, target_node)
6973
    _CheckNodeNotDrained(self, target_node)
6974
    _CheckNodeVmCapable(self, target_node)
6975

    
6976
    if instance.admin_up:
6977
      # check memory requirements on the secondary node
6978
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6979
                           instance.name, bep[constants.BE_MEMORY],
6980
                           instance.hypervisor)
6981
    else:
6982
      self.LogInfo("Not checking memory on the secondary node as"
6983
                   " instance will not be started")
6984

    
6985
    # check bridge existance
6986
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6987

    
6988
  def Exec(self, feedback_fn):
6989
    """Move an instance.
6990

6991
    The move is done by shutting it down on its present node, copying
6992
    the data over (slow) and starting it on the new node.
6993

6994
    """
6995
    instance = self.instance
6996

    
6997
    source_node = instance.primary_node
6998
    target_node = self.target_node
6999

    
7000
    self.LogInfo("Shutting down instance %s on source node %s",
7001
                 instance.name, source_node)
7002

    
7003
    result = self.rpc.call_instance_shutdown(source_node, instance,
7004
                                             self.op.shutdown_timeout)
7005
    msg = result.fail_msg
7006
    if msg:
7007
      if self.op.ignore_consistency:
7008
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7009
                             " Proceeding anyway. Please make sure node"
7010
                             " %s is down. Error details: %s",
7011
                             instance.name, source_node, source_node, msg)
7012
      else:
7013
        raise errors.OpExecError("Could not shutdown instance %s on"
7014
                                 " node %s: %s" %
7015
                                 (instance.name, source_node, msg))
7016

    
7017
    # create the target disks
7018
    try:
7019
      _CreateDisks(self, instance, target_node=target_node)
7020
    except errors.OpExecError:
7021
      self.LogWarning("Device creation failed, reverting...")
7022
      try:
7023
        _RemoveDisks(self, instance, target_node=target_node)
7024
      finally:
7025
        self.cfg.ReleaseDRBDMinors(instance.name)
7026
        raise
7027

    
7028
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7029

    
7030
    errs = []
7031
    # activate, get path, copy the data over
7032
    for idx, disk in enumerate(instance.disks):
7033
      self.LogInfo("Copying data for disk %d", idx)
7034
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7035
                                               instance.name, True, idx)
7036
      if result.fail_msg:
7037
        self.LogWarning("Can't assemble newly created disk %d: %s",
7038
                        idx, result.fail_msg)
7039
        errs.append(result.fail_msg)
7040
        break
7041
      dev_path = result.payload
7042
      result = self.rpc.call_blockdev_export(source_node, disk,
7043
                                             target_node, dev_path,
7044
                                             cluster_name)
7045
      if result.fail_msg:
7046
        self.LogWarning("Can't copy data over for disk %d: %s",
7047
                        idx, result.fail_msg)
7048
        errs.append(result.fail_msg)
7049
        break
7050

    
7051
    if errs:
7052
      self.LogWarning("Some disks failed to copy, aborting")
7053
      try:
7054
        _RemoveDisks(self, instance, target_node=target_node)
7055
      finally:
7056
        self.cfg.ReleaseDRBDMinors(instance.name)
7057
        raise errors.OpExecError("Errors during disk copy: %s" %
7058
                                 (",".join(errs),))
7059

    
7060
    instance.primary_node = target_node
7061
    self.cfg.Update(instance, feedback_fn)
7062

    
7063
    self.LogInfo("Removing the disks on the original node")
7064
    _RemoveDisks(self, instance, target_node=source_node)
7065

    
7066
    # Only start the instance if it's marked as up
7067
    if instance.admin_up:
7068
      self.LogInfo("Starting instance %s on node %s",
7069
                   instance.name, target_node)
7070

    
7071
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7072
                                           ignore_secondaries=True)
7073
      if not disks_ok:
7074
        _ShutdownInstanceDisks(self, instance)
7075
        raise errors.OpExecError("Can't activate the instance's disks")
7076

    
7077
      result = self.rpc.call_instance_start(target_node,
7078
                                            (instance, None, None), False)
7079
      msg = result.fail_msg
7080
      if msg:
7081
        _ShutdownInstanceDisks(self, instance)
7082
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7083
                                 (instance.name, target_node, msg))
7084

    
7085

    
7086
class LUNodeMigrate(LogicalUnit):
7087
  """Migrate all instances from a node.
7088

7089
  """
7090
  HPATH = "node-migrate"
7091
  HTYPE = constants.HTYPE_NODE
7092
  REQ_BGL = False
7093

    
7094
  def CheckArguments(self):
7095
    pass
7096

    
7097
  def ExpandNames(self):
7098
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7099

    
7100
    self.share_locks = _ShareAll()
7101
    self.needed_locks = {
7102
      locking.LEVEL_NODE: [self.op.node_name],
7103
      }
7104

    
7105
  def BuildHooksEnv(self):
7106
    """Build hooks env.
7107

7108
    This runs on the master, the primary and all the secondaries.
7109

7110
    """
7111
    return {
7112
      "NODE_NAME": self.op.node_name,
7113
      }
7114

    
7115
  def BuildHooksNodes(self):
7116
    """Build hooks nodes.
7117

7118
    """
7119
    nl = [self.cfg.GetMasterNode()]
7120
    return (nl, nl)
7121

    
7122
  def CheckPrereq(self):
7123
    pass
7124

    
7125
  def Exec(self, feedback_fn):
7126
    # Prepare jobs for migration instances
7127
    jobs = [
7128
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7129
                                 mode=self.op.mode,
7130
                                 live=self.op.live,
7131
                                 iallocator=self.op.iallocator,
7132
                                 target_node=self.op.target_node)]
7133
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7134
      ]
7135

    
7136
    # TODO: Run iallocator in this opcode and pass correct placement options to
7137
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7138
    # running the iallocator and the actual migration, a good consistency model
7139
    # will have to be found.
7140

    
7141
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7142
            frozenset([self.op.node_name]))
7143

    
7144
    return ResultWithJobs(jobs)
7145

    
7146

    
7147
class TLMigrateInstance(Tasklet):
7148
  """Tasklet class for instance migration.
7149

7150
  @type live: boolean
7151
  @ivar live: whether the migration will be done live or non-live;
7152
      this variable is initalized only after CheckPrereq has run
7153
  @type cleanup: boolean
7154
  @ivar cleanup: Wheater we cleanup from a failed migration
7155
  @type iallocator: string
7156
  @ivar iallocator: The iallocator used to determine target_node
7157
  @type target_node: string
7158
  @ivar target_node: If given, the target_node to reallocate the instance to
7159
  @type failover: boolean
7160
  @ivar failover: Whether operation results in failover or migration
7161
  @type fallback: boolean
7162
  @ivar fallback: Whether fallback to failover is allowed if migration not
7163
                  possible
7164
  @type ignore_consistency: boolean
7165
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7166
                            and target node
7167
  @type shutdown_timeout: int
7168
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7169

7170
  """
7171

    
7172
  # Constants
7173
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7174
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7175

    
7176
  def __init__(self, lu, instance_name, cleanup=False,
7177
               failover=False, fallback=False,
7178
               ignore_consistency=False,
7179
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7180
    """Initializes this class.
7181

7182
    """
7183
    Tasklet.__init__(self, lu)
7184

    
7185
    # Parameters
7186
    self.instance_name = instance_name
7187
    self.cleanup = cleanup
7188
    self.live = False # will be overridden later
7189
    self.failover = failover
7190
    self.fallback = fallback
7191
    self.ignore_consistency = ignore_consistency
7192
    self.shutdown_timeout = shutdown_timeout
7193

    
7194
  def CheckPrereq(self):
7195
    """Check prerequisites.
7196

7197
    This checks that the instance is in the cluster.
7198

7199
    """
7200
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7201
    instance = self.cfg.GetInstanceInfo(instance_name)
7202
    assert instance is not None
7203
    self.instance = instance
7204

    
7205
    if (not self.cleanup and not instance.admin_up and not self.failover and
7206
        self.fallback):
7207
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7208
                      " to failover")
7209
      self.failover = True
7210

    
7211
    if instance.disk_template not in constants.DTS_MIRRORED:
7212
      if self.failover:
7213
        text = "failovers"
7214
      else:
7215
        text = "migrations"
7216
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7217
                                 " %s" % (instance.disk_template, text),
7218
                                 errors.ECODE_STATE)
7219

    
7220
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7221
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7222

    
7223
      if self.lu.op.iallocator:
7224
        self._RunAllocator()
7225
      else:
7226
        # We set set self.target_node as it is required by
7227
        # BuildHooksEnv
7228
        self.target_node = self.lu.op.target_node
7229

    
7230
      # self.target_node is already populated, either directly or by the
7231
      # iallocator run
7232
      target_node = self.target_node
7233
      if self.target_node == instance.primary_node:
7234
        raise errors.OpPrereqError("Cannot migrate instance %s"
7235
                                   " to its primary (%s)" %
7236
                                   (instance.name, instance.primary_node))
7237

    
7238
      if len(self.lu.tasklets) == 1:
7239
        # It is safe to release locks only when we're the only tasklet
7240
        # in the LU
7241
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7242
                      keep=[instance.primary_node, self.target_node])
7243

    
7244
    else:
7245
      secondary_nodes = instance.secondary_nodes
7246
      if not secondary_nodes:
7247
        raise errors.ConfigurationError("No secondary node but using"
7248
                                        " %s disk template" %
7249
                                        instance.disk_template)
7250
      target_node = secondary_nodes[0]
7251
      if self.lu.op.iallocator or (self.lu.op.target_node and
7252
                                   self.lu.op.target_node != target_node):
7253
        if self.failover:
7254
          text = "failed over"
7255
        else:
7256
          text = "migrated"
7257
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7258
                                   " be %s to arbitrary nodes"
7259
                                   " (neither an iallocator nor a target"
7260
                                   " node can be passed)" %
7261
                                   (instance.disk_template, text),
7262
                                   errors.ECODE_INVAL)
7263

    
7264
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7265

    
7266
    # check memory requirements on the secondary node
7267
    if not self.failover or instance.admin_up:
7268
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7269
                           instance.name, i_be[constants.BE_MEMORY],
7270
                           instance.hypervisor)
7271
    else:
7272
      self.lu.LogInfo("Not checking memory on the secondary node as"
7273
                      " instance will not be started")
7274

    
7275
    # check bridge existance
7276
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7277

    
7278
    if not self.cleanup:
7279
      _CheckNodeNotDrained(self.lu, target_node)
7280
      if not self.failover:
7281
        result = self.rpc.call_instance_migratable(instance.primary_node,
7282
                                                   instance)
7283
        if result.fail_msg and self.fallback:
7284
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7285
                          " failover")
7286
          self.failover = True
7287
        else:
7288
          result.Raise("Can't migrate, please use failover",
7289
                       prereq=True, ecode=errors.ECODE_STATE)
7290

    
7291
    assert not (self.failover and self.cleanup)
7292

    
7293
    if not self.failover:
7294
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7295
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7296
                                   " parameters are accepted",
7297
                                   errors.ECODE_INVAL)
7298
      if self.lu.op.live is not None:
7299
        if self.lu.op.live:
7300
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7301
        else:
7302
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7303
        # reset the 'live' parameter to None so that repeated
7304
        # invocations of CheckPrereq do not raise an exception
7305
        self.lu.op.live = None
7306
      elif self.lu.op.mode is None:
7307
        # read the default value from the hypervisor
7308
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7309
                                                skip_globals=False)
7310
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7311

    
7312
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7313
    else:
7314
      # Failover is never live
7315
      self.live = False
7316

    
7317
  def _RunAllocator(self):
7318
    """Run the allocator based on input opcode.
7319

7320
    """
7321
    ial = IAllocator(self.cfg, self.rpc,
7322
                     mode=constants.IALLOCATOR_MODE_RELOC,
7323
                     name=self.instance_name,
7324
                     # TODO See why hail breaks with a single node below
7325
                     relocate_from=[self.instance.primary_node,
7326
                                    self.instance.primary_node],
7327
                     )
7328

    
7329
    ial.Run(self.lu.op.iallocator)
7330

    
7331
    if not ial.success:
7332
      raise errors.OpPrereqError("Can't compute nodes using"
7333
                                 " iallocator '%s': %s" %
7334
                                 (self.lu.op.iallocator, ial.info),
7335
                                 errors.ECODE_NORES)
7336
    if len(ial.result) != ial.required_nodes:
7337
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7338
                                 " of nodes (%s), required %s" %
7339
                                 (self.lu.op.iallocator, len(ial.result),
7340
                                  ial.required_nodes), errors.ECODE_FAULT)
7341
    self.target_node = ial.result[0]
7342
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7343
                 self.instance_name, self.lu.op.iallocator,
7344
                 utils.CommaJoin(ial.result))
7345

    
7346
  def _WaitUntilSync(self):
7347
    """Poll with custom rpc for disk sync.
7348

7349
    This uses our own step-based rpc call.
7350

7351
    """
7352
    self.feedback_fn("* wait until resync is done")
7353
    all_done = False
7354
    while not all_done:
7355
      all_done = True
7356
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7357
                                            self.nodes_ip,
7358
                                            self.instance.disks)
7359
      min_percent = 100
7360
      for node, nres in result.items():
7361
        nres.Raise("Cannot resync disks on node %s" % node)
7362
        node_done, node_percent = nres.payload
7363
        all_done = all_done and node_done
7364
        if node_percent is not None:
7365
          min_percent = min(min_percent, node_percent)
7366
      if not all_done:
7367
        if min_percent < 100:
7368
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7369
        time.sleep(2)
7370

    
7371
  def _EnsureSecondary(self, node):
7372
    """Demote a node to secondary.
7373

7374
    """
7375
    self.feedback_fn("* switching node %s to secondary mode" % node)
7376

    
7377
    for dev in self.instance.disks:
7378
      self.cfg.SetDiskID(dev, node)
7379

    
7380
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7381
                                          self.instance.disks)
7382
    result.Raise("Cannot change disk to secondary on node %s" % node)
7383

    
7384
  def _GoStandalone(self):
7385
    """Disconnect from the network.
7386

7387
    """
7388
    self.feedback_fn("* changing into standalone mode")
7389
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7390
                                               self.instance.disks)
7391
    for node, nres in result.items():
7392
      nres.Raise("Cannot disconnect disks node %s" % node)
7393

    
7394
  def _GoReconnect(self, multimaster):
7395
    """Reconnect to the network.
7396

7397
    """
7398
    if multimaster:
7399
      msg = "dual-master"
7400
    else:
7401
      msg = "single-master"
7402
    self.feedback_fn("* changing disks into %s mode" % msg)
7403
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7404
                                           self.instance.disks,
7405
                                           self.instance.name, multimaster)
7406
    for node, nres in result.items():
7407
      nres.Raise("Cannot change disks config on node %s" % node)
7408

    
7409
  def _ExecCleanup(self):
7410
    """Try to cleanup after a failed migration.
7411

7412
    The cleanup is done by:
7413
      - check that the instance is running only on one node
7414
        (and update the config if needed)
7415
      - change disks on its secondary node to secondary
7416
      - wait until disks are fully synchronized
7417
      - disconnect from the network
7418
      - change disks into single-master mode
7419
      - wait again until disks are fully synchronized
7420

7421
    """
7422
    instance = self.instance
7423
    target_node = self.target_node
7424
    source_node = self.source_node
7425

    
7426
    # check running on only one node
7427
    self.feedback_fn("* checking where the instance actually runs"
7428
                     " (if this hangs, the hypervisor might be in"
7429
                     " a bad state)")
7430
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7431
    for node, result in ins_l.items():
7432
      result.Raise("Can't contact node %s" % node)
7433

    
7434
    runningon_source = instance.name in ins_l[source_node].payload
7435
    runningon_target = instance.name in ins_l[target_node].payload
7436

    
7437
    if runningon_source and runningon_target:
7438
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7439
                               " or the hypervisor is confused; you will have"
7440
                               " to ensure manually that it runs only on one"
7441
                               " and restart this operation")
7442

    
7443
    if not (runningon_source or runningon_target):
7444
      raise errors.OpExecError("Instance does not seem to be running at all;"
7445
                               " in this case it's safer to repair by"
7446
                               " running 'gnt-instance stop' to ensure disk"
7447
                               " shutdown, and then restarting it")
7448

    
7449
    if runningon_target:
7450
      # the migration has actually succeeded, we need to update the config
7451
      self.feedback_fn("* instance running on secondary node (%s),"
7452
                       " updating config" % target_node)
7453
      instance.primary_node = target_node
7454
      self.cfg.Update(instance, self.feedback_fn)
7455
      demoted_node = source_node
7456
    else:
7457
      self.feedback_fn("* instance confirmed to be running on its"
7458
                       " primary node (%s)" % source_node)
7459
      demoted_node = target_node
7460

    
7461
    if instance.disk_template in constants.DTS_INT_MIRROR:
7462
      self._EnsureSecondary(demoted_node)
7463
      try:
7464
        self._WaitUntilSync()
7465
      except errors.OpExecError:
7466
        # we ignore here errors, since if the device is standalone, it
7467
        # won't be able to sync
7468
        pass
7469
      self._GoStandalone()
7470
      self._GoReconnect(False)
7471
      self._WaitUntilSync()
7472

    
7473
    self.feedback_fn("* done")
7474

    
7475
  def _RevertDiskStatus(self):
7476
    """Try to revert the disk status after a failed migration.
7477

7478
    """
7479
    target_node = self.target_node
7480
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7481
      return
7482

    
7483
    try:
7484
      self._EnsureSecondary(target_node)
7485
      self._GoStandalone()
7486
      self._GoReconnect(False)
7487
      self._WaitUntilSync()
7488
    except errors.OpExecError, err:
7489
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7490
                         " please try to recover the instance manually;"
7491
                         " error '%s'" % str(err))
7492

    
7493
  def _AbortMigration(self):
7494
    """Call the hypervisor code to abort a started migration.
7495

7496
    """
7497
    instance = self.instance
7498
    target_node = self.target_node
7499
    source_node = self.source_node
7500
    migration_info = self.migration_info
7501

    
7502
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7503
                                                                 instance,
7504
                                                                 migration_info,
7505
                                                                 False)
7506
    abort_msg = abort_result.fail_msg
7507
    if abort_msg:
7508
      logging.error("Aborting migration failed on target node %s: %s",
7509
                    target_node, abort_msg)
7510
      # Don't raise an exception here, as we stil have to try to revert the
7511
      # disk status, even if this step failed.
7512

    
7513
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7514
        instance, False, self.live)
7515
    abort_msg = abort_result.fail_msg
7516
    if abort_msg:
7517
      logging.error("Aborting migration failed on source node %s: %s",
7518
                    source_node, abort_msg)
7519

    
7520
  def _ExecMigration(self):
7521
    """Migrate an instance.
7522

7523
    The migrate is done by:
7524
      - change the disks into dual-master mode
7525
      - wait until disks are fully synchronized again
7526
      - migrate the instance
7527
      - change disks on the new secondary node (the old primary) to secondary
7528
      - wait until disks are fully synchronized
7529
      - change disks into single-master mode
7530

7531
    """
7532
    instance = self.instance
7533
    target_node = self.target_node
7534
    source_node = self.source_node
7535

    
7536
    # Check for hypervisor version mismatch and warn the user.
7537
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7538
                                       None, self.instance.hypervisor)
7539
    src_info = nodeinfo[source_node]
7540
    dst_info = nodeinfo[target_node]
7541

    
7542
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7543
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7544
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7545
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7546
      if src_version != dst_version:
7547
        self.feedback_fn("* warning: hypervisor version mismatch between"
7548
                         " source (%s) and target (%s) node" %
7549
                         (src_version, dst_version))
7550

    
7551
    self.feedback_fn("* checking disk consistency between source and target")
7552
    for dev in instance.disks:
7553
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7554
        raise errors.OpExecError("Disk %s is degraded or not fully"
7555
                                 " synchronized on target node,"
7556
                                 " aborting migration" % dev.iv_name)
7557

    
7558
    # First get the migration information from the remote node
7559
    result = self.rpc.call_migration_info(source_node, instance)
7560
    msg = result.fail_msg
7561
    if msg:
7562
      log_err = ("Failed fetching source migration information from %s: %s" %
7563
                 (source_node, msg))
7564
      logging.error(log_err)
7565
      raise errors.OpExecError(log_err)
7566

    
7567
    self.migration_info = migration_info = result.payload
7568

    
7569
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7570
      # Then switch the disks to master/master mode
7571
      self._EnsureSecondary(target_node)
7572
      self._GoStandalone()
7573
      self._GoReconnect(True)
7574
      self._WaitUntilSync()
7575

    
7576
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7577
    result = self.rpc.call_accept_instance(target_node,
7578
                                           instance,
7579
                                           migration_info,
7580
                                           self.nodes_ip[target_node])
7581

    
7582
    msg = result.fail_msg
7583
    if msg:
7584
      logging.error("Instance pre-migration failed, trying to revert"
7585
                    " disk status: %s", msg)
7586
      self.feedback_fn("Pre-migration failed, aborting")
7587
      self._AbortMigration()
7588
      self._RevertDiskStatus()
7589
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7590
                               (instance.name, msg))
7591

    
7592
    self.feedback_fn("* migrating instance to %s" % target_node)
7593
    result = self.rpc.call_instance_migrate(source_node, instance,
7594
                                            self.nodes_ip[target_node],
7595
                                            self.live)
7596
    msg = result.fail_msg
7597
    if msg:
7598
      logging.error("Instance migration failed, trying to revert"
7599
                    " disk status: %s", msg)
7600
      self.feedback_fn("Migration failed, aborting")
7601
      self._AbortMigration()
7602
      self._RevertDiskStatus()
7603
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7604
                               (instance.name, msg))
7605

    
7606
    self.feedback_fn("* starting memory transfer")
7607
    last_feedback = time.time()
7608
    while True:
7609
      result = self.rpc.call_instance_get_migration_status(source_node,
7610
                                                           instance)
7611
      msg = result.fail_msg
7612
      ms = result.payload   # MigrationStatus instance
7613
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7614
        logging.error("Instance migration failed, trying to revert"
7615
                      " disk status: %s", msg)
7616
        self.feedback_fn("Migration failed, aborting")
7617
        self._AbortMigration()
7618
        self._RevertDiskStatus()
7619
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7620
                                 (instance.name, msg))
7621

    
7622
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7623
        self.feedback_fn("* memory transfer complete")
7624
        break
7625

    
7626
      if (utils.TimeoutExpired(last_feedback,
7627
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7628
          ms.transferred_ram is not None):
7629
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7630
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7631
        last_feedback = time.time()
7632

    
7633
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7634

    
7635
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7636
                                                           instance,
7637
                                                           True,
7638
                                                           self.live)
7639
    msg = result.fail_msg
7640
    if msg:
7641
      logging.error("Instance migration succeeded, but finalization failed"
7642
                    " on the source node: %s", msg)
7643
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7644
                               msg)
7645

    
7646
    instance.primary_node = target_node
7647

    
7648
    # distribute new instance config to the other nodes
7649
    self.cfg.Update(instance, self.feedback_fn)
7650

    
7651
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7652
                                                           instance,
7653
                                                           migration_info,
7654
                                                           True)
7655
    msg = result.fail_msg
7656
    if msg:
7657
      logging.error("Instance migration succeeded, but finalization failed"
7658
                    " on the target node: %s", msg)
7659
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7660
                               msg)
7661

    
7662
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7663
      self._EnsureSecondary(source_node)
7664
      self._WaitUntilSync()
7665
      self._GoStandalone()
7666
      self._GoReconnect(False)
7667
      self._WaitUntilSync()
7668

    
7669
    self.feedback_fn("* done")
7670

    
7671
  def _ExecFailover(self):
7672
    """Failover an instance.
7673

7674
    The failover is done by shutting it down on its present node and
7675
    starting it on the secondary.
7676

7677
    """
7678
    instance = self.instance
7679
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7680

    
7681
    source_node = instance.primary_node
7682
    target_node = self.target_node
7683

    
7684
    if instance.admin_up:
7685
      self.feedback_fn("* checking disk consistency between source and target")
7686
      for dev in instance.disks:
7687
        # for drbd, these are drbd over lvm
7688
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7689
          if primary_node.offline:
7690
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7691
                             " target node %s" %
7692
                             (primary_node.name, dev.iv_name, target_node))
7693
          elif not self.ignore_consistency:
7694
            raise errors.OpExecError("Disk %s is degraded on target node,"
7695
                                     " aborting failover" % dev.iv_name)
7696
    else:
7697
      self.feedback_fn("* not checking disk consistency as instance is not"
7698
                       " running")
7699

    
7700
    self.feedback_fn("* shutting down instance on source node")
7701
    logging.info("Shutting down instance %s on node %s",
7702
                 instance.name, source_node)
7703

    
7704
    result = self.rpc.call_instance_shutdown(source_node, instance,
7705
                                             self.shutdown_timeout)
7706
    msg = result.fail_msg
7707
    if msg:
7708
      if self.ignore_consistency or primary_node.offline:
7709
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7710
                           " proceeding anyway; please make sure node"
7711
                           " %s is down; error details: %s",
7712
                           instance.name, source_node, source_node, msg)
7713
      else:
7714
        raise errors.OpExecError("Could not shutdown instance %s on"
7715
                                 " node %s: %s" %
7716
                                 (instance.name, source_node, msg))
7717

    
7718
    self.feedback_fn("* deactivating the instance's disks on source node")
7719
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7720
      raise errors.OpExecError("Can't shut down the instance's disks")
7721

    
7722
    instance.primary_node = target_node
7723
    # distribute new instance config to the other nodes
7724
    self.cfg.Update(instance, self.feedback_fn)
7725

    
7726
    # Only start the instance if it's marked as up
7727
    if instance.admin_up:
7728
      self.feedback_fn("* activating the instance's disks on target node %s" %
7729
                       target_node)
7730
      logging.info("Starting instance %s on node %s",
7731
                   instance.name, target_node)
7732

    
7733
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7734
                                           ignore_secondaries=True)
7735
      if not disks_ok:
7736
        _ShutdownInstanceDisks(self.lu, instance)
7737
        raise errors.OpExecError("Can't activate the instance's disks")
7738

    
7739
      self.feedback_fn("* starting the instance on the target node %s" %
7740
                       target_node)
7741
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7742
                                            False)
7743
      msg = result.fail_msg
7744
      if msg:
7745
        _ShutdownInstanceDisks(self.lu, instance)
7746
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7747
                                 (instance.name, target_node, msg))
7748

    
7749
  def Exec(self, feedback_fn):
7750
    """Perform the migration.
7751

7752
    """
7753
    self.feedback_fn = feedback_fn
7754
    self.source_node = self.instance.primary_node
7755

    
7756
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7757
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7758
      self.target_node = self.instance.secondary_nodes[0]
7759
      # Otherwise self.target_node has been populated either
7760
      # directly, or through an iallocator.
7761

    
7762
    self.all_nodes = [self.source_node, self.target_node]
7763
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7764
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7765

    
7766
    if self.failover:
7767
      feedback_fn("Failover instance %s" % self.instance.name)
7768
      self._ExecFailover()
7769
    else:
7770
      feedback_fn("Migrating instance %s" % self.instance.name)
7771

    
7772
      if self.cleanup:
7773
        return self._ExecCleanup()
7774
      else:
7775
        return self._ExecMigration()
7776

    
7777

    
7778
def _CreateBlockDev(lu, node, instance, device, force_create,
7779
                    info, force_open):
7780
  """Create a tree of block devices on a given node.
7781

7782
  If this device type has to be created on secondaries, create it and
7783
  all its children.
7784

7785
  If not, just recurse to children keeping the same 'force' value.
7786

7787
  @param lu: the lu on whose behalf we execute
7788
  @param node: the node on which to create the device
7789
  @type instance: L{objects.Instance}
7790
  @param instance: the instance which owns the device
7791
  @type device: L{objects.Disk}
7792
  @param device: the device to create
7793
  @type force_create: boolean
7794
  @param force_create: whether to force creation of this device; this
7795
      will be change to True whenever we find a device which has
7796
      CreateOnSecondary() attribute
7797
  @param info: the extra 'metadata' we should attach to the device
7798
      (this will be represented as a LVM tag)
7799
  @type force_open: boolean
7800
  @param force_open: this parameter will be passes to the
7801
      L{backend.BlockdevCreate} function where it specifies
7802
      whether we run on primary or not, and it affects both
7803
      the child assembly and the device own Open() execution
7804

7805
  """
7806
  if device.CreateOnSecondary():
7807
    force_create = True
7808

    
7809
  if device.children:
7810
    for child in device.children:
7811
      _CreateBlockDev(lu, node, instance, child, force_create,
7812
                      info, force_open)
7813

    
7814
  if not force_create:
7815
    return
7816

    
7817
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7818

    
7819

    
7820
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7821
  """Create a single block device on a given node.
7822

7823
  This will not recurse over children of the device, so they must be
7824
  created in advance.
7825

7826
  @param lu: the lu on whose behalf we execute
7827
  @param node: the node on which to create the device
7828
  @type instance: L{objects.Instance}
7829
  @param instance: the instance which owns the device
7830
  @type device: L{objects.Disk}
7831
  @param device: the device to create
7832
  @param info: the extra 'metadata' we should attach to the device
7833
      (this will be represented as a LVM tag)
7834
  @type force_open: boolean
7835
  @param force_open: this parameter will be passes to the
7836
      L{backend.BlockdevCreate} function where it specifies
7837
      whether we run on primary or not, and it affects both
7838
      the child assembly and the device own Open() execution
7839

7840
  """
7841
  lu.cfg.SetDiskID(device, node)
7842
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7843
                                       instance.name, force_open, info)
7844
  result.Raise("Can't create block device %s on"
7845
               " node %s for instance %s" % (device, node, instance.name))
7846
  if device.physical_id is None:
7847
    device.physical_id = result.payload
7848

    
7849

    
7850
def _GenerateUniqueNames(lu, exts):
7851
  """Generate a suitable LV name.
7852

7853
  This will generate a logical volume name for the given instance.
7854

7855
  """
7856
  results = []
7857
  for val in exts:
7858
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7859
    results.append("%s%s" % (new_id, val))
7860
  return results
7861

    
7862

    
7863
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7864
                         iv_name, p_minor, s_minor):
7865
  """Generate a drbd8 device complete with its children.
7866

7867
  """
7868
  assert len(vgnames) == len(names) == 2
7869
  port = lu.cfg.AllocatePort()
7870
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7871
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7872
                          logical_id=(vgnames[0], names[0]))
7873
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7874
                          logical_id=(vgnames[1], names[1]))
7875
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7876
                          logical_id=(primary, secondary, port,
7877
                                      p_minor, s_minor,
7878
                                      shared_secret),
7879
                          children=[dev_data, dev_meta],
7880
                          iv_name=iv_name)
7881
  return drbd_dev
7882

    
7883

    
7884
def _GenerateDiskTemplate(lu, template_name,
7885
                          instance_name, primary_node,
7886
                          secondary_nodes, disk_info,
7887
                          file_storage_dir, file_driver,
7888
                          base_index, feedback_fn):
7889
  """Generate the entire disk layout for a given template type.
7890

7891
  """
7892
  #TODO: compute space requirements
7893

    
7894
  vgname = lu.cfg.GetVGName()
7895
  disk_count = len(disk_info)
7896
  disks = []
7897
  if template_name == constants.DT_DISKLESS:
7898
    pass
7899
  elif template_name == constants.DT_PLAIN:
7900
    if len(secondary_nodes) != 0:
7901
      raise errors.ProgrammerError("Wrong template configuration")
7902

    
7903
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7904
                                      for i in range(disk_count)])
7905
    for idx, disk in enumerate(disk_info):
7906
      disk_index = idx + base_index
7907
      vg = disk.get(constants.IDISK_VG, vgname)
7908
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7909
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7910
                              size=disk[constants.IDISK_SIZE],
7911
                              logical_id=(vg, names[idx]),
7912
                              iv_name="disk/%d" % disk_index,
7913
                              mode=disk[constants.IDISK_MODE])
7914
      disks.append(disk_dev)
7915
  elif template_name == constants.DT_DRBD8:
7916
    if len(secondary_nodes) != 1:
7917
      raise errors.ProgrammerError("Wrong template configuration")
7918
    remote_node = secondary_nodes[0]
7919
    minors = lu.cfg.AllocateDRBDMinor(
7920
      [primary_node, remote_node] * len(disk_info), instance_name)
7921

    
7922
    names = []
7923
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7924
                                               for i in range(disk_count)]):
7925
      names.append(lv_prefix + "_data")
7926
      names.append(lv_prefix + "_meta")
7927
    for idx, disk in enumerate(disk_info):
7928
      disk_index = idx + base_index
7929
      data_vg = disk.get(constants.IDISK_VG, vgname)
7930
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7931
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7932
                                      disk[constants.IDISK_SIZE],
7933
                                      [data_vg, meta_vg],
7934
                                      names[idx * 2:idx * 2 + 2],
7935
                                      "disk/%d" % disk_index,
7936
                                      minors[idx * 2], minors[idx * 2 + 1])
7937
      disk_dev.mode = disk[constants.IDISK_MODE]
7938
      disks.append(disk_dev)
7939
  elif template_name == constants.DT_FILE:
7940
    if len(secondary_nodes) != 0:
7941
      raise errors.ProgrammerError("Wrong template configuration")
7942

    
7943
    opcodes.RequireFileStorage()
7944

    
7945
    for idx, disk in enumerate(disk_info):
7946
      disk_index = idx + base_index
7947
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7948
                              size=disk[constants.IDISK_SIZE],
7949
                              iv_name="disk/%d" % disk_index,
7950
                              logical_id=(file_driver,
7951
                                          "%s/disk%d" % (file_storage_dir,
7952
                                                         disk_index)),
7953
                              mode=disk[constants.IDISK_MODE])
7954
      disks.append(disk_dev)
7955
  elif template_name == constants.DT_SHARED_FILE:
7956
    if len(secondary_nodes) != 0:
7957
      raise errors.ProgrammerError("Wrong template configuration")
7958

    
7959
    opcodes.RequireSharedFileStorage()
7960

    
7961
    for idx, disk in enumerate(disk_info):
7962
      disk_index = idx + base_index
7963
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7964
                              size=disk[constants.IDISK_SIZE],
7965
                              iv_name="disk/%d" % disk_index,
7966
                              logical_id=(file_driver,
7967
                                          "%s/disk%d" % (file_storage_dir,
7968
                                                         disk_index)),
7969
                              mode=disk[constants.IDISK_MODE])
7970
      disks.append(disk_dev)
7971
  elif template_name == constants.DT_BLOCK:
7972
    if len(secondary_nodes) != 0:
7973
      raise errors.ProgrammerError("Wrong template configuration")
7974

    
7975
    for idx, disk in enumerate(disk_info):
7976
      disk_index = idx + base_index
7977
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7978
                              size=disk[constants.IDISK_SIZE],
7979
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7980
                                          disk[constants.IDISK_ADOPT]),
7981
                              iv_name="disk/%d" % disk_index,
7982
                              mode=disk[constants.IDISK_MODE])
7983
      disks.append(disk_dev)
7984

    
7985
  else:
7986
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7987
  return disks
7988

    
7989

    
7990
def _GetInstanceInfoText(instance):
7991
  """Compute that text that should be added to the disk's metadata.
7992

7993
  """
7994
  return "originstname+%s" % instance.name
7995

    
7996

    
7997
def _CalcEta(time_taken, written, total_size):
7998
  """Calculates the ETA based on size written and total size.
7999

8000
  @param time_taken: The time taken so far
8001
  @param written: amount written so far
8002
  @param total_size: The total size of data to be written
8003
  @return: The remaining time in seconds
8004

8005
  """
8006
  avg_time = time_taken / float(written)
8007
  return (total_size - written) * avg_time
8008

    
8009

    
8010
def _WipeDisks(lu, instance):
8011
  """Wipes instance disks.
8012

8013
  @type lu: L{LogicalUnit}
8014
  @param lu: the logical unit on whose behalf we execute
8015
  @type instance: L{objects.Instance}
8016
  @param instance: the instance whose disks we should create
8017
  @return: the success of the wipe
8018

8019
  """
8020
  node = instance.primary_node
8021

    
8022
  for device in instance.disks:
8023
    lu.cfg.SetDiskID(device, node)
8024

    
8025
  logging.info("Pause sync of instance %s disks", instance.name)
8026
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8027

    
8028
  for idx, success in enumerate(result.payload):
8029
    if not success:
8030
      logging.warn("pause-sync of instance %s for disks %d failed",
8031
                   instance.name, idx)
8032

    
8033
  try:
8034
    for idx, device in enumerate(instance.disks):
8035
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8036
      # MAX_WIPE_CHUNK at max
8037
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8038
                            constants.MIN_WIPE_CHUNK_PERCENT)
8039
      # we _must_ make this an int, otherwise rounding errors will
8040
      # occur
8041
      wipe_chunk_size = int(wipe_chunk_size)
8042

    
8043
      lu.LogInfo("* Wiping disk %d", idx)
8044
      logging.info("Wiping disk %d for instance %s, node %s using"
8045
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8046

    
8047
      offset = 0
8048
      size = device.size
8049
      last_output = 0
8050
      start_time = time.time()
8051

    
8052
      while offset < size:
8053
        wipe_size = min(wipe_chunk_size, size - offset)
8054
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8055
                      idx, offset, wipe_size)
8056
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8057
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8058
                     (idx, offset, wipe_size))
8059
        now = time.time()
8060
        offset += wipe_size
8061
        if now - last_output >= 60:
8062
          eta = _CalcEta(now - start_time, offset, size)
8063
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8064
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8065
          last_output = now
8066
  finally:
8067
    logging.info("Resume sync of instance %s disks", instance.name)
8068

    
8069
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8070

    
8071
    for idx, success in enumerate(result.payload):
8072
      if not success:
8073
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8074
                      " look at the status and troubleshoot the issue", idx)
8075
        logging.warn("resume-sync of instance %s for disks %d failed",
8076
                     instance.name, idx)
8077

    
8078

    
8079
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8080
  """Create all disks for an instance.
8081

8082
  This abstracts away some work from AddInstance.
8083

8084
  @type lu: L{LogicalUnit}
8085
  @param lu: the logical unit on whose behalf we execute
8086
  @type instance: L{objects.Instance}
8087
  @param instance: the instance whose disks we should create
8088
  @type to_skip: list
8089
  @param to_skip: list of indices to skip
8090
  @type target_node: string
8091
  @param target_node: if passed, overrides the target node for creation
8092
  @rtype: boolean
8093
  @return: the success of the creation
8094

8095
  """
8096
  info = _GetInstanceInfoText(instance)
8097
  if target_node is None:
8098
    pnode = instance.primary_node
8099
    all_nodes = instance.all_nodes
8100
  else:
8101
    pnode = target_node
8102
    all_nodes = [pnode]
8103

    
8104
  if instance.disk_template in constants.DTS_FILEBASED:
8105
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8106
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8107

    
8108
    result.Raise("Failed to create directory '%s' on"
8109
                 " node %s" % (file_storage_dir, pnode))
8110

    
8111
  # Note: this needs to be kept in sync with adding of disks in
8112
  # LUInstanceSetParams
8113
  for idx, device in enumerate(instance.disks):
8114
    if to_skip and idx in to_skip:
8115
      continue
8116
    logging.info("Creating volume %s for instance %s",
8117
                 device.iv_name, instance.name)
8118
    #HARDCODE
8119
    for node in all_nodes:
8120
      f_create = node == pnode
8121
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8122

    
8123

    
8124
def _RemoveDisks(lu, instance, target_node=None):
8125
  """Remove all disks for an instance.
8126

8127
  This abstracts away some work from `AddInstance()` and
8128
  `RemoveInstance()`. Note that in case some of the devices couldn't
8129
  be removed, the removal will continue with the other ones (compare
8130
  with `_CreateDisks()`).
8131

8132
  @type lu: L{LogicalUnit}
8133
  @param lu: the logical unit on whose behalf we execute
8134
  @type instance: L{objects.Instance}
8135
  @param instance: the instance whose disks we should remove
8136
  @type target_node: string
8137
  @param target_node: used to override the node on which to remove the disks
8138
  @rtype: boolean
8139
  @return: the success of the removal
8140

8141
  """
8142
  logging.info("Removing block devices for instance %s", instance.name)
8143

    
8144
  all_result = True
8145
  for device in instance.disks:
8146
    if target_node:
8147
      edata = [(target_node, device)]
8148
    else:
8149
      edata = device.ComputeNodeTree(instance.primary_node)
8150
    for node, disk in edata:
8151
      lu.cfg.SetDiskID(disk, node)
8152
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8153
      if msg:
8154
        lu.LogWarning("Could not remove block device %s on node %s,"
8155
                      " continuing anyway: %s", device.iv_name, node, msg)
8156
        all_result = False
8157

    
8158
  if instance.disk_template == constants.DT_FILE:
8159
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8160
    if target_node:
8161
      tgt = target_node
8162
    else:
8163
      tgt = instance.primary_node
8164
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8165
    if result.fail_msg:
8166
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8167
                    file_storage_dir, instance.primary_node, result.fail_msg)
8168
      all_result = False
8169

    
8170
  return all_result
8171

    
8172

    
8173
def _ComputeDiskSizePerVG(disk_template, disks):
8174
  """Compute disk size requirements in the volume group
8175

8176
  """
8177
  def _compute(disks, payload):
8178
    """Universal algorithm.
8179

8180
    """
8181
    vgs = {}
8182
    for disk in disks:
8183
      vgs[disk[constants.IDISK_VG]] = \
8184
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8185

    
8186
    return vgs
8187

    
8188
  # Required free disk space as a function of disk and swap space
8189
  req_size_dict = {
8190
    constants.DT_DISKLESS: {},
8191
    constants.DT_PLAIN: _compute(disks, 0),
8192
    # 128 MB are added for drbd metadata for each disk
8193
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8194
    constants.DT_FILE: {},
8195
    constants.DT_SHARED_FILE: {},
8196
  }
8197

    
8198
  if disk_template not in req_size_dict:
8199
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8200
                                 " is unknown" % disk_template)
8201

    
8202
  return req_size_dict[disk_template]
8203

    
8204

    
8205
def _ComputeDiskSize(disk_template, disks):
8206
  """Compute disk size requirements in the volume group
8207

8208
  """
8209
  # Required free disk space as a function of disk and swap space
8210
  req_size_dict = {
8211
    constants.DT_DISKLESS: None,
8212
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8213
    # 128 MB are added for drbd metadata for each disk
8214
    constants.DT_DRBD8:
8215
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8216
    constants.DT_FILE: None,
8217
    constants.DT_SHARED_FILE: 0,
8218
    constants.DT_BLOCK: 0,
8219
  }
8220

    
8221
  if disk_template not in req_size_dict:
8222
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8223
                                 " is unknown" % disk_template)
8224

    
8225
  return req_size_dict[disk_template]
8226

    
8227

    
8228
def _FilterVmNodes(lu, nodenames):
8229
  """Filters out non-vm_capable nodes from a list.
8230

8231
  @type lu: L{LogicalUnit}
8232
  @param lu: the logical unit for which we check
8233
  @type nodenames: list
8234
  @param nodenames: the list of nodes on which we should check
8235
  @rtype: list
8236
  @return: the list of vm-capable nodes
8237

8238
  """
8239
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8240
  return [name for name in nodenames if name not in vm_nodes]
8241

    
8242

    
8243
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8244
  """Hypervisor parameter validation.
8245

8246
  This function abstract the hypervisor parameter validation to be
8247
  used in both instance create and instance modify.
8248

8249
  @type lu: L{LogicalUnit}
8250
  @param lu: the logical unit for which we check
8251
  @type nodenames: list
8252
  @param nodenames: the list of nodes on which we should check
8253
  @type hvname: string
8254
  @param hvname: the name of the hypervisor we should use
8255
  @type hvparams: dict
8256
  @param hvparams: the parameters which we need to check
8257
  @raise errors.OpPrereqError: if the parameters are not valid
8258

8259
  """
8260
  nodenames = _FilterVmNodes(lu, nodenames)
8261

    
8262
  cluster = lu.cfg.GetClusterInfo()
8263
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8264

    
8265
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8266
  for node in nodenames:
8267
    info = hvinfo[node]
8268
    if info.offline:
8269
      continue
8270
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8271

    
8272

    
8273
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8274
  """OS parameters validation.
8275

8276
  @type lu: L{LogicalUnit}
8277
  @param lu: the logical unit for which we check
8278
  @type required: boolean
8279
  @param required: whether the validation should fail if the OS is not
8280
      found
8281
  @type nodenames: list
8282
  @param nodenames: the list of nodes on which we should check
8283
  @type osname: string
8284
  @param osname: the name of the hypervisor we should use
8285
  @type osparams: dict
8286
  @param osparams: the parameters which we need to check
8287
  @raise errors.OpPrereqError: if the parameters are not valid
8288

8289
  """
8290
  nodenames = _FilterVmNodes(lu, nodenames)
8291
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8292
                                   [constants.OS_VALIDATE_PARAMETERS],
8293
                                   osparams)
8294
  for node, nres in result.items():
8295
    # we don't check for offline cases since this should be run only
8296
    # against the master node and/or an instance's nodes
8297
    nres.Raise("OS Parameters validation failed on node %s" % node)
8298
    if not nres.payload:
8299
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8300
                 osname, node)
8301

    
8302

    
8303
class LUInstanceCreate(LogicalUnit):
8304
  """Create an instance.
8305

8306
  """
8307
  HPATH = "instance-add"
8308
  HTYPE = constants.HTYPE_INSTANCE
8309
  REQ_BGL = False
8310

    
8311
  def CheckArguments(self):
8312
    """Check arguments.
8313

8314
    """
8315
    # do not require name_check to ease forward/backward compatibility
8316
    # for tools
8317
    if self.op.no_install and self.op.start:
8318
      self.LogInfo("No-installation mode selected, disabling startup")
8319
      self.op.start = False
8320
    # validate/normalize the instance name
8321
    self.op.instance_name = \
8322
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8323

    
8324
    if self.op.ip_check and not self.op.name_check:
8325
      # TODO: make the ip check more flexible and not depend on the name check
8326
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8327
                                 " check", errors.ECODE_INVAL)
8328

    
8329
    # check nics' parameter names
8330
    for nic in self.op.nics:
8331
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8332

    
8333
    # check disks. parameter names and consistent adopt/no-adopt strategy
8334
    has_adopt = has_no_adopt = False
8335
    for disk in self.op.disks:
8336
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8337
      if constants.IDISK_ADOPT in disk:
8338
        has_adopt = True
8339
      else:
8340
        has_no_adopt = True
8341
    if has_adopt and has_no_adopt:
8342
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8343
                                 errors.ECODE_INVAL)
8344
    if has_adopt:
8345
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8346
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8347
                                   " '%s' disk template" %
8348
                                   self.op.disk_template,
8349
                                   errors.ECODE_INVAL)
8350
      if self.op.iallocator is not None:
8351
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8352
                                   " iallocator script", errors.ECODE_INVAL)
8353
      if self.op.mode == constants.INSTANCE_IMPORT:
8354
        raise errors.OpPrereqError("Disk adoption not allowed for"
8355
                                   " instance import", errors.ECODE_INVAL)
8356
    else:
8357
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8358
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8359
                                   " but no 'adopt' parameter given" %
8360
                                   self.op.disk_template,
8361
                                   errors.ECODE_INVAL)
8362

    
8363
    self.adopt_disks = has_adopt
8364

    
8365
    # instance name verification
8366
    if self.op.name_check:
8367
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8368
      self.op.instance_name = self.hostname1.name
8369
      # used in CheckPrereq for ip ping check
8370
      self.check_ip = self.hostname1.ip
8371
    else:
8372
      self.check_ip = None
8373

    
8374
    # file storage checks
8375
    if (self.op.file_driver and
8376
        not self.op.file_driver in constants.FILE_DRIVER):
8377
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8378
                                 self.op.file_driver, errors.ECODE_INVAL)
8379

    
8380
    if self.op.disk_template == constants.DT_FILE:
8381
      opcodes.RequireFileStorage()
8382
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8383
      opcodes.RequireSharedFileStorage()
8384

    
8385
    ### Node/iallocator related checks
8386
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8387

    
8388
    if self.op.pnode is not None:
8389
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8390
        if self.op.snode is None:
8391
          raise errors.OpPrereqError("The networked disk templates need"
8392
                                     " a mirror node", errors.ECODE_INVAL)
8393
      elif self.op.snode:
8394
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8395
                        " template")
8396
        self.op.snode = None
8397

    
8398
    self._cds = _GetClusterDomainSecret()
8399

    
8400
    if self.op.mode == constants.INSTANCE_IMPORT:
8401
      # On import force_variant must be True, because if we forced it at
8402
      # initial install, our only chance when importing it back is that it
8403
      # works again!
8404
      self.op.force_variant = True
8405

    
8406
      if self.op.no_install:
8407
        self.LogInfo("No-installation mode has no effect during import")
8408

    
8409
    elif self.op.mode == constants.INSTANCE_CREATE:
8410
      if self.op.os_type is None:
8411
        raise errors.OpPrereqError("No guest OS specified",
8412
                                   errors.ECODE_INVAL)
8413
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8414
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8415
                                   " installation" % self.op.os_type,
8416
                                   errors.ECODE_STATE)
8417
      if self.op.disk_template is None:
8418
        raise errors.OpPrereqError("No disk template specified",
8419
                                   errors.ECODE_INVAL)
8420

    
8421
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8422
      # Check handshake to ensure both clusters have the same domain secret
8423
      src_handshake = self.op.source_handshake
8424
      if not src_handshake:
8425
        raise errors.OpPrereqError("Missing source handshake",
8426
                                   errors.ECODE_INVAL)
8427

    
8428
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8429
                                                           src_handshake)
8430
      if errmsg:
8431
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8432
                                   errors.ECODE_INVAL)
8433

    
8434
      # Load and check source CA
8435
      self.source_x509_ca_pem = self.op.source_x509_ca
8436
      if not self.source_x509_ca_pem:
8437
        raise errors.OpPrereqError("Missing source X509 CA",
8438
                                   errors.ECODE_INVAL)
8439

    
8440
      try:
8441
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8442
                                                    self._cds)
8443
      except OpenSSL.crypto.Error, err:
8444
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8445
                                   (err, ), errors.ECODE_INVAL)
8446

    
8447
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8448
      if errcode is not None:
8449
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8450
                                   errors.ECODE_INVAL)
8451

    
8452
      self.source_x509_ca = cert
8453

    
8454
      src_instance_name = self.op.source_instance_name
8455
      if not src_instance_name:
8456
        raise errors.OpPrereqError("Missing source instance name",
8457
                                   errors.ECODE_INVAL)
8458

    
8459
      self.source_instance_name = \
8460
          netutils.GetHostname(name=src_instance_name).name
8461

    
8462
    else:
8463
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8464
                                 self.op.mode, errors.ECODE_INVAL)
8465

    
8466
  def ExpandNames(self):
8467
    """ExpandNames for CreateInstance.
8468

8469
    Figure out the right locks for instance creation.
8470

8471
    """
8472
    self.needed_locks = {}
8473

    
8474
    instance_name = self.op.instance_name
8475
    # this is just a preventive check, but someone might still add this
8476
    # instance in the meantime, and creation will fail at lock-add time
8477
    if instance_name in self.cfg.GetInstanceList():
8478
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8479
                                 instance_name, errors.ECODE_EXISTS)
8480

    
8481
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8482

    
8483
    if self.op.iallocator:
8484
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8485
    else:
8486
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8487
      nodelist = [self.op.pnode]
8488
      if self.op.snode is not None:
8489
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8490
        nodelist.append(self.op.snode)
8491
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8492

    
8493
    # in case of import lock the source node too
8494
    if self.op.mode == constants.INSTANCE_IMPORT:
8495
      src_node = self.op.src_node
8496
      src_path = self.op.src_path
8497

    
8498
      if src_path is None:
8499
        self.op.src_path = src_path = self.op.instance_name
8500

    
8501
      if src_node is None:
8502
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8503
        self.op.src_node = None
8504
        if os.path.isabs(src_path):
8505
          raise errors.OpPrereqError("Importing an instance from a path"
8506
                                     " requires a source node option",
8507
                                     errors.ECODE_INVAL)
8508
      else:
8509
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8510
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8511
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8512
        if not os.path.isabs(src_path):
8513
          self.op.src_path = src_path = \
8514
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8515

    
8516
  def _RunAllocator(self):
8517
    """Run the allocator based on input opcode.
8518

8519
    """
8520
    nics = [n.ToDict() for n in self.nics]
8521
    ial = IAllocator(self.cfg, self.rpc,
8522
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8523
                     name=self.op.instance_name,
8524
                     disk_template=self.op.disk_template,
8525
                     tags=self.op.tags,
8526
                     os=self.op.os_type,
8527
                     vcpus=self.be_full[constants.BE_VCPUS],
8528
                     memory=self.be_full[constants.BE_MEMORY],
8529
                     disks=self.disks,
8530
                     nics=nics,
8531
                     hypervisor=self.op.hypervisor,
8532
                     )
8533

    
8534
    ial.Run(self.op.iallocator)
8535

    
8536
    if not ial.success:
8537
      raise errors.OpPrereqError("Can't compute nodes using"
8538
                                 " iallocator '%s': %s" %
8539
                                 (self.op.iallocator, ial.info),
8540
                                 errors.ECODE_NORES)
8541
    if len(ial.result) != ial.required_nodes:
8542
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8543
                                 " of nodes (%s), required %s" %
8544
                                 (self.op.iallocator, len(ial.result),
8545
                                  ial.required_nodes), errors.ECODE_FAULT)
8546
    self.op.pnode = ial.result[0]
8547
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8548
                 self.op.instance_name, self.op.iallocator,
8549
                 utils.CommaJoin(ial.result))
8550
    if ial.required_nodes == 2:
8551
      self.op.snode = ial.result[1]
8552

    
8553
  def BuildHooksEnv(self):
8554
    """Build hooks env.
8555

8556
    This runs on master, primary and secondary nodes of the instance.
8557

8558
    """
8559
    env = {
8560
      "ADD_MODE": self.op.mode,
8561
      }
8562
    if self.op.mode == constants.INSTANCE_IMPORT:
8563
      env["SRC_NODE"] = self.op.src_node
8564
      env["SRC_PATH"] = self.op.src_path
8565
      env["SRC_IMAGES"] = self.src_images
8566

    
8567
    env.update(_BuildInstanceHookEnv(
8568
      name=self.op.instance_name,
8569
      primary_node=self.op.pnode,
8570
      secondary_nodes=self.secondaries,
8571
      status=self.op.start,
8572
      os_type=self.op.os_type,
8573
      memory=self.be_full[constants.BE_MEMORY],
8574
      vcpus=self.be_full[constants.BE_VCPUS],
8575
      nics=_NICListToTuple(self, self.nics),
8576
      disk_template=self.op.disk_template,
8577
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8578
             for d in self.disks],
8579
      bep=self.be_full,
8580
      hvp=self.hv_full,
8581
      hypervisor_name=self.op.hypervisor,
8582
      tags=self.op.tags,
8583
    ))
8584

    
8585
    return env
8586

    
8587
  def BuildHooksNodes(self):
8588
    """Build hooks nodes.
8589

8590
    """
8591
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8592
    return nl, nl
8593

    
8594
  def _ReadExportInfo(self):
8595
    """Reads the export information from disk.
8596

8597
    It will override the opcode source node and path with the actual
8598
    information, if these two were not specified before.
8599

8600
    @return: the export information
8601

8602
    """
8603
    assert self.op.mode == constants.INSTANCE_IMPORT
8604

    
8605
    src_node = self.op.src_node
8606
    src_path = self.op.src_path
8607

    
8608
    if src_node is None:
8609
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8610
      exp_list = self.rpc.call_export_list(locked_nodes)
8611
      found = False
8612
      for node in exp_list:
8613
        if exp_list[node].fail_msg:
8614
          continue
8615
        if src_path in exp_list[node].payload:
8616
          found = True
8617
          self.op.src_node = src_node = node
8618
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8619
                                                       src_path)
8620
          break
8621
      if not found:
8622
        raise errors.OpPrereqError("No export found for relative path %s" %
8623
                                    src_path, errors.ECODE_INVAL)
8624

    
8625
    _CheckNodeOnline(self, src_node)
8626
    result = self.rpc.call_export_info(src_node, src_path)
8627
    result.Raise("No export or invalid export found in dir %s" % src_path)
8628

    
8629
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8630
    if not export_info.has_section(constants.INISECT_EXP):
8631
      raise errors.ProgrammerError("Corrupted export config",
8632
                                   errors.ECODE_ENVIRON)
8633

    
8634
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8635
    if (int(ei_version) != constants.EXPORT_VERSION):
8636
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8637
                                 (ei_version, constants.EXPORT_VERSION),
8638
                                 errors.ECODE_ENVIRON)
8639
    return export_info
8640

    
8641
  def _ReadExportParams(self, einfo):
8642
    """Use export parameters as defaults.
8643

8644
    In case the opcode doesn't specify (as in override) some instance
8645
    parameters, then try to use them from the export information, if
8646
    that declares them.
8647

8648
    """
8649
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8650

    
8651
    if self.op.disk_template is None:
8652
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8653
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8654
                                          "disk_template")
8655
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8656
          raise errors.OpPrereqError("Disk template specified in configuration"
8657
                                     " file is not one of the allowed values:"
8658
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8659
      else:
8660
        raise errors.OpPrereqError("No disk template specified and the export"
8661
                                   " is missing the disk_template information",
8662
                                   errors.ECODE_INVAL)
8663

    
8664
    if not self.op.disks:
8665
      disks = []
8666
      # TODO: import the disk iv_name too
8667
      for idx in range(constants.MAX_DISKS):
8668
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8669
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8670
          disks.append({constants.IDISK_SIZE: disk_sz})
8671
      self.op.disks = disks
8672
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8673
        raise errors.OpPrereqError("No disk info specified and the export"
8674
                                   " is missing the disk information",
8675
                                   errors.ECODE_INVAL)
8676

    
8677
    if not self.op.nics:
8678
      nics = []
8679
      for idx in range(constants.MAX_NICS):
8680
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8681
          ndict = {}
8682
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8683
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8684
            ndict[name] = v
8685
          nics.append(ndict)
8686
        else:
8687
          break
8688
      self.op.nics = nics
8689

    
8690
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8691
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8692

    
8693
    if (self.op.hypervisor is None and
8694
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8695
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8696

    
8697
    if einfo.has_section(constants.INISECT_HYP):
8698
      # use the export parameters but do not override the ones
8699
      # specified by the user
8700
      for name, value in einfo.items(constants.INISECT_HYP):
8701
        if name not in self.op.hvparams:
8702
          self.op.hvparams[name] = value
8703

    
8704
    if einfo.has_section(constants.INISECT_BEP):
8705
      # use the parameters, without overriding
8706
      for name, value in einfo.items(constants.INISECT_BEP):
8707
        if name not in self.op.beparams:
8708
          self.op.beparams[name] = value
8709
    else:
8710
      # try to read the parameters old style, from the main section
8711
      for name in constants.BES_PARAMETERS:
8712
        if (name not in self.op.beparams and
8713
            einfo.has_option(constants.INISECT_INS, name)):
8714
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8715

    
8716
    if einfo.has_section(constants.INISECT_OSP):
8717
      # use the parameters, without overriding
8718
      for name, value in einfo.items(constants.INISECT_OSP):
8719
        if name not in self.op.osparams:
8720
          self.op.osparams[name] = value
8721

    
8722
  def _RevertToDefaults(self, cluster):
8723
    """Revert the instance parameters to the default values.
8724

8725
    """
8726
    # hvparams
8727
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8728
    for name in self.op.hvparams.keys():
8729
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8730
        del self.op.hvparams[name]
8731
    # beparams
8732
    be_defs = cluster.SimpleFillBE({})
8733
    for name in self.op.beparams.keys():
8734
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8735
        del self.op.beparams[name]
8736
    # nic params
8737
    nic_defs = cluster.SimpleFillNIC({})
8738
    for nic in self.op.nics:
8739
      for name in constants.NICS_PARAMETERS:
8740
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8741
          del nic[name]
8742
    # osparams
8743
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8744
    for name in self.op.osparams.keys():
8745
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8746
        del self.op.osparams[name]
8747

    
8748
  def _CalculateFileStorageDir(self):
8749
    """Calculate final instance file storage dir.
8750

8751
    """
8752
    # file storage dir calculation/check
8753
    self.instance_file_storage_dir = None
8754
    if self.op.disk_template in constants.DTS_FILEBASED:
8755
      # build the full file storage dir path
8756
      joinargs = []
8757

    
8758
      if self.op.disk_template == constants.DT_SHARED_FILE:
8759
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8760
      else:
8761
        get_fsd_fn = self.cfg.GetFileStorageDir
8762

    
8763
      cfg_storagedir = get_fsd_fn()
8764
      if not cfg_storagedir:
8765
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8766
      joinargs.append(cfg_storagedir)
8767

    
8768
      if self.op.file_storage_dir is not None:
8769
        joinargs.append(self.op.file_storage_dir)
8770

    
8771
      joinargs.append(self.op.instance_name)
8772

    
8773
      # pylint: disable=W0142
8774
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8775

    
8776
  def CheckPrereq(self):
8777
    """Check prerequisites.
8778

8779
    """
8780
    self._CalculateFileStorageDir()
8781

    
8782
    if self.op.mode == constants.INSTANCE_IMPORT:
8783
      export_info = self._ReadExportInfo()
8784
      self._ReadExportParams(export_info)
8785

    
8786
    if (not self.cfg.GetVGName() and
8787
        self.op.disk_template not in constants.DTS_NOT_LVM):
8788
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8789
                                 " instances", errors.ECODE_STATE)
8790

    
8791
    if (self.op.hypervisor is None or
8792
        self.op.hypervisor == constants.VALUE_AUTO):
8793
      self.op.hypervisor = self.cfg.GetHypervisorType()
8794

    
8795
    cluster = self.cfg.GetClusterInfo()
8796
    enabled_hvs = cluster.enabled_hypervisors
8797
    if self.op.hypervisor not in enabled_hvs:
8798
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8799
                                 " cluster (%s)" % (self.op.hypervisor,
8800
                                  ",".join(enabled_hvs)),
8801
                                 errors.ECODE_STATE)
8802

    
8803
    # Check tag validity
8804
    for tag in self.op.tags:
8805
      objects.TaggableObject.ValidateTag(tag)
8806

    
8807
    # check hypervisor parameter syntax (locally)
8808
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8809
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8810
                                      self.op.hvparams)
8811
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8812
    hv_type.CheckParameterSyntax(filled_hvp)
8813
    self.hv_full = filled_hvp
8814
    # check that we don't specify global parameters on an instance
8815
    _CheckGlobalHvParams(self.op.hvparams)
8816

    
8817
    # fill and remember the beparams dict
8818
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8819
    for param, value in self.op.beparams.iteritems():
8820
      if value == constants.VALUE_AUTO:
8821
        self.op.beparams[param] = default_beparams[param]
8822
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8823
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8824

    
8825
    # build os parameters
8826
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8827

    
8828
    # now that hvp/bep are in final format, let's reset to defaults,
8829
    # if told to do so
8830
    if self.op.identify_defaults:
8831
      self._RevertToDefaults(cluster)
8832

    
8833
    # NIC buildup
8834
    self.nics = []
8835
    for idx, nic in enumerate(self.op.nics):
8836
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8837
      nic_mode = nic_mode_req
8838
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8839
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8840

    
8841
      # in routed mode, for the first nic, the default ip is 'auto'
8842
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8843
        default_ip_mode = constants.VALUE_AUTO
8844
      else:
8845
        default_ip_mode = constants.VALUE_NONE
8846

    
8847
      # ip validity checks
8848
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8849
      if ip is None or ip.lower() == constants.VALUE_NONE:
8850
        nic_ip = None
8851
      elif ip.lower() == constants.VALUE_AUTO:
8852
        if not self.op.name_check:
8853
          raise errors.OpPrereqError("IP address set to auto but name checks"
8854
                                     " have been skipped",
8855
                                     errors.ECODE_INVAL)
8856
        nic_ip = self.hostname1.ip
8857
      else:
8858
        if not netutils.IPAddress.IsValid(ip):
8859
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8860
                                     errors.ECODE_INVAL)
8861
        nic_ip = ip
8862

    
8863
      # TODO: check the ip address for uniqueness
8864
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8865
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8866
                                   errors.ECODE_INVAL)
8867

    
8868
      # MAC address verification
8869
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8870
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8871
        mac = utils.NormalizeAndValidateMac(mac)
8872

    
8873
        try:
8874
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8875
        except errors.ReservationError:
8876
          raise errors.OpPrereqError("MAC address %s already in use"
8877
                                     " in cluster" % mac,
8878
                                     errors.ECODE_NOTUNIQUE)
8879

    
8880
      #  Build nic parameters
8881
      link = nic.get(constants.INIC_LINK, None)
8882
      if link == constants.VALUE_AUTO:
8883
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8884
      nicparams = {}
8885
      if nic_mode_req:
8886
        nicparams[constants.NIC_MODE] = nic_mode
8887
      if link:
8888
        nicparams[constants.NIC_LINK] = link
8889

    
8890
      check_params = cluster.SimpleFillNIC(nicparams)
8891
      objects.NIC.CheckParameterSyntax(check_params)
8892
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8893

    
8894
    # disk checks/pre-build
8895
    default_vg = self.cfg.GetVGName()
8896
    self.disks = []
8897
    for disk in self.op.disks:
8898
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8899
      if mode not in constants.DISK_ACCESS_SET:
8900
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8901
                                   mode, errors.ECODE_INVAL)
8902
      size = disk.get(constants.IDISK_SIZE, None)
8903
      if size is None:
8904
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8905
      try:
8906
        size = int(size)
8907
      except (TypeError, ValueError):
8908
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8909
                                   errors.ECODE_INVAL)
8910

    
8911
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8912
      new_disk = {
8913
        constants.IDISK_SIZE: size,
8914
        constants.IDISK_MODE: mode,
8915
        constants.IDISK_VG: data_vg,
8916
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8917
        }
8918
      if constants.IDISK_ADOPT in disk:
8919
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8920
      self.disks.append(new_disk)
8921

    
8922
    if self.op.mode == constants.INSTANCE_IMPORT:
8923
      disk_images = []
8924
      for idx in range(len(self.disks)):
8925
        option = "disk%d_dump" % idx
8926
        if export_info.has_option(constants.INISECT_INS, option):
8927
          # FIXME: are the old os-es, disk sizes, etc. useful?
8928
          export_name = export_info.get(constants.INISECT_INS, option)
8929
          image = utils.PathJoin(self.op.src_path, export_name)
8930
          disk_images.append(image)
8931
        else:
8932
          disk_images.append(False)
8933

    
8934
      self.src_images = disk_images
8935

    
8936
      old_name = export_info.get(constants.INISECT_INS, "name")
8937
      if self.op.instance_name == old_name:
8938
        for idx, nic in enumerate(self.nics):
8939
          if nic.mac == constants.VALUE_AUTO:
8940
            nic_mac_ini = "nic%d_mac" % idx
8941
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8942

    
8943
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8944

    
8945
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8946
    if self.op.ip_check:
8947
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8948
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8949
                                   (self.check_ip, self.op.instance_name),
8950
                                   errors.ECODE_NOTUNIQUE)
8951

    
8952
    #### mac address generation
8953
    # By generating here the mac address both the allocator and the hooks get
8954
    # the real final mac address rather than the 'auto' or 'generate' value.
8955
    # There is a race condition between the generation and the instance object
8956
    # creation, which means that we know the mac is valid now, but we're not
8957
    # sure it will be when we actually add the instance. If things go bad
8958
    # adding the instance will abort because of a duplicate mac, and the
8959
    # creation job will fail.
8960
    for nic in self.nics:
8961
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8962
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8963

    
8964
    #### allocator run
8965

    
8966
    if self.op.iallocator is not None:
8967
      self._RunAllocator()
8968

    
8969
    #### node related checks
8970

    
8971
    # check primary node
8972
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8973
    assert self.pnode is not None, \
8974
      "Cannot retrieve locked node %s" % self.op.pnode
8975
    if pnode.offline:
8976
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8977
                                 pnode.name, errors.ECODE_STATE)
8978
    if pnode.drained:
8979
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8980
                                 pnode.name, errors.ECODE_STATE)
8981
    if not pnode.vm_capable:
8982
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8983
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8984

    
8985
    self.secondaries = []
8986

    
8987
    # mirror node verification
8988
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8989
      if self.op.snode == pnode.name:
8990
        raise errors.OpPrereqError("The secondary node cannot be the"
8991
                                   " primary node", errors.ECODE_INVAL)
8992
      _CheckNodeOnline(self, self.op.snode)
8993
      _CheckNodeNotDrained(self, self.op.snode)
8994
      _CheckNodeVmCapable(self, self.op.snode)
8995
      self.secondaries.append(self.op.snode)
8996

    
8997
    nodenames = [pnode.name] + self.secondaries
8998

    
8999
    if not self.adopt_disks:
9000
      # Check lv size requirements, if not adopting
9001
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9002
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9003

    
9004
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9005
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9006
                                disk[constants.IDISK_ADOPT])
9007
                     for disk in self.disks])
9008
      if len(all_lvs) != len(self.disks):
9009
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9010
                                   errors.ECODE_INVAL)
9011
      for lv_name in all_lvs:
9012
        try:
9013
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9014
          # to ReserveLV uses the same syntax
9015
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9016
        except errors.ReservationError:
9017
          raise errors.OpPrereqError("LV named %s used by another instance" %
9018
                                     lv_name, errors.ECODE_NOTUNIQUE)
9019

    
9020
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9021
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9022

    
9023
      node_lvs = self.rpc.call_lv_list([pnode.name],
9024
                                       vg_names.payload.keys())[pnode.name]
9025
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9026
      node_lvs = node_lvs.payload
9027

    
9028
      delta = all_lvs.difference(node_lvs.keys())
9029
      if delta:
9030
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9031
                                   utils.CommaJoin(delta),
9032
                                   errors.ECODE_INVAL)
9033
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9034
      if online_lvs:
9035
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9036
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9037
                                   errors.ECODE_STATE)
9038
      # update the size of disk based on what is found
9039
      for dsk in self.disks:
9040
        dsk[constants.IDISK_SIZE] = \
9041
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9042
                                        dsk[constants.IDISK_ADOPT])][0]))
9043

    
9044
    elif self.op.disk_template == constants.DT_BLOCK:
9045
      # Normalize and de-duplicate device paths
9046
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9047
                       for disk in self.disks])
9048
      if len(all_disks) != len(self.disks):
9049
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9050
                                   errors.ECODE_INVAL)
9051
      baddisks = [d for d in all_disks
9052
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9053
      if baddisks:
9054
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9055
                                   " cannot be adopted" %
9056
                                   (", ".join(baddisks),
9057
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9058
                                   errors.ECODE_INVAL)
9059

    
9060
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9061
                                            list(all_disks))[pnode.name]
9062
      node_disks.Raise("Cannot get block device information from node %s" %
9063
                       pnode.name)
9064
      node_disks = node_disks.payload
9065
      delta = all_disks.difference(node_disks.keys())
9066
      if delta:
9067
        raise errors.OpPrereqError("Missing block device(s): %s" %
9068
                                   utils.CommaJoin(delta),
9069
                                   errors.ECODE_INVAL)
9070
      for dsk in self.disks:
9071
        dsk[constants.IDISK_SIZE] = \
9072
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9073

    
9074
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9075

    
9076
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9077
    # check OS parameters (remotely)
9078
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9079

    
9080
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9081

    
9082
    # memory check on primary node
9083
    if self.op.start:
9084
      _CheckNodeFreeMemory(self, self.pnode.name,
9085
                           "creating instance %s" % self.op.instance_name,
9086
                           self.be_full[constants.BE_MEMORY],
9087
                           self.op.hypervisor)
9088

    
9089
    self.dry_run_result = list(nodenames)
9090

    
9091
  def Exec(self, feedback_fn):
9092
    """Create and add the instance to the cluster.
9093

9094
    """
9095
    instance = self.op.instance_name
9096
    pnode_name = self.pnode.name
9097

    
9098
    ht_kind = self.op.hypervisor
9099
    if ht_kind in constants.HTS_REQ_PORT:
9100
      network_port = self.cfg.AllocatePort()
9101
    else:
9102
      network_port = None
9103

    
9104
    disks = _GenerateDiskTemplate(self,
9105
                                  self.op.disk_template,
9106
                                  instance, pnode_name,
9107
                                  self.secondaries,
9108
                                  self.disks,
9109
                                  self.instance_file_storage_dir,
9110
                                  self.op.file_driver,
9111
                                  0,
9112
                                  feedback_fn)
9113

    
9114
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9115
                            primary_node=pnode_name,
9116
                            nics=self.nics, disks=disks,
9117
                            disk_template=self.op.disk_template,
9118
                            admin_up=False,
9119
                            network_port=network_port,
9120
                            beparams=self.op.beparams,
9121
                            hvparams=self.op.hvparams,
9122
                            hypervisor=self.op.hypervisor,
9123
                            osparams=self.op.osparams,
9124
                            )
9125

    
9126
    if self.op.tags:
9127
      for tag in self.op.tags:
9128
        iobj.AddTag(tag)
9129

    
9130
    if self.adopt_disks:
9131
      if self.op.disk_template == constants.DT_PLAIN:
9132
        # rename LVs to the newly-generated names; we need to construct
9133
        # 'fake' LV disks with the old data, plus the new unique_id
9134
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9135
        rename_to = []
9136
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9137
          rename_to.append(t_dsk.logical_id)
9138
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9139
          self.cfg.SetDiskID(t_dsk, pnode_name)
9140
        result = self.rpc.call_blockdev_rename(pnode_name,
9141
                                               zip(tmp_disks, rename_to))
9142
        result.Raise("Failed to rename adoped LVs")
9143
    else:
9144
      feedback_fn("* creating instance disks...")
9145
      try:
9146
        _CreateDisks(self, iobj)
9147
      except errors.OpExecError:
9148
        self.LogWarning("Device creation failed, reverting...")
9149
        try:
9150
          _RemoveDisks(self, iobj)
9151
        finally:
9152
          self.cfg.ReleaseDRBDMinors(instance)
9153
          raise
9154

    
9155
    feedback_fn("adding instance %s to cluster config" % instance)
9156

    
9157
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9158

    
9159
    # Declare that we don't want to remove the instance lock anymore, as we've
9160
    # added the instance to the config
9161
    del self.remove_locks[locking.LEVEL_INSTANCE]
9162

    
9163
    if self.op.mode == constants.INSTANCE_IMPORT:
9164
      # Release unused nodes
9165
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9166
    else:
9167
      # Release all nodes
9168
      _ReleaseLocks(self, locking.LEVEL_NODE)
9169

    
9170
    disk_abort = False
9171
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9172
      feedback_fn("* wiping instance disks...")
9173
      try:
9174
        _WipeDisks(self, iobj)
9175
      except errors.OpExecError, err:
9176
        logging.exception("Wiping disks failed")
9177
        self.LogWarning("Wiping instance disks failed (%s)", err)
9178
        disk_abort = True
9179

    
9180
    if disk_abort:
9181
      # Something is already wrong with the disks, don't do anything else
9182
      pass
9183
    elif self.op.wait_for_sync:
9184
      disk_abort = not _WaitForSync(self, iobj)
9185
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9186
      # make sure the disks are not degraded (still sync-ing is ok)
9187
      feedback_fn("* checking mirrors status")
9188
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9189
    else:
9190
      disk_abort = False
9191

    
9192
    if disk_abort:
9193
      _RemoveDisks(self, iobj)
9194
      self.cfg.RemoveInstance(iobj.name)
9195
      # Make sure the instance lock gets removed
9196
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9197
      raise errors.OpExecError("There are some degraded disks for"
9198
                               " this instance")
9199

    
9200
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9201
      if self.op.mode == constants.INSTANCE_CREATE:
9202
        if not self.op.no_install:
9203
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9204
                        not self.op.wait_for_sync)
9205
          if pause_sync:
9206
            feedback_fn("* pausing disk sync to install instance OS")
9207
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9208
                                                              iobj.disks, True)
9209
            for idx, success in enumerate(result.payload):
9210
              if not success:
9211
                logging.warn("pause-sync of instance %s for disk %d failed",
9212
                             instance, idx)
9213

    
9214
          feedback_fn("* running the instance OS create scripts...")
9215
          # FIXME: pass debug option from opcode to backend
9216
          os_add_result = \
9217
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9218
                                          self.op.debug_level)
9219
          if pause_sync:
9220
            feedback_fn("* resuming disk sync")
9221
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9222
                                                              iobj.disks, False)
9223
            for idx, success in enumerate(result.payload):
9224
              if not success:
9225
                logging.warn("resume-sync of instance %s for disk %d failed",
9226
                             instance, idx)
9227

    
9228
          os_add_result.Raise("Could not add os for instance %s"
9229
                              " on node %s" % (instance, pnode_name))
9230

    
9231
      elif self.op.mode == constants.INSTANCE_IMPORT:
9232
        feedback_fn("* running the instance OS import scripts...")
9233

    
9234
        transfers = []
9235

    
9236
        for idx, image in enumerate(self.src_images):
9237
          if not image:
9238
            continue
9239

    
9240
          # FIXME: pass debug option from opcode to backend
9241
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9242
                                             constants.IEIO_FILE, (image, ),
9243
                                             constants.IEIO_SCRIPT,
9244
                                             (iobj.disks[idx], idx),
9245
                                             None)
9246
          transfers.append(dt)
9247

    
9248
        import_result = \
9249
          masterd.instance.TransferInstanceData(self, feedback_fn,
9250
                                                self.op.src_node, pnode_name,
9251
                                                self.pnode.secondary_ip,
9252
                                                iobj, transfers)
9253
        if not compat.all(import_result):
9254
          self.LogWarning("Some disks for instance %s on node %s were not"
9255
                          " imported successfully" % (instance, pnode_name))
9256

    
9257
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9258
        feedback_fn("* preparing remote import...")
9259
        # The source cluster will stop the instance before attempting to make a
9260
        # connection. In some cases stopping an instance can take a long time,
9261
        # hence the shutdown timeout is added to the connection timeout.
9262
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9263
                           self.op.source_shutdown_timeout)
9264
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9265

    
9266
        assert iobj.primary_node == self.pnode.name
9267
        disk_results = \
9268
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9269
                                        self.source_x509_ca,
9270
                                        self._cds, timeouts)
9271
        if not compat.all(disk_results):
9272
          # TODO: Should the instance still be started, even if some disks
9273
          # failed to import (valid for local imports, too)?
9274
          self.LogWarning("Some disks for instance %s on node %s were not"
9275
                          " imported successfully" % (instance, pnode_name))
9276

    
9277
        # Run rename script on newly imported instance
9278
        assert iobj.name == instance
9279
        feedback_fn("Running rename script for %s" % instance)
9280
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9281
                                                   self.source_instance_name,
9282
                                                   self.op.debug_level)
9283
        if result.fail_msg:
9284
          self.LogWarning("Failed to run rename script for %s on node"
9285
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9286

    
9287
      else:
9288
        # also checked in the prereq part
9289
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9290
                                     % self.op.mode)
9291

    
9292
    if self.op.start:
9293
      iobj.admin_up = True
9294
      self.cfg.Update(iobj, feedback_fn)
9295
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9296
      feedback_fn("* starting instance...")
9297
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9298
                                            False)
9299
      result.Raise("Could not start instance")
9300

    
9301
    return list(iobj.all_nodes)
9302

    
9303

    
9304
class LUInstanceConsole(NoHooksLU):
9305
  """Connect to an instance's console.
9306

9307
  This is somewhat special in that it returns the command line that
9308
  you need to run on the master node in order to connect to the
9309
  console.
9310

9311
  """
9312
  REQ_BGL = False
9313

    
9314
  def ExpandNames(self):
9315
    self._ExpandAndLockInstance()
9316

    
9317
  def CheckPrereq(self):
9318
    """Check prerequisites.
9319

9320
    This checks that the instance is in the cluster.
9321

9322
    """
9323
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9324
    assert self.instance is not None, \
9325
      "Cannot retrieve locked instance %s" % self.op.instance_name
9326
    _CheckNodeOnline(self, self.instance.primary_node)
9327

    
9328
  def Exec(self, feedback_fn):
9329
    """Connect to the console of an instance
9330

9331
    """
9332
    instance = self.instance
9333
    node = instance.primary_node
9334

    
9335
    node_insts = self.rpc.call_instance_list([node],
9336
                                             [instance.hypervisor])[node]
9337
    node_insts.Raise("Can't get node information from %s" % node)
9338

    
9339
    if instance.name not in node_insts.payload:
9340
      if instance.admin_up:
9341
        state = constants.INSTST_ERRORDOWN
9342
      else:
9343
        state = constants.INSTST_ADMINDOWN
9344
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9345
                               (instance.name, state))
9346

    
9347
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9348

    
9349
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9350

    
9351

    
9352
def _GetInstanceConsole(cluster, instance):
9353
  """Returns console information for an instance.
9354

9355
  @type cluster: L{objects.Cluster}
9356
  @type instance: L{objects.Instance}
9357
  @rtype: dict
9358

9359
  """
9360
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9361
  # beparams and hvparams are passed separately, to avoid editing the
9362
  # instance and then saving the defaults in the instance itself.
9363
  hvparams = cluster.FillHV(instance)
9364
  beparams = cluster.FillBE(instance)
9365
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9366

    
9367
  assert console.instance == instance.name
9368
  assert console.Validate()
9369

    
9370
  return console.ToDict()
9371

    
9372

    
9373
class LUInstanceReplaceDisks(LogicalUnit):
9374
  """Replace the disks of an instance.
9375

9376
  """
9377
  HPATH = "mirrors-replace"
9378
  HTYPE = constants.HTYPE_INSTANCE
9379
  REQ_BGL = False
9380

    
9381
  def CheckArguments(self):
9382
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9383
                                  self.op.iallocator)
9384

    
9385
  def ExpandNames(self):
9386
    self._ExpandAndLockInstance()
9387

    
9388
    assert locking.LEVEL_NODE not in self.needed_locks
9389
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9390

    
9391
    assert self.op.iallocator is None or self.op.remote_node is None, \
9392
      "Conflicting options"
9393

    
9394
    if self.op.remote_node is not None:
9395
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9396

    
9397
      # Warning: do not remove the locking of the new secondary here
9398
      # unless DRBD8.AddChildren is changed to work in parallel;
9399
      # currently it doesn't since parallel invocations of
9400
      # FindUnusedMinor will conflict
9401
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9402
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9403
    else:
9404
      self.needed_locks[locking.LEVEL_NODE] = []
9405
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9406

    
9407
      if self.op.iallocator is not None:
9408
        # iallocator will select a new node in the same group
9409
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9410

    
9411
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9412
                                   self.op.iallocator, self.op.remote_node,
9413
                                   self.op.disks, False, self.op.early_release)
9414

    
9415
    self.tasklets = [self.replacer]
9416

    
9417
  def DeclareLocks(self, level):
9418
    if level == locking.LEVEL_NODEGROUP:
9419
      assert self.op.remote_node is None
9420
      assert self.op.iallocator is not None
9421
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9422

    
9423
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9424
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9425
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9426

    
9427
    elif level == locking.LEVEL_NODE:
9428
      if self.op.iallocator is not None:
9429
        assert self.op.remote_node is None
9430
        assert not self.needed_locks[locking.LEVEL_NODE]
9431

    
9432
        # Lock member nodes of all locked groups
9433
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9434
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9435
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9436
      else:
9437
        self._LockInstancesNodes()
9438

    
9439
  def BuildHooksEnv(self):
9440
    """Build hooks env.
9441

9442
    This runs on the master, the primary and all the secondaries.
9443

9444
    """
9445
    instance = self.replacer.instance
9446
    env = {
9447
      "MODE": self.op.mode,
9448
      "NEW_SECONDARY": self.op.remote_node,
9449
      "OLD_SECONDARY": instance.secondary_nodes[0],
9450
      }
9451
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9452
    return env
9453

    
9454
  def BuildHooksNodes(self):
9455
    """Build hooks nodes.
9456

9457
    """
9458
    instance = self.replacer.instance
9459
    nl = [
9460
      self.cfg.GetMasterNode(),
9461
      instance.primary_node,
9462
      ]
9463
    if self.op.remote_node is not None:
9464
      nl.append(self.op.remote_node)
9465
    return nl, nl
9466

    
9467
  def CheckPrereq(self):
9468
    """Check prerequisites.
9469

9470
    """
9471
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9472
            self.op.iallocator is None)
9473

    
9474
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9475
    if owned_groups:
9476
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9477

    
9478
    return LogicalUnit.CheckPrereq(self)
9479

    
9480

    
9481
class TLReplaceDisks(Tasklet):
9482
  """Replaces disks for an instance.
9483

9484
  Note: Locking is not within the scope of this class.
9485

9486
  """
9487
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9488
               disks, delay_iallocator, early_release):
9489
    """Initializes this class.
9490

9491
    """
9492
    Tasklet.__init__(self, lu)
9493

    
9494
    # Parameters
9495
    self.instance_name = instance_name
9496
    self.mode = mode
9497
    self.iallocator_name = iallocator_name
9498
    self.remote_node = remote_node
9499
    self.disks = disks
9500
    self.delay_iallocator = delay_iallocator
9501
    self.early_release = early_release
9502

    
9503
    # Runtime data
9504
    self.instance = None
9505
    self.new_node = None
9506
    self.target_node = None
9507
    self.other_node = None
9508
    self.remote_node_info = None
9509
    self.node_secondary_ip = None
9510

    
9511
  @staticmethod
9512
  def CheckArguments(mode, remote_node, iallocator):
9513
    """Helper function for users of this class.
9514

9515
    """
9516
    # check for valid parameter combination
9517
    if mode == constants.REPLACE_DISK_CHG:
9518
      if remote_node is None and iallocator is None:
9519
        raise errors.OpPrereqError("When changing the secondary either an"
9520
                                   " iallocator script must be used or the"
9521
                                   " new node given", errors.ECODE_INVAL)
9522

    
9523
      if remote_node is not None and iallocator is not None:
9524
        raise errors.OpPrereqError("Give either the iallocator or the new"
9525
                                   " secondary, not both", errors.ECODE_INVAL)
9526

    
9527
    elif remote_node is not None or iallocator is not None:
9528
      # Not replacing the secondary
9529
      raise errors.OpPrereqError("The iallocator and new node options can"
9530
                                 " only be used when changing the"
9531
                                 " secondary node", errors.ECODE_INVAL)
9532

    
9533
  @staticmethod
9534
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9535
    """Compute a new secondary node using an IAllocator.
9536

9537
    """
9538
    ial = IAllocator(lu.cfg, lu.rpc,
9539
                     mode=constants.IALLOCATOR_MODE_RELOC,
9540
                     name=instance_name,
9541
                     relocate_from=list(relocate_from))
9542

    
9543
    ial.Run(iallocator_name)
9544

    
9545
    if not ial.success:
9546
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9547
                                 " %s" % (iallocator_name, ial.info),
9548
                                 errors.ECODE_NORES)
9549

    
9550
    if len(ial.result) != ial.required_nodes:
9551
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9552
                                 " of nodes (%s), required %s" %
9553
                                 (iallocator_name,
9554
                                  len(ial.result), ial.required_nodes),
9555
                                 errors.ECODE_FAULT)
9556

    
9557
    remote_node_name = ial.result[0]
9558

    
9559
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9560
               instance_name, remote_node_name)
9561

    
9562
    return remote_node_name
9563

    
9564
  def _FindFaultyDisks(self, node_name):
9565
    """Wrapper for L{_FindFaultyInstanceDisks}.
9566

9567
    """
9568
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9569
                                    node_name, True)
9570

    
9571
  def _CheckDisksActivated(self, instance):
9572
    """Checks if the instance disks are activated.
9573

9574
    @param instance: The instance to check disks
9575
    @return: True if they are activated, False otherwise
9576

9577
    """
9578
    nodes = instance.all_nodes
9579

    
9580
    for idx, dev in enumerate(instance.disks):
9581
      for node in nodes:
9582
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9583
        self.cfg.SetDiskID(dev, node)
9584

    
9585
        result = self.rpc.call_blockdev_find(node, dev)
9586

    
9587
        if result.offline:
9588
          continue
9589
        elif result.fail_msg or not result.payload:
9590
          return False
9591

    
9592
    return True
9593

    
9594
  def CheckPrereq(self):
9595
    """Check prerequisites.
9596

9597
    This checks that the instance is in the cluster.
9598

9599
    """
9600
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9601
    assert instance is not None, \
9602
      "Cannot retrieve locked instance %s" % self.instance_name
9603

    
9604
    if instance.disk_template != constants.DT_DRBD8:
9605
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9606
                                 " instances", errors.ECODE_INVAL)
9607

    
9608
    if len(instance.secondary_nodes) != 1:
9609
      raise errors.OpPrereqError("The instance has a strange layout,"
9610
                                 " expected one secondary but found %d" %
9611
                                 len(instance.secondary_nodes),
9612
                                 errors.ECODE_FAULT)
9613

    
9614
    if not self.delay_iallocator:
9615
      self._CheckPrereq2()
9616

    
9617
  def _CheckPrereq2(self):
9618
    """Check prerequisites, second part.
9619

9620
    This function should always be part of CheckPrereq. It was separated and is
9621
    now called from Exec because during node evacuation iallocator was only
9622
    called with an unmodified cluster model, not taking planned changes into
9623
    account.
9624

9625
    """
9626
    instance = self.instance
9627
    secondary_node = instance.secondary_nodes[0]
9628

    
9629
    if self.iallocator_name is None:
9630
      remote_node = self.remote_node
9631
    else:
9632
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9633
                                       instance.name, instance.secondary_nodes)
9634

    
9635
    if remote_node is None:
9636
      self.remote_node_info = None
9637
    else:
9638
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9639
             "Remote node '%s' is not locked" % remote_node
9640

    
9641
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9642
      assert self.remote_node_info is not None, \
9643
        "Cannot retrieve locked node %s" % remote_node
9644

    
9645
    if remote_node == self.instance.primary_node:
9646
      raise errors.OpPrereqError("The specified node is the primary node of"
9647
                                 " the instance", errors.ECODE_INVAL)
9648

    
9649
    if remote_node == secondary_node:
9650
      raise errors.OpPrereqError("The specified node is already the"
9651
                                 " secondary node of the instance",
9652
                                 errors.ECODE_INVAL)
9653

    
9654
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9655
                                    constants.REPLACE_DISK_CHG):
9656
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9657
                                 errors.ECODE_INVAL)
9658

    
9659
    if self.mode == constants.REPLACE_DISK_AUTO:
9660
      if not self._CheckDisksActivated(instance):
9661
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9662
                                   " first" % self.instance_name,
9663
                                   errors.ECODE_STATE)
9664
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9665
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9666

    
9667
      if faulty_primary and faulty_secondary:
9668
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9669
                                   " one node and can not be repaired"
9670
                                   " automatically" % self.instance_name,
9671
                                   errors.ECODE_STATE)
9672

    
9673
      if faulty_primary:
9674
        self.disks = faulty_primary
9675
        self.target_node = instance.primary_node
9676
        self.other_node = secondary_node
9677
        check_nodes = [self.target_node, self.other_node]
9678
      elif faulty_secondary:
9679
        self.disks = faulty_secondary
9680
        self.target_node = secondary_node
9681
        self.other_node = instance.primary_node
9682
        check_nodes = [self.target_node, self.other_node]
9683
      else:
9684
        self.disks = []
9685
        check_nodes = []
9686

    
9687
    else:
9688
      # Non-automatic modes
9689
      if self.mode == constants.REPLACE_DISK_PRI:
9690
        self.target_node = instance.primary_node
9691
        self.other_node = secondary_node
9692
        check_nodes = [self.target_node, self.other_node]
9693

    
9694
      elif self.mode == constants.REPLACE_DISK_SEC:
9695
        self.target_node = secondary_node
9696
        self.other_node = instance.primary_node
9697
        check_nodes = [self.target_node, self.other_node]
9698

    
9699
      elif self.mode == constants.REPLACE_DISK_CHG:
9700
        self.new_node = remote_node
9701
        self.other_node = instance.primary_node
9702
        self.target_node = secondary_node
9703
        check_nodes = [self.new_node, self.other_node]
9704

    
9705
        _CheckNodeNotDrained(self.lu, remote_node)
9706
        _CheckNodeVmCapable(self.lu, remote_node)
9707

    
9708
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9709
        assert old_node_info is not None
9710
        if old_node_info.offline and not self.early_release:
9711
          # doesn't make sense to delay the release
9712
          self.early_release = True
9713
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9714
                          " early-release mode", secondary_node)
9715

    
9716
      else:
9717
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9718
                                     self.mode)
9719

    
9720
      # If not specified all disks should be replaced
9721
      if not self.disks:
9722
        self.disks = range(len(self.instance.disks))
9723

    
9724
    for node in check_nodes:
9725
      _CheckNodeOnline(self.lu, node)
9726

    
9727
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9728
                                                          self.other_node,
9729
                                                          self.target_node]
9730
                              if node_name is not None)
9731

    
9732
    # Release unneeded node locks
9733
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9734

    
9735
    # Release any owned node group
9736
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9737
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9738

    
9739
    # Check whether disks are valid
9740
    for disk_idx in self.disks:
9741
      instance.FindDisk(disk_idx)
9742

    
9743
    # Get secondary node IP addresses
9744
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9745
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9746

    
9747
  def Exec(self, feedback_fn):
9748
    """Execute disk replacement.
9749

9750
    This dispatches the disk replacement to the appropriate handler.
9751

9752
    """
9753
    if self.delay_iallocator:
9754
      self._CheckPrereq2()
9755

    
9756
    if __debug__:
9757
      # Verify owned locks before starting operation
9758
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9759
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9760
          ("Incorrect node locks, owning %s, expected %s" %
9761
           (owned_nodes, self.node_secondary_ip.keys()))
9762

    
9763
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9764
      assert list(owned_instances) == [self.instance_name], \
9765
          "Instance '%s' not locked" % self.instance_name
9766

    
9767
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9768
          "Should not own any node group lock at this point"
9769

    
9770
    if not self.disks:
9771
      feedback_fn("No disks need replacement")
9772
      return
9773

    
9774
    feedback_fn("Replacing disk(s) %s for %s" %
9775
                (utils.CommaJoin(self.disks), self.instance.name))
9776

    
9777
    activate_disks = (not self.instance.admin_up)
9778

    
9779
    # Activate the instance disks if we're replacing them on a down instance
9780
    if activate_disks:
9781
      _StartInstanceDisks(self.lu, self.instance, True)
9782

    
9783
    try:
9784
      # Should we replace the secondary node?
9785
      if self.new_node is not None:
9786
        fn = self._ExecDrbd8Secondary
9787
      else:
9788
        fn = self._ExecDrbd8DiskOnly
9789

    
9790
      result = fn(feedback_fn)
9791
    finally:
9792
      # Deactivate the instance disks if we're replacing them on a
9793
      # down instance
9794
      if activate_disks:
9795
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9796

    
9797
    if __debug__:
9798
      # Verify owned locks
9799
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9800
      nodes = frozenset(self.node_secondary_ip)
9801
      assert ((self.early_release and not owned_nodes) or
9802
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9803
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9804
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9805

    
9806
    return result
9807

    
9808
  def _CheckVolumeGroup(self, nodes):
9809
    self.lu.LogInfo("Checking volume groups")
9810

    
9811
    vgname = self.cfg.GetVGName()
9812

    
9813
    # Make sure volume group exists on all involved nodes
9814
    results = self.rpc.call_vg_list(nodes)
9815
    if not results:
9816
      raise errors.OpExecError("Can't list volume groups on the nodes")
9817

    
9818
    for node in nodes:
9819
      res = results[node]
9820
      res.Raise("Error checking node %s" % node)
9821
      if vgname not in res.payload:
9822
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9823
                                 (vgname, node))
9824

    
9825
  def _CheckDisksExistence(self, nodes):
9826
    # Check disk existence
9827
    for idx, dev in enumerate(self.instance.disks):
9828
      if idx not in self.disks:
9829
        continue
9830

    
9831
      for node in nodes:
9832
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9833
        self.cfg.SetDiskID(dev, node)
9834

    
9835
        result = self.rpc.call_blockdev_find(node, dev)
9836

    
9837
        msg = result.fail_msg
9838
        if msg or not result.payload:
9839
          if not msg:
9840
            msg = "disk not found"
9841
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9842
                                   (idx, node, msg))
9843

    
9844
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9845
    for idx, dev in enumerate(self.instance.disks):
9846
      if idx not in self.disks:
9847
        continue
9848

    
9849
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9850
                      (idx, node_name))
9851

    
9852
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9853
                                   ldisk=ldisk):
9854
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9855
                                 " replace disks for instance %s" %
9856
                                 (node_name, self.instance.name))
9857

    
9858
  def _CreateNewStorage(self, node_name):
9859
    """Create new storage on the primary or secondary node.
9860

9861
    This is only used for same-node replaces, not for changing the
9862
    secondary node, hence we don't want to modify the existing disk.
9863

9864
    """
9865
    iv_names = {}
9866

    
9867
    for idx, dev in enumerate(self.instance.disks):
9868
      if idx not in self.disks:
9869
        continue
9870

    
9871
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9872

    
9873
      self.cfg.SetDiskID(dev, node_name)
9874

    
9875
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9876
      names = _GenerateUniqueNames(self.lu, lv_names)
9877

    
9878
      vg_data = dev.children[0].logical_id[0]
9879
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9880
                             logical_id=(vg_data, names[0]))
9881
      vg_meta = dev.children[1].logical_id[0]
9882
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9883
                             logical_id=(vg_meta, names[1]))
9884

    
9885
      new_lvs = [lv_data, lv_meta]
9886
      old_lvs = [child.Copy() for child in dev.children]
9887
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9888

    
9889
      # we pass force_create=True to force the LVM creation
9890
      for new_lv in new_lvs:
9891
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9892
                        _GetInstanceInfoText(self.instance), False)
9893

    
9894
    return iv_names
9895

    
9896
  def _CheckDevices(self, node_name, iv_names):
9897
    for name, (dev, _, _) in iv_names.iteritems():
9898
      self.cfg.SetDiskID(dev, node_name)
9899

    
9900
      result = self.rpc.call_blockdev_find(node_name, dev)
9901

    
9902
      msg = result.fail_msg
9903
      if msg or not result.payload:
9904
        if not msg:
9905
          msg = "disk not found"
9906
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9907
                                 (name, msg))
9908

    
9909
      if result.payload.is_degraded:
9910
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9911

    
9912
  def _RemoveOldStorage(self, node_name, iv_names):
9913
    for name, (_, old_lvs, _) in iv_names.iteritems():
9914
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9915

    
9916
      for lv in old_lvs:
9917
        self.cfg.SetDiskID(lv, node_name)
9918

    
9919
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9920
        if msg:
9921
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9922
                             hint="remove unused LVs manually")
9923

    
9924
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9925
    """Replace a disk on the primary or secondary for DRBD 8.
9926

9927
    The algorithm for replace is quite complicated:
9928

9929
      1. for each disk to be replaced:
9930

9931
        1. create new LVs on the target node with unique names
9932
        1. detach old LVs from the drbd device
9933
        1. rename old LVs to name_replaced.<time_t>
9934
        1. rename new LVs to old LVs
9935
        1. attach the new LVs (with the old names now) to the drbd device
9936

9937
      1. wait for sync across all devices
9938

9939
      1. for each modified disk:
9940

9941
        1. remove old LVs (which have the name name_replaces.<time_t>)
9942

9943
    Failures are not very well handled.
9944

9945
    """
9946
    steps_total = 6
9947

    
9948
    # Step: check device activation
9949
    self.lu.LogStep(1, steps_total, "Check device existence")
9950
    self._CheckDisksExistence([self.other_node, self.target_node])
9951
    self._CheckVolumeGroup([self.target_node, self.other_node])
9952

    
9953
    # Step: check other node consistency
9954
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9955
    self._CheckDisksConsistency(self.other_node,
9956
                                self.other_node == self.instance.primary_node,
9957
                                False)
9958

    
9959
    # Step: create new storage
9960
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9961
    iv_names = self._CreateNewStorage(self.target_node)
9962

    
9963
    # Step: for each lv, detach+rename*2+attach
9964
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9965
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9966
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9967

    
9968
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9969
                                                     old_lvs)
9970
      result.Raise("Can't detach drbd from local storage on node"
9971
                   " %s for device %s" % (self.target_node, dev.iv_name))
9972
      #dev.children = []
9973
      #cfg.Update(instance)
9974

    
9975
      # ok, we created the new LVs, so now we know we have the needed
9976
      # storage; as such, we proceed on the target node to rename
9977
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9978
      # using the assumption that logical_id == physical_id (which in
9979
      # turn is the unique_id on that node)
9980

    
9981
      # FIXME(iustin): use a better name for the replaced LVs
9982
      temp_suffix = int(time.time())
9983
      ren_fn = lambda d, suff: (d.physical_id[0],
9984
                                d.physical_id[1] + "_replaced-%s" % suff)
9985

    
9986
      # Build the rename list based on what LVs exist on the node
9987
      rename_old_to_new = []
9988
      for to_ren in old_lvs:
9989
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9990
        if not result.fail_msg and result.payload:
9991
          # device exists
9992
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9993

    
9994
      self.lu.LogInfo("Renaming the old LVs on the target node")
9995
      result = self.rpc.call_blockdev_rename(self.target_node,
9996
                                             rename_old_to_new)
9997
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9998

    
9999
      # Now we rename the new LVs to the old LVs
10000
      self.lu.LogInfo("Renaming the new LVs on the target node")
10001
      rename_new_to_old = [(new, old.physical_id)
10002
                           for old, new in zip(old_lvs, new_lvs)]
10003
      result = self.rpc.call_blockdev_rename(self.target_node,
10004
                                             rename_new_to_old)
10005
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10006

    
10007
      # Intermediate steps of in memory modifications
10008
      for old, new in zip(old_lvs, new_lvs):
10009
        new.logical_id = old.logical_id
10010
        self.cfg.SetDiskID(new, self.target_node)
10011

    
10012
      # We need to modify old_lvs so that removal later removes the
10013
      # right LVs, not the newly added ones; note that old_lvs is a
10014
      # copy here
10015
      for disk in old_lvs:
10016
        disk.logical_id = ren_fn(disk, temp_suffix)
10017
        self.cfg.SetDiskID(disk, self.target_node)
10018

    
10019
      # Now that the new lvs have the old name, we can add them to the device
10020
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10021
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10022
                                                  new_lvs)
10023
      msg = result.fail_msg
10024
      if msg:
10025
        for new_lv in new_lvs:
10026
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10027
                                               new_lv).fail_msg
10028
          if msg2:
10029
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10030
                               hint=("cleanup manually the unused logical"
10031
                                     "volumes"))
10032
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10033

    
10034
    cstep = 5
10035
    if self.early_release:
10036
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10037
      cstep += 1
10038
      self._RemoveOldStorage(self.target_node, iv_names)
10039
      # WARNING: we release both node locks here, do not do other RPCs
10040
      # than WaitForSync to the primary node
10041
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10042
                    names=[self.target_node, self.other_node])
10043

    
10044
    # Wait for sync
10045
    # This can fail as the old devices are degraded and _WaitForSync
10046
    # does a combined result over all disks, so we don't check its return value
10047
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10048
    cstep += 1
10049
    _WaitForSync(self.lu, self.instance)
10050

    
10051
    # Check all devices manually
10052
    self._CheckDevices(self.instance.primary_node, iv_names)
10053

    
10054
    # Step: remove old storage
10055
    if not self.early_release:
10056
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10057
      cstep += 1
10058
      self._RemoveOldStorage(self.target_node, iv_names)
10059

    
10060
  def _ExecDrbd8Secondary(self, feedback_fn):
10061
    """Replace the secondary node for DRBD 8.
10062

10063
    The algorithm for replace is quite complicated:
10064
      - for all disks of the instance:
10065
        - create new LVs on the new node with same names
10066
        - shutdown the drbd device on the old secondary
10067
        - disconnect the drbd network on the primary
10068
        - create the drbd device on the new secondary
10069
        - network attach the drbd on the primary, using an artifice:
10070
          the drbd code for Attach() will connect to the network if it
10071
          finds a device which is connected to the good local disks but
10072
          not network enabled
10073
      - wait for sync across all devices
10074
      - remove all disks from the old secondary
10075

10076
    Failures are not very well handled.
10077

10078
    """
10079
    steps_total = 6
10080

    
10081
    pnode = self.instance.primary_node
10082

    
10083
    # Step: check device activation
10084
    self.lu.LogStep(1, steps_total, "Check device existence")
10085
    self._CheckDisksExistence([self.instance.primary_node])
10086
    self._CheckVolumeGroup([self.instance.primary_node])
10087

    
10088
    # Step: check other node consistency
10089
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10090
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10091

    
10092
    # Step: create new storage
10093
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10094
    for idx, dev in enumerate(self.instance.disks):
10095
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10096
                      (self.new_node, idx))
10097
      # we pass force_create=True to force LVM creation
10098
      for new_lv in dev.children:
10099
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10100
                        _GetInstanceInfoText(self.instance), False)
10101

    
10102
    # Step 4: dbrd minors and drbd setups changes
10103
    # after this, we must manually remove the drbd minors on both the
10104
    # error and the success paths
10105
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10106
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10107
                                         for dev in self.instance.disks],
10108
                                        self.instance.name)
10109
    logging.debug("Allocated minors %r", minors)
10110

    
10111
    iv_names = {}
10112
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10113
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10114
                      (self.new_node, idx))
10115
      # create new devices on new_node; note that we create two IDs:
10116
      # one without port, so the drbd will be activated without
10117
      # networking information on the new node at this stage, and one
10118
      # with network, for the latter activation in step 4
10119
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10120
      if self.instance.primary_node == o_node1:
10121
        p_minor = o_minor1
10122
      else:
10123
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10124
        p_minor = o_minor2
10125

    
10126
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10127
                      p_minor, new_minor, o_secret)
10128
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10129
                    p_minor, new_minor, o_secret)
10130

    
10131
      iv_names[idx] = (dev, dev.children, new_net_id)
10132
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10133
                    new_net_id)
10134
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10135
                              logical_id=new_alone_id,
10136
                              children=dev.children,
10137
                              size=dev.size)
10138
      try:
10139
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10140
                              _GetInstanceInfoText(self.instance), False)
10141
      except errors.GenericError:
10142
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10143
        raise
10144

    
10145
    # We have new devices, shutdown the drbd on the old secondary
10146
    for idx, dev in enumerate(self.instance.disks):
10147
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10148
      self.cfg.SetDiskID(dev, self.target_node)
10149
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10150
      if msg:
10151
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10152
                           "node: %s" % (idx, msg),
10153
                           hint=("Please cleanup this device manually as"
10154
                                 " soon as possible"))
10155

    
10156
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10157
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10158
                                               self.instance.disks)[pnode]
10159

    
10160
    msg = result.fail_msg
10161
    if msg:
10162
      # detaches didn't succeed (unlikely)
10163
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10164
      raise errors.OpExecError("Can't detach the disks from the network on"
10165
                               " old node: %s" % (msg,))
10166

    
10167
    # if we managed to detach at least one, we update all the disks of
10168
    # the instance to point to the new secondary
10169
    self.lu.LogInfo("Updating instance configuration")
10170
    for dev, _, new_logical_id in iv_names.itervalues():
10171
      dev.logical_id = new_logical_id
10172
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10173

    
10174
    self.cfg.Update(self.instance, feedback_fn)
10175

    
10176
    # and now perform the drbd attach
10177
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10178
                    " (standalone => connected)")
10179
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10180
                                            self.new_node],
10181
                                           self.node_secondary_ip,
10182
                                           self.instance.disks,
10183
                                           self.instance.name,
10184
                                           False)
10185
    for to_node, to_result in result.items():
10186
      msg = to_result.fail_msg
10187
      if msg:
10188
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10189
                           to_node, msg,
10190
                           hint=("please do a gnt-instance info to see the"
10191
                                 " status of disks"))
10192
    cstep = 5
10193
    if self.early_release:
10194
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10195
      cstep += 1
10196
      self._RemoveOldStorage(self.target_node, iv_names)
10197
      # WARNING: we release all node locks here, do not do other RPCs
10198
      # than WaitForSync to the primary node
10199
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10200
                    names=[self.instance.primary_node,
10201
                           self.target_node,
10202
                           self.new_node])
10203

    
10204
    # Wait for sync
10205
    # This can fail as the old devices are degraded and _WaitForSync
10206
    # does a combined result over all disks, so we don't check its return value
10207
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10208
    cstep += 1
10209
    _WaitForSync(self.lu, self.instance)
10210

    
10211
    # Check all devices manually
10212
    self._CheckDevices(self.instance.primary_node, iv_names)
10213

    
10214
    # Step: remove old storage
10215
    if not self.early_release:
10216
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10217
      self._RemoveOldStorage(self.target_node, iv_names)
10218

    
10219

    
10220
class LURepairNodeStorage(NoHooksLU):
10221
  """Repairs the volume group on a node.
10222

10223
  """
10224
  REQ_BGL = False
10225

    
10226
  def CheckArguments(self):
10227
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10228

    
10229
    storage_type = self.op.storage_type
10230

    
10231
    if (constants.SO_FIX_CONSISTENCY not in
10232
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10233
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10234
                                 " repaired" % storage_type,
10235
                                 errors.ECODE_INVAL)
10236

    
10237
  def ExpandNames(self):
10238
    self.needed_locks = {
10239
      locking.LEVEL_NODE: [self.op.node_name],
10240
      }
10241

    
10242
  def _CheckFaultyDisks(self, instance, node_name):
10243
    """Ensure faulty disks abort the opcode or at least warn."""
10244
    try:
10245
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10246
                                  node_name, True):
10247
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10248
                                   " node '%s'" % (instance.name, node_name),
10249
                                   errors.ECODE_STATE)
10250
    except errors.OpPrereqError, err:
10251
      if self.op.ignore_consistency:
10252
        self.proc.LogWarning(str(err.args[0]))
10253
      else:
10254
        raise
10255

    
10256
  def CheckPrereq(self):
10257
    """Check prerequisites.
10258

10259
    """
10260
    # Check whether any instance on this node has faulty disks
10261
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10262
      if not inst.admin_up:
10263
        continue
10264
      check_nodes = set(inst.all_nodes)
10265
      check_nodes.discard(self.op.node_name)
10266
      for inst_node_name in check_nodes:
10267
        self._CheckFaultyDisks(inst, inst_node_name)
10268

    
10269
  def Exec(self, feedback_fn):
10270
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10271
                (self.op.name, self.op.node_name))
10272

    
10273
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10274
    result = self.rpc.call_storage_execute(self.op.node_name,
10275
                                           self.op.storage_type, st_args,
10276
                                           self.op.name,
10277
                                           constants.SO_FIX_CONSISTENCY)
10278
    result.Raise("Failed to repair storage unit '%s' on %s" %
10279
                 (self.op.name, self.op.node_name))
10280

    
10281

    
10282
class LUNodeEvacuate(NoHooksLU):
10283
  """Evacuates instances off a list of nodes.
10284

10285
  """
10286
  REQ_BGL = False
10287

    
10288
  def CheckArguments(self):
10289
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10290

    
10291
  def ExpandNames(self):
10292
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10293

    
10294
    if self.op.remote_node is not None:
10295
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10296
      assert self.op.remote_node
10297

    
10298
      if self.op.remote_node == self.op.node_name:
10299
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10300
                                   " secondary node", errors.ECODE_INVAL)
10301

    
10302
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10303
        raise errors.OpPrereqError("Without the use of an iallocator only"
10304
                                   " secondary instances can be evacuated",
10305
                                   errors.ECODE_INVAL)
10306

    
10307
    # Declare locks
10308
    self.share_locks = _ShareAll()
10309
    self.needed_locks = {
10310
      locking.LEVEL_INSTANCE: [],
10311
      locking.LEVEL_NODEGROUP: [],
10312
      locking.LEVEL_NODE: [],
10313
      }
10314

    
10315
    if self.op.remote_node is None:
10316
      # Iallocator will choose any node(s) in the same group
10317
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10318
    else:
10319
      group_nodes = frozenset([self.op.remote_node])
10320

    
10321
    # Determine nodes to be locked
10322
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10323

    
10324
  def _DetermineInstances(self):
10325
    """Builds list of instances to operate on.
10326

10327
    """
10328
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10329

    
10330
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10331
      # Primary instances only
10332
      inst_fn = _GetNodePrimaryInstances
10333
      assert self.op.remote_node is None, \
10334
        "Evacuating primary instances requires iallocator"
10335
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10336
      # Secondary instances only
10337
      inst_fn = _GetNodeSecondaryInstances
10338
    else:
10339
      # All instances
10340
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10341
      inst_fn = _GetNodeInstances
10342

    
10343
    return inst_fn(self.cfg, self.op.node_name)
10344

    
10345
  def DeclareLocks(self, level):
10346
    if level == locking.LEVEL_INSTANCE:
10347
      # Lock instances optimistically, needs verification once node and group
10348
      # locks have been acquired
10349
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10350
        set(i.name for i in self._DetermineInstances())
10351

    
10352
    elif level == locking.LEVEL_NODEGROUP:
10353
      # Lock node groups optimistically, needs verification once nodes have
10354
      # been acquired
10355
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10356
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10357

    
10358
    elif level == locking.LEVEL_NODE:
10359
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10360

    
10361
  def CheckPrereq(self):
10362
    # Verify locks
10363
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10364
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10365
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10366

    
10367
    assert owned_nodes == self.lock_nodes
10368

    
10369
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10370
    if owned_groups != wanted_groups:
10371
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10372
                               " current groups are '%s', used to be '%s'" %
10373
                               (utils.CommaJoin(wanted_groups),
10374
                                utils.CommaJoin(owned_groups)))
10375

    
10376
    # Determine affected instances
10377
    self.instances = self._DetermineInstances()
10378
    self.instance_names = [i.name for i in self.instances]
10379

    
10380
    if set(self.instance_names) != owned_instances:
10381
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10382
                               " were acquired, current instances are '%s',"
10383
                               " used to be '%s'" %
10384
                               (self.op.node_name,
10385
                                utils.CommaJoin(self.instance_names),
10386
                                utils.CommaJoin(owned_instances)))
10387

    
10388
    if self.instance_names:
10389
      self.LogInfo("Evacuating instances from node '%s': %s",
10390
                   self.op.node_name,
10391
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10392
    else:
10393
      self.LogInfo("No instances to evacuate from node '%s'",
10394
                   self.op.node_name)
10395

    
10396
    if self.op.remote_node is not None:
10397
      for i in self.instances:
10398
        if i.primary_node == self.op.remote_node:
10399
          raise errors.OpPrereqError("Node %s is the primary node of"
10400
                                     " instance %s, cannot use it as"
10401
                                     " secondary" %
10402
                                     (self.op.remote_node, i.name),
10403
                                     errors.ECODE_INVAL)
10404

    
10405
  def Exec(self, feedback_fn):
10406
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10407

    
10408
    if not self.instance_names:
10409
      # No instances to evacuate
10410
      jobs = []
10411

    
10412
    elif self.op.iallocator is not None:
10413
      # TODO: Implement relocation to other group
10414
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10415
                       evac_mode=self.op.mode,
10416
                       instances=list(self.instance_names))
10417

    
10418
      ial.Run(self.op.iallocator)
10419

    
10420
      if not ial.success:
10421
        raise errors.OpPrereqError("Can't compute node evacuation using"
10422
                                   " iallocator '%s': %s" %
10423
                                   (self.op.iallocator, ial.info),
10424
                                   errors.ECODE_NORES)
10425

    
10426
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10427

    
10428
    elif self.op.remote_node is not None:
10429
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10430
      jobs = [
10431
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10432
                                        remote_node=self.op.remote_node,
10433
                                        disks=[],
10434
                                        mode=constants.REPLACE_DISK_CHG,
10435
                                        early_release=self.op.early_release)]
10436
        for instance_name in self.instance_names
10437
        ]
10438

    
10439
    else:
10440
      raise errors.ProgrammerError("No iallocator or remote node")
10441

    
10442
    return ResultWithJobs(jobs)
10443

    
10444

    
10445
def _SetOpEarlyRelease(early_release, op):
10446
  """Sets C{early_release} flag on opcodes if available.
10447

10448
  """
10449
  try:
10450
    op.early_release = early_release
10451
  except AttributeError:
10452
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10453

    
10454
  return op
10455

    
10456

    
10457
def _NodeEvacDest(use_nodes, group, nodes):
10458
  """Returns group or nodes depending on caller's choice.
10459

10460
  """
10461
  if use_nodes:
10462
    return utils.CommaJoin(nodes)
10463
  else:
10464
    return group
10465

    
10466

    
10467
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10468
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10469

10470
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10471
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10472

10473
  @type lu: L{LogicalUnit}
10474
  @param lu: Logical unit instance
10475
  @type alloc_result: tuple/list
10476
  @param alloc_result: Result from iallocator
10477
  @type early_release: bool
10478
  @param early_release: Whether to release locks early if possible
10479
  @type use_nodes: bool
10480
  @param use_nodes: Whether to display node names instead of groups
10481

10482
  """
10483
  (moved, failed, jobs) = alloc_result
10484

    
10485
  if failed:
10486
    lu.LogWarning("Unable to evacuate instances %s",
10487
                  utils.CommaJoin("%s (%s)" % (name, reason)
10488
                                  for (name, reason) in failed))
10489

    
10490
  if moved:
10491
    lu.LogInfo("Instances to be moved: %s",
10492
               utils.CommaJoin("%s (to %s)" %
10493
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10494
                               for (name, group, nodes) in moved))
10495

    
10496
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10497
              map(opcodes.OpCode.LoadOpCode, ops))
10498
          for ops in jobs]
10499

    
10500

    
10501
class LUInstanceGrowDisk(LogicalUnit):
10502
  """Grow a disk of an instance.
10503

10504
  """
10505
  HPATH = "disk-grow"
10506
  HTYPE = constants.HTYPE_INSTANCE
10507
  REQ_BGL = False
10508

    
10509
  def ExpandNames(self):
10510
    self._ExpandAndLockInstance()
10511
    self.needed_locks[locking.LEVEL_NODE] = []
10512
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10513

    
10514
  def DeclareLocks(self, level):
10515
    if level == locking.LEVEL_NODE:
10516
      self._LockInstancesNodes()
10517

    
10518
  def BuildHooksEnv(self):
10519
    """Build hooks env.
10520

10521
    This runs on the master, the primary and all the secondaries.
10522

10523
    """
10524
    env = {
10525
      "DISK": self.op.disk,
10526
      "AMOUNT": self.op.amount,
10527
      }
10528
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10529
    return env
10530

    
10531
  def BuildHooksNodes(self):
10532
    """Build hooks nodes.
10533

10534
    """
10535
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10536
    return (nl, nl)
10537

    
10538
  def CheckPrereq(self):
10539
    """Check prerequisites.
10540

10541
    This checks that the instance is in the cluster.
10542

10543
    """
10544
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10545
    assert instance is not None, \
10546
      "Cannot retrieve locked instance %s" % self.op.instance_name
10547
    nodenames = list(instance.all_nodes)
10548
    for node in nodenames:
10549
      _CheckNodeOnline(self, node)
10550

    
10551
    self.instance = instance
10552

    
10553
    if instance.disk_template not in constants.DTS_GROWABLE:
10554
      raise errors.OpPrereqError("Instance's disk layout does not support"
10555
                                 " growing", errors.ECODE_INVAL)
10556

    
10557
    self.disk = instance.FindDisk(self.op.disk)
10558

    
10559
    if instance.disk_template not in (constants.DT_FILE,
10560
                                      constants.DT_SHARED_FILE):
10561
      # TODO: check the free disk space for file, when that feature will be
10562
      # supported
10563
      _CheckNodesFreeDiskPerVG(self, nodenames,
10564
                               self.disk.ComputeGrowth(self.op.amount))
10565

    
10566
  def Exec(self, feedback_fn):
10567
    """Execute disk grow.
10568

10569
    """
10570
    instance = self.instance
10571
    disk = self.disk
10572

    
10573
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10574
    if not disks_ok:
10575
      raise errors.OpExecError("Cannot activate block device to grow")
10576

    
10577
    # First run all grow ops in dry-run mode
10578
    for node in instance.all_nodes:
10579
      self.cfg.SetDiskID(disk, node)
10580
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10581
      result.Raise("Grow request failed to node %s" % node)
10582

    
10583
    # We know that (as far as we can test) operations across different
10584
    # nodes will succeed, time to run it for real
10585
    for node in instance.all_nodes:
10586
      self.cfg.SetDiskID(disk, node)
10587
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10588
      result.Raise("Grow request failed to node %s" % node)
10589

    
10590
      # TODO: Rewrite code to work properly
10591
      # DRBD goes into sync mode for a short amount of time after executing the
10592
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10593
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10594
      # time is a work-around.
10595
      time.sleep(5)
10596

    
10597
    disk.RecordGrow(self.op.amount)
10598
    self.cfg.Update(instance, feedback_fn)
10599
    if self.op.wait_for_sync:
10600
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10601
      if disk_abort:
10602
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10603
                             " status; please check the instance")
10604
      if not instance.admin_up:
10605
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10606
    elif not instance.admin_up:
10607
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10608
                           " not supposed to be running because no wait for"
10609
                           " sync mode was requested")
10610

    
10611

    
10612
class LUInstanceQueryData(NoHooksLU):
10613
  """Query runtime instance data.
10614

10615
  """
10616
  REQ_BGL = False
10617

    
10618
  def ExpandNames(self):
10619
    self.needed_locks = {}
10620

    
10621
    # Use locking if requested or when non-static information is wanted
10622
    if not (self.op.static or self.op.use_locking):
10623
      self.LogWarning("Non-static data requested, locks need to be acquired")
10624
      self.op.use_locking = True
10625

    
10626
    if self.op.instances or not self.op.use_locking:
10627
      # Expand instance names right here
10628
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10629
    else:
10630
      # Will use acquired locks
10631
      self.wanted_names = None
10632

    
10633
    if self.op.use_locking:
10634
      self.share_locks = _ShareAll()
10635

    
10636
      if self.wanted_names is None:
10637
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10638
      else:
10639
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10640

    
10641
      self.needed_locks[locking.LEVEL_NODE] = []
10642
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10643

    
10644
  def DeclareLocks(self, level):
10645
    if self.op.use_locking and level == locking.LEVEL_NODE:
10646
      self._LockInstancesNodes()
10647

    
10648
  def CheckPrereq(self):
10649
    """Check prerequisites.
10650

10651
    This only checks the optional instance list against the existing names.
10652

10653
    """
10654
    if self.wanted_names is None:
10655
      assert self.op.use_locking, "Locking was not used"
10656
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10657

    
10658
    self.wanted_instances = \
10659
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10660

    
10661
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10662
    """Returns the status of a block device
10663

10664
    """
10665
    if self.op.static or not node:
10666
      return None
10667

    
10668
    self.cfg.SetDiskID(dev, node)
10669

    
10670
    result = self.rpc.call_blockdev_find(node, dev)
10671
    if result.offline:
10672
      return None
10673

    
10674
    result.Raise("Can't compute disk status for %s" % instance_name)
10675

    
10676
    status = result.payload
10677
    if status is None:
10678
      return None
10679

    
10680
    return (status.dev_path, status.major, status.minor,
10681
            status.sync_percent, status.estimated_time,
10682
            status.is_degraded, status.ldisk_status)
10683

    
10684
  def _ComputeDiskStatus(self, instance, snode, dev):
10685
    """Compute block device status.
10686

10687
    """
10688
    if dev.dev_type in constants.LDS_DRBD:
10689
      # we change the snode then (otherwise we use the one passed in)
10690
      if dev.logical_id[0] == instance.primary_node:
10691
        snode = dev.logical_id[1]
10692
      else:
10693
        snode = dev.logical_id[0]
10694

    
10695
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10696
                                              instance.name, dev)
10697
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10698

    
10699
    if dev.children:
10700
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10701
                                        instance, snode),
10702
                         dev.children)
10703
    else:
10704
      dev_children = []
10705

    
10706
    return {
10707
      "iv_name": dev.iv_name,
10708
      "dev_type": dev.dev_type,
10709
      "logical_id": dev.logical_id,
10710
      "physical_id": dev.physical_id,
10711
      "pstatus": dev_pstatus,
10712
      "sstatus": dev_sstatus,
10713
      "children": dev_children,
10714
      "mode": dev.mode,
10715
      "size": dev.size,
10716
      }
10717

    
10718
  def Exec(self, feedback_fn):
10719
    """Gather and return data"""
10720
    result = {}
10721

    
10722
    cluster = self.cfg.GetClusterInfo()
10723

    
10724
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10725
                                          for i in self.wanted_instances)
10726
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10727
      if self.op.static or pnode.offline:
10728
        remote_state = None
10729
        if pnode.offline:
10730
          self.LogWarning("Primary node %s is marked offline, returning static"
10731
                          " information only for instance %s" %
10732
                          (pnode.name, instance.name))
10733
      else:
10734
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10735
                                                  instance.name,
10736
                                                  instance.hypervisor)
10737
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10738
        remote_info = remote_info.payload
10739
        if remote_info and "state" in remote_info:
10740
          remote_state = "up"
10741
        else:
10742
          remote_state = "down"
10743

    
10744
      if instance.admin_up:
10745
        config_state = "up"
10746
      else:
10747
        config_state = "down"
10748

    
10749
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10750
                  instance.disks)
10751

    
10752
      result[instance.name] = {
10753
        "name": instance.name,
10754
        "config_state": config_state,
10755
        "run_state": remote_state,
10756
        "pnode": instance.primary_node,
10757
        "snodes": instance.secondary_nodes,
10758
        "os": instance.os,
10759
        # this happens to be the same format used for hooks
10760
        "nics": _NICListToTuple(self, instance.nics),
10761
        "disk_template": instance.disk_template,
10762
        "disks": disks,
10763
        "hypervisor": instance.hypervisor,
10764
        "network_port": instance.network_port,
10765
        "hv_instance": instance.hvparams,
10766
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10767
        "be_instance": instance.beparams,
10768
        "be_actual": cluster.FillBE(instance),
10769
        "os_instance": instance.osparams,
10770
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10771
        "serial_no": instance.serial_no,
10772
        "mtime": instance.mtime,
10773
        "ctime": instance.ctime,
10774
        "uuid": instance.uuid,
10775
        }
10776

    
10777
    return result
10778

    
10779

    
10780
class LUInstanceSetParams(LogicalUnit):
10781
  """Modifies an instances's parameters.
10782

10783
  """
10784
  HPATH = "instance-modify"
10785
  HTYPE = constants.HTYPE_INSTANCE
10786
  REQ_BGL = False
10787

    
10788
  def CheckArguments(self):
10789
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10790
            self.op.hvparams or self.op.beparams or self.op.os_name):
10791
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10792

    
10793
    if self.op.hvparams:
10794
      _CheckGlobalHvParams(self.op.hvparams)
10795

    
10796
    # Disk validation
10797
    disk_addremove = 0
10798
    for disk_op, disk_dict in self.op.disks:
10799
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10800
      if disk_op == constants.DDM_REMOVE:
10801
        disk_addremove += 1
10802
        continue
10803
      elif disk_op == constants.DDM_ADD:
10804
        disk_addremove += 1
10805
      else:
10806
        if not isinstance(disk_op, int):
10807
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10808
        if not isinstance(disk_dict, dict):
10809
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10810
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10811

    
10812
      if disk_op == constants.DDM_ADD:
10813
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10814
        if mode not in constants.DISK_ACCESS_SET:
10815
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10816
                                     errors.ECODE_INVAL)
10817
        size = disk_dict.get(constants.IDISK_SIZE, None)
10818
        if size is None:
10819
          raise errors.OpPrereqError("Required disk parameter size missing",
10820
                                     errors.ECODE_INVAL)
10821
        try:
10822
          size = int(size)
10823
        except (TypeError, ValueError), err:
10824
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10825
                                     str(err), errors.ECODE_INVAL)
10826
        disk_dict[constants.IDISK_SIZE] = size
10827
      else:
10828
        # modification of disk
10829
        if constants.IDISK_SIZE in disk_dict:
10830
          raise errors.OpPrereqError("Disk size change not possible, use"
10831
                                     " grow-disk", errors.ECODE_INVAL)
10832

    
10833
    if disk_addremove > 1:
10834
      raise errors.OpPrereqError("Only one disk add or remove operation"
10835
                                 " supported at a time", errors.ECODE_INVAL)
10836

    
10837
    if self.op.disks and self.op.disk_template is not None:
10838
      raise errors.OpPrereqError("Disk template conversion and other disk"
10839
                                 " changes not supported at the same time",
10840
                                 errors.ECODE_INVAL)
10841

    
10842
    if (self.op.disk_template and
10843
        self.op.disk_template in constants.DTS_INT_MIRROR and
10844
        self.op.remote_node is None):
10845
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10846
                                 " one requires specifying a secondary node",
10847
                                 errors.ECODE_INVAL)
10848

    
10849
    # NIC validation
10850
    nic_addremove = 0
10851
    for nic_op, nic_dict in self.op.nics:
10852
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10853
      if nic_op == constants.DDM_REMOVE:
10854
        nic_addremove += 1
10855
        continue
10856
      elif nic_op == constants.DDM_ADD:
10857
        nic_addremove += 1
10858
      else:
10859
        if not isinstance(nic_op, int):
10860
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10861
        if not isinstance(nic_dict, dict):
10862
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10863
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10864

    
10865
      # nic_dict should be a dict
10866
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10867
      if nic_ip is not None:
10868
        if nic_ip.lower() == constants.VALUE_NONE:
10869
          nic_dict[constants.INIC_IP] = None
10870
        else:
10871
          if not netutils.IPAddress.IsValid(nic_ip):
10872
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10873
                                       errors.ECODE_INVAL)
10874

    
10875
      nic_bridge = nic_dict.get("bridge", None)
10876
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10877
      if nic_bridge and nic_link:
10878
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10879
                                   " at the same time", errors.ECODE_INVAL)
10880
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10881
        nic_dict["bridge"] = None
10882
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10883
        nic_dict[constants.INIC_LINK] = None
10884

    
10885
      if nic_op == constants.DDM_ADD:
10886
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10887
        if nic_mac is None:
10888
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10889

    
10890
      if constants.INIC_MAC in nic_dict:
10891
        nic_mac = nic_dict[constants.INIC_MAC]
10892
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10893
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10894

    
10895
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10896
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10897
                                     " modifying an existing nic",
10898
                                     errors.ECODE_INVAL)
10899

    
10900
    if nic_addremove > 1:
10901
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10902
                                 " supported at a time", errors.ECODE_INVAL)
10903

    
10904
  def ExpandNames(self):
10905
    self._ExpandAndLockInstance()
10906
    self.needed_locks[locking.LEVEL_NODE] = []
10907
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10908

    
10909
  def DeclareLocks(self, level):
10910
    if level == locking.LEVEL_NODE:
10911
      self._LockInstancesNodes()
10912
      if self.op.disk_template and self.op.remote_node:
10913
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10914
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10915

    
10916
  def BuildHooksEnv(self):
10917
    """Build hooks env.
10918

10919
    This runs on the master, primary and secondaries.
10920

10921
    """
10922
    args = dict()
10923
    if constants.BE_MEMORY in self.be_new:
10924
      args["memory"] = self.be_new[constants.BE_MEMORY]
10925
    if constants.BE_VCPUS in self.be_new:
10926
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10927
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10928
    # information at all.
10929
    if self.op.nics:
10930
      args["nics"] = []
10931
      nic_override = dict(self.op.nics)
10932
      for idx, nic in enumerate(self.instance.nics):
10933
        if idx in nic_override:
10934
          this_nic_override = nic_override[idx]
10935
        else:
10936
          this_nic_override = {}
10937
        if constants.INIC_IP in this_nic_override:
10938
          ip = this_nic_override[constants.INIC_IP]
10939
        else:
10940
          ip = nic.ip
10941
        if constants.INIC_MAC in this_nic_override:
10942
          mac = this_nic_override[constants.INIC_MAC]
10943
        else:
10944
          mac = nic.mac
10945
        if idx in self.nic_pnew:
10946
          nicparams = self.nic_pnew[idx]
10947
        else:
10948
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10949
        mode = nicparams[constants.NIC_MODE]
10950
        link = nicparams[constants.NIC_LINK]
10951
        args["nics"].append((ip, mac, mode, link))
10952
      if constants.DDM_ADD in nic_override:
10953
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10954
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10955
        nicparams = self.nic_pnew[constants.DDM_ADD]
10956
        mode = nicparams[constants.NIC_MODE]
10957
        link = nicparams[constants.NIC_LINK]
10958
        args["nics"].append((ip, mac, mode, link))
10959
      elif constants.DDM_REMOVE in nic_override:
10960
        del args["nics"][-1]
10961

    
10962
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10963
    if self.op.disk_template:
10964
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10965

    
10966
    return env
10967

    
10968
  def BuildHooksNodes(self):
10969
    """Build hooks nodes.
10970

10971
    """
10972
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10973
    return (nl, nl)
10974

    
10975
  def CheckPrereq(self):
10976
    """Check prerequisites.
10977

10978
    This only checks the instance list against the existing names.
10979

10980
    """
10981
    # checking the new params on the primary/secondary nodes
10982

    
10983
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10984
    cluster = self.cluster = self.cfg.GetClusterInfo()
10985
    assert self.instance is not None, \
10986
      "Cannot retrieve locked instance %s" % self.op.instance_name
10987
    pnode = instance.primary_node
10988
    nodelist = list(instance.all_nodes)
10989

    
10990
    # OS change
10991
    if self.op.os_name and not self.op.force:
10992
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10993
                      self.op.force_variant)
10994
      instance_os = self.op.os_name
10995
    else:
10996
      instance_os = instance.os
10997

    
10998
    if self.op.disk_template:
10999
      if instance.disk_template == self.op.disk_template:
11000
        raise errors.OpPrereqError("Instance already has disk template %s" %
11001
                                   instance.disk_template, errors.ECODE_INVAL)
11002

    
11003
      if (instance.disk_template,
11004
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11005
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11006
                                   " %s to %s" % (instance.disk_template,
11007
                                                  self.op.disk_template),
11008
                                   errors.ECODE_INVAL)
11009
      _CheckInstanceDown(self, instance, "cannot change disk template")
11010
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11011
        if self.op.remote_node == pnode:
11012
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11013
                                     " as the primary node of the instance" %
11014
                                     self.op.remote_node, errors.ECODE_STATE)
11015
        _CheckNodeOnline(self, self.op.remote_node)
11016
        _CheckNodeNotDrained(self, self.op.remote_node)
11017
        # FIXME: here we assume that the old instance type is DT_PLAIN
11018
        assert instance.disk_template == constants.DT_PLAIN
11019
        disks = [{constants.IDISK_SIZE: d.size,
11020
                  constants.IDISK_VG: d.logical_id[0]}
11021
                 for d in instance.disks]
11022
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11023
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11024

    
11025
    # hvparams processing
11026
    if self.op.hvparams:
11027
      hv_type = instance.hypervisor
11028
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11029
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11030
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11031

    
11032
      # local check
11033
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11034
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11035
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11036
      self.hv_inst = i_hvdict # the new dict (without defaults)
11037
    else:
11038
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11039
                                              instance.hvparams)
11040
      self.hv_new = self.hv_inst = {}
11041

    
11042
    # beparams processing
11043
    if self.op.beparams:
11044
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11045
                                   use_none=True)
11046
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11047
      be_new = cluster.SimpleFillBE(i_bedict)
11048
      self.be_proposed = self.be_new = be_new # the new actual values
11049
      self.be_inst = i_bedict # the new dict (without defaults)
11050
    else:
11051
      self.be_new = self.be_inst = {}
11052
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11053
    be_old = cluster.FillBE(instance)
11054

    
11055
    # CPU param validation -- checking every time a paramtere is
11056
    # changed to cover all cases where either CPU mask or vcpus have
11057
    # changed
11058
    if (constants.BE_VCPUS in self.be_proposed and
11059
        constants.HV_CPU_MASK in self.hv_proposed):
11060
      cpu_list = \
11061
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11062
      # Verify mask is consistent with number of vCPUs. Can skip this
11063
      # test if only 1 entry in the CPU mask, which means same mask
11064
      # is applied to all vCPUs.
11065
      if (len(cpu_list) > 1 and
11066
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11067
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11068
                                   " CPU mask [%s]" %
11069
                                   (self.be_proposed[constants.BE_VCPUS],
11070
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11071
                                   errors.ECODE_INVAL)
11072

    
11073
      # Only perform this test if a new CPU mask is given
11074
      if constants.HV_CPU_MASK in self.hv_new:
11075
        # Calculate the largest CPU number requested
11076
        max_requested_cpu = max(map(max, cpu_list))
11077
        # Check that all of the instance's nodes have enough physical CPUs to
11078
        # satisfy the requested CPU mask
11079
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11080
                                max_requested_cpu + 1, instance.hypervisor)
11081

    
11082
    # osparams processing
11083
    if self.op.osparams:
11084
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11085
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11086
      self.os_inst = i_osdict # the new dict (without defaults)
11087
    else:
11088
      self.os_inst = {}
11089

    
11090
    self.warn = []
11091

    
11092
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11093
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11094
      mem_check_list = [pnode]
11095
      if be_new[constants.BE_AUTO_BALANCE]:
11096
        # either we changed auto_balance to yes or it was from before
11097
        mem_check_list.extend(instance.secondary_nodes)
11098
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11099
                                                  instance.hypervisor)
11100
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11101
                                         instance.hypervisor)
11102
      pninfo = nodeinfo[pnode]
11103
      msg = pninfo.fail_msg
11104
      if msg:
11105
        # Assume the primary node is unreachable and go ahead
11106
        self.warn.append("Can't get info from primary node %s: %s" %
11107
                         (pnode, msg))
11108
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11109
        self.warn.append("Node data from primary node %s doesn't contain"
11110
                         " free memory information" % pnode)
11111
      elif instance_info.fail_msg:
11112
        self.warn.append("Can't get instance runtime information: %s" %
11113
                        instance_info.fail_msg)
11114
      else:
11115
        if instance_info.payload:
11116
          current_mem = int(instance_info.payload["memory"])
11117
        else:
11118
          # Assume instance not running
11119
          # (there is a slight race condition here, but it's not very probable,
11120
          # and we have no other way to check)
11121
          current_mem = 0
11122
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11123
                    pninfo.payload["memory_free"])
11124
        if miss_mem > 0:
11125
          raise errors.OpPrereqError("This change will prevent the instance"
11126
                                     " from starting, due to %d MB of memory"
11127
                                     " missing on its primary node" % miss_mem,
11128
                                     errors.ECODE_NORES)
11129

    
11130
      if be_new[constants.BE_AUTO_BALANCE]:
11131
        for node, nres in nodeinfo.items():
11132
          if node not in instance.secondary_nodes:
11133
            continue
11134
          nres.Raise("Can't get info from secondary node %s" % node,
11135
                     prereq=True, ecode=errors.ECODE_STATE)
11136
          if not isinstance(nres.payload.get("memory_free", None), int):
11137
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11138
                                       " memory information" % node,
11139
                                       errors.ECODE_STATE)
11140
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11141
            raise errors.OpPrereqError("This change will prevent the instance"
11142
                                       " from failover to its secondary node"
11143
                                       " %s, due to not enough memory" % node,
11144
                                       errors.ECODE_STATE)
11145

    
11146
    # NIC processing
11147
    self.nic_pnew = {}
11148
    self.nic_pinst = {}
11149
    for nic_op, nic_dict in self.op.nics:
11150
      if nic_op == constants.DDM_REMOVE:
11151
        if not instance.nics:
11152
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11153
                                     errors.ECODE_INVAL)
11154
        continue
11155
      if nic_op != constants.DDM_ADD:
11156
        # an existing nic
11157
        if not instance.nics:
11158
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11159
                                     " no NICs" % nic_op,
11160
                                     errors.ECODE_INVAL)
11161
        if nic_op < 0 or nic_op >= len(instance.nics):
11162
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11163
                                     " are 0 to %d" %
11164
                                     (nic_op, len(instance.nics) - 1),
11165
                                     errors.ECODE_INVAL)
11166
        old_nic_params = instance.nics[nic_op].nicparams
11167
        old_nic_ip = instance.nics[nic_op].ip
11168
      else:
11169
        old_nic_params = {}
11170
        old_nic_ip = None
11171

    
11172
      update_params_dict = dict([(key, nic_dict[key])
11173
                                 for key in constants.NICS_PARAMETERS
11174
                                 if key in nic_dict])
11175

    
11176
      if "bridge" in nic_dict:
11177
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11178

    
11179
      new_nic_params = _GetUpdatedParams(old_nic_params,
11180
                                         update_params_dict)
11181
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11182
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11183
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11184
      self.nic_pinst[nic_op] = new_nic_params
11185
      self.nic_pnew[nic_op] = new_filled_nic_params
11186
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11187

    
11188
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11189
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11190
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11191
        if msg:
11192
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11193
          if self.op.force:
11194
            self.warn.append(msg)
11195
          else:
11196
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11197
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11198
        if constants.INIC_IP in nic_dict:
11199
          nic_ip = nic_dict[constants.INIC_IP]
11200
        else:
11201
          nic_ip = old_nic_ip
11202
        if nic_ip is None:
11203
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11204
                                     " on a routed nic", errors.ECODE_INVAL)
11205
      if constants.INIC_MAC in nic_dict:
11206
        nic_mac = nic_dict[constants.INIC_MAC]
11207
        if nic_mac is None:
11208
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11209
                                     errors.ECODE_INVAL)
11210
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11211
          # otherwise generate the mac
11212
          nic_dict[constants.INIC_MAC] = \
11213
            self.cfg.GenerateMAC(self.proc.GetECId())
11214
        else:
11215
          # or validate/reserve the current one
11216
          try:
11217
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11218
          except errors.ReservationError:
11219
            raise errors.OpPrereqError("MAC address %s already in use"
11220
                                       " in cluster" % nic_mac,
11221
                                       errors.ECODE_NOTUNIQUE)
11222

    
11223
    # DISK processing
11224
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11225
      raise errors.OpPrereqError("Disk operations not supported for"
11226
                                 " diskless instances",
11227
                                 errors.ECODE_INVAL)
11228
    for disk_op, _ in self.op.disks:
11229
      if disk_op == constants.DDM_REMOVE:
11230
        if len(instance.disks) == 1:
11231
          raise errors.OpPrereqError("Cannot remove the last disk of"
11232
                                     " an instance", errors.ECODE_INVAL)
11233
        _CheckInstanceDown(self, instance, "cannot remove disks")
11234

    
11235
      if (disk_op == constants.DDM_ADD and
11236
          len(instance.disks) >= constants.MAX_DISKS):
11237
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11238
                                   " add more" % constants.MAX_DISKS,
11239
                                   errors.ECODE_STATE)
11240
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11241
        # an existing disk
11242
        if disk_op < 0 or disk_op >= len(instance.disks):
11243
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11244
                                     " are 0 to %d" %
11245
                                     (disk_op, len(instance.disks)),
11246
                                     errors.ECODE_INVAL)
11247

    
11248
    return
11249

    
11250
  def _ConvertPlainToDrbd(self, feedback_fn):
11251
    """Converts an instance from plain to drbd.
11252

11253
    """
11254
    feedback_fn("Converting template to drbd")
11255
    instance = self.instance
11256
    pnode = instance.primary_node
11257
    snode = self.op.remote_node
11258

    
11259
    # create a fake disk info for _GenerateDiskTemplate
11260
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11261
                  constants.IDISK_VG: d.logical_id[0]}
11262
                 for d in instance.disks]
11263
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11264
                                      instance.name, pnode, [snode],
11265
                                      disk_info, None, None, 0, feedback_fn)
11266
    info = _GetInstanceInfoText(instance)
11267
    feedback_fn("Creating aditional volumes...")
11268
    # first, create the missing data and meta devices
11269
    for disk in new_disks:
11270
      # unfortunately this is... not too nice
11271
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11272
                            info, True)
11273
      for child in disk.children:
11274
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11275
    # at this stage, all new LVs have been created, we can rename the
11276
    # old ones
11277
    feedback_fn("Renaming original volumes...")
11278
    rename_list = [(o, n.children[0].logical_id)
11279
                   for (o, n) in zip(instance.disks, new_disks)]
11280
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11281
    result.Raise("Failed to rename original LVs")
11282

    
11283
    feedback_fn("Initializing DRBD devices...")
11284
    # all child devices are in place, we can now create the DRBD devices
11285
    for disk in new_disks:
11286
      for node in [pnode, snode]:
11287
        f_create = node == pnode
11288
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11289

    
11290
    # at this point, the instance has been modified
11291
    instance.disk_template = constants.DT_DRBD8
11292
    instance.disks = new_disks
11293
    self.cfg.Update(instance, feedback_fn)
11294

    
11295
    # disks are created, waiting for sync
11296
    disk_abort = not _WaitForSync(self, instance,
11297
                                  oneshot=not self.op.wait_for_sync)
11298
    if disk_abort:
11299
      raise errors.OpExecError("There are some degraded disks for"
11300
                               " this instance, please cleanup manually")
11301

    
11302
  def _ConvertDrbdToPlain(self, feedback_fn):
11303
    """Converts an instance from drbd to plain.
11304

11305
    """
11306
    instance = self.instance
11307
    assert len(instance.secondary_nodes) == 1
11308
    pnode = instance.primary_node
11309
    snode = instance.secondary_nodes[0]
11310
    feedback_fn("Converting template to plain")
11311

    
11312
    old_disks = instance.disks
11313
    new_disks = [d.children[0] for d in old_disks]
11314

    
11315
    # copy over size and mode
11316
    for parent, child in zip(old_disks, new_disks):
11317
      child.size = parent.size
11318
      child.mode = parent.mode
11319

    
11320
    # update instance structure
11321
    instance.disks = new_disks
11322
    instance.disk_template = constants.DT_PLAIN
11323
    self.cfg.Update(instance, feedback_fn)
11324

    
11325
    feedback_fn("Removing volumes on the secondary node...")
11326
    for disk in old_disks:
11327
      self.cfg.SetDiskID(disk, snode)
11328
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11329
      if msg:
11330
        self.LogWarning("Could not remove block device %s on node %s,"
11331
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11332

    
11333
    feedback_fn("Removing unneeded volumes on the primary node...")
11334
    for idx, disk in enumerate(old_disks):
11335
      meta = disk.children[1]
11336
      self.cfg.SetDiskID(meta, pnode)
11337
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11338
      if msg:
11339
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11340
                        " continuing anyway: %s", idx, pnode, msg)
11341

    
11342
  def Exec(self, feedback_fn):
11343
    """Modifies an instance.
11344

11345
    All parameters take effect only at the next restart of the instance.
11346

11347
    """
11348
    # Process here the warnings from CheckPrereq, as we don't have a
11349
    # feedback_fn there.
11350
    for warn in self.warn:
11351
      feedback_fn("WARNING: %s" % warn)
11352

    
11353
    result = []
11354
    instance = self.instance
11355
    # disk changes
11356
    for disk_op, disk_dict in self.op.disks:
11357
      if disk_op == constants.DDM_REMOVE:
11358
        # remove the last disk
11359
        device = instance.disks.pop()
11360
        device_idx = len(instance.disks)
11361
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11362
          self.cfg.SetDiskID(disk, node)
11363
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11364
          if msg:
11365
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11366
                            " continuing anyway", device_idx, node, msg)
11367
        result.append(("disk/%d" % device_idx, "remove"))
11368
      elif disk_op == constants.DDM_ADD:
11369
        # add a new disk
11370
        if instance.disk_template in (constants.DT_FILE,
11371
                                        constants.DT_SHARED_FILE):
11372
          file_driver, file_path = instance.disks[0].logical_id
11373
          file_path = os.path.dirname(file_path)
11374
        else:
11375
          file_driver = file_path = None
11376
        disk_idx_base = len(instance.disks)
11377
        new_disk = _GenerateDiskTemplate(self,
11378
                                         instance.disk_template,
11379
                                         instance.name, instance.primary_node,
11380
                                         instance.secondary_nodes,
11381
                                         [disk_dict],
11382
                                         file_path,
11383
                                         file_driver,
11384
                                         disk_idx_base, feedback_fn)[0]
11385
        instance.disks.append(new_disk)
11386
        info = _GetInstanceInfoText(instance)
11387

    
11388
        logging.info("Creating volume %s for instance %s",
11389
                     new_disk.iv_name, instance.name)
11390
        # Note: this needs to be kept in sync with _CreateDisks
11391
        #HARDCODE
11392
        for node in instance.all_nodes:
11393
          f_create = node == instance.primary_node
11394
          try:
11395
            _CreateBlockDev(self, node, instance, new_disk,
11396
                            f_create, info, f_create)
11397
          except errors.OpExecError, err:
11398
            self.LogWarning("Failed to create volume %s (%s) on"
11399
                            " node %s: %s",
11400
                            new_disk.iv_name, new_disk, node, err)
11401
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11402
                       (new_disk.size, new_disk.mode)))
11403
      else:
11404
        # change a given disk
11405
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11406
        result.append(("disk.mode/%d" % disk_op,
11407
                       disk_dict[constants.IDISK_MODE]))
11408

    
11409
    if self.op.disk_template:
11410
      r_shut = _ShutdownInstanceDisks(self, instance)
11411
      if not r_shut:
11412
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11413
                                 " proceed with disk template conversion")
11414
      mode = (instance.disk_template, self.op.disk_template)
11415
      try:
11416
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11417
      except:
11418
        self.cfg.ReleaseDRBDMinors(instance.name)
11419
        raise
11420
      result.append(("disk_template", self.op.disk_template))
11421

    
11422
    # NIC changes
11423
    for nic_op, nic_dict in self.op.nics:
11424
      if nic_op == constants.DDM_REMOVE:
11425
        # remove the last nic
11426
        del instance.nics[-1]
11427
        result.append(("nic.%d" % len(instance.nics), "remove"))
11428
      elif nic_op == constants.DDM_ADD:
11429
        # mac and bridge should be set, by now
11430
        mac = nic_dict[constants.INIC_MAC]
11431
        ip = nic_dict.get(constants.INIC_IP, None)
11432
        nicparams = self.nic_pinst[constants.DDM_ADD]
11433
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11434
        instance.nics.append(new_nic)
11435
        result.append(("nic.%d" % (len(instance.nics) - 1),
11436
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11437
                       (new_nic.mac, new_nic.ip,
11438
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11439
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11440
                       )))
11441
      else:
11442
        for key in (constants.INIC_MAC, constants.INIC_IP):
11443
          if key in nic_dict:
11444
            setattr(instance.nics[nic_op], key, nic_dict[key])
11445
        if nic_op in self.nic_pinst:
11446
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11447
        for key, val in nic_dict.iteritems():
11448
          result.append(("nic.%s/%d" % (key, nic_op), val))
11449

    
11450
    # hvparams changes
11451
    if self.op.hvparams:
11452
      instance.hvparams = self.hv_inst
11453
      for key, val in self.op.hvparams.iteritems():
11454
        result.append(("hv/%s" % key, val))
11455

    
11456
    # beparams changes
11457
    if self.op.beparams:
11458
      instance.beparams = self.be_inst
11459
      for key, val in self.op.beparams.iteritems():
11460
        result.append(("be/%s" % key, val))
11461

    
11462
    # OS change
11463
    if self.op.os_name:
11464
      instance.os = self.op.os_name
11465

    
11466
    # osparams changes
11467
    if self.op.osparams:
11468
      instance.osparams = self.os_inst
11469
      for key, val in self.op.osparams.iteritems():
11470
        result.append(("os/%s" % key, val))
11471

    
11472
    self.cfg.Update(instance, feedback_fn)
11473

    
11474
    return result
11475

    
11476
  _DISK_CONVERSIONS = {
11477
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11478
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11479
    }
11480

    
11481

    
11482
class LUInstanceChangeGroup(LogicalUnit):
11483
  HPATH = "instance-change-group"
11484
  HTYPE = constants.HTYPE_INSTANCE
11485
  REQ_BGL = False
11486

    
11487
  def ExpandNames(self):
11488
    self.share_locks = _ShareAll()
11489
    self.needed_locks = {
11490
      locking.LEVEL_NODEGROUP: [],
11491
      locking.LEVEL_NODE: [],
11492
      }
11493

    
11494
    self._ExpandAndLockInstance()
11495

    
11496
    if self.op.target_groups:
11497
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11498
                                  self.op.target_groups)
11499
    else:
11500
      self.req_target_uuids = None
11501

    
11502
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11503

    
11504
  def DeclareLocks(self, level):
11505
    if level == locking.LEVEL_NODEGROUP:
11506
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11507

    
11508
      if self.req_target_uuids:
11509
        lock_groups = set(self.req_target_uuids)
11510

    
11511
        # Lock all groups used by instance optimistically; this requires going
11512
        # via the node before it's locked, requiring verification later on
11513
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11514
        lock_groups.update(instance_groups)
11515
      else:
11516
        # No target groups, need to lock all of them
11517
        lock_groups = locking.ALL_SET
11518

    
11519
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11520

    
11521
    elif level == locking.LEVEL_NODE:
11522
      if self.req_target_uuids:
11523
        # Lock all nodes used by instances
11524
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11525
        self._LockInstancesNodes()
11526

    
11527
        # Lock all nodes in all potential target groups
11528
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11529
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11530
        member_nodes = [node_name
11531
                        for group in lock_groups
11532
                        for node_name in self.cfg.GetNodeGroup(group).members]
11533
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11534
      else:
11535
        # Lock all nodes as all groups are potential targets
11536
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11537

    
11538
  def CheckPrereq(self):
11539
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11540
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11541
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11542

    
11543
    assert (self.req_target_uuids is None or
11544
            owned_groups.issuperset(self.req_target_uuids))
11545
    assert owned_instances == set([self.op.instance_name])
11546

    
11547
    # Get instance information
11548
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11549

    
11550
    # Check if node groups for locked instance are still correct
11551
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11552
      ("Instance %s's nodes changed while we kept the lock" %
11553
       self.op.instance_name)
11554

    
11555
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11556
                                           owned_groups)
11557

    
11558
    if self.req_target_uuids:
11559
      # User requested specific target groups
11560
      self.target_uuids = self.req_target_uuids
11561
    else:
11562
      # All groups except those used by the instance are potential targets
11563
      self.target_uuids = owned_groups - inst_groups
11564

    
11565
    conflicting_groups = self.target_uuids & inst_groups
11566
    if conflicting_groups:
11567
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11568
                                 " used by the instance '%s'" %
11569
                                 (utils.CommaJoin(conflicting_groups),
11570
                                  self.op.instance_name),
11571
                                 errors.ECODE_INVAL)
11572

    
11573
    if not self.target_uuids:
11574
      raise errors.OpPrereqError("There are no possible target groups",
11575
                                 errors.ECODE_INVAL)
11576

    
11577
  def BuildHooksEnv(self):
11578
    """Build hooks env.
11579

11580
    """
11581
    assert self.target_uuids
11582

    
11583
    env = {
11584
      "TARGET_GROUPS": " ".join(self.target_uuids),
11585
      }
11586

    
11587
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11588

    
11589
    return env
11590

    
11591
  def BuildHooksNodes(self):
11592
    """Build hooks nodes.
11593

11594
    """
11595
    mn = self.cfg.GetMasterNode()
11596
    return ([mn], [mn])
11597

    
11598
  def Exec(self, feedback_fn):
11599
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11600

    
11601
    assert instances == [self.op.instance_name], "Instance not locked"
11602

    
11603
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11604
                     instances=instances, target_groups=list(self.target_uuids))
11605

    
11606
    ial.Run(self.op.iallocator)
11607

    
11608
    if not ial.success:
11609
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11610
                                 " instance '%s' using iallocator '%s': %s" %
11611
                                 (self.op.instance_name, self.op.iallocator,
11612
                                  ial.info),
11613
                                 errors.ECODE_NORES)
11614

    
11615
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11616

    
11617
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11618
                 " instance '%s'", len(jobs), self.op.instance_name)
11619

    
11620
    return ResultWithJobs(jobs)
11621

    
11622

    
11623
class LUBackupQuery(NoHooksLU):
11624
  """Query the exports list
11625

11626
  """
11627
  REQ_BGL = False
11628

    
11629
  def ExpandNames(self):
11630
    self.needed_locks = {}
11631
    self.share_locks[locking.LEVEL_NODE] = 1
11632
    if not self.op.nodes:
11633
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11634
    else:
11635
      self.needed_locks[locking.LEVEL_NODE] = \
11636
        _GetWantedNodes(self, self.op.nodes)
11637

    
11638
  def Exec(self, feedback_fn):
11639
    """Compute the list of all the exported system images.
11640

11641
    @rtype: dict
11642
    @return: a dictionary with the structure node->(export-list)
11643
        where export-list is a list of the instances exported on
11644
        that node.
11645

11646
    """
11647
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11648
    rpcresult = self.rpc.call_export_list(self.nodes)
11649
    result = {}
11650
    for node in rpcresult:
11651
      if rpcresult[node].fail_msg:
11652
        result[node] = False
11653
      else:
11654
        result[node] = rpcresult[node].payload
11655

    
11656
    return result
11657

    
11658

    
11659
class LUBackupPrepare(NoHooksLU):
11660
  """Prepares an instance for an export and returns useful information.
11661

11662
  """
11663
  REQ_BGL = False
11664

    
11665
  def ExpandNames(self):
11666
    self._ExpandAndLockInstance()
11667

    
11668
  def CheckPrereq(self):
11669
    """Check prerequisites.
11670

11671
    """
11672
    instance_name = self.op.instance_name
11673

    
11674
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11675
    assert self.instance is not None, \
11676
          "Cannot retrieve locked instance %s" % self.op.instance_name
11677
    _CheckNodeOnline(self, self.instance.primary_node)
11678

    
11679
    self._cds = _GetClusterDomainSecret()
11680

    
11681
  def Exec(self, feedback_fn):
11682
    """Prepares an instance for an export.
11683

11684
    """
11685
    instance = self.instance
11686

    
11687
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11688
      salt = utils.GenerateSecret(8)
11689

    
11690
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11691
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11692
                                              constants.RIE_CERT_VALIDITY)
11693
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11694

    
11695
      (name, cert_pem) = result.payload
11696

    
11697
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11698
                                             cert_pem)
11699

    
11700
      return {
11701
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11702
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11703
                          salt),
11704
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11705
        }
11706

    
11707
    return None
11708

    
11709

    
11710
class LUBackupExport(LogicalUnit):
11711
  """Export an instance to an image in the cluster.
11712

11713
  """
11714
  HPATH = "instance-export"
11715
  HTYPE = constants.HTYPE_INSTANCE
11716
  REQ_BGL = False
11717

    
11718
  def CheckArguments(self):
11719
    """Check the arguments.
11720

11721
    """
11722
    self.x509_key_name = self.op.x509_key_name
11723
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11724

    
11725
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11726
      if not self.x509_key_name:
11727
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11728
                                   errors.ECODE_INVAL)
11729

    
11730
      if not self.dest_x509_ca_pem:
11731
        raise errors.OpPrereqError("Missing destination X509 CA",
11732
                                   errors.ECODE_INVAL)
11733

    
11734
  def ExpandNames(self):
11735
    self._ExpandAndLockInstance()
11736

    
11737
    # Lock all nodes for local exports
11738
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11739
      # FIXME: lock only instance primary and destination node
11740
      #
11741
      # Sad but true, for now we have do lock all nodes, as we don't know where
11742
      # the previous export might be, and in this LU we search for it and
11743
      # remove it from its current node. In the future we could fix this by:
11744
      #  - making a tasklet to search (share-lock all), then create the
11745
      #    new one, then one to remove, after
11746
      #  - removing the removal operation altogether
11747
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11748

    
11749
  def DeclareLocks(self, level):
11750
    """Last minute lock declaration."""
11751
    # All nodes are locked anyway, so nothing to do here.
11752

    
11753
  def BuildHooksEnv(self):
11754
    """Build hooks env.
11755

11756
    This will run on the master, primary node and target node.
11757

11758
    """
11759
    env = {
11760
      "EXPORT_MODE": self.op.mode,
11761
      "EXPORT_NODE": self.op.target_node,
11762
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11763
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11764
      # TODO: Generic function for boolean env variables
11765
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11766
      }
11767

    
11768
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11769

    
11770
    return env
11771

    
11772
  def BuildHooksNodes(self):
11773
    """Build hooks nodes.
11774

11775
    """
11776
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11777

    
11778
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11779
      nl.append(self.op.target_node)
11780

    
11781
    return (nl, nl)
11782

    
11783
  def CheckPrereq(self):
11784
    """Check prerequisites.
11785

11786
    This checks that the instance and node names are valid.
11787

11788
    """
11789
    instance_name = self.op.instance_name
11790

    
11791
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11792
    assert self.instance is not None, \
11793
          "Cannot retrieve locked instance %s" % self.op.instance_name
11794
    _CheckNodeOnline(self, self.instance.primary_node)
11795

    
11796
    if (self.op.remove_instance and self.instance.admin_up and
11797
        not self.op.shutdown):
11798
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11799
                                 " down before")
11800

    
11801
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11802
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11803
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11804
      assert self.dst_node is not None
11805

    
11806
      _CheckNodeOnline(self, self.dst_node.name)
11807
      _CheckNodeNotDrained(self, self.dst_node.name)
11808

    
11809
      self._cds = None
11810
      self.dest_disk_info = None
11811
      self.dest_x509_ca = None
11812

    
11813
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11814
      self.dst_node = None
11815

    
11816
      if len(self.op.target_node) != len(self.instance.disks):
11817
        raise errors.OpPrereqError(("Received destination information for %s"
11818
                                    " disks, but instance %s has %s disks") %
11819
                                   (len(self.op.target_node), instance_name,
11820
                                    len(self.instance.disks)),
11821
                                   errors.ECODE_INVAL)
11822

    
11823
      cds = _GetClusterDomainSecret()
11824

    
11825
      # Check X509 key name
11826
      try:
11827
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11828
      except (TypeError, ValueError), err:
11829
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11830

    
11831
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11832
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11833
                                   errors.ECODE_INVAL)
11834

    
11835
      # Load and verify CA
11836
      try:
11837
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11838
      except OpenSSL.crypto.Error, err:
11839
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11840
                                   (err, ), errors.ECODE_INVAL)
11841

    
11842
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11843
      if errcode is not None:
11844
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11845
                                   (msg, ), errors.ECODE_INVAL)
11846

    
11847
      self.dest_x509_ca = cert
11848

    
11849
      # Verify target information
11850
      disk_info = []
11851
      for idx, disk_data in enumerate(self.op.target_node):
11852
        try:
11853
          (host, port, magic) = \
11854
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11855
        except errors.GenericError, err:
11856
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11857
                                     (idx, err), errors.ECODE_INVAL)
11858

    
11859
        disk_info.append((host, port, magic))
11860

    
11861
      assert len(disk_info) == len(self.op.target_node)
11862
      self.dest_disk_info = disk_info
11863

    
11864
    else:
11865
      raise errors.ProgrammerError("Unhandled export mode %r" %
11866
                                   self.op.mode)
11867

    
11868
    # instance disk type verification
11869
    # TODO: Implement export support for file-based disks
11870
    for disk in self.instance.disks:
11871
      if disk.dev_type == constants.LD_FILE:
11872
        raise errors.OpPrereqError("Export not supported for instances with"
11873
                                   " file-based disks", errors.ECODE_INVAL)
11874

    
11875
  def _CleanupExports(self, feedback_fn):
11876
    """Removes exports of current instance from all other nodes.
11877

11878
    If an instance in a cluster with nodes A..D was exported to node C, its
11879
    exports will be removed from the nodes A, B and D.
11880

11881
    """
11882
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11883

    
11884
    nodelist = self.cfg.GetNodeList()
11885
    nodelist.remove(self.dst_node.name)
11886

    
11887
    # on one-node clusters nodelist will be empty after the removal
11888
    # if we proceed the backup would be removed because OpBackupQuery
11889
    # substitutes an empty list with the full cluster node list.
11890
    iname = self.instance.name
11891
    if nodelist:
11892
      feedback_fn("Removing old exports for instance %s" % iname)
11893
      exportlist = self.rpc.call_export_list(nodelist)
11894
      for node in exportlist:
11895
        if exportlist[node].fail_msg:
11896
          continue
11897
        if iname in exportlist[node].payload:
11898
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11899
          if msg:
11900
            self.LogWarning("Could not remove older export for instance %s"
11901
                            " on node %s: %s", iname, node, msg)
11902

    
11903
  def Exec(self, feedback_fn):
11904
    """Export an instance to an image in the cluster.
11905

11906
    """
11907
    assert self.op.mode in constants.EXPORT_MODES
11908

    
11909
    instance = self.instance
11910
    src_node = instance.primary_node
11911

    
11912
    if self.op.shutdown:
11913
      # shutdown the instance, but not the disks
11914
      feedback_fn("Shutting down instance %s" % instance.name)
11915
      result = self.rpc.call_instance_shutdown(src_node, instance,
11916
                                               self.op.shutdown_timeout)
11917
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11918
      result.Raise("Could not shutdown instance %s on"
11919
                   " node %s" % (instance.name, src_node))
11920

    
11921
    # set the disks ID correctly since call_instance_start needs the
11922
    # correct drbd minor to create the symlinks
11923
    for disk in instance.disks:
11924
      self.cfg.SetDiskID(disk, src_node)
11925

    
11926
    activate_disks = (not instance.admin_up)
11927

    
11928
    if activate_disks:
11929
      # Activate the instance disks if we'exporting a stopped instance
11930
      feedback_fn("Activating disks for %s" % instance.name)
11931
      _StartInstanceDisks(self, instance, None)
11932

    
11933
    try:
11934
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11935
                                                     instance)
11936

    
11937
      helper.CreateSnapshots()
11938
      try:
11939
        if (self.op.shutdown and instance.admin_up and
11940
            not self.op.remove_instance):
11941
          assert not activate_disks
11942
          feedback_fn("Starting instance %s" % instance.name)
11943
          result = self.rpc.call_instance_start(src_node,
11944
                                                (instance, None, None), False)
11945
          msg = result.fail_msg
11946
          if msg:
11947
            feedback_fn("Failed to start instance: %s" % msg)
11948
            _ShutdownInstanceDisks(self, instance)
11949
            raise errors.OpExecError("Could not start instance: %s" % msg)
11950

    
11951
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11952
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11953
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11954
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11955
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11956

    
11957
          (key_name, _, _) = self.x509_key_name
11958

    
11959
          dest_ca_pem = \
11960
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11961
                                            self.dest_x509_ca)
11962

    
11963
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11964
                                                     key_name, dest_ca_pem,
11965
                                                     timeouts)
11966
      finally:
11967
        helper.Cleanup()
11968

    
11969
      # Check for backwards compatibility
11970
      assert len(dresults) == len(instance.disks)
11971
      assert compat.all(isinstance(i, bool) for i in dresults), \
11972
             "Not all results are boolean: %r" % dresults
11973

    
11974
    finally:
11975
      if activate_disks:
11976
        feedback_fn("Deactivating disks for %s" % instance.name)
11977
        _ShutdownInstanceDisks(self, instance)
11978

    
11979
    if not (compat.all(dresults) and fin_resu):
11980
      failures = []
11981
      if not fin_resu:
11982
        failures.append("export finalization")
11983
      if not compat.all(dresults):
11984
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11985
                               if not dsk)
11986
        failures.append("disk export: disk(s) %s" % fdsk)
11987

    
11988
      raise errors.OpExecError("Export failed, errors in %s" %
11989
                               utils.CommaJoin(failures))
11990

    
11991
    # At this point, the export was successful, we can cleanup/finish
11992

    
11993
    # Remove instance if requested
11994
    if self.op.remove_instance:
11995
      feedback_fn("Removing instance %s" % instance.name)
11996
      _RemoveInstance(self, feedback_fn, instance,
11997
                      self.op.ignore_remove_failures)
11998

    
11999
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12000
      self._CleanupExports(feedback_fn)
12001

    
12002
    return fin_resu, dresults
12003

    
12004

    
12005
class LUBackupRemove(NoHooksLU):
12006
  """Remove exports related to the named instance.
12007

12008
  """
12009
  REQ_BGL = False
12010

    
12011
  def ExpandNames(self):
12012
    self.needed_locks = {}
12013
    # We need all nodes to be locked in order for RemoveExport to work, but we
12014
    # don't need to lock the instance itself, as nothing will happen to it (and
12015
    # we can remove exports also for a removed instance)
12016
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12017

    
12018
  def Exec(self, feedback_fn):
12019
    """Remove any export.
12020

12021
    """
12022
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12023
    # If the instance was not found we'll try with the name that was passed in.
12024
    # This will only work if it was an FQDN, though.
12025
    fqdn_warn = False
12026
    if not instance_name:
12027
      fqdn_warn = True
12028
      instance_name = self.op.instance_name
12029

    
12030
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12031
    exportlist = self.rpc.call_export_list(locked_nodes)
12032
    found = False
12033
    for node in exportlist:
12034
      msg = exportlist[node].fail_msg
12035
      if msg:
12036
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12037
        continue
12038
      if instance_name in exportlist[node].payload:
12039
        found = True
12040
        result = self.rpc.call_export_remove(node, instance_name)
12041
        msg = result.fail_msg
12042
        if msg:
12043
          logging.error("Could not remove export for instance %s"
12044
                        " on node %s: %s", instance_name, node, msg)
12045

    
12046
    if fqdn_warn and not found:
12047
      feedback_fn("Export not found. If trying to remove an export belonging"
12048
                  " to a deleted instance please use its Fully Qualified"
12049
                  " Domain Name.")
12050

    
12051

    
12052
class LUGroupAdd(LogicalUnit):
12053
  """Logical unit for creating node groups.
12054

12055
  """
12056
  HPATH = "group-add"
12057
  HTYPE = constants.HTYPE_GROUP
12058
  REQ_BGL = False
12059

    
12060
  def ExpandNames(self):
12061
    # We need the new group's UUID here so that we can create and acquire the
12062
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12063
    # that it should not check whether the UUID exists in the configuration.
12064
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12065
    self.needed_locks = {}
12066
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12067

    
12068
  def CheckPrereq(self):
12069
    """Check prerequisites.
12070

12071
    This checks that the given group name is not an existing node group
12072
    already.
12073

12074
    """
12075
    try:
12076
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12077
    except errors.OpPrereqError:
12078
      pass
12079
    else:
12080
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12081
                                 " node group (UUID: %s)" %
12082
                                 (self.op.group_name, existing_uuid),
12083
                                 errors.ECODE_EXISTS)
12084

    
12085
    if self.op.ndparams:
12086
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12087

    
12088
  def BuildHooksEnv(self):
12089
    """Build hooks env.
12090

12091
    """
12092
    return {
12093
      "GROUP_NAME": self.op.group_name,
12094
      }
12095

    
12096
  def BuildHooksNodes(self):
12097
    """Build hooks nodes.
12098

12099
    """
12100
    mn = self.cfg.GetMasterNode()
12101
    return ([mn], [mn])
12102

    
12103
  def Exec(self, feedback_fn):
12104
    """Add the node group to the cluster.
12105

12106
    """
12107
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12108
                                  uuid=self.group_uuid,
12109
                                  alloc_policy=self.op.alloc_policy,
12110
                                  ndparams=self.op.ndparams)
12111

    
12112
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12113
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12114

    
12115

    
12116
class LUGroupAssignNodes(NoHooksLU):
12117
  """Logical unit for assigning nodes to groups.
12118

12119
  """
12120
  REQ_BGL = False
12121

    
12122
  def ExpandNames(self):
12123
    # These raise errors.OpPrereqError on their own:
12124
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12125
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12126

    
12127
    # We want to lock all the affected nodes and groups. We have readily
12128
    # available the list of nodes, and the *destination* group. To gather the
12129
    # list of "source" groups, we need to fetch node information later on.
12130
    self.needed_locks = {
12131
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12132
      locking.LEVEL_NODE: self.op.nodes,
12133
      }
12134

    
12135
  def DeclareLocks(self, level):
12136
    if level == locking.LEVEL_NODEGROUP:
12137
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12138

    
12139
      # Try to get all affected nodes' groups without having the group or node
12140
      # lock yet. Needs verification later in the code flow.
12141
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12142

    
12143
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12144

    
12145
  def CheckPrereq(self):
12146
    """Check prerequisites.
12147

12148
    """
12149
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12150
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12151
            frozenset(self.op.nodes))
12152

    
12153
    expected_locks = (set([self.group_uuid]) |
12154
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12155
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12156
    if actual_locks != expected_locks:
12157
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12158
                               " current groups are '%s', used to be '%s'" %
12159
                               (utils.CommaJoin(expected_locks),
12160
                                utils.CommaJoin(actual_locks)))
12161

    
12162
    self.node_data = self.cfg.GetAllNodesInfo()
12163
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12164
    instance_data = self.cfg.GetAllInstancesInfo()
12165

    
12166
    if self.group is None:
12167
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12168
                               (self.op.group_name, self.group_uuid))
12169

    
12170
    (new_splits, previous_splits) = \
12171
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12172
                                             for node in self.op.nodes],
12173
                                            self.node_data, instance_data)
12174

    
12175
    if new_splits:
12176
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12177

    
12178
      if not self.op.force:
12179
        raise errors.OpExecError("The following instances get split by this"
12180
                                 " change and --force was not given: %s" %
12181
                                 fmt_new_splits)
12182
      else:
12183
        self.LogWarning("This operation will split the following instances: %s",
12184
                        fmt_new_splits)
12185

    
12186
        if previous_splits:
12187
          self.LogWarning("In addition, these already-split instances continue"
12188
                          " to be split across groups: %s",
12189
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12190

    
12191
  def Exec(self, feedback_fn):
12192
    """Assign nodes to a new group.
12193

12194
    """
12195
    for node in self.op.nodes:
12196
      self.node_data[node].group = self.group_uuid
12197

    
12198
    # FIXME: Depends on side-effects of modifying the result of
12199
    # C{cfg.GetAllNodesInfo}
12200

    
12201
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12202

    
12203
  @staticmethod
12204
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12205
    """Check for split instances after a node assignment.
12206

12207
    This method considers a series of node assignments as an atomic operation,
12208
    and returns information about split instances after applying the set of
12209
    changes.
12210

12211
    In particular, it returns information about newly split instances, and
12212
    instances that were already split, and remain so after the change.
12213

12214
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12215
    considered.
12216

12217
    @type changes: list of (node_name, new_group_uuid) pairs.
12218
    @param changes: list of node assignments to consider.
12219
    @param node_data: a dict with data for all nodes
12220
    @param instance_data: a dict with all instances to consider
12221
    @rtype: a two-tuple
12222
    @return: a list of instances that were previously okay and result split as a
12223
      consequence of this change, and a list of instances that were previously
12224
      split and this change does not fix.
12225

12226
    """
12227
    changed_nodes = dict((node, group) for node, group in changes
12228
                         if node_data[node].group != group)
12229

    
12230
    all_split_instances = set()
12231
    previously_split_instances = set()
12232

    
12233
    def InstanceNodes(instance):
12234
      return [instance.primary_node] + list(instance.secondary_nodes)
12235

    
12236
    for inst in instance_data.values():
12237
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12238
        continue
12239

    
12240
      instance_nodes = InstanceNodes(inst)
12241

    
12242
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12243
        previously_split_instances.add(inst.name)
12244

    
12245
      if len(set(changed_nodes.get(node, node_data[node].group)
12246
                 for node in instance_nodes)) > 1:
12247
        all_split_instances.add(inst.name)
12248

    
12249
    return (list(all_split_instances - previously_split_instances),
12250
            list(previously_split_instances & all_split_instances))
12251

    
12252

    
12253
class _GroupQuery(_QueryBase):
12254
  FIELDS = query.GROUP_FIELDS
12255

    
12256
  def ExpandNames(self, lu):
12257
    lu.needed_locks = {}
12258

    
12259
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12260
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12261

    
12262
    if not self.names:
12263
      self.wanted = [name_to_uuid[name]
12264
                     for name in utils.NiceSort(name_to_uuid.keys())]
12265
    else:
12266
      # Accept names to be either names or UUIDs.
12267
      missing = []
12268
      self.wanted = []
12269
      all_uuid = frozenset(self._all_groups.keys())
12270

    
12271
      for name in self.names:
12272
        if name in all_uuid:
12273
          self.wanted.append(name)
12274
        elif name in name_to_uuid:
12275
          self.wanted.append(name_to_uuid[name])
12276
        else:
12277
          missing.append(name)
12278

    
12279
      if missing:
12280
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12281
                                   utils.CommaJoin(missing),
12282
                                   errors.ECODE_NOENT)
12283

    
12284
  def DeclareLocks(self, lu, level):
12285
    pass
12286

    
12287
  def _GetQueryData(self, lu):
12288
    """Computes the list of node groups and their attributes.
12289

12290
    """
12291
    do_nodes = query.GQ_NODE in self.requested_data
12292
    do_instances = query.GQ_INST in self.requested_data
12293

    
12294
    group_to_nodes = None
12295
    group_to_instances = None
12296

    
12297
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12298
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12299
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12300
    # instance->node. Hence, we will need to process nodes even if we only need
12301
    # instance information.
12302
    if do_nodes or do_instances:
12303
      all_nodes = lu.cfg.GetAllNodesInfo()
12304
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12305
      node_to_group = {}
12306

    
12307
      for node in all_nodes.values():
12308
        if node.group in group_to_nodes:
12309
          group_to_nodes[node.group].append(node.name)
12310
          node_to_group[node.name] = node.group
12311

    
12312
      if do_instances:
12313
        all_instances = lu.cfg.GetAllInstancesInfo()
12314
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12315

    
12316
        for instance in all_instances.values():
12317
          node = instance.primary_node
12318
          if node in node_to_group:
12319
            group_to_instances[node_to_group[node]].append(instance.name)
12320

    
12321
        if not do_nodes:
12322
          # Do not pass on node information if it was not requested.
12323
          group_to_nodes = None
12324

    
12325
    return query.GroupQueryData([self._all_groups[uuid]
12326
                                 for uuid in self.wanted],
12327
                                group_to_nodes, group_to_instances)
12328

    
12329

    
12330
class LUGroupQuery(NoHooksLU):
12331
  """Logical unit for querying node groups.
12332

12333
  """
12334
  REQ_BGL = False
12335

    
12336
  def CheckArguments(self):
12337
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12338
                          self.op.output_fields, False)
12339

    
12340
  def ExpandNames(self):
12341
    self.gq.ExpandNames(self)
12342

    
12343
  def DeclareLocks(self, level):
12344
    self.gq.DeclareLocks(self, level)
12345

    
12346
  def Exec(self, feedback_fn):
12347
    return self.gq.OldStyleQuery(self)
12348

    
12349

    
12350
class LUGroupSetParams(LogicalUnit):
12351
  """Modifies the parameters of a node group.
12352

12353
  """
12354
  HPATH = "group-modify"
12355
  HTYPE = constants.HTYPE_GROUP
12356
  REQ_BGL = False
12357

    
12358
  def CheckArguments(self):
12359
    all_changes = [
12360
      self.op.ndparams,
12361
      self.op.alloc_policy,
12362
      ]
12363

    
12364
    if all_changes.count(None) == len(all_changes):
12365
      raise errors.OpPrereqError("Please pass at least one modification",
12366
                                 errors.ECODE_INVAL)
12367

    
12368
  def ExpandNames(self):
12369
    # This raises errors.OpPrereqError on its own:
12370
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12371

    
12372
    self.needed_locks = {
12373
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12374
      }
12375

    
12376
  def CheckPrereq(self):
12377
    """Check prerequisites.
12378

12379
    """
12380
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12381

    
12382
    if self.group is None:
12383
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12384
                               (self.op.group_name, self.group_uuid))
12385

    
12386
    if self.op.ndparams:
12387
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12388
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12389
      self.new_ndparams = new_ndparams
12390

    
12391
  def BuildHooksEnv(self):
12392
    """Build hooks env.
12393

12394
    """
12395
    return {
12396
      "GROUP_NAME": self.op.group_name,
12397
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12398
      }
12399

    
12400
  def BuildHooksNodes(self):
12401
    """Build hooks nodes.
12402

12403
    """
12404
    mn = self.cfg.GetMasterNode()
12405
    return ([mn], [mn])
12406

    
12407
  def Exec(self, feedback_fn):
12408
    """Modifies the node group.
12409

12410
    """
12411
    result = []
12412

    
12413
    if self.op.ndparams:
12414
      self.group.ndparams = self.new_ndparams
12415
      result.append(("ndparams", str(self.group.ndparams)))
12416

    
12417
    if self.op.alloc_policy:
12418
      self.group.alloc_policy = self.op.alloc_policy
12419

    
12420
    self.cfg.Update(self.group, feedback_fn)
12421
    return result
12422

    
12423

    
12424
class LUGroupRemove(LogicalUnit):
12425
  HPATH = "group-remove"
12426
  HTYPE = constants.HTYPE_GROUP
12427
  REQ_BGL = False
12428

    
12429
  def ExpandNames(self):
12430
    # This will raises errors.OpPrereqError on its own:
12431
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12432
    self.needed_locks = {
12433
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12434
      }
12435

    
12436
  def CheckPrereq(self):
12437
    """Check prerequisites.
12438

12439
    This checks that the given group name exists as a node group, that is
12440
    empty (i.e., contains no nodes), and that is not the last group of the
12441
    cluster.
12442

12443
    """
12444
    # Verify that the group is empty.
12445
    group_nodes = [node.name
12446
                   for node in self.cfg.GetAllNodesInfo().values()
12447
                   if node.group == self.group_uuid]
12448

    
12449
    if group_nodes:
12450
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12451
                                 " nodes: %s" %
12452
                                 (self.op.group_name,
12453
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12454
                                 errors.ECODE_STATE)
12455

    
12456
    # Verify the cluster would not be left group-less.
12457
    if len(self.cfg.GetNodeGroupList()) == 1:
12458
      raise errors.OpPrereqError("Group '%s' is the only group,"
12459
                                 " cannot be removed" %
12460
                                 self.op.group_name,
12461
                                 errors.ECODE_STATE)
12462

    
12463
  def BuildHooksEnv(self):
12464
    """Build hooks env.
12465

12466
    """
12467
    return {
12468
      "GROUP_NAME": self.op.group_name,
12469
      }
12470

    
12471
  def BuildHooksNodes(self):
12472
    """Build hooks nodes.
12473

12474
    """
12475
    mn = self.cfg.GetMasterNode()
12476
    return ([mn], [mn])
12477

    
12478
  def Exec(self, feedback_fn):
12479
    """Remove the node group.
12480

12481
    """
12482
    try:
12483
      self.cfg.RemoveNodeGroup(self.group_uuid)
12484
    except errors.ConfigurationError:
12485
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12486
                               (self.op.group_name, self.group_uuid))
12487

    
12488
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12489

    
12490

    
12491
class LUGroupRename(LogicalUnit):
12492
  HPATH = "group-rename"
12493
  HTYPE = constants.HTYPE_GROUP
12494
  REQ_BGL = False
12495

    
12496
  def ExpandNames(self):
12497
    # This raises errors.OpPrereqError on its own:
12498
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12499

    
12500
    self.needed_locks = {
12501
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12502
      }
12503

    
12504
  def CheckPrereq(self):
12505
    """Check prerequisites.
12506

12507
    Ensures requested new name is not yet used.
12508

12509
    """
12510
    try:
12511
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12512
    except errors.OpPrereqError:
12513
      pass
12514
    else:
12515
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12516
                                 " node group (UUID: %s)" %
12517
                                 (self.op.new_name, new_name_uuid),
12518
                                 errors.ECODE_EXISTS)
12519

    
12520
  def BuildHooksEnv(self):
12521
    """Build hooks env.
12522

12523
    """
12524
    return {
12525
      "OLD_NAME": self.op.group_name,
12526
      "NEW_NAME": self.op.new_name,
12527
      }
12528

    
12529
  def BuildHooksNodes(self):
12530
    """Build hooks nodes.
12531

12532
    """
12533
    mn = self.cfg.GetMasterNode()
12534

    
12535
    all_nodes = self.cfg.GetAllNodesInfo()
12536
    all_nodes.pop(mn, None)
12537

    
12538
    run_nodes = [mn]
12539
    run_nodes.extend(node.name for node in all_nodes.values()
12540
                     if node.group == self.group_uuid)
12541

    
12542
    return (run_nodes, run_nodes)
12543

    
12544
  def Exec(self, feedback_fn):
12545
    """Rename the node group.
12546

12547
    """
12548
    group = self.cfg.GetNodeGroup(self.group_uuid)
12549

    
12550
    if group is None:
12551
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12552
                               (self.op.group_name, self.group_uuid))
12553

    
12554
    group.name = self.op.new_name
12555
    self.cfg.Update(group, feedback_fn)
12556

    
12557
    return self.op.new_name
12558

    
12559

    
12560
class LUGroupEvacuate(LogicalUnit):
12561
  HPATH = "group-evacuate"
12562
  HTYPE = constants.HTYPE_GROUP
12563
  REQ_BGL = False
12564

    
12565
  def ExpandNames(self):
12566
    # This raises errors.OpPrereqError on its own:
12567
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12568

    
12569
    if self.op.target_groups:
12570
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12571
                                  self.op.target_groups)
12572
    else:
12573
      self.req_target_uuids = []
12574

    
12575
    if self.group_uuid in self.req_target_uuids:
12576
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12577
                                 " as a target group (targets are %s)" %
12578
                                 (self.group_uuid,
12579
                                  utils.CommaJoin(self.req_target_uuids)),
12580
                                 errors.ECODE_INVAL)
12581

    
12582
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12583

    
12584
    self.share_locks = _ShareAll()
12585
    self.needed_locks = {
12586
      locking.LEVEL_INSTANCE: [],
12587
      locking.LEVEL_NODEGROUP: [],
12588
      locking.LEVEL_NODE: [],
12589
      }
12590

    
12591
  def DeclareLocks(self, level):
12592
    if level == locking.LEVEL_INSTANCE:
12593
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12594

    
12595
      # Lock instances optimistically, needs verification once node and group
12596
      # locks have been acquired
12597
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12598
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12599

    
12600
    elif level == locking.LEVEL_NODEGROUP:
12601
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12602

    
12603
      if self.req_target_uuids:
12604
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12605

    
12606
        # Lock all groups used by instances optimistically; this requires going
12607
        # via the node before it's locked, requiring verification later on
12608
        lock_groups.update(group_uuid
12609
                           for instance_name in
12610
                             self.owned_locks(locking.LEVEL_INSTANCE)
12611
                           for group_uuid in
12612
                             self.cfg.GetInstanceNodeGroups(instance_name))
12613
      else:
12614
        # No target groups, need to lock all of them
12615
        lock_groups = locking.ALL_SET
12616

    
12617
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12618

    
12619
    elif level == locking.LEVEL_NODE:
12620
      # This will only lock the nodes in the group to be evacuated which
12621
      # contain actual instances
12622
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12623
      self._LockInstancesNodes()
12624

    
12625
      # Lock all nodes in group to be evacuated and target groups
12626
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12627
      assert self.group_uuid in owned_groups
12628
      member_nodes = [node_name
12629
                      for group in owned_groups
12630
                      for node_name in self.cfg.GetNodeGroup(group).members]
12631
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12632

    
12633
  def CheckPrereq(self):
12634
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12635
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12636
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12637

    
12638
    assert owned_groups.issuperset(self.req_target_uuids)
12639
    assert self.group_uuid in owned_groups
12640

    
12641
    # Check if locked instances are still correct
12642
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12643

    
12644
    # Get instance information
12645
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12646

    
12647
    # Check if node groups for locked instances are still correct
12648
    for instance_name in owned_instances:
12649
      inst = self.instances[instance_name]
12650
      assert owned_nodes.issuperset(inst.all_nodes), \
12651
        "Instance %s's nodes changed while we kept the lock" % instance_name
12652

    
12653
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12654
                                             owned_groups)
12655

    
12656
      assert self.group_uuid in inst_groups, \
12657
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12658

    
12659
    if self.req_target_uuids:
12660
      # User requested specific target groups
12661
      self.target_uuids = self.req_target_uuids
12662
    else:
12663
      # All groups except the one to be evacuated are potential targets
12664
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12665
                           if group_uuid != self.group_uuid]
12666

    
12667
      if not self.target_uuids:
12668
        raise errors.OpPrereqError("There are no possible target groups",
12669
                                   errors.ECODE_INVAL)
12670

    
12671
  def BuildHooksEnv(self):
12672
    """Build hooks env.
12673

12674
    """
12675
    return {
12676
      "GROUP_NAME": self.op.group_name,
12677
      "TARGET_GROUPS": " ".join(self.target_uuids),
12678
      }
12679

    
12680
  def BuildHooksNodes(self):
12681
    """Build hooks nodes.
12682

12683
    """
12684
    mn = self.cfg.GetMasterNode()
12685

    
12686
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12687

    
12688
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12689

    
12690
    return (run_nodes, run_nodes)
12691

    
12692
  def Exec(self, feedback_fn):
12693
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12694

    
12695
    assert self.group_uuid not in self.target_uuids
12696

    
12697
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12698
                     instances=instances, target_groups=self.target_uuids)
12699

    
12700
    ial.Run(self.op.iallocator)
12701

    
12702
    if not ial.success:
12703
      raise errors.OpPrereqError("Can't compute group evacuation using"
12704
                                 " iallocator '%s': %s" %
12705
                                 (self.op.iallocator, ial.info),
12706
                                 errors.ECODE_NORES)
12707

    
12708
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12709

    
12710
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12711
                 len(jobs), self.op.group_name)
12712

    
12713
    return ResultWithJobs(jobs)
12714

    
12715

    
12716
class TagsLU(NoHooksLU): # pylint: disable=W0223
12717
  """Generic tags LU.
12718

12719
  This is an abstract class which is the parent of all the other tags LUs.
12720

12721
  """
12722
  def ExpandNames(self):
12723
    self.group_uuid = None
12724
    self.needed_locks = {}
12725
    if self.op.kind == constants.TAG_NODE:
12726
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12727
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12728
    elif self.op.kind == constants.TAG_INSTANCE:
12729
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12730
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12731
    elif self.op.kind == constants.TAG_NODEGROUP:
12732
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12733

    
12734
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12735
    # not possible to acquire the BGL based on opcode parameters)
12736

    
12737
  def CheckPrereq(self):
12738
    """Check prerequisites.
12739

12740
    """
12741
    if self.op.kind == constants.TAG_CLUSTER:
12742
      self.target = self.cfg.GetClusterInfo()
12743
    elif self.op.kind == constants.TAG_NODE:
12744
      self.target = self.cfg.GetNodeInfo(self.op.name)
12745
    elif self.op.kind == constants.TAG_INSTANCE:
12746
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12747
    elif self.op.kind == constants.TAG_NODEGROUP:
12748
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12749
    else:
12750
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12751
                                 str(self.op.kind), errors.ECODE_INVAL)
12752

    
12753

    
12754
class LUTagsGet(TagsLU):
12755
  """Returns the tags of a given object.
12756

12757
  """
12758
  REQ_BGL = False
12759

    
12760
  def ExpandNames(self):
12761
    TagsLU.ExpandNames(self)
12762

    
12763
    # Share locks as this is only a read operation
12764
    self.share_locks = _ShareAll()
12765

    
12766
  def Exec(self, feedback_fn):
12767
    """Returns the tag list.
12768

12769
    """
12770
    return list(self.target.GetTags())
12771

    
12772

    
12773
class LUTagsSearch(NoHooksLU):
12774
  """Searches the tags for a given pattern.
12775

12776
  """
12777
  REQ_BGL = False
12778

    
12779
  def ExpandNames(self):
12780
    self.needed_locks = {}
12781

    
12782
  def CheckPrereq(self):
12783
    """Check prerequisites.
12784

12785
    This checks the pattern passed for validity by compiling it.
12786

12787
    """
12788
    try:
12789
      self.re = re.compile(self.op.pattern)
12790
    except re.error, err:
12791
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12792
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12793

    
12794
  def Exec(self, feedback_fn):
12795
    """Returns the tag list.
12796

12797
    """
12798
    cfg = self.cfg
12799
    tgts = [("/cluster", cfg.GetClusterInfo())]
12800
    ilist = cfg.GetAllInstancesInfo().values()
12801
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12802
    nlist = cfg.GetAllNodesInfo().values()
12803
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12804
    tgts.extend(("/nodegroup/%s" % n.name, n)
12805
                for n in cfg.GetAllNodeGroupsInfo().values())
12806
    results = []
12807
    for path, target in tgts:
12808
      for tag in target.GetTags():
12809
        if self.re.search(tag):
12810
          results.append((path, tag))
12811
    return results
12812

    
12813

    
12814
class LUTagsSet(TagsLU):
12815
  """Sets a tag on a given object.
12816

12817
  """
12818
  REQ_BGL = False
12819

    
12820
  def CheckPrereq(self):
12821
    """Check prerequisites.
12822

12823
    This checks the type and length of the tag name and value.
12824

12825
    """
12826
    TagsLU.CheckPrereq(self)
12827
    for tag in self.op.tags:
12828
      objects.TaggableObject.ValidateTag(tag)
12829

    
12830
  def Exec(self, feedback_fn):
12831
    """Sets the tag.
12832

12833
    """
12834
    try:
12835
      for tag in self.op.tags:
12836
        self.target.AddTag(tag)
12837
    except errors.TagError, err:
12838
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12839
    self.cfg.Update(self.target, feedback_fn)
12840

    
12841

    
12842
class LUTagsDel(TagsLU):
12843
  """Delete a list of tags from a given object.
12844

12845
  """
12846
  REQ_BGL = False
12847

    
12848
  def CheckPrereq(self):
12849
    """Check prerequisites.
12850

12851
    This checks that we have the given tag.
12852

12853
    """
12854
    TagsLU.CheckPrereq(self)
12855
    for tag in self.op.tags:
12856
      objects.TaggableObject.ValidateTag(tag)
12857
    del_tags = frozenset(self.op.tags)
12858
    cur_tags = self.target.GetTags()
12859

    
12860
    diff_tags = del_tags - cur_tags
12861
    if diff_tags:
12862
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12863
      raise errors.OpPrereqError("Tag(s) %s not found" %
12864
                                 (utils.CommaJoin(diff_names), ),
12865
                                 errors.ECODE_NOENT)
12866

    
12867
  def Exec(self, feedback_fn):
12868
    """Remove the tag from the object.
12869

12870
    """
12871
    for tag in self.op.tags:
12872
      self.target.RemoveTag(tag)
12873
    self.cfg.Update(self.target, feedback_fn)
12874

    
12875

    
12876
class LUTestDelay(NoHooksLU):
12877
  """Sleep for a specified amount of time.
12878

12879
  This LU sleeps on the master and/or nodes for a specified amount of
12880
  time.
12881

12882
  """
12883
  REQ_BGL = False
12884

    
12885
  def ExpandNames(self):
12886
    """Expand names and set required locks.
12887

12888
    This expands the node list, if any.
12889

12890
    """
12891
    self.needed_locks = {}
12892
    if self.op.on_nodes:
12893
      # _GetWantedNodes can be used here, but is not always appropriate to use
12894
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12895
      # more information.
12896
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12897
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12898

    
12899
  def _TestDelay(self):
12900
    """Do the actual sleep.
12901

12902
    """
12903
    if self.op.on_master:
12904
      if not utils.TestDelay(self.op.duration):
12905
        raise errors.OpExecError("Error during master delay test")
12906
    if self.op.on_nodes:
12907
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12908
      for node, node_result in result.items():
12909
        node_result.Raise("Failure during rpc call to node %s" % node)
12910

    
12911
  def Exec(self, feedback_fn):
12912
    """Execute the test delay opcode, with the wanted repetitions.
12913

12914
    """
12915
    if self.op.repeat == 0:
12916
      self._TestDelay()
12917
    else:
12918
      top_value = self.op.repeat - 1
12919
      for i in range(self.op.repeat):
12920
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12921
        self._TestDelay()
12922

    
12923

    
12924
class LUTestJqueue(NoHooksLU):
12925
  """Utility LU to test some aspects of the job queue.
12926

12927
  """
12928
  REQ_BGL = False
12929

    
12930
  # Must be lower than default timeout for WaitForJobChange to see whether it
12931
  # notices changed jobs
12932
  _CLIENT_CONNECT_TIMEOUT = 20.0
12933
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12934

    
12935
  @classmethod
12936
  def _NotifyUsingSocket(cls, cb, errcls):
12937
    """Opens a Unix socket and waits for another program to connect.
12938

12939
    @type cb: callable
12940
    @param cb: Callback to send socket name to client
12941
    @type errcls: class
12942
    @param errcls: Exception class to use for errors
12943

12944
    """
12945
    # Using a temporary directory as there's no easy way to create temporary
12946
    # sockets without writing a custom loop around tempfile.mktemp and
12947
    # socket.bind
12948
    tmpdir = tempfile.mkdtemp()
12949
    try:
12950
      tmpsock = utils.PathJoin(tmpdir, "sock")
12951

    
12952
      logging.debug("Creating temporary socket at %s", tmpsock)
12953
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12954
      try:
12955
        sock.bind(tmpsock)
12956
        sock.listen(1)
12957

    
12958
        # Send details to client
12959
        cb(tmpsock)
12960

    
12961
        # Wait for client to connect before continuing
12962
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12963
        try:
12964
          (conn, _) = sock.accept()
12965
        except socket.error, err:
12966
          raise errcls("Client didn't connect in time (%s)" % err)
12967
      finally:
12968
        sock.close()
12969
    finally:
12970
      # Remove as soon as client is connected
12971
      shutil.rmtree(tmpdir)
12972

    
12973
    # Wait for client to close
12974
    try:
12975
      try:
12976
        # pylint: disable=E1101
12977
        # Instance of '_socketobject' has no ... member
12978
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12979
        conn.recv(1)
12980
      except socket.error, err:
12981
        raise errcls("Client failed to confirm notification (%s)" % err)
12982
    finally:
12983
      conn.close()
12984

    
12985
  def _SendNotification(self, test, arg, sockname):
12986
    """Sends a notification to the client.
12987

12988
    @type test: string
12989
    @param test: Test name
12990
    @param arg: Test argument (depends on test)
12991
    @type sockname: string
12992
    @param sockname: Socket path
12993

12994
    """
12995
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12996

    
12997
  def _Notify(self, prereq, test, arg):
12998
    """Notifies the client of a test.
12999

13000
    @type prereq: bool
13001
    @param prereq: Whether this is a prereq-phase test
13002
    @type test: string
13003
    @param test: Test name
13004
    @param arg: Test argument (depends on test)
13005

13006
    """
13007
    if prereq:
13008
      errcls = errors.OpPrereqError
13009
    else:
13010
      errcls = errors.OpExecError
13011

    
13012
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13013
                                                  test, arg),
13014
                                   errcls)
13015

    
13016
  def CheckArguments(self):
13017
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13018
    self.expandnames_calls = 0
13019

    
13020
  def ExpandNames(self):
13021
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13022
    if checkargs_calls < 1:
13023
      raise errors.ProgrammerError("CheckArguments was not called")
13024

    
13025
    self.expandnames_calls += 1
13026

    
13027
    if self.op.notify_waitlock:
13028
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13029

    
13030
    self.LogInfo("Expanding names")
13031

    
13032
    # Get lock on master node (just to get a lock, not for a particular reason)
13033
    self.needed_locks = {
13034
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13035
      }
13036

    
13037
  def Exec(self, feedback_fn):
13038
    if self.expandnames_calls < 1:
13039
      raise errors.ProgrammerError("ExpandNames was not called")
13040

    
13041
    if self.op.notify_exec:
13042
      self._Notify(False, constants.JQT_EXEC, None)
13043

    
13044
    self.LogInfo("Executing")
13045

    
13046
    if self.op.log_messages:
13047
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13048
      for idx, msg in enumerate(self.op.log_messages):
13049
        self.LogInfo("Sending log message %s", idx + 1)
13050
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13051
        # Report how many test messages have been sent
13052
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13053

    
13054
    if self.op.fail:
13055
      raise errors.OpExecError("Opcode failure was requested")
13056

    
13057
    return True
13058

    
13059

    
13060
class IAllocator(object):
13061
  """IAllocator framework.
13062

13063
  An IAllocator instance has three sets of attributes:
13064
    - cfg that is needed to query the cluster
13065
    - input data (all members of the _KEYS class attribute are required)
13066
    - four buffer attributes (in|out_data|text), that represent the
13067
      input (to the external script) in text and data structure format,
13068
      and the output from it, again in two formats
13069
    - the result variables from the script (success, info, nodes) for
13070
      easy usage
13071

13072
  """
13073
  # pylint: disable=R0902
13074
  # lots of instance attributes
13075

    
13076
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13077
    self.cfg = cfg
13078
    self.rpc = rpc_runner
13079
    # init buffer variables
13080
    self.in_text = self.out_text = self.in_data = self.out_data = None
13081
    # init all input fields so that pylint is happy
13082
    self.mode = mode
13083
    self.memory = self.disks = self.disk_template = None
13084
    self.os = self.tags = self.nics = self.vcpus = None
13085
    self.hypervisor = None
13086
    self.relocate_from = None
13087
    self.name = None
13088
    self.instances = None
13089
    self.evac_mode = None
13090
    self.target_groups = []
13091
    # computed fields
13092
    self.required_nodes = None
13093
    # init result fields
13094
    self.success = self.info = self.result = None
13095

    
13096
    try:
13097
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13098
    except KeyError:
13099
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13100
                                   " IAllocator" % self.mode)
13101

    
13102
    keyset = [n for (n, _) in keydata]
13103

    
13104
    for key in kwargs:
13105
      if key not in keyset:
13106
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13107
                                     " IAllocator" % key)
13108
      setattr(self, key, kwargs[key])
13109

    
13110
    for key in keyset:
13111
      if key not in kwargs:
13112
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13113
                                     " IAllocator" % key)
13114
    self._BuildInputData(compat.partial(fn, self), keydata)
13115

    
13116
  def _ComputeClusterData(self):
13117
    """Compute the generic allocator input data.
13118

13119
    This is the data that is independent of the actual operation.
13120

13121
    """
13122
    cfg = self.cfg
13123
    cluster_info = cfg.GetClusterInfo()
13124
    # cluster data
13125
    data = {
13126
      "version": constants.IALLOCATOR_VERSION,
13127
      "cluster_name": cfg.GetClusterName(),
13128
      "cluster_tags": list(cluster_info.GetTags()),
13129
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13130
      # we don't have job IDs
13131
      }
13132
    ninfo = cfg.GetAllNodesInfo()
13133
    iinfo = cfg.GetAllInstancesInfo().values()
13134
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13135

    
13136
    # node data
13137
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13138

    
13139
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13140
      hypervisor_name = self.hypervisor
13141
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13142
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13143
    else:
13144
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13145

    
13146
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13147
                                        hypervisor_name)
13148
    node_iinfo = \
13149
      self.rpc.call_all_instances_info(node_list,
13150
                                       cluster_info.enabled_hypervisors)
13151

    
13152
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13153

    
13154
    config_ndata = self._ComputeBasicNodeData(ninfo)
13155
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13156
                                                 i_list, config_ndata)
13157
    assert len(data["nodes"]) == len(ninfo), \
13158
        "Incomplete node data computed"
13159

    
13160
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13161

    
13162
    self.in_data = data
13163

    
13164
  @staticmethod
13165
  def _ComputeNodeGroupData(cfg):
13166
    """Compute node groups data.
13167

13168
    """
13169
    ng = dict((guuid, {
13170
      "name": gdata.name,
13171
      "alloc_policy": gdata.alloc_policy,
13172
      })
13173
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13174

    
13175
    return ng
13176

    
13177
  @staticmethod
13178
  def _ComputeBasicNodeData(node_cfg):
13179
    """Compute global node data.
13180

13181
    @rtype: dict
13182
    @returns: a dict of name: (node dict, node config)
13183

13184
    """
13185
    # fill in static (config-based) values
13186
    node_results = dict((ninfo.name, {
13187
      "tags": list(ninfo.GetTags()),
13188
      "primary_ip": ninfo.primary_ip,
13189
      "secondary_ip": ninfo.secondary_ip,
13190
      "offline": ninfo.offline,
13191
      "drained": ninfo.drained,
13192
      "master_candidate": ninfo.master_candidate,
13193
      "group": ninfo.group,
13194
      "master_capable": ninfo.master_capable,
13195
      "vm_capable": ninfo.vm_capable,
13196
      })
13197
      for ninfo in node_cfg.values())
13198

    
13199
    return node_results
13200

    
13201
  @staticmethod
13202
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13203
                              node_results):
13204
    """Compute global node data.
13205

13206
    @param node_results: the basic node structures as filled from the config
13207

13208
    """
13209
    # make a copy of the current dict
13210
    node_results = dict(node_results)
13211
    for nname, nresult in node_data.items():
13212
      assert nname in node_results, "Missing basic data for node %s" % nname
13213
      ninfo = node_cfg[nname]
13214

    
13215
      if not (ninfo.offline or ninfo.drained):
13216
        nresult.Raise("Can't get data for node %s" % nname)
13217
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13218
                                nname)
13219
        remote_info = nresult.payload
13220

    
13221
        for attr in ["memory_total", "memory_free", "memory_dom0",
13222
                     "vg_size", "vg_free", "cpu_total"]:
13223
          if attr not in remote_info:
13224
            raise errors.OpExecError("Node '%s' didn't return attribute"
13225
                                     " '%s'" % (nname, attr))
13226
          if not isinstance(remote_info[attr], int):
13227
            raise errors.OpExecError("Node '%s' returned invalid value"
13228
                                     " for '%s': %s" %
13229
                                     (nname, attr, remote_info[attr]))
13230
        # compute memory used by primary instances
13231
        i_p_mem = i_p_up_mem = 0
13232
        for iinfo, beinfo in i_list:
13233
          if iinfo.primary_node == nname:
13234
            i_p_mem += beinfo[constants.BE_MEMORY]
13235
            if iinfo.name not in node_iinfo[nname].payload:
13236
              i_used_mem = 0
13237
            else:
13238
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13239
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13240
            remote_info["memory_free"] -= max(0, i_mem_diff)
13241

    
13242
            if iinfo.admin_up:
13243
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13244

    
13245
        # compute memory used by instances
13246
        pnr_dyn = {
13247
          "total_memory": remote_info["memory_total"],
13248
          "reserved_memory": remote_info["memory_dom0"],
13249
          "free_memory": remote_info["memory_free"],
13250
          "total_disk": remote_info["vg_size"],
13251
          "free_disk": remote_info["vg_free"],
13252
          "total_cpus": remote_info["cpu_total"],
13253
          "i_pri_memory": i_p_mem,
13254
          "i_pri_up_memory": i_p_up_mem,
13255
          }
13256
        pnr_dyn.update(node_results[nname])
13257
        node_results[nname] = pnr_dyn
13258

    
13259
    return node_results
13260

    
13261
  @staticmethod
13262
  def _ComputeInstanceData(cluster_info, i_list):
13263
    """Compute global instance data.
13264

13265
    """
13266
    instance_data = {}
13267
    for iinfo, beinfo in i_list:
13268
      nic_data = []
13269
      for nic in iinfo.nics:
13270
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13271
        nic_dict = {
13272
          "mac": nic.mac,
13273
          "ip": nic.ip,
13274
          "mode": filled_params[constants.NIC_MODE],
13275
          "link": filled_params[constants.NIC_LINK],
13276
          }
13277
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13278
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13279
        nic_data.append(nic_dict)
13280
      pir = {
13281
        "tags": list(iinfo.GetTags()),
13282
        "admin_up": iinfo.admin_up,
13283
        "vcpus": beinfo[constants.BE_VCPUS],
13284
        "memory": beinfo[constants.BE_MEMORY],
13285
        "os": iinfo.os,
13286
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13287
        "nics": nic_data,
13288
        "disks": [{constants.IDISK_SIZE: dsk.size,
13289
                   constants.IDISK_MODE: dsk.mode}
13290
                  for dsk in iinfo.disks],
13291
        "disk_template": iinfo.disk_template,
13292
        "hypervisor": iinfo.hypervisor,
13293
        }
13294
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13295
                                                 pir["disks"])
13296
      instance_data[iinfo.name] = pir
13297

    
13298
    return instance_data
13299

    
13300
  def _AddNewInstance(self):
13301
    """Add new instance data to allocator structure.
13302

13303
    This in combination with _AllocatorGetClusterData will create the
13304
    correct structure needed as input for the allocator.
13305

13306
    The checks for the completeness of the opcode must have already been
13307
    done.
13308

13309
    """
13310
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13311

    
13312
    if self.disk_template in constants.DTS_INT_MIRROR:
13313
      self.required_nodes = 2
13314
    else:
13315
      self.required_nodes = 1
13316

    
13317
    request = {
13318
      "name": self.name,
13319
      "disk_template": self.disk_template,
13320
      "tags": self.tags,
13321
      "os": self.os,
13322
      "vcpus": self.vcpus,
13323
      "memory": self.memory,
13324
      "disks": self.disks,
13325
      "disk_space_total": disk_space,
13326
      "nics": self.nics,
13327
      "required_nodes": self.required_nodes,
13328
      "hypervisor": self.hypervisor,
13329
      }
13330

    
13331
    return request
13332

    
13333
  def _AddRelocateInstance(self):
13334
    """Add relocate instance data to allocator structure.
13335

13336
    This in combination with _IAllocatorGetClusterData will create the
13337
    correct structure needed as input for the allocator.
13338

13339
    The checks for the completeness of the opcode must have already been
13340
    done.
13341

13342
    """
13343
    instance = self.cfg.GetInstanceInfo(self.name)
13344
    if instance is None:
13345
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13346
                                   " IAllocator" % self.name)
13347

    
13348
    if instance.disk_template not in constants.DTS_MIRRORED:
13349
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13350
                                 errors.ECODE_INVAL)
13351

    
13352
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13353
        len(instance.secondary_nodes) != 1:
13354
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13355
                                 errors.ECODE_STATE)
13356

    
13357
    self.required_nodes = 1
13358
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13359
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13360

    
13361
    request = {
13362
      "name": self.name,
13363
      "disk_space_total": disk_space,
13364
      "required_nodes": self.required_nodes,
13365
      "relocate_from": self.relocate_from,
13366
      }
13367
    return request
13368

    
13369
  def _AddNodeEvacuate(self):
13370
    """Get data for node-evacuate requests.
13371

13372
    """
13373
    return {
13374
      "instances": self.instances,
13375
      "evac_mode": self.evac_mode,
13376
      }
13377

    
13378
  def _AddChangeGroup(self):
13379
    """Get data for node-evacuate requests.
13380

13381
    """
13382
    return {
13383
      "instances": self.instances,
13384
      "target_groups": self.target_groups,
13385
      }
13386

    
13387
  def _BuildInputData(self, fn, keydata):
13388
    """Build input data structures.
13389

13390
    """
13391
    self._ComputeClusterData()
13392

    
13393
    request = fn()
13394
    request["type"] = self.mode
13395
    for keyname, keytype in keydata:
13396
      if keyname not in request:
13397
        raise errors.ProgrammerError("Request parameter %s is missing" %
13398
                                     keyname)
13399
      val = request[keyname]
13400
      if not keytype(val):
13401
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13402
                                     " validation, value %s, expected"
13403
                                     " type %s" % (keyname, val, keytype))
13404
    self.in_data["request"] = request
13405

    
13406
    self.in_text = serializer.Dump(self.in_data)
13407

    
13408
  _STRING_LIST = ht.TListOf(ht.TString)
13409
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13410
     # pylint: disable=E1101
13411
     # Class '...' has no 'OP_ID' member
13412
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13413
                          opcodes.OpInstanceMigrate.OP_ID,
13414
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13415
     })))
13416

    
13417
  _NEVAC_MOVED = \
13418
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13419
                       ht.TItems([ht.TNonEmptyString,
13420
                                  ht.TNonEmptyString,
13421
                                  ht.TListOf(ht.TNonEmptyString),
13422
                                 ])))
13423
  _NEVAC_FAILED = \
13424
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13425
                       ht.TItems([ht.TNonEmptyString,
13426
                                  ht.TMaybeString,
13427
                                 ])))
13428
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13429
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13430

    
13431
  _MODE_DATA = {
13432
    constants.IALLOCATOR_MODE_ALLOC:
13433
      (_AddNewInstance,
13434
       [
13435
        ("name", ht.TString),
13436
        ("memory", ht.TInt),
13437
        ("disks", ht.TListOf(ht.TDict)),
13438
        ("disk_template", ht.TString),
13439
        ("os", ht.TString),
13440
        ("tags", _STRING_LIST),
13441
        ("nics", ht.TListOf(ht.TDict)),
13442
        ("vcpus", ht.TInt),
13443
        ("hypervisor", ht.TString),
13444
        ], ht.TList),
13445
    constants.IALLOCATOR_MODE_RELOC:
13446
      (_AddRelocateInstance,
13447
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13448
       ht.TList),
13449
     constants.IALLOCATOR_MODE_NODE_EVAC:
13450
      (_AddNodeEvacuate, [
13451
        ("instances", _STRING_LIST),
13452
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13453
        ], _NEVAC_RESULT),
13454
     constants.IALLOCATOR_MODE_CHG_GROUP:
13455
      (_AddChangeGroup, [
13456
        ("instances", _STRING_LIST),
13457
        ("target_groups", _STRING_LIST),
13458
        ], _NEVAC_RESULT),
13459
    }
13460

    
13461
  def Run(self, name, validate=True, call_fn=None):
13462
    """Run an instance allocator and return the results.
13463

13464
    """
13465
    if call_fn is None:
13466
      call_fn = self.rpc.call_iallocator_runner
13467

    
13468
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13469
    result.Raise("Failure while running the iallocator script")
13470

    
13471
    self.out_text = result.payload
13472
    if validate:
13473
      self._ValidateResult()
13474

    
13475
  def _ValidateResult(self):
13476
    """Process the allocator results.
13477

13478
    This will process and if successful save the result in
13479
    self.out_data and the other parameters.
13480

13481
    """
13482
    try:
13483
      rdict = serializer.Load(self.out_text)
13484
    except Exception, err:
13485
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13486

    
13487
    if not isinstance(rdict, dict):
13488
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13489

    
13490
    # TODO: remove backwards compatiblity in later versions
13491
    if "nodes" in rdict and "result" not in rdict:
13492
      rdict["result"] = rdict["nodes"]
13493
      del rdict["nodes"]
13494

    
13495
    for key in "success", "info", "result":
13496
      if key not in rdict:
13497
        raise errors.OpExecError("Can't parse iallocator results:"
13498
                                 " missing key '%s'" % key)
13499
      setattr(self, key, rdict[key])
13500

    
13501
    if not self._result_check(self.result):
13502
      raise errors.OpExecError("Iallocator returned invalid result,"
13503
                               " expected %s, got %s" %
13504
                               (self._result_check, self.result),
13505
                               errors.ECODE_INVAL)
13506

    
13507
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13508
      assert self.relocate_from is not None
13509
      assert self.required_nodes == 1
13510

    
13511
      node2group = dict((name, ndata["group"])
13512
                        for (name, ndata) in self.in_data["nodes"].items())
13513

    
13514
      fn = compat.partial(self._NodesToGroups, node2group,
13515
                          self.in_data["nodegroups"])
13516

    
13517
      instance = self.cfg.GetInstanceInfo(self.name)
13518
      request_groups = fn(self.relocate_from + [instance.primary_node])
13519
      result_groups = fn(rdict["result"] + [instance.primary_node])
13520

    
13521
      if self.success and not set(result_groups).issubset(request_groups):
13522
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13523
                                 " differ from original groups (%s)" %
13524
                                 (utils.CommaJoin(result_groups),
13525
                                  utils.CommaJoin(request_groups)))
13526

    
13527
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13528
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13529

    
13530
    self.out_data = rdict
13531

    
13532
  @staticmethod
13533
  def _NodesToGroups(node2group, groups, nodes):
13534
    """Returns a list of unique group names for a list of nodes.
13535

13536
    @type node2group: dict
13537
    @param node2group: Map from node name to group UUID
13538
    @type groups: dict
13539
    @param groups: Group information
13540
    @type nodes: list
13541
    @param nodes: Node names
13542

13543
    """
13544
    result = set()
13545

    
13546
    for node in nodes:
13547
      try:
13548
        group_uuid = node2group[node]
13549
      except KeyError:
13550
        # Ignore unknown node
13551
        pass
13552
      else:
13553
        try:
13554
          group = groups[group_uuid]
13555
        except KeyError:
13556
          # Can't find group, let's use UUID
13557
          group_name = group_uuid
13558
        else:
13559
          group_name = group["name"]
13560

    
13561
        result.add(group_name)
13562

    
13563
    return sorted(result)
13564

    
13565

    
13566
class LUTestAllocator(NoHooksLU):
13567
  """Run allocator tests.
13568

13569
  This LU runs the allocator tests
13570

13571
  """
13572
  def CheckPrereq(self):
13573
    """Check prerequisites.
13574

13575
    This checks the opcode parameters depending on the director and mode test.
13576

13577
    """
13578
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13579
      for attr in ["memory", "disks", "disk_template",
13580
                   "os", "tags", "nics", "vcpus"]:
13581
        if not hasattr(self.op, attr):
13582
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13583
                                     attr, errors.ECODE_INVAL)
13584
      iname = self.cfg.ExpandInstanceName(self.op.name)
13585
      if iname is not None:
13586
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13587
                                   iname, errors.ECODE_EXISTS)
13588
      if not isinstance(self.op.nics, list):
13589
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13590
                                   errors.ECODE_INVAL)
13591
      if not isinstance(self.op.disks, list):
13592
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13593
                                   errors.ECODE_INVAL)
13594
      for row in self.op.disks:
13595
        if (not isinstance(row, dict) or
13596
            constants.IDISK_SIZE not in row or
13597
            not isinstance(row[constants.IDISK_SIZE], int) or
13598
            constants.IDISK_MODE not in row or
13599
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13600
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13601
                                     " parameter", errors.ECODE_INVAL)
13602
      if self.op.hypervisor is None:
13603
        self.op.hypervisor = self.cfg.GetHypervisorType()
13604
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13605
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13606
      self.op.name = fname
13607
      self.relocate_from = \
13608
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13609
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13610
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13611
      if not self.op.instances:
13612
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13613
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13614
    else:
13615
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13616
                                 self.op.mode, errors.ECODE_INVAL)
13617

    
13618
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13619
      if self.op.allocator is None:
13620
        raise errors.OpPrereqError("Missing allocator name",
13621
                                   errors.ECODE_INVAL)
13622
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13623
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13624
                                 self.op.direction, errors.ECODE_INVAL)
13625

    
13626
  def Exec(self, feedback_fn):
13627
    """Run the allocator test.
13628

13629
    """
13630
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13631
      ial = IAllocator(self.cfg, self.rpc,
13632
                       mode=self.op.mode,
13633
                       name=self.op.name,
13634
                       memory=self.op.memory,
13635
                       disks=self.op.disks,
13636
                       disk_template=self.op.disk_template,
13637
                       os=self.op.os,
13638
                       tags=self.op.tags,
13639
                       nics=self.op.nics,
13640
                       vcpus=self.op.vcpus,
13641
                       hypervisor=self.op.hypervisor,
13642
                       )
13643
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13644
      ial = IAllocator(self.cfg, self.rpc,
13645
                       mode=self.op.mode,
13646
                       name=self.op.name,
13647
                       relocate_from=list(self.relocate_from),
13648
                       )
13649
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13650
      ial = IAllocator(self.cfg, self.rpc,
13651
                       mode=self.op.mode,
13652
                       instances=self.op.instances,
13653
                       target_groups=self.op.target_groups)
13654
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13655
      ial = IAllocator(self.cfg, self.rpc,
13656
                       mode=self.op.mode,
13657
                       instances=self.op.instances,
13658
                       evac_mode=self.op.evac_mode)
13659
    else:
13660
      raise errors.ProgrammerError("Uncatched mode %s in"
13661
                                   " LUTestAllocator.Exec", self.op.mode)
13662

    
13663
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13664
      result = ial.in_text
13665
    else:
13666
      ial.Run(self.op.allocator, validate=False)
13667
      result = ial.out_text
13668
    return result
13669

    
13670

    
13671
#: Query type implementations
13672
_QUERY_IMPL = {
13673
  constants.QR_INSTANCE: _InstanceQuery,
13674
  constants.QR_NODE: _NodeQuery,
13675
  constants.QR_GROUP: _GroupQuery,
13676
  constants.QR_OS: _OsQuery,
13677
  }
13678

    
13679
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13680

    
13681

    
13682
def _GetQueryImplementation(name):
13683
  """Returns the implemtnation for a query type.
13684

13685
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13686

13687
  """
13688
  try:
13689
    return _QUERY_IMPL[name]
13690
  except KeyError:
13691
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13692
                               errors.ECODE_INVAL)