Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 17b0b812

History | View | Annotate | Download (484.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False,
353
                          level=locking.LEVEL_NODE):
354
    """Helper function to declare instances' nodes for locking.
355

356
    This function should be called after locking one or more instances to lock
357
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
358
    with all primary or secondary nodes for instances already locked and
359
    present in self.needed_locks[locking.LEVEL_INSTANCE].
360

361
    It should be called from DeclareLocks, and for safety only works if
362
    self.recalculate_locks[locking.LEVEL_NODE] is set.
363

364
    In the future it may grow parameters to just lock some instance's nodes, or
365
    to just lock primaries or secondary nodes, if needed.
366

367
    If should be called in DeclareLocks in a way similar to::
368

369
      if level == locking.LEVEL_NODE:
370
        self._LockInstancesNodes()
371

372
    @type primary_only: boolean
373
    @param primary_only: only lock primary nodes of locked instances
374
    @param level: Which lock level to use for locking nodes
375

376
    """
377
    assert level in self.recalculate_locks, \
378
      "_LockInstancesNodes helper function called with no nodes to recalculate"
379

    
380
    # TODO: check if we're really been called with the instance locks held
381

    
382
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
383
    # future we might want to have different behaviors depending on the value
384
    # of self.recalculate_locks[locking.LEVEL_NODE]
385
    wanted_nodes = []
386
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
387
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
388
      wanted_nodes.append(instance.primary_node)
389
      if not primary_only:
390
        wanted_nodes.extend(instance.secondary_nodes)
391

    
392
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
393
      self.needed_locks[level] = wanted_nodes
394
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
395
      self.needed_locks[level].extend(wanted_nodes)
396
    else:
397
      raise errors.ProgrammerError("Unknown recalculation mode")
398

    
399
    del self.recalculate_locks[level]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, qfilter, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.owned_locks(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _ShareAll():
563
  """Returns a dict declaring all lock levels shared.
564

565
  """
566
  return dict.fromkeys(locking.LEVELS, 1)
567

    
568

    
569
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
570
  """Checks if the owned node groups are still correct for an instance.
571

572
  @type cfg: L{config.ConfigWriter}
573
  @param cfg: The cluster configuration
574
  @type instance_name: string
575
  @param instance_name: Instance name
576
  @type owned_groups: set or frozenset
577
  @param owned_groups: List of currently owned node groups
578

579
  """
580
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
581

    
582
  if not owned_groups.issuperset(inst_groups):
583
    raise errors.OpPrereqError("Instance %s's node groups changed since"
584
                               " locks were acquired, current groups are"
585
                               " are '%s', owning groups '%s'; retry the"
586
                               " operation" %
587
                               (instance_name,
588
                                utils.CommaJoin(inst_groups),
589
                                utils.CommaJoin(owned_groups)),
590
                               errors.ECODE_STATE)
591

    
592
  return inst_groups
593

    
594

    
595
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
596
  """Checks if the instances in a node group are still correct.
597

598
  @type cfg: L{config.ConfigWriter}
599
  @param cfg: The cluster configuration
600
  @type group_uuid: string
601
  @param group_uuid: Node group UUID
602
  @type owned_instances: set or frozenset
603
  @param owned_instances: List of currently owned instances
604

605
  """
606
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
607
  if owned_instances != wanted_instances:
608
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
609
                               " locks were acquired, wanted '%s', have '%s';"
610
                               " retry the operation" %
611
                               (group_uuid,
612
                                utils.CommaJoin(wanted_instances),
613
                                utils.CommaJoin(owned_instances)),
614
                               errors.ECODE_STATE)
615

    
616
  return wanted_instances
617

    
618

    
619
def _SupportsOob(cfg, node):
620
  """Tells if node supports OOB.
621

622
  @type cfg: L{config.ConfigWriter}
623
  @param cfg: The cluster configuration
624
  @type node: L{objects.Node}
625
  @param node: The node
626
  @return: The OOB script if supported or an empty string otherwise
627

628
  """
629
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if nodes:
645
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
646

    
647
  return utils.NiceSort(lu.cfg.GetNodeList())
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _ReleaseLocks(lu, level, names=None, keep=None):
704
  """Releases locks owned by an LU.
705

706
  @type lu: L{LogicalUnit}
707
  @param level: Lock level
708
  @type names: list or None
709
  @param names: Names of locks to release
710
  @type keep: list or None
711
  @param keep: Names of locks to retain
712

713
  """
714
  assert not (keep is not None and names is not None), \
715
         "Only one of the 'names' and the 'keep' parameters can be given"
716

    
717
  if names is not None:
718
    should_release = names.__contains__
719
  elif keep:
720
    should_release = lambda name: name not in keep
721
  else:
722
    should_release = None
723

    
724
  if should_release:
725
    retain = []
726
    release = []
727

    
728
    # Determine which locks to release
729
    for name in lu.owned_locks(level):
730
      if should_release(name):
731
        release.append(name)
732
      else:
733
        retain.append(name)
734

    
735
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
736

    
737
    # Release just some locks
738
    lu.glm.release(level, names=release)
739

    
740
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
741
  else:
742
    # Release everything
743
    lu.glm.release(level)
744

    
745
    assert not lu.glm.is_owned(level), "No locks should be owned"
746

    
747

    
748
def _MapInstanceDisksToNodes(instances):
749
  """Creates a map from (node, volume) to instance name.
750

751
  @type instances: list of L{objects.Instance}
752
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
753

754
  """
755
  return dict(((node, vol), inst.name)
756
              for inst in instances
757
              for (node, vols) in inst.MapLVsByNode().items()
758
              for vol in vols)
759

    
760

    
761
def _RunPostHook(lu, node_name):
762
  """Runs the post-hook for an opcode on a single node.
763

764
  """
765
  hm = lu.proc.BuildHooksManager(lu)
766
  try:
767
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
768
  except:
769
    # pylint: disable=W0702
770
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
771

    
772

    
773
def _CheckOutputFields(static, dynamic, selected):
774
  """Checks whether all selected fields are valid.
775

776
  @type static: L{utils.FieldSet}
777
  @param static: static fields set
778
  @type dynamic: L{utils.FieldSet}
779
  @param dynamic: dynamic fields set
780

781
  """
782
  f = utils.FieldSet()
783
  f.Extend(static)
784
  f.Extend(dynamic)
785

    
786
  delta = f.NonMatching(selected)
787
  if delta:
788
    raise errors.OpPrereqError("Unknown output fields selected: %s"
789
                               % ",".join(delta), errors.ECODE_INVAL)
790

    
791

    
792
def _CheckGlobalHvParams(params):
793
  """Validates that given hypervisor params are not global ones.
794

795
  This will ensure that instances don't get customised versions of
796
  global params.
797

798
  """
799
  used_globals = constants.HVC_GLOBALS.intersection(params)
800
  if used_globals:
801
    msg = ("The following hypervisor parameters are global and cannot"
802
           " be customized at instance level, please modify them at"
803
           " cluster level: %s" % utils.CommaJoin(used_globals))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805

    
806

    
807
def _CheckNodeOnline(lu, node, msg=None):
808
  """Ensure that a given node is online.
809

810
  @param lu: the LU on behalf of which we make the check
811
  @param node: the node to check
812
  @param msg: if passed, should be a message to replace the default one
813
  @raise errors.OpPrereqError: if the node is offline
814

815
  """
816
  if msg is None:
817
    msg = "Can't use offline node"
818
  if lu.cfg.GetNodeInfo(node).offline:
819
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
820

    
821

    
822
def _CheckNodeNotDrained(lu, node):
823
  """Ensure that a given node is not drained.
824

825
  @param lu: the LU on behalf of which we make the check
826
  @param node: the node to check
827
  @raise errors.OpPrereqError: if the node is drained
828

829
  """
830
  if lu.cfg.GetNodeInfo(node).drained:
831
    raise errors.OpPrereqError("Can't use drained node %s" % node,
832
                               errors.ECODE_STATE)
833

    
834

    
835
def _CheckNodeVmCapable(lu, node):
836
  """Ensure that a given node is vm capable.
837

838
  @param lu: the LU on behalf of which we make the check
839
  @param node: the node to check
840
  @raise errors.OpPrereqError: if the node is not vm capable
841

842
  """
843
  if not lu.cfg.GetNodeInfo(node).vm_capable:
844
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
845
                               errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeHasOS(lu, node, os_name, force_variant):
849
  """Ensure that a node supports a given OS.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @param os_name: the OS to query about
854
  @param force_variant: whether to ignore variant errors
855
  @raise errors.OpPrereqError: if the node is not supporting the OS
856

857
  """
858
  result = lu.rpc.call_os_get(node, os_name)
859
  result.Raise("OS '%s' not in supported OS list for node %s" %
860
               (os_name, node),
861
               prereq=True, ecode=errors.ECODE_INVAL)
862
  if not force_variant:
863
    _CheckOSVariant(result.payload, os_name)
864

    
865

    
866
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
867
  """Ensure that a node has the given secondary ip.
868

869
  @type lu: L{LogicalUnit}
870
  @param lu: the LU on behalf of which we make the check
871
  @type node: string
872
  @param node: the node to check
873
  @type secondary_ip: string
874
  @param secondary_ip: the ip to check
875
  @type prereq: boolean
876
  @param prereq: whether to throw a prerequisite or an execute error
877
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
878
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
879

880
  """
881
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
882
  result.Raise("Failure checking secondary ip on node %s" % node,
883
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
884
  if not result.payload:
885
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
886
           " please fix and re-run this command" % secondary_ip)
887
    if prereq:
888
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
889
    else:
890
      raise errors.OpExecError(msg)
891

    
892

    
893
def _GetClusterDomainSecret():
894
  """Reads the cluster domain secret.
895

896
  """
897
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
898
                               strict=True)
899

    
900

    
901
def _CheckInstanceDown(lu, instance, reason):
902
  """Ensure that an instance is not running."""
903
  if instance.admin_up:
904
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907
  pnode = instance.primary_node
908
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
909
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
910
              prereq=True, ecode=errors.ECODE_ENVIRON)
911

    
912
  if instance.name in ins_l.payload:
913
    raise errors.OpPrereqError("Instance %s is running, %s" %
914
                               (instance.name, reason), errors.ECODE_STATE)
915

    
916

    
917
def _ExpandItemName(fn, name, kind):
918
  """Expand an item name.
919

920
  @param fn: the function to use for expansion
921
  @param name: requested item name
922
  @param kind: text description ('Node' or 'Instance')
923
  @return: the resolved (full) name
924
  @raise errors.OpPrereqError: if the item is not found
925

926
  """
927
  full_name = fn(name)
928
  if full_name is None:
929
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
930
                               errors.ECODE_NOENT)
931
  return full_name
932

    
933

    
934
def _ExpandNodeName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for nodes."""
936
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
937

    
938

    
939
def _ExpandInstanceName(cfg, name):
940
  """Wrapper over L{_ExpandItemName} for instance."""
941
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
942

    
943

    
944
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
945
                          memory, vcpus, nics, disk_template, disks,
946
                          bep, hvp, hypervisor_name, tags):
947
  """Builds instance related env variables for hooks
948

949
  This builds the hook environment from individual variables.
950

951
  @type name: string
952
  @param name: the name of the instance
953
  @type primary_node: string
954
  @param primary_node: the name of the instance's primary node
955
  @type secondary_nodes: list
956
  @param secondary_nodes: list of secondary nodes as strings
957
  @type os_type: string
958
  @param os_type: the name of the instance's OS
959
  @type status: boolean
960
  @param status: the should_run status of the instance
961
  @type memory: string
962
  @param memory: the memory size of the instance
963
  @type vcpus: string
964
  @param vcpus: the count of VCPUs the instance has
965
  @type nics: list
966
  @param nics: list of tuples (ip, mac, mode, link) representing
967
      the NICs the instance has
968
  @type disk_template: string
969
  @param disk_template: the disk template of the instance
970
  @type disks: list
971
  @param disks: the list of (size, mode) pairs
972
  @type bep: dict
973
  @param bep: the backend parameters for the instance
974
  @type hvp: dict
975
  @param hvp: the hypervisor parameters for the instance
976
  @type hypervisor_name: string
977
  @param hypervisor_name: the hypervisor for the instance
978
  @type tags: list
979
  @param tags: list of instance tags as strings
980
  @rtype: dict
981
  @return: the hook environment for this instance
982

983
  """
984
  if status:
985
    str_status = "up"
986
  else:
987
    str_status = "down"
988
  env = {
989
    "OP_TARGET": name,
990
    "INSTANCE_NAME": name,
991
    "INSTANCE_PRIMARY": primary_node,
992
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
993
    "INSTANCE_OS_TYPE": os_type,
994
    "INSTANCE_STATUS": str_status,
995
    "INSTANCE_MEMORY": memory,
996
    "INSTANCE_VCPUS": vcpus,
997
    "INSTANCE_DISK_TEMPLATE": disk_template,
998
    "INSTANCE_HYPERVISOR": hypervisor_name,
999
  }
1000

    
1001
  if nics:
1002
    nic_count = len(nics)
1003
    for idx, (ip, mac, mode, link) in enumerate(nics):
1004
      if ip is None:
1005
        ip = ""
1006
      env["INSTANCE_NIC%d_IP" % idx] = ip
1007
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1008
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1009
      env["INSTANCE_NIC%d_LINK" % idx] = link
1010
      if mode == constants.NIC_MODE_BRIDGED:
1011
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012
  else:
1013
    nic_count = 0
1014

    
1015
  env["INSTANCE_NIC_COUNT"] = nic_count
1016

    
1017
  if disks:
1018
    disk_count = len(disks)
1019
    for idx, (size, mode) in enumerate(disks):
1020
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1021
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1022
  else:
1023
    disk_count = 0
1024

    
1025
  env["INSTANCE_DISK_COUNT"] = disk_count
1026

    
1027
  if not tags:
1028
    tags = []
1029

    
1030
  env["INSTANCE_TAGS"] = " ".join(tags)
1031

    
1032
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033
    for key, value in source.items():
1034
      env["INSTANCE_%s_%s" % (kind, key)] = value
1035

    
1036
  return env
1037

    
1038

    
1039
def _NICListToTuple(lu, nics):
1040
  """Build a list of nic information tuples.
1041

1042
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043
  value in LUInstanceQueryData.
1044

1045
  @type lu:  L{LogicalUnit}
1046
  @param lu: the logical unit on whose behalf we execute
1047
  @type nics: list of L{objects.NIC}
1048
  @param nics: list of nics to convert to hooks tuples
1049

1050
  """
1051
  hooks_nics = []
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  for nic in nics:
1054
    ip = nic.ip
1055
    mac = nic.mac
1056
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057
    mode = filled_params[constants.NIC_MODE]
1058
    link = filled_params[constants.NIC_LINK]
1059
    hooks_nics.append((ip, mac, mode, link))
1060
  return hooks_nics
1061

    
1062

    
1063
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064
  """Builds instance related env variables for hooks from an object.
1065

1066
  @type lu: L{LogicalUnit}
1067
  @param lu: the logical unit on whose behalf we execute
1068
  @type instance: L{objects.Instance}
1069
  @param instance: the instance for which we should build the
1070
      environment
1071
  @type override: dict
1072
  @param override: dictionary with key/values that will override
1073
      our values
1074
  @rtype: dict
1075
  @return: the hook environment dictionary
1076

1077
  """
1078
  cluster = lu.cfg.GetClusterInfo()
1079
  bep = cluster.FillBE(instance)
1080
  hvp = cluster.FillHV(instance)
1081
  args = {
1082
    "name": instance.name,
1083
    "primary_node": instance.primary_node,
1084
    "secondary_nodes": instance.secondary_nodes,
1085
    "os_type": instance.os,
1086
    "status": instance.admin_up,
1087
    "memory": bep[constants.BE_MEMORY],
1088
    "vcpus": bep[constants.BE_VCPUS],
1089
    "nics": _NICListToTuple(lu, instance.nics),
1090
    "disk_template": instance.disk_template,
1091
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092
    "bep": bep,
1093
    "hvp": hvp,
1094
    "hypervisor_name": instance.hypervisor,
1095
    "tags": instance.tags,
1096
  }
1097
  if override:
1098
    args.update(override)
1099
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100

    
1101

    
1102
def _AdjustCandidatePool(lu, exceptions):
1103
  """Adjust the candidate pool after node operations.
1104

1105
  """
1106
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107
  if mod_list:
1108
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1109
               utils.CommaJoin(node.name for node in mod_list))
1110
    for name in mod_list:
1111
      lu.context.ReaddNode(name)
1112
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113
  if mc_now > mc_max:
1114
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115
               (mc_now, mc_max))
1116

    
1117

    
1118
def _DecideSelfPromotion(lu, exceptions=None):
1119
  """Decide whether I should promote myself as a master candidate.
1120

1121
  """
1122
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124
  # the new node will increase mc_max with one, so:
1125
  mc_should = min(mc_should + 1, cp_size)
1126
  return mc_now < mc_should
1127

    
1128

    
1129
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130
  """Check that the brigdes needed by a list of nics exist.
1131

1132
  """
1133
  cluster = lu.cfg.GetClusterInfo()
1134
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135
  brlist = [params[constants.NIC_LINK] for params in paramslist
1136
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137
  if brlist:
1138
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1139
    result.Raise("Error checking bridges on destination node '%s'" %
1140
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141

    
1142

    
1143
def _CheckInstanceBridgesExist(lu, instance, node=None):
1144
  """Check that the brigdes needed by an instance exist.
1145

1146
  """
1147
  if node is None:
1148
    node = instance.primary_node
1149
  _CheckNicsBridgesExist(lu, instance.nics, node)
1150

    
1151

    
1152
def _CheckOSVariant(os_obj, name):
1153
  """Check whether an OS name conforms to the os variants specification.
1154

1155
  @type os_obj: L{objects.OS}
1156
  @param os_obj: OS object to check
1157
  @type name: string
1158
  @param name: OS name passed by the user, to check for validity
1159

1160
  """
1161
  variant = objects.OS.GetVariant(name)
1162
  if not os_obj.supported_variants:
1163
    if variant:
1164
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165
                                 " passed)" % (os_obj.name, variant),
1166
                                 errors.ECODE_INVAL)
1167
    return
1168
  if not variant:
1169
    raise errors.OpPrereqError("OS name must include a variant",
1170
                               errors.ECODE_INVAL)
1171

    
1172
  if variant not in os_obj.supported_variants:
1173
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174

    
1175

    
1176
def _GetNodeInstancesInner(cfg, fn):
1177
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178

    
1179

    
1180
def _GetNodeInstances(cfg, node_name):
1181
  """Returns a list of all primary and secondary instances on a node.
1182

1183
  """
1184

    
1185
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186

    
1187

    
1188
def _GetNodePrimaryInstances(cfg, node_name):
1189
  """Returns primary instances on a node.
1190

1191
  """
1192
  return _GetNodeInstancesInner(cfg,
1193
                                lambda inst: node_name == inst.primary_node)
1194

    
1195

    
1196
def _GetNodeSecondaryInstances(cfg, node_name):
1197
  """Returns secondary instances on a node.
1198

1199
  """
1200
  return _GetNodeInstancesInner(cfg,
1201
                                lambda inst: node_name in inst.secondary_nodes)
1202

    
1203

    
1204
def _GetStorageTypeArgs(cfg, storage_type):
1205
  """Returns the arguments for a storage type.
1206

1207
  """
1208
  # Special case for file storage
1209
  if storage_type == constants.ST_FILE:
1210
    # storage.FileStorage wants a list of storage directories
1211
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212

    
1213
  return []
1214

    
1215

    
1216
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217
  faulty = []
1218

    
1219
  for dev in instance.disks:
1220
    cfg.SetDiskID(dev, node_name)
1221

    
1222
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223
  result.Raise("Failed to get disk status from node %s" % node_name,
1224
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225

    
1226
  for idx, bdev_status in enumerate(result.payload):
1227
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228
      faulty.append(idx)
1229

    
1230
  return faulty
1231

    
1232

    
1233
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234
  """Check the sanity of iallocator and node arguments and use the
1235
  cluster-wide iallocator if appropriate.
1236

1237
  Check that at most one of (iallocator, node) is specified. If none is
1238
  specified, then the LU's opcode's iallocator slot is filled with the
1239
  cluster-wide default iallocator.
1240

1241
  @type iallocator_slot: string
1242
  @param iallocator_slot: the name of the opcode iallocator slot
1243
  @type node_slot: string
1244
  @param node_slot: the name of the opcode target node slot
1245

1246
  """
1247
  node = getattr(lu.op, node_slot, None)
1248
  iallocator = getattr(lu.op, iallocator_slot, None)
1249

    
1250
  if node is not None and iallocator is not None:
1251
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252
                               errors.ECODE_INVAL)
1253
  elif node is None and iallocator is None:
1254
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1255
    if default_iallocator:
1256
      setattr(lu.op, iallocator_slot, default_iallocator)
1257
    else:
1258
      raise errors.OpPrereqError("No iallocator or node given and no"
1259
                                 " cluster-wide default iallocator found;"
1260
                                 " please specify either an iallocator or a"
1261
                                 " node, or set a cluster-wide default"
1262
                                 " iallocator")
1263

    
1264

    
1265
def _GetDefaultIAllocator(cfg, iallocator):
1266
  """Decides on which iallocator to use.
1267

1268
  @type cfg: L{config.ConfigWriter}
1269
  @param cfg: Cluster configuration object
1270
  @type iallocator: string or None
1271
  @param iallocator: Iallocator specified in opcode
1272
  @rtype: string
1273
  @return: Iallocator name
1274

1275
  """
1276
  if not iallocator:
1277
    # Use default iallocator
1278
    iallocator = cfg.GetDefaultIAllocator()
1279

    
1280
  if not iallocator:
1281
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282
                               " opcode nor as a cluster-wide default",
1283
                               errors.ECODE_INVAL)
1284

    
1285
  return iallocator
1286

    
1287

    
1288
class LUClusterPostInit(LogicalUnit):
1289
  """Logical unit for running hooks after cluster initialization.
1290

1291
  """
1292
  HPATH = "cluster-init"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294

    
1295
  def BuildHooksEnv(self):
1296
    """Build hooks env.
1297

1298
    """
1299
    return {
1300
      "OP_TARGET": self.cfg.GetClusterName(),
1301
      }
1302

    
1303
  def BuildHooksNodes(self):
1304
    """Build hooks nodes.
1305

1306
    """
1307
    return ([], [self.cfg.GetMasterNode()])
1308

    
1309
  def Exec(self, feedback_fn):
1310
    """Nothing to do.
1311

1312
    """
1313
    return True
1314

    
1315

    
1316
class LUClusterDestroy(LogicalUnit):
1317
  """Logical unit for destroying the cluster.
1318

1319
  """
1320
  HPATH = "cluster-destroy"
1321
  HTYPE = constants.HTYPE_CLUSTER
1322

    
1323
  def BuildHooksEnv(self):
1324
    """Build hooks env.
1325

1326
    """
1327
    return {
1328
      "OP_TARGET": self.cfg.GetClusterName(),
1329
      }
1330

    
1331
  def BuildHooksNodes(self):
1332
    """Build hooks nodes.
1333

1334
    """
1335
    return ([], [])
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    This checks whether the cluster is empty.
1341

1342
    Any errors are signaled by raising errors.OpPrereqError.
1343

1344
    """
1345
    master = self.cfg.GetMasterNode()
1346

    
1347
    nodelist = self.cfg.GetNodeList()
1348
    if len(nodelist) != 1 or nodelist[0] != master:
1349
      raise errors.OpPrereqError("There are still %d node(s) in"
1350
                                 " this cluster." % (len(nodelist) - 1),
1351
                                 errors.ECODE_INVAL)
1352
    instancelist = self.cfg.GetInstanceList()
1353
    if instancelist:
1354
      raise errors.OpPrereqError("There are still %d instance(s) in"
1355
                                 " this cluster." % len(instancelist),
1356
                                 errors.ECODE_INVAL)
1357

    
1358
  def Exec(self, feedback_fn):
1359
    """Destroys the cluster.
1360

1361
    """
1362
    master_params = self.cfg.GetMasterNetworkParameters()
1363

    
1364
    # Run post hooks on master node before it's removed
1365
    _RunPostHook(self, master_params.name)
1366

    
1367
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1368
                                                     master_params)
1369
    result.Raise("Could not disable the master role")
1370

    
1371
    return master_params.name
1372

    
1373

    
1374
def _VerifyCertificate(filename):
1375
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1376

1377
  @type filename: string
1378
  @param filename: Path to PEM file
1379

1380
  """
1381
  try:
1382
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1383
                                           utils.ReadFile(filename))
1384
  except Exception, err: # pylint: disable=W0703
1385
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1386
            "Failed to load X509 certificate %s: %s" % (filename, err))
1387

    
1388
  (errcode, msg) = \
1389
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1390
                                constants.SSL_CERT_EXPIRATION_ERROR)
1391

    
1392
  if msg:
1393
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1394
  else:
1395
    fnamemsg = None
1396

    
1397
  if errcode is None:
1398
    return (None, fnamemsg)
1399
  elif errcode == utils.CERT_WARNING:
1400
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1401
  elif errcode == utils.CERT_ERROR:
1402
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1403

    
1404
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1405

    
1406

    
1407
def _GetAllHypervisorParameters(cluster, instances):
1408
  """Compute the set of all hypervisor parameters.
1409

1410
  @type cluster: L{objects.Cluster}
1411
  @param cluster: the cluster object
1412
  @param instances: list of L{objects.Instance}
1413
  @param instances: additional instances from which to obtain parameters
1414
  @rtype: list of (origin, hypervisor, parameters)
1415
  @return: a list with all parameters found, indicating the hypervisor they
1416
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1417

1418
  """
1419
  hvp_data = []
1420

    
1421
  for hv_name in cluster.enabled_hypervisors:
1422
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1423

    
1424
  for os_name, os_hvp in cluster.os_hvp.items():
1425
    for hv_name, hv_params in os_hvp.items():
1426
      if hv_params:
1427
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1428
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1429

    
1430
  # TODO: collapse identical parameter values in a single one
1431
  for instance in instances:
1432
    if instance.hvparams:
1433
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1434
                       cluster.FillHV(instance)))
1435

    
1436
  return hvp_data
1437

    
1438

    
1439
class _VerifyErrors(object):
1440
  """Mix-in for cluster/group verify LUs.
1441

1442
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1443
  self.op and self._feedback_fn to be available.)
1444

1445
  """
1446

    
1447
  ETYPE_FIELD = "code"
1448
  ETYPE_ERROR = "ERROR"
1449
  ETYPE_WARNING = "WARNING"
1450

    
1451
  def _Error(self, ecode, item, msg, *args, **kwargs):
1452
    """Format an error message.
1453

1454
    Based on the opcode's error_codes parameter, either format a
1455
    parseable error code, or a simpler error string.
1456

1457
    This must be called only from Exec and functions called from Exec.
1458

1459
    """
1460
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1461
    itype, etxt, _ = ecode
1462
    # first complete the msg
1463
    if args:
1464
      msg = msg % args
1465
    # then format the whole message
1466
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1467
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1468
    else:
1469
      if item:
1470
        item = " " + item
1471
      else:
1472
        item = ""
1473
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1474
    # and finally report it via the feedback_fn
1475
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1476

    
1477
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1478
    """Log an error message if the passed condition is True.
1479

1480
    """
1481
    cond = (bool(cond)
1482
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1483

    
1484
    # If the error code is in the list of ignored errors, demote the error to a
1485
    # warning
1486
    (_, etxt, _) = ecode
1487
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1488
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1489

    
1490
    if cond:
1491
      self._Error(ecode, *args, **kwargs)
1492

    
1493
    # do not mark the operation as failed for WARN cases only
1494
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1495
      self.bad = self.bad or cond
1496

    
1497

    
1498
class LUClusterVerify(NoHooksLU):
1499
  """Submits all jobs necessary to verify the cluster.
1500

1501
  """
1502
  REQ_BGL = False
1503

    
1504
  def ExpandNames(self):
1505
    self.needed_locks = {}
1506

    
1507
  def Exec(self, feedback_fn):
1508
    jobs = []
1509

    
1510
    if self.op.group_name:
1511
      groups = [self.op.group_name]
1512
      depends_fn = lambda: None
1513
    else:
1514
      groups = self.cfg.GetNodeGroupList()
1515

    
1516
      # Verify global configuration
1517
      jobs.append([
1518
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1519
        ])
1520

    
1521
      # Always depend on global verification
1522
      depends_fn = lambda: [(-len(jobs), [])]
1523

    
1524
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1525
                                            ignore_errors=self.op.ignore_errors,
1526
                                            depends=depends_fn())]
1527
                for group in groups)
1528

    
1529
    # Fix up all parameters
1530
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1531
      op.debug_simulate_errors = self.op.debug_simulate_errors
1532
      op.verbose = self.op.verbose
1533
      op.error_codes = self.op.error_codes
1534
      try:
1535
        op.skip_checks = self.op.skip_checks
1536
      except AttributeError:
1537
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1538

    
1539
    return ResultWithJobs(jobs)
1540

    
1541

    
1542
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1543
  """Verifies the cluster config.
1544

1545
  """
1546
  REQ_BGL = True
1547

    
1548
  def _VerifyHVP(self, hvp_data):
1549
    """Verifies locally the syntax of the hypervisor parameters.
1550

1551
    """
1552
    for item, hv_name, hv_params in hvp_data:
1553
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1554
             (item, hv_name))
1555
      try:
1556
        hv_class = hypervisor.GetHypervisor(hv_name)
1557
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1558
        hv_class.CheckParameterSyntax(hv_params)
1559
      except errors.GenericError, err:
1560
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1561

    
1562
  def ExpandNames(self):
1563
    # Information can be safely retrieved as the BGL is acquired in exclusive
1564
    # mode
1565
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1566
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1567
    self.all_node_info = self.cfg.GetAllNodesInfo()
1568
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1569
    self.needed_locks = {}
1570

    
1571
  def Exec(self, feedback_fn):
1572
    """Verify integrity of cluster, performing various test on nodes.
1573

1574
    """
1575
    self.bad = False
1576
    self._feedback_fn = feedback_fn
1577

    
1578
    feedback_fn("* Verifying cluster config")
1579

    
1580
    for msg in self.cfg.VerifyConfig():
1581
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1582

    
1583
    feedback_fn("* Verifying cluster certificate files")
1584

    
1585
    for cert_filename in constants.ALL_CERT_FILES:
1586
      (errcode, msg) = _VerifyCertificate(cert_filename)
1587
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1588

    
1589
    feedback_fn("* Verifying hypervisor parameters")
1590

    
1591
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1592
                                                self.all_inst_info.values()))
1593

    
1594
    feedback_fn("* Verifying all nodes belong to an existing group")
1595

    
1596
    # We do this verification here because, should this bogus circumstance
1597
    # occur, it would never be caught by VerifyGroup, which only acts on
1598
    # nodes/instances reachable from existing node groups.
1599

    
1600
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1601
                         if node.group not in self.all_group_info)
1602

    
1603
    dangling_instances = {}
1604
    no_node_instances = []
1605

    
1606
    for inst in self.all_inst_info.values():
1607
      if inst.primary_node in dangling_nodes:
1608
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1609
      elif inst.primary_node not in self.all_node_info:
1610
        no_node_instances.append(inst.name)
1611

    
1612
    pretty_dangling = [
1613
        "%s (%s)" %
1614
        (node.name,
1615
         utils.CommaJoin(dangling_instances.get(node.name,
1616
                                                ["no instances"])))
1617
        for node in dangling_nodes]
1618

    
1619
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1620
                  None,
1621
                  "the following nodes (and their instances) belong to a non"
1622
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1623

    
1624
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1625
                  None,
1626
                  "the following instances have a non-existing primary-node:"
1627
                  " %s", utils.CommaJoin(no_node_instances))
1628

    
1629
    return not self.bad
1630

    
1631

    
1632
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1633
  """Verifies the status of a node group.
1634

1635
  """
1636
  HPATH = "cluster-verify"
1637
  HTYPE = constants.HTYPE_CLUSTER
1638
  REQ_BGL = False
1639

    
1640
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1641

    
1642
  class NodeImage(object):
1643
    """A class representing the logical and physical status of a node.
1644

1645
    @type name: string
1646
    @ivar name: the node name to which this object refers
1647
    @ivar volumes: a structure as returned from
1648
        L{ganeti.backend.GetVolumeList} (runtime)
1649
    @ivar instances: a list of running instances (runtime)
1650
    @ivar pinst: list of configured primary instances (config)
1651
    @ivar sinst: list of configured secondary instances (config)
1652
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1653
        instances for which this node is secondary (config)
1654
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1655
    @ivar dfree: free disk, as reported by the node (runtime)
1656
    @ivar offline: the offline status (config)
1657
    @type rpc_fail: boolean
1658
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1659
        not whether the individual keys were correct) (runtime)
1660
    @type lvm_fail: boolean
1661
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1662
    @type hyp_fail: boolean
1663
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1664
    @type ghost: boolean
1665
    @ivar ghost: whether this is a known node or not (config)
1666
    @type os_fail: boolean
1667
    @ivar os_fail: whether the RPC call didn't return valid OS data
1668
    @type oslist: list
1669
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1670
    @type vm_capable: boolean
1671
    @ivar vm_capable: whether the node can host instances
1672

1673
    """
1674
    def __init__(self, offline=False, name=None, vm_capable=True):
1675
      self.name = name
1676
      self.volumes = {}
1677
      self.instances = []
1678
      self.pinst = []
1679
      self.sinst = []
1680
      self.sbp = {}
1681
      self.mfree = 0
1682
      self.dfree = 0
1683
      self.offline = offline
1684
      self.vm_capable = vm_capable
1685
      self.rpc_fail = False
1686
      self.lvm_fail = False
1687
      self.hyp_fail = False
1688
      self.ghost = False
1689
      self.os_fail = False
1690
      self.oslist = {}
1691

    
1692
  def ExpandNames(self):
1693
    # This raises errors.OpPrereqError on its own:
1694
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1695

    
1696
    # Get instances in node group; this is unsafe and needs verification later
1697
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1698

    
1699
    self.needed_locks = {
1700
      locking.LEVEL_INSTANCE: inst_names,
1701
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1702
      locking.LEVEL_NODE: [],
1703
      }
1704

    
1705
    self.share_locks = _ShareAll()
1706

    
1707
  def DeclareLocks(self, level):
1708
    if level == locking.LEVEL_NODE:
1709
      # Get members of node group; this is unsafe and needs verification later
1710
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1711

    
1712
      all_inst_info = self.cfg.GetAllInstancesInfo()
1713

    
1714
      # In Exec(), we warn about mirrored instances that have primary and
1715
      # secondary living in separate node groups. To fully verify that
1716
      # volumes for these instances are healthy, we will need to do an
1717
      # extra call to their secondaries. We ensure here those nodes will
1718
      # be locked.
1719
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1720
        # Important: access only the instances whose lock is owned
1721
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1722
          nodes.update(all_inst_info[inst].secondary_nodes)
1723

    
1724
      self.needed_locks[locking.LEVEL_NODE] = nodes
1725

    
1726
  def CheckPrereq(self):
1727
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1728
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1729

    
1730
    group_nodes = set(self.group_info.members)
1731
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1732

    
1733
    unlocked_nodes = \
1734
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1735

    
1736
    unlocked_instances = \
1737
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1738

    
1739
    if unlocked_nodes:
1740
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1741
                                 utils.CommaJoin(unlocked_nodes))
1742

    
1743
    if unlocked_instances:
1744
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1745
                                 utils.CommaJoin(unlocked_instances))
1746

    
1747
    self.all_node_info = self.cfg.GetAllNodesInfo()
1748
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1749

    
1750
    self.my_node_names = utils.NiceSort(group_nodes)
1751
    self.my_inst_names = utils.NiceSort(group_instances)
1752

    
1753
    self.my_node_info = dict((name, self.all_node_info[name])
1754
                             for name in self.my_node_names)
1755

    
1756
    self.my_inst_info = dict((name, self.all_inst_info[name])
1757
                             for name in self.my_inst_names)
1758

    
1759
    # We detect here the nodes that will need the extra RPC calls for verifying
1760
    # split LV volumes; they should be locked.
1761
    extra_lv_nodes = set()
1762

    
1763
    for inst in self.my_inst_info.values():
1764
      if inst.disk_template in constants.DTS_INT_MIRROR:
1765
        group = self.my_node_info[inst.primary_node].group
1766
        for nname in inst.secondary_nodes:
1767
          if self.all_node_info[nname].group != group:
1768
            extra_lv_nodes.add(nname)
1769

    
1770
    unlocked_lv_nodes = \
1771
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1772

    
1773
    if unlocked_lv_nodes:
1774
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1775
                                 utils.CommaJoin(unlocked_lv_nodes))
1776
    self.extra_lv_nodes = list(extra_lv_nodes)
1777

    
1778
  def _VerifyNode(self, ninfo, nresult):
1779
    """Perform some basic validation on data returned from a node.
1780

1781
      - check the result data structure is well formed and has all the
1782
        mandatory fields
1783
      - check ganeti version
1784

1785
    @type ninfo: L{objects.Node}
1786
    @param ninfo: the node to check
1787
    @param nresult: the results from the node
1788
    @rtype: boolean
1789
    @return: whether overall this call was successful (and we can expect
1790
         reasonable values in the respose)
1791

1792
    """
1793
    node = ninfo.name
1794
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1795

    
1796
    # main result, nresult should be a non-empty dict
1797
    test = not nresult or not isinstance(nresult, dict)
1798
    _ErrorIf(test, constants.CV_ENODERPC, node,
1799
                  "unable to verify node: no data returned")
1800
    if test:
1801
      return False
1802

    
1803
    # compares ganeti version
1804
    local_version = constants.PROTOCOL_VERSION
1805
    remote_version = nresult.get("version", None)
1806
    test = not (remote_version and
1807
                isinstance(remote_version, (list, tuple)) and
1808
                len(remote_version) == 2)
1809
    _ErrorIf(test, constants.CV_ENODERPC, node,
1810
             "connection to node returned invalid data")
1811
    if test:
1812
      return False
1813

    
1814
    test = local_version != remote_version[0]
1815
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1816
             "incompatible protocol versions: master %s,"
1817
             " node %s", local_version, remote_version[0])
1818
    if test:
1819
      return False
1820

    
1821
    # node seems compatible, we can actually try to look into its results
1822

    
1823
    # full package version
1824
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1825
                  constants.CV_ENODEVERSION, node,
1826
                  "software version mismatch: master %s, node %s",
1827
                  constants.RELEASE_VERSION, remote_version[1],
1828
                  code=self.ETYPE_WARNING)
1829

    
1830
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1831
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1832
      for hv_name, hv_result in hyp_result.iteritems():
1833
        test = hv_result is not None
1834
        _ErrorIf(test, constants.CV_ENODEHV, node,
1835
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1836

    
1837
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1838
    if ninfo.vm_capable and isinstance(hvp_result, list):
1839
      for item, hv_name, hv_result in hvp_result:
1840
        _ErrorIf(True, constants.CV_ENODEHV, node,
1841
                 "hypervisor %s parameter verify failure (source %s): %s",
1842
                 hv_name, item, hv_result)
1843

    
1844
    test = nresult.get(constants.NV_NODESETUP,
1845
                       ["Missing NODESETUP results"])
1846
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1847
             "; ".join(test))
1848

    
1849
    return True
1850

    
1851
  def _VerifyNodeTime(self, ninfo, nresult,
1852
                      nvinfo_starttime, nvinfo_endtime):
1853
    """Check the node time.
1854

1855
    @type ninfo: L{objects.Node}
1856
    @param ninfo: the node to check
1857
    @param nresult: the remote results for the node
1858
    @param nvinfo_starttime: the start time of the RPC call
1859
    @param nvinfo_endtime: the end time of the RPC call
1860

1861
    """
1862
    node = ninfo.name
1863
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1864

    
1865
    ntime = nresult.get(constants.NV_TIME, None)
1866
    try:
1867
      ntime_merged = utils.MergeTime(ntime)
1868
    except (ValueError, TypeError):
1869
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1870
      return
1871

    
1872
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1873
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1874
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1875
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1876
    else:
1877
      ntime_diff = None
1878

    
1879
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1880
             "Node time diverges by at least %s from master node time",
1881
             ntime_diff)
1882

    
1883
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1884
    """Check the node LVM results.
1885

1886
    @type ninfo: L{objects.Node}
1887
    @param ninfo: the node to check
1888
    @param nresult: the remote results for the node
1889
    @param vg_name: the configured VG name
1890

1891
    """
1892
    if vg_name is None:
1893
      return
1894

    
1895
    node = ninfo.name
1896
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1897

    
1898
    # checks vg existence and size > 20G
1899
    vglist = nresult.get(constants.NV_VGLIST, None)
1900
    test = not vglist
1901
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1902
    if not test:
1903
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1904
                                            constants.MIN_VG_SIZE)
1905
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1906

    
1907
    # check pv names
1908
    pvlist = nresult.get(constants.NV_PVLIST, None)
1909
    test = pvlist is None
1910
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1911
    if not test:
1912
      # check that ':' is not present in PV names, since it's a
1913
      # special character for lvcreate (denotes the range of PEs to
1914
      # use on the PV)
1915
      for _, pvname, owner_vg in pvlist:
1916
        test = ":" in pvname
1917
        _ErrorIf(test, constants.CV_ENODELVM, node,
1918
                 "Invalid character ':' in PV '%s' of VG '%s'",
1919
                 pvname, owner_vg)
1920

    
1921
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1922
    """Check the node bridges.
1923

1924
    @type ninfo: L{objects.Node}
1925
    @param ninfo: the node to check
1926
    @param nresult: the remote results for the node
1927
    @param bridges: the expected list of bridges
1928

1929
    """
1930
    if not bridges:
1931
      return
1932

    
1933
    node = ninfo.name
1934
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1935

    
1936
    missing = nresult.get(constants.NV_BRIDGES, None)
1937
    test = not isinstance(missing, list)
1938
    _ErrorIf(test, constants.CV_ENODENET, node,
1939
             "did not return valid bridge information")
1940
    if not test:
1941
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1942
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1943

    
1944
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1945
    """Check the results of user scripts presence and executability on the node
1946

1947
    @type ninfo: L{objects.Node}
1948
    @param ninfo: the node to check
1949
    @param nresult: the remote results for the node
1950

1951
    """
1952
    node = ninfo.name
1953

    
1954
    test = not constants.NV_USERSCRIPTS in nresult
1955
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1956
                  "did not return user scripts information")
1957

    
1958
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1959
    if not test:
1960
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1961
                    "user scripts not present or not executable: %s" %
1962
                    utils.CommaJoin(sorted(broken_scripts)))
1963

    
1964
  def _VerifyNodeNetwork(self, ninfo, nresult):
1965
    """Check the node network connectivity results.
1966

1967
    @type ninfo: L{objects.Node}
1968
    @param ninfo: the node to check
1969
    @param nresult: the remote results for the node
1970

1971
    """
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974

    
1975
    test = constants.NV_NODELIST not in nresult
1976
    _ErrorIf(test, constants.CV_ENODESSH, node,
1977
             "node hasn't returned node ssh connectivity data")
1978
    if not test:
1979
      if nresult[constants.NV_NODELIST]:
1980
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1981
          _ErrorIf(True, constants.CV_ENODESSH, node,
1982
                   "ssh communication with node '%s': %s", a_node, a_msg)
1983

    
1984
    test = constants.NV_NODENETTEST not in nresult
1985
    _ErrorIf(test, constants.CV_ENODENET, node,
1986
             "node hasn't returned node tcp connectivity data")
1987
    if not test:
1988
      if nresult[constants.NV_NODENETTEST]:
1989
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1990
        for anode in nlist:
1991
          _ErrorIf(True, constants.CV_ENODENET, node,
1992
                   "tcp communication with node '%s': %s",
1993
                   anode, nresult[constants.NV_NODENETTEST][anode])
1994

    
1995
    test = constants.NV_MASTERIP not in nresult
1996
    _ErrorIf(test, constants.CV_ENODENET, node,
1997
             "node hasn't returned node master IP reachability data")
1998
    if not test:
1999
      if not nresult[constants.NV_MASTERIP]:
2000
        if node == self.master_node:
2001
          msg = "the master node cannot reach the master IP (not configured?)"
2002
        else:
2003
          msg = "cannot reach the master IP"
2004
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2005

    
2006
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2007
                      diskstatus):
2008
    """Verify an instance.
2009

2010
    This function checks to see if the required block devices are
2011
    available on the instance's node.
2012

2013
    """
2014
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2015
    node_current = instanceconfig.primary_node
2016

    
2017
    node_vol_should = {}
2018
    instanceconfig.MapLVsByNode(node_vol_should)
2019

    
2020
    for node in node_vol_should:
2021
      n_img = node_image[node]
2022
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2023
        # ignore missing volumes on offline or broken nodes
2024
        continue
2025
      for volume in node_vol_should[node]:
2026
        test = volume not in n_img.volumes
2027
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2028
                 "volume %s missing on node %s", volume, node)
2029

    
2030
    if instanceconfig.admin_up:
2031
      pri_img = node_image[node_current]
2032
      test = instance not in pri_img.instances and not pri_img.offline
2033
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2034
               "instance not running on its primary node %s",
2035
               node_current)
2036

    
2037
    diskdata = [(nname, success, status, idx)
2038
                for (nname, disks) in diskstatus.items()
2039
                for idx, (success, status) in enumerate(disks)]
2040

    
2041
    for nname, success, bdev_status, idx in diskdata:
2042
      # the 'ghost node' construction in Exec() ensures that we have a
2043
      # node here
2044
      snode = node_image[nname]
2045
      bad_snode = snode.ghost or snode.offline
2046
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2047
               constants.CV_EINSTANCEFAULTYDISK, instance,
2048
               "couldn't retrieve status for disk/%s on %s: %s",
2049
               idx, nname, bdev_status)
2050
      _ErrorIf((instanceconfig.admin_up and success and
2051
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2052
               constants.CV_EINSTANCEFAULTYDISK, instance,
2053
               "disk/%s on %s is faulty", idx, nname)
2054

    
2055
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2056
    """Verify if there are any unknown volumes in the cluster.
2057

2058
    The .os, .swap and backup volumes are ignored. All other volumes are
2059
    reported as unknown.
2060

2061
    @type reserved: L{ganeti.utils.FieldSet}
2062
    @param reserved: a FieldSet of reserved volume names
2063

2064
    """
2065
    for node, n_img in node_image.items():
2066
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2067
        # skip non-healthy nodes
2068
        continue
2069
      for volume in n_img.volumes:
2070
        test = ((node not in node_vol_should or
2071
                volume not in node_vol_should[node]) and
2072
                not reserved.Matches(volume))
2073
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2074
                      "volume %s is unknown", volume)
2075

    
2076
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2077
    """Verify N+1 Memory Resilience.
2078

2079
    Check that if one single node dies we can still start all the
2080
    instances it was primary for.
2081

2082
    """
2083
    cluster_info = self.cfg.GetClusterInfo()
2084
    for node, n_img in node_image.items():
2085
      # This code checks that every node which is now listed as
2086
      # secondary has enough memory to host all instances it is
2087
      # supposed to should a single other node in the cluster fail.
2088
      # FIXME: not ready for failover to an arbitrary node
2089
      # FIXME: does not support file-backed instances
2090
      # WARNING: we currently take into account down instances as well
2091
      # as up ones, considering that even if they're down someone
2092
      # might want to start them even in the event of a node failure.
2093
      if n_img.offline:
2094
        # we're skipping offline nodes from the N+1 warning, since
2095
        # most likely we don't have good memory infromation from them;
2096
        # we already list instances living on such nodes, and that's
2097
        # enough warning
2098
        continue
2099
      for prinode, instances in n_img.sbp.items():
2100
        needed_mem = 0
2101
        for instance in instances:
2102
          bep = cluster_info.FillBE(instance_cfg[instance])
2103
          if bep[constants.BE_AUTO_BALANCE]:
2104
            needed_mem += bep[constants.BE_MEMORY]
2105
        test = n_img.mfree < needed_mem
2106
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2107
                      "not enough memory to accomodate instance failovers"
2108
                      " should node %s fail (%dMiB needed, %dMiB available)",
2109
                      prinode, needed_mem, n_img.mfree)
2110

    
2111
  @classmethod
2112
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2113
                   (files_all, files_opt, files_mc, files_vm)):
2114
    """Verifies file checksums collected from all nodes.
2115

2116
    @param errorif: Callback for reporting errors
2117
    @param nodeinfo: List of L{objects.Node} objects
2118
    @param master_node: Name of master node
2119
    @param all_nvinfo: RPC results
2120

2121
    """
2122
    # Define functions determining which nodes to consider for a file
2123
    files2nodefn = [
2124
      (files_all, None),
2125
      (files_mc, lambda node: (node.master_candidate or
2126
                               node.name == master_node)),
2127
      (files_vm, lambda node: node.vm_capable),
2128
      ]
2129

    
2130
    # Build mapping from filename to list of nodes which should have the file
2131
    nodefiles = {}
2132
    for (files, fn) in files2nodefn:
2133
      if fn is None:
2134
        filenodes = nodeinfo
2135
      else:
2136
        filenodes = filter(fn, nodeinfo)
2137
      nodefiles.update((filename,
2138
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2139
                       for filename in files)
2140

    
2141
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2142

    
2143
    fileinfo = dict((filename, {}) for filename in nodefiles)
2144
    ignore_nodes = set()
2145

    
2146
    for node in nodeinfo:
2147
      if node.offline:
2148
        ignore_nodes.add(node.name)
2149
        continue
2150

    
2151
      nresult = all_nvinfo[node.name]
2152

    
2153
      if nresult.fail_msg or not nresult.payload:
2154
        node_files = None
2155
      else:
2156
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2157

    
2158
      test = not (node_files and isinstance(node_files, dict))
2159
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2160
              "Node did not return file checksum data")
2161
      if test:
2162
        ignore_nodes.add(node.name)
2163
        continue
2164

    
2165
      # Build per-checksum mapping from filename to nodes having it
2166
      for (filename, checksum) in node_files.items():
2167
        assert filename in nodefiles
2168
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2169

    
2170
    for (filename, checksums) in fileinfo.items():
2171
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2172

    
2173
      # Nodes having the file
2174
      with_file = frozenset(node_name
2175
                            for nodes in fileinfo[filename].values()
2176
                            for node_name in nodes) - ignore_nodes
2177

    
2178
      expected_nodes = nodefiles[filename] - ignore_nodes
2179

    
2180
      # Nodes missing file
2181
      missing_file = expected_nodes - with_file
2182

    
2183
      if filename in files_opt:
2184
        # All or no nodes
2185
        errorif(missing_file and missing_file != expected_nodes,
2186
                constants.CV_ECLUSTERFILECHECK, None,
2187
                "File %s is optional, but it must exist on all or no"
2188
                " nodes (not found on %s)",
2189
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2190
      else:
2191
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2192
                "File %s is missing from node(s) %s", filename,
2193
                utils.CommaJoin(utils.NiceSort(missing_file)))
2194

    
2195
        # Warn if a node has a file it shouldn't
2196
        unexpected = with_file - expected_nodes
2197
        errorif(unexpected,
2198
                constants.CV_ECLUSTERFILECHECK, None,
2199
                "File %s should not exist on node(s) %s",
2200
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2201

    
2202
      # See if there are multiple versions of the file
2203
      test = len(checksums) > 1
2204
      if test:
2205
        variants = ["variant %s on %s" %
2206
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2207
                    for (idx, (checksum, nodes)) in
2208
                      enumerate(sorted(checksums.items()))]
2209
      else:
2210
        variants = []
2211

    
2212
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2213
              "File %s found with %s different checksums (%s)",
2214
              filename, len(checksums), "; ".join(variants))
2215

    
2216
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2217
                      drbd_map):
2218
    """Verifies and the node DRBD status.
2219

2220
    @type ninfo: L{objects.Node}
2221
    @param ninfo: the node to check
2222
    @param nresult: the remote results for the node
2223
    @param instanceinfo: the dict of instances
2224
    @param drbd_helper: the configured DRBD usermode helper
2225
    @param drbd_map: the DRBD map as returned by
2226
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2227

2228
    """
2229
    node = ninfo.name
2230
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2231

    
2232
    if drbd_helper:
2233
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2234
      test = (helper_result == None)
2235
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2236
               "no drbd usermode helper returned")
2237
      if helper_result:
2238
        status, payload = helper_result
2239
        test = not status
2240
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2241
                 "drbd usermode helper check unsuccessful: %s", payload)
2242
        test = status and (payload != drbd_helper)
2243
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2244
                 "wrong drbd usermode helper: %s", payload)
2245

    
2246
    # compute the DRBD minors
2247
    node_drbd = {}
2248
    for minor, instance in drbd_map[node].items():
2249
      test = instance not in instanceinfo
2250
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2251
               "ghost instance '%s' in temporary DRBD map", instance)
2252
        # ghost instance should not be running, but otherwise we
2253
        # don't give double warnings (both ghost instance and
2254
        # unallocated minor in use)
2255
      if test:
2256
        node_drbd[minor] = (instance, False)
2257
      else:
2258
        instance = instanceinfo[instance]
2259
        node_drbd[minor] = (instance.name, instance.admin_up)
2260

    
2261
    # and now check them
2262
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2263
    test = not isinstance(used_minors, (tuple, list))
2264
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2265
             "cannot parse drbd status file: %s", str(used_minors))
2266
    if test:
2267
      # we cannot check drbd status
2268
      return
2269

    
2270
    for minor, (iname, must_exist) in node_drbd.items():
2271
      test = minor not in used_minors and must_exist
2272
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2273
               "drbd minor %d of instance %s is not active", minor, iname)
2274
    for minor in used_minors:
2275
      test = minor not in node_drbd
2276
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2277
               "unallocated drbd minor %d is in use", minor)
2278

    
2279
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2280
    """Builds the node OS structures.
2281

2282
    @type ninfo: L{objects.Node}
2283
    @param ninfo: the node to check
2284
    @param nresult: the remote results for the node
2285
    @param nimg: the node image object
2286

2287
    """
2288
    node = ninfo.name
2289
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2290

    
2291
    remote_os = nresult.get(constants.NV_OSLIST, None)
2292
    test = (not isinstance(remote_os, list) or
2293
            not compat.all(isinstance(v, list) and len(v) == 7
2294
                           for v in remote_os))
2295

    
2296
    _ErrorIf(test, constants.CV_ENODEOS, node,
2297
             "node hasn't returned valid OS data")
2298

    
2299
    nimg.os_fail = test
2300

    
2301
    if test:
2302
      return
2303

    
2304
    os_dict = {}
2305

    
2306
    for (name, os_path, status, diagnose,
2307
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2308

    
2309
      if name not in os_dict:
2310
        os_dict[name] = []
2311

    
2312
      # parameters is a list of lists instead of list of tuples due to
2313
      # JSON lacking a real tuple type, fix it:
2314
      parameters = [tuple(v) for v in parameters]
2315
      os_dict[name].append((os_path, status, diagnose,
2316
                            set(variants), set(parameters), set(api_ver)))
2317

    
2318
    nimg.oslist = os_dict
2319

    
2320
  def _VerifyNodeOS(self, ninfo, nimg, base):
2321
    """Verifies the node OS list.
2322

2323
    @type ninfo: L{objects.Node}
2324
    @param ninfo: the node to check
2325
    @param nimg: the node image object
2326
    @param base: the 'template' node we match against (e.g. from the master)
2327

2328
    """
2329
    node = ninfo.name
2330
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2331

    
2332
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2333

    
2334
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2335
    for os_name, os_data in nimg.oslist.items():
2336
      assert os_data, "Empty OS status for OS %s?!" % os_name
2337
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2338
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2339
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2340
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2341
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2342
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2343
      # comparisons with the 'base' image
2344
      test = os_name not in base.oslist
2345
      _ErrorIf(test, constants.CV_ENODEOS, node,
2346
               "Extra OS %s not present on reference node (%s)",
2347
               os_name, base.name)
2348
      if test:
2349
        continue
2350
      assert base.oslist[os_name], "Base node has empty OS status?"
2351
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2352
      if not b_status:
2353
        # base OS is invalid, skipping
2354
        continue
2355
      for kind, a, b in [("API version", f_api, b_api),
2356
                         ("variants list", f_var, b_var),
2357
                         ("parameters", beautify_params(f_param),
2358
                          beautify_params(b_param))]:
2359
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2360
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2361
                 kind, os_name, base.name,
2362
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2363

    
2364
    # check any missing OSes
2365
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2366
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2367
             "OSes present on reference node %s but missing on this node: %s",
2368
             base.name, utils.CommaJoin(missing))
2369

    
2370
  def _VerifyOob(self, ninfo, nresult):
2371
    """Verifies out of band functionality of a node.
2372

2373
    @type ninfo: L{objects.Node}
2374
    @param ninfo: the node to check
2375
    @param nresult: the remote results for the node
2376

2377
    """
2378
    node = ninfo.name
2379
    # We just have to verify the paths on master and/or master candidates
2380
    # as the oob helper is invoked on the master
2381
    if ((ninfo.master_candidate or ninfo.master_capable) and
2382
        constants.NV_OOB_PATHS in nresult):
2383
      for path_result in nresult[constants.NV_OOB_PATHS]:
2384
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2385

    
2386
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2387
    """Verifies and updates the node volume data.
2388

2389
    This function will update a L{NodeImage}'s internal structures
2390
    with data from the remote call.
2391

2392
    @type ninfo: L{objects.Node}
2393
    @param ninfo: the node to check
2394
    @param nresult: the remote results for the node
2395
    @param nimg: the node image object
2396
    @param vg_name: the configured VG name
2397

2398
    """
2399
    node = ninfo.name
2400
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401

    
2402
    nimg.lvm_fail = True
2403
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2404
    if vg_name is None:
2405
      pass
2406
    elif isinstance(lvdata, basestring):
2407
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2408
               utils.SafeEncode(lvdata))
2409
    elif not isinstance(lvdata, dict):
2410
      _ErrorIf(True, constants.CV_ENODELVM, node,
2411
               "rpc call to node failed (lvlist)")
2412
    else:
2413
      nimg.volumes = lvdata
2414
      nimg.lvm_fail = False
2415

    
2416
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2417
    """Verifies and updates the node instance list.
2418

2419
    If the listing was successful, then updates this node's instance
2420
    list. Otherwise, it marks the RPC call as failed for the instance
2421
    list key.
2422

2423
    @type ninfo: L{objects.Node}
2424
    @param ninfo: the node to check
2425
    @param nresult: the remote results for the node
2426
    @param nimg: the node image object
2427

2428
    """
2429
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2430
    test = not isinstance(idata, list)
2431
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2432
                  "rpc call to node failed (instancelist): %s",
2433
                  utils.SafeEncode(str(idata)))
2434
    if test:
2435
      nimg.hyp_fail = True
2436
    else:
2437
      nimg.instances = idata
2438

    
2439
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2440
    """Verifies and computes a node information map
2441

2442
    @type ninfo: L{objects.Node}
2443
    @param ninfo: the node to check
2444
    @param nresult: the remote results for the node
2445
    @param nimg: the node image object
2446
    @param vg_name: the configured VG name
2447

2448
    """
2449
    node = ninfo.name
2450
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2451

    
2452
    # try to read free memory (from the hypervisor)
2453
    hv_info = nresult.get(constants.NV_HVINFO, None)
2454
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2455
    _ErrorIf(test, constants.CV_ENODEHV, node,
2456
             "rpc call to node failed (hvinfo)")
2457
    if not test:
2458
      try:
2459
        nimg.mfree = int(hv_info["memory_free"])
2460
      except (ValueError, TypeError):
2461
        _ErrorIf(True, constants.CV_ENODERPC, node,
2462
                 "node returned invalid nodeinfo, check hypervisor")
2463

    
2464
    # FIXME: devise a free space model for file based instances as well
2465
    if vg_name is not None:
2466
      test = (constants.NV_VGLIST not in nresult or
2467
              vg_name not in nresult[constants.NV_VGLIST])
2468
      _ErrorIf(test, constants.CV_ENODELVM, node,
2469
               "node didn't return data for the volume group '%s'"
2470
               " - it is either missing or broken", vg_name)
2471
      if not test:
2472
        try:
2473
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2474
        except (ValueError, TypeError):
2475
          _ErrorIf(True, constants.CV_ENODERPC, node,
2476
                   "node returned invalid LVM info, check LVM status")
2477

    
2478
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2479
    """Gets per-disk status information for all instances.
2480

2481
    @type nodelist: list of strings
2482
    @param nodelist: Node names
2483
    @type node_image: dict of (name, L{objects.Node})
2484
    @param node_image: Node objects
2485
    @type instanceinfo: dict of (name, L{objects.Instance})
2486
    @param instanceinfo: Instance objects
2487
    @rtype: {instance: {node: [(succes, payload)]}}
2488
    @return: a dictionary of per-instance dictionaries with nodes as
2489
        keys and disk information as values; the disk information is a
2490
        list of tuples (success, payload)
2491

2492
    """
2493
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494

    
2495
    node_disks = {}
2496
    node_disks_devonly = {}
2497
    diskless_instances = set()
2498
    diskless = constants.DT_DISKLESS
2499

    
2500
    for nname in nodelist:
2501
      node_instances = list(itertools.chain(node_image[nname].pinst,
2502
                                            node_image[nname].sinst))
2503
      diskless_instances.update(inst for inst in node_instances
2504
                                if instanceinfo[inst].disk_template == diskless)
2505
      disks = [(inst, disk)
2506
               for inst in node_instances
2507
               for disk in instanceinfo[inst].disks]
2508

    
2509
      if not disks:
2510
        # No need to collect data
2511
        continue
2512

    
2513
      node_disks[nname] = disks
2514

    
2515
      # Creating copies as SetDiskID below will modify the objects and that can
2516
      # lead to incorrect data returned from nodes
2517
      devonly = [dev.Copy() for (_, dev) in disks]
2518

    
2519
      for dev in devonly:
2520
        self.cfg.SetDiskID(dev, nname)
2521

    
2522
      node_disks_devonly[nname] = devonly
2523

    
2524
    assert len(node_disks) == len(node_disks_devonly)
2525

    
2526
    # Collect data from all nodes with disks
2527
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2528
                                                          node_disks_devonly)
2529

    
2530
    assert len(result) == len(node_disks)
2531

    
2532
    instdisk = {}
2533

    
2534
    for (nname, nres) in result.items():
2535
      disks = node_disks[nname]
2536

    
2537
      if nres.offline:
2538
        # No data from this node
2539
        data = len(disks) * [(False, "node offline")]
2540
      else:
2541
        msg = nres.fail_msg
2542
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2543
                 "while getting disk information: %s", msg)
2544
        if msg:
2545
          # No data from this node
2546
          data = len(disks) * [(False, msg)]
2547
        else:
2548
          data = []
2549
          for idx, i in enumerate(nres.payload):
2550
            if isinstance(i, (tuple, list)) and len(i) == 2:
2551
              data.append(i)
2552
            else:
2553
              logging.warning("Invalid result from node %s, entry %d: %s",
2554
                              nname, idx, i)
2555
              data.append((False, "Invalid result from the remote node"))
2556

    
2557
      for ((inst, _), status) in zip(disks, data):
2558
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2559

    
2560
    # Add empty entries for diskless instances.
2561
    for inst in diskless_instances:
2562
      assert inst not in instdisk
2563
      instdisk[inst] = {}
2564

    
2565
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2566
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2567
                      compat.all(isinstance(s, (tuple, list)) and
2568
                                 len(s) == 2 for s in statuses)
2569
                      for inst, nnames in instdisk.items()
2570
                      for nname, statuses in nnames.items())
2571
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2572

    
2573
    return instdisk
2574

    
2575
  @staticmethod
2576
  def _SshNodeSelector(group_uuid, all_nodes):
2577
    """Create endless iterators for all potential SSH check hosts.
2578

2579
    """
2580
    nodes = [node for node in all_nodes
2581
             if (node.group != group_uuid and
2582
                 not node.offline)]
2583
    keyfunc = operator.attrgetter("group")
2584

    
2585
    return map(itertools.cycle,
2586
               [sorted(map(operator.attrgetter("name"), names))
2587
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2588
                                                  keyfunc)])
2589

    
2590
  @classmethod
2591
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2592
    """Choose which nodes should talk to which other nodes.
2593

2594
    We will make nodes contact all nodes in their group, and one node from
2595
    every other group.
2596

2597
    @warning: This algorithm has a known issue if one node group is much
2598
      smaller than others (e.g. just one node). In such a case all other
2599
      nodes will talk to the single node.
2600

2601
    """
2602
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2603
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2604

    
2605
    return (online_nodes,
2606
            dict((name, sorted([i.next() for i in sel]))
2607
                 for name in online_nodes))
2608

    
2609
  def BuildHooksEnv(self):
2610
    """Build hooks env.
2611

2612
    Cluster-Verify hooks just ran in the post phase and their failure makes
2613
    the output be logged in the verify output and the verification to fail.
2614

2615
    """
2616
    env = {
2617
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2618
      }
2619

    
2620
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2621
               for node in self.my_node_info.values())
2622

    
2623
    return env
2624

    
2625
  def BuildHooksNodes(self):
2626
    """Build hooks nodes.
2627

2628
    """
2629
    return ([], self.my_node_names)
2630

    
2631
  def Exec(self, feedback_fn):
2632
    """Verify integrity of the node group, performing various test on nodes.
2633

2634
    """
2635
    # This method has too many local variables. pylint: disable=R0914
2636
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2637

    
2638
    if not self.my_node_names:
2639
      # empty node group
2640
      feedback_fn("* Empty node group, skipping verification")
2641
      return True
2642

    
2643
    self.bad = False
2644
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2645
    verbose = self.op.verbose
2646
    self._feedback_fn = feedback_fn
2647

    
2648
    vg_name = self.cfg.GetVGName()
2649
    drbd_helper = self.cfg.GetDRBDHelper()
2650
    cluster = self.cfg.GetClusterInfo()
2651
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2652
    hypervisors = cluster.enabled_hypervisors
2653
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2654

    
2655
    i_non_redundant = [] # Non redundant instances
2656
    i_non_a_balanced = [] # Non auto-balanced instances
2657
    n_offline = 0 # Count of offline nodes
2658
    n_drained = 0 # Count of nodes being drained
2659
    node_vol_should = {}
2660

    
2661
    # FIXME: verify OS list
2662

    
2663
    # File verification
2664
    filemap = _ComputeAncillaryFiles(cluster, False)
2665

    
2666
    # do local checksums
2667
    master_node = self.master_node = self.cfg.GetMasterNode()
2668
    master_ip = self.cfg.GetMasterIP()
2669

    
2670
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2671

    
2672
    user_scripts = []
2673
    if self.cfg.GetUseExternalMipScript():
2674
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2675

    
2676
    node_verify_param = {
2677
      constants.NV_FILELIST:
2678
        utils.UniqueSequence(filename
2679
                             for files in filemap
2680
                             for filename in files),
2681
      constants.NV_NODELIST:
2682
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2683
                                  self.all_node_info.values()),
2684
      constants.NV_HYPERVISOR: hypervisors,
2685
      constants.NV_HVPARAMS:
2686
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2687
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2688
                                 for node in node_data_list
2689
                                 if not node.offline],
2690
      constants.NV_INSTANCELIST: hypervisors,
2691
      constants.NV_VERSION: None,
2692
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2693
      constants.NV_NODESETUP: None,
2694
      constants.NV_TIME: None,
2695
      constants.NV_MASTERIP: (master_node, master_ip),
2696
      constants.NV_OSLIST: None,
2697
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2698
      constants.NV_USERSCRIPTS: user_scripts,
2699
      }
2700

    
2701
    if vg_name is not None:
2702
      node_verify_param[constants.NV_VGLIST] = None
2703
      node_verify_param[constants.NV_LVLIST] = vg_name
2704
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2705
      node_verify_param[constants.NV_DRBDLIST] = None
2706

    
2707
    if drbd_helper:
2708
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2709

    
2710
    # bridge checks
2711
    # FIXME: this needs to be changed per node-group, not cluster-wide
2712
    bridges = set()
2713
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2714
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2715
      bridges.add(default_nicpp[constants.NIC_LINK])
2716
    for instance in self.my_inst_info.values():
2717
      for nic in instance.nics:
2718
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2719
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2720
          bridges.add(full_nic[constants.NIC_LINK])
2721

    
2722
    if bridges:
2723
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2724

    
2725
    # Build our expected cluster state
2726
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2727
                                                 name=node.name,
2728
                                                 vm_capable=node.vm_capable))
2729
                      for node in node_data_list)
2730

    
2731
    # Gather OOB paths
2732
    oob_paths = []
2733
    for node in self.all_node_info.values():
2734
      path = _SupportsOob(self.cfg, node)
2735
      if path and path not in oob_paths:
2736
        oob_paths.append(path)
2737

    
2738
    if oob_paths:
2739
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2740

    
2741
    for instance in self.my_inst_names:
2742
      inst_config = self.my_inst_info[instance]
2743

    
2744
      for nname in inst_config.all_nodes:
2745
        if nname not in node_image:
2746
          gnode = self.NodeImage(name=nname)
2747
          gnode.ghost = (nname not in self.all_node_info)
2748
          node_image[nname] = gnode
2749

    
2750
      inst_config.MapLVsByNode(node_vol_should)
2751

    
2752
      pnode = inst_config.primary_node
2753
      node_image[pnode].pinst.append(instance)
2754

    
2755
      for snode in inst_config.secondary_nodes:
2756
        nimg = node_image[snode]
2757
        nimg.sinst.append(instance)
2758
        if pnode not in nimg.sbp:
2759
          nimg.sbp[pnode] = []
2760
        nimg.sbp[pnode].append(instance)
2761

    
2762
    # At this point, we have the in-memory data structures complete,
2763
    # except for the runtime information, which we'll gather next
2764

    
2765
    # Due to the way our RPC system works, exact response times cannot be
2766
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2767
    # time before and after executing the request, we can at least have a time
2768
    # window.
2769
    nvinfo_starttime = time.time()
2770
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2771
                                           node_verify_param,
2772
                                           self.cfg.GetClusterName())
2773
    nvinfo_endtime = time.time()
2774

    
2775
    if self.extra_lv_nodes and vg_name is not None:
2776
      extra_lv_nvinfo = \
2777
          self.rpc.call_node_verify(self.extra_lv_nodes,
2778
                                    {constants.NV_LVLIST: vg_name},
2779
                                    self.cfg.GetClusterName())
2780
    else:
2781
      extra_lv_nvinfo = {}
2782

    
2783
    all_drbd_map = self.cfg.ComputeDRBDMap()
2784

    
2785
    feedback_fn("* Gathering disk information (%s nodes)" %
2786
                len(self.my_node_names))
2787
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2788
                                     self.my_inst_info)
2789

    
2790
    feedback_fn("* Verifying configuration file consistency")
2791

    
2792
    # If not all nodes are being checked, we need to make sure the master node
2793
    # and a non-checked vm_capable node are in the list.
2794
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2795
    if absent_nodes:
2796
      vf_nvinfo = all_nvinfo.copy()
2797
      vf_node_info = list(self.my_node_info.values())
2798
      additional_nodes = []
2799
      if master_node not in self.my_node_info:
2800
        additional_nodes.append(master_node)
2801
        vf_node_info.append(self.all_node_info[master_node])
2802
      # Add the first vm_capable node we find which is not included
2803
      for node in absent_nodes:
2804
        nodeinfo = self.all_node_info[node]
2805
        if nodeinfo.vm_capable and not nodeinfo.offline:
2806
          additional_nodes.append(node)
2807
          vf_node_info.append(self.all_node_info[node])
2808
          break
2809
      key = constants.NV_FILELIST
2810
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2811
                                                 {key: node_verify_param[key]},
2812
                                                 self.cfg.GetClusterName()))
2813
    else:
2814
      vf_nvinfo = all_nvinfo
2815
      vf_node_info = self.my_node_info.values()
2816

    
2817
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2818

    
2819
    feedback_fn("* Verifying node status")
2820

    
2821
    refos_img = None
2822

    
2823
    for node_i in node_data_list:
2824
      node = node_i.name
2825
      nimg = node_image[node]
2826

    
2827
      if node_i.offline:
2828
        if verbose:
2829
          feedback_fn("* Skipping offline node %s" % (node,))
2830
        n_offline += 1
2831
        continue
2832

    
2833
      if node == master_node:
2834
        ntype = "master"
2835
      elif node_i.master_candidate:
2836
        ntype = "master candidate"
2837
      elif node_i.drained:
2838
        ntype = "drained"
2839
        n_drained += 1
2840
      else:
2841
        ntype = "regular"
2842
      if verbose:
2843
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2844

    
2845
      msg = all_nvinfo[node].fail_msg
2846
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2847
               msg)
2848
      if msg:
2849
        nimg.rpc_fail = True
2850
        continue
2851

    
2852
      nresult = all_nvinfo[node].payload
2853

    
2854
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2855
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2856
      self._VerifyNodeNetwork(node_i, nresult)
2857
      self._VerifyNodeUserScripts(node_i, nresult)
2858
      self._VerifyOob(node_i, nresult)
2859

    
2860
      if nimg.vm_capable:
2861
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2862
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2863
                             all_drbd_map)
2864

    
2865
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2866
        self._UpdateNodeInstances(node_i, nresult, nimg)
2867
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2868
        self._UpdateNodeOS(node_i, nresult, nimg)
2869

    
2870
        if not nimg.os_fail:
2871
          if refos_img is None:
2872
            refos_img = nimg
2873
          self._VerifyNodeOS(node_i, nimg, refos_img)
2874
        self._VerifyNodeBridges(node_i, nresult, bridges)
2875

    
2876
        # Check whether all running instancies are primary for the node. (This
2877
        # can no longer be done from _VerifyInstance below, since some of the
2878
        # wrong instances could be from other node groups.)
2879
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2880

    
2881
        for inst in non_primary_inst:
2882
          test = inst in self.all_inst_info
2883
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2884
                   "instance should not run on node %s", node_i.name)
2885
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2886
                   "node is running unknown instance %s", inst)
2887

    
2888
    for node, result in extra_lv_nvinfo.items():
2889
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2890
                              node_image[node], vg_name)
2891

    
2892
    feedback_fn("* Verifying instance status")
2893
    for instance in self.my_inst_names:
2894
      if verbose:
2895
        feedback_fn("* Verifying instance %s" % instance)
2896
      inst_config = self.my_inst_info[instance]
2897
      self._VerifyInstance(instance, inst_config, node_image,
2898
                           instdisk[instance])
2899
      inst_nodes_offline = []
2900

    
2901
      pnode = inst_config.primary_node
2902
      pnode_img = node_image[pnode]
2903
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2904
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2905
               " primary node failed", instance)
2906

    
2907
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2908
               constants.CV_EINSTANCEBADNODE, instance,
2909
               "instance is marked as running and lives on offline node %s",
2910
               inst_config.primary_node)
2911

    
2912
      # If the instance is non-redundant we cannot survive losing its primary
2913
      # node, so we are not N+1 compliant. On the other hand we have no disk
2914
      # templates with more than one secondary so that situation is not well
2915
      # supported either.
2916
      # FIXME: does not support file-backed instances
2917
      if not inst_config.secondary_nodes:
2918
        i_non_redundant.append(instance)
2919

    
2920
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2921
               constants.CV_EINSTANCELAYOUT,
2922
               instance, "instance has multiple secondary nodes: %s",
2923
               utils.CommaJoin(inst_config.secondary_nodes),
2924
               code=self.ETYPE_WARNING)
2925

    
2926
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2927
        pnode = inst_config.primary_node
2928
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2929
        instance_groups = {}
2930

    
2931
        for node in instance_nodes:
2932
          instance_groups.setdefault(self.all_node_info[node].group,
2933
                                     []).append(node)
2934

    
2935
        pretty_list = [
2936
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2937
          # Sort so that we always list the primary node first.
2938
          for group, nodes in sorted(instance_groups.items(),
2939
                                     key=lambda (_, nodes): pnode in nodes,
2940
                                     reverse=True)]
2941

    
2942
        self._ErrorIf(len(instance_groups) > 1,
2943
                      constants.CV_EINSTANCESPLITGROUPS,
2944
                      instance, "instance has primary and secondary nodes in"
2945
                      " different groups: %s", utils.CommaJoin(pretty_list),
2946
                      code=self.ETYPE_WARNING)
2947

    
2948
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2949
        i_non_a_balanced.append(instance)
2950

    
2951
      for snode in inst_config.secondary_nodes:
2952
        s_img = node_image[snode]
2953
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2954
                 snode, "instance %s, connection to secondary node failed",
2955
                 instance)
2956

    
2957
        if s_img.offline:
2958
          inst_nodes_offline.append(snode)
2959

    
2960
      # warn that the instance lives on offline nodes
2961
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2962
               "instance has offline secondary node(s) %s",
2963
               utils.CommaJoin(inst_nodes_offline))
2964
      # ... or ghost/non-vm_capable nodes
2965
      for node in inst_config.all_nodes:
2966
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2967
                 instance, "instance lives on ghost node %s", node)
2968
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2969
                 instance, "instance lives on non-vm_capable node %s", node)
2970

    
2971
    feedback_fn("* Verifying orphan volumes")
2972
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2973

    
2974
    # We will get spurious "unknown volume" warnings if any node of this group
2975
    # is secondary for an instance whose primary is in another group. To avoid
2976
    # them, we find these instances and add their volumes to node_vol_should.
2977
    for inst in self.all_inst_info.values():
2978
      for secondary in inst.secondary_nodes:
2979
        if (secondary in self.my_node_info
2980
            and inst.name not in self.my_inst_info):
2981
          inst.MapLVsByNode(node_vol_should)
2982
          break
2983

    
2984
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2985

    
2986
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2987
      feedback_fn("* Verifying N+1 Memory redundancy")
2988
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2989

    
2990
    feedback_fn("* Other Notes")
2991
    if i_non_redundant:
2992
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2993
                  % len(i_non_redundant))
2994

    
2995
    if i_non_a_balanced:
2996
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2997
                  % len(i_non_a_balanced))
2998

    
2999
    if n_offline:
3000
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3001

    
3002
    if n_drained:
3003
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3004

    
3005
    return not self.bad
3006

    
3007
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3008
    """Analyze the post-hooks' result
3009

3010
    This method analyses the hook result, handles it, and sends some
3011
    nicely-formatted feedback back to the user.
3012

3013
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3014
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3015
    @param hooks_results: the results of the multi-node hooks rpc call
3016
    @param feedback_fn: function used send feedback back to the caller
3017
    @param lu_result: previous Exec result
3018
    @return: the new Exec result, based on the previous result
3019
        and hook results
3020

3021
    """
3022
    # We only really run POST phase hooks, only for non-empty groups,
3023
    # and are only interested in their results
3024
    if not self.my_node_names:
3025
      # empty node group
3026
      pass
3027
    elif phase == constants.HOOKS_PHASE_POST:
3028
      # Used to change hooks' output to proper indentation
3029
      feedback_fn("* Hooks Results")
3030
      assert hooks_results, "invalid result from hooks"
3031

    
3032
      for node_name in hooks_results:
3033
        res = hooks_results[node_name]
3034
        msg = res.fail_msg
3035
        test = msg and not res.offline
3036
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3037
                      "Communication failure in hooks execution: %s", msg)
3038
        if res.offline or msg:
3039
          # No need to investigate payload if node is offline or gave
3040
          # an error.
3041
          continue
3042
        for script, hkr, output in res.payload:
3043
          test = hkr == constants.HKR_FAIL
3044
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3045
                        "Script %s failed, output:", script)
3046
          if test:
3047
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3048
            feedback_fn("%s" % output)
3049
            lu_result = False
3050

    
3051
    return lu_result
3052

    
3053

    
3054
class LUClusterVerifyDisks(NoHooksLU):
3055
  """Verifies the cluster disks status.
3056

3057
  """
3058
  REQ_BGL = False
3059

    
3060
  def ExpandNames(self):
3061
    self.share_locks = _ShareAll()
3062
    self.needed_locks = {
3063
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3064
      }
3065

    
3066
  def Exec(self, feedback_fn):
3067
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3068

    
3069
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3070
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3071
                           for group in group_names])
3072

    
3073

    
3074
class LUGroupVerifyDisks(NoHooksLU):
3075
  """Verifies the status of all disks in a node group.
3076

3077
  """
3078
  REQ_BGL = False
3079

    
3080
  def ExpandNames(self):
3081
    # Raises errors.OpPrereqError on its own if group can't be found
3082
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3083

    
3084
    self.share_locks = _ShareAll()
3085
    self.needed_locks = {
3086
      locking.LEVEL_INSTANCE: [],
3087
      locking.LEVEL_NODEGROUP: [],
3088
      locking.LEVEL_NODE: [],
3089
      }
3090

    
3091
  def DeclareLocks(self, level):
3092
    if level == locking.LEVEL_INSTANCE:
3093
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3094

    
3095
      # Lock instances optimistically, needs verification once node and group
3096
      # locks have been acquired
3097
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3098
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3099

    
3100
    elif level == locking.LEVEL_NODEGROUP:
3101
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3102

    
3103
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3104
        set([self.group_uuid] +
3105
            # Lock all groups used by instances optimistically; this requires
3106
            # going via the node before it's locked, requiring verification
3107
            # later on
3108
            [group_uuid
3109
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3110
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3111

    
3112
    elif level == locking.LEVEL_NODE:
3113
      # This will only lock the nodes in the group to be verified which contain
3114
      # actual instances
3115
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3116
      self._LockInstancesNodes()
3117

    
3118
      # Lock all nodes in group to be verified
3119
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3120
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3121
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3122

    
3123
  def CheckPrereq(self):
3124
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3125
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3126
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3127

    
3128
    assert self.group_uuid in owned_groups
3129

    
3130
    # Check if locked instances are still correct
3131
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3132

    
3133
    # Get instance information
3134
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3135

    
3136
    # Check if node groups for locked instances are still correct
3137
    for (instance_name, inst) in self.instances.items():
3138
      assert owned_nodes.issuperset(inst.all_nodes), \
3139
        "Instance %s's nodes changed while we kept the lock" % instance_name
3140

    
3141
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3142
                                             owned_groups)
3143

    
3144
      assert self.group_uuid in inst_groups, \
3145
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3146

    
3147
  def Exec(self, feedback_fn):
3148
    """Verify integrity of cluster disks.
3149

3150
    @rtype: tuple of three items
3151
    @return: a tuple of (dict of node-to-node_error, list of instances
3152
        which need activate-disks, dict of instance: (node, volume) for
3153
        missing volumes
3154

3155
    """
3156
    res_nodes = {}
3157
    res_instances = set()
3158
    res_missing = {}
3159

    
3160
    nv_dict = _MapInstanceDisksToNodes([inst
3161
                                        for inst in self.instances.values()
3162
                                        if inst.admin_up])
3163

    
3164
    if nv_dict:
3165
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3166
                             set(self.cfg.GetVmCapableNodeList()))
3167

    
3168
      node_lvs = self.rpc.call_lv_list(nodes, [])
3169

    
3170
      for (node, node_res) in node_lvs.items():
3171
        if node_res.offline:
3172
          continue
3173

    
3174
        msg = node_res.fail_msg
3175
        if msg:
3176
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3177
          res_nodes[node] = msg
3178
          continue
3179

    
3180
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3181
          inst = nv_dict.pop((node, lv_name), None)
3182
          if not (lv_online or inst is None):
3183
            res_instances.add(inst)
3184

    
3185
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3186
      # better
3187
      for key, inst in nv_dict.iteritems():
3188
        res_missing.setdefault(inst, []).append(list(key))
3189

    
3190
    return (res_nodes, list(res_instances), res_missing)
3191

    
3192

    
3193
class LUClusterRepairDiskSizes(NoHooksLU):
3194
  """Verifies the cluster disks sizes.
3195

3196
  """
3197
  REQ_BGL = False
3198

    
3199
  def ExpandNames(self):
3200
    if self.op.instances:
3201
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3202
      self.needed_locks = {
3203
        locking.LEVEL_NODE: [],
3204
        locking.LEVEL_INSTANCE: self.wanted_names,
3205
        }
3206
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3207
    else:
3208
      self.wanted_names = None
3209
      self.needed_locks = {
3210
        locking.LEVEL_NODE: locking.ALL_SET,
3211
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3212
        }
3213
    self.share_locks = _ShareAll()
3214

    
3215
  def DeclareLocks(self, level):
3216
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3217
      self._LockInstancesNodes(primary_only=True)
3218

    
3219
  def CheckPrereq(self):
3220
    """Check prerequisites.
3221

3222
    This only checks the optional instance list against the existing names.
3223

3224
    """
3225
    if self.wanted_names is None:
3226
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3227

    
3228
    self.wanted_instances = \
3229
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3230

    
3231
  def _EnsureChildSizes(self, disk):
3232
    """Ensure children of the disk have the needed disk size.
3233

3234
    This is valid mainly for DRBD8 and fixes an issue where the
3235
    children have smaller disk size.
3236

3237
    @param disk: an L{ganeti.objects.Disk} object
3238

3239
    """
3240
    if disk.dev_type == constants.LD_DRBD8:
3241
      assert disk.children, "Empty children for DRBD8?"
3242
      fchild = disk.children[0]
3243
      mismatch = fchild.size < disk.size
3244
      if mismatch:
3245
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3246
                     fchild.size, disk.size)
3247
        fchild.size = disk.size
3248

    
3249
      # and we recurse on this child only, not on the metadev
3250
      return self._EnsureChildSizes(fchild) or mismatch
3251
    else:
3252
      return False
3253

    
3254
  def Exec(self, feedback_fn):
3255
    """Verify the size of cluster disks.
3256

3257
    """
3258
    # TODO: check child disks too
3259
    # TODO: check differences in size between primary/secondary nodes
3260
    per_node_disks = {}
3261
    for instance in self.wanted_instances:
3262
      pnode = instance.primary_node
3263
      if pnode not in per_node_disks:
3264
        per_node_disks[pnode] = []
3265
      for idx, disk in enumerate(instance.disks):
3266
        per_node_disks[pnode].append((instance, idx, disk))
3267

    
3268
    changed = []
3269
    for node, dskl in per_node_disks.items():
3270
      newl = [v[2].Copy() for v in dskl]
3271
      for dsk in newl:
3272
        self.cfg.SetDiskID(dsk, node)
3273
      result = self.rpc.call_blockdev_getsize(node, newl)
3274
      if result.fail_msg:
3275
        self.LogWarning("Failure in blockdev_getsize call to node"
3276
                        " %s, ignoring", node)
3277
        continue
3278
      if len(result.payload) != len(dskl):
3279
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3280
                        " result.payload=%s", node, len(dskl), result.payload)
3281
        self.LogWarning("Invalid result from node %s, ignoring node results",
3282
                        node)
3283
        continue
3284
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3285
        if size is None:
3286
          self.LogWarning("Disk %d of instance %s did not return size"
3287
                          " information, ignoring", idx, instance.name)
3288
          continue
3289
        if not isinstance(size, (int, long)):
3290
          self.LogWarning("Disk %d of instance %s did not return valid"
3291
                          " size information, ignoring", idx, instance.name)
3292
          continue
3293
        size = size >> 20
3294
        if size != disk.size:
3295
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3296
                       " correcting: recorded %d, actual %d", idx,
3297
                       instance.name, disk.size, size)
3298
          disk.size = size
3299
          self.cfg.Update(instance, feedback_fn)
3300
          changed.append((instance.name, idx, size))
3301
        if self._EnsureChildSizes(disk):
3302
          self.cfg.Update(instance, feedback_fn)
3303
          changed.append((instance.name, idx, disk.size))
3304
    return changed
3305

    
3306

    
3307
class LUClusterRename(LogicalUnit):
3308
  """Rename the cluster.
3309

3310
  """
3311
  HPATH = "cluster-rename"
3312
  HTYPE = constants.HTYPE_CLUSTER
3313

    
3314
  def BuildHooksEnv(self):
3315
    """Build hooks env.
3316

3317
    """
3318
    return {
3319
      "OP_TARGET": self.cfg.GetClusterName(),
3320
      "NEW_NAME": self.op.name,
3321
      }
3322

    
3323
  def BuildHooksNodes(self):
3324
    """Build hooks nodes.
3325

3326
    """
3327
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3328

    
3329
  def CheckPrereq(self):
3330
    """Verify that the passed name is a valid one.
3331

3332
    """
3333
    hostname = netutils.GetHostname(name=self.op.name,
3334
                                    family=self.cfg.GetPrimaryIPFamily())
3335

    
3336
    new_name = hostname.name
3337
    self.ip = new_ip = hostname.ip
3338
    old_name = self.cfg.GetClusterName()
3339
    old_ip = self.cfg.GetMasterIP()
3340
    if new_name == old_name and new_ip == old_ip:
3341
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3342
                                 " cluster has changed",
3343
                                 errors.ECODE_INVAL)
3344
    if new_ip != old_ip:
3345
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3346
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3347
                                   " reachable on the network" %
3348
                                   new_ip, errors.ECODE_NOTUNIQUE)
3349

    
3350
    self.op.name = new_name
3351

    
3352
  def Exec(self, feedback_fn):
3353
    """Rename the cluster.
3354

3355
    """
3356
    clustername = self.op.name
3357
    new_ip = self.ip
3358

    
3359
    # shutdown the master IP
3360
    master_params = self.cfg.GetMasterNetworkParameters()
3361
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3362
                                                     master_params)
3363
    result.Raise("Could not disable the master role")
3364

    
3365
    try:
3366
      cluster = self.cfg.GetClusterInfo()
3367
      cluster.cluster_name = clustername
3368
      cluster.master_ip = new_ip
3369
      self.cfg.Update(cluster, feedback_fn)
3370

    
3371
      # update the known hosts file
3372
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3373
      node_list = self.cfg.GetOnlineNodeList()
3374
      try:
3375
        node_list.remove(master_params.name)
3376
      except ValueError:
3377
        pass
3378
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3379
    finally:
3380
      master_params.ip = new_ip
3381
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3382
                                                     master_params)
3383
      msg = result.fail_msg
3384
      if msg:
3385
        self.LogWarning("Could not re-enable the master role on"
3386
                        " the master, please restart manually: %s", msg)
3387

    
3388
    return clustername
3389

    
3390

    
3391
def _ValidateNetmask(cfg, netmask):
3392
  """Checks if a netmask is valid.
3393

3394
  @type cfg: L{config.ConfigWriter}
3395
  @param cfg: The cluster configuration
3396
  @type netmask: int
3397
  @param netmask: the netmask to be verified
3398
  @raise errors.OpPrereqError: if the validation fails
3399

3400
  """
3401
  ip_family = cfg.GetPrimaryIPFamily()
3402
  try:
3403
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3404
  except errors.ProgrammerError:
3405
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3406
                               ip_family)
3407
  if not ipcls.ValidateNetmask(netmask):
3408
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3409
                                (netmask))
3410

    
3411

    
3412
class LUClusterSetParams(LogicalUnit):
3413
  """Change the parameters of the cluster.
3414

3415
  """
3416
  HPATH = "cluster-modify"
3417
  HTYPE = constants.HTYPE_CLUSTER
3418
  REQ_BGL = False
3419

    
3420
  def CheckArguments(self):
3421
    """Check parameters
3422

3423
    """
3424
    if self.op.uid_pool:
3425
      uidpool.CheckUidPool(self.op.uid_pool)
3426

    
3427
    if self.op.add_uids:
3428
      uidpool.CheckUidPool(self.op.add_uids)
3429

    
3430
    if self.op.remove_uids:
3431
      uidpool.CheckUidPool(self.op.remove_uids)
3432

    
3433
    if self.op.master_netmask is not None:
3434
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3435

    
3436
  def ExpandNames(self):
3437
    # FIXME: in the future maybe other cluster params won't require checking on
3438
    # all nodes to be modified.
3439
    self.needed_locks = {
3440
      locking.LEVEL_NODE: locking.ALL_SET,
3441
    }
3442
    self.share_locks[locking.LEVEL_NODE] = 1
3443

    
3444
  def BuildHooksEnv(self):
3445
    """Build hooks env.
3446

3447
    """
3448
    return {
3449
      "OP_TARGET": self.cfg.GetClusterName(),
3450
      "NEW_VG_NAME": self.op.vg_name,
3451
      }
3452

    
3453
  def BuildHooksNodes(self):
3454
    """Build hooks nodes.
3455

3456
    """
3457
    mn = self.cfg.GetMasterNode()
3458
    return ([mn], [mn])
3459

    
3460
  def CheckPrereq(self):
3461
    """Check prerequisites.
3462

3463
    This checks whether the given params don't conflict and
3464
    if the given volume group is valid.
3465

3466
    """
3467
    if self.op.vg_name is not None and not self.op.vg_name:
3468
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3469
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3470
                                   " instances exist", errors.ECODE_INVAL)
3471

    
3472
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3473
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3474
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3475
                                   " drbd-based instances exist",
3476
                                   errors.ECODE_INVAL)
3477

    
3478
    node_list = self.owned_locks(locking.LEVEL_NODE)
3479

    
3480
    # if vg_name not None, checks given volume group on all nodes
3481
    if self.op.vg_name:
3482
      vglist = self.rpc.call_vg_list(node_list)
3483
      for node in node_list:
3484
        msg = vglist[node].fail_msg
3485
        if msg:
3486
          # ignoring down node
3487
          self.LogWarning("Error while gathering data on node %s"
3488
                          " (ignoring node): %s", node, msg)
3489
          continue
3490
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3491
                                              self.op.vg_name,
3492
                                              constants.MIN_VG_SIZE)
3493
        if vgstatus:
3494
          raise errors.OpPrereqError("Error on node '%s': %s" %
3495
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3496

    
3497
    if self.op.drbd_helper:
3498
      # checks given drbd helper on all nodes
3499
      helpers = self.rpc.call_drbd_helper(node_list)
3500
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3501
        if ninfo.offline:
3502
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3503
          continue
3504
        msg = helpers[node].fail_msg
3505
        if msg:
3506
          raise errors.OpPrereqError("Error checking drbd helper on node"
3507
                                     " '%s': %s" % (node, msg),
3508
                                     errors.ECODE_ENVIRON)
3509
        node_helper = helpers[node].payload
3510
        if node_helper != self.op.drbd_helper:
3511
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3512
                                     (node, node_helper), errors.ECODE_ENVIRON)
3513

    
3514
    self.cluster = cluster = self.cfg.GetClusterInfo()
3515
    # validate params changes
3516
    if self.op.beparams:
3517
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3518
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3519

    
3520
    if self.op.ndparams:
3521
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3522
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3523

    
3524
      # TODO: we need a more general way to handle resetting
3525
      # cluster-level parameters to default values
3526
      if self.new_ndparams["oob_program"] == "":
3527
        self.new_ndparams["oob_program"] = \
3528
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3529

    
3530
    if self.op.nicparams:
3531
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3532
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3533
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3534
      nic_errors = []
3535

    
3536
      # check all instances for consistency
3537
      for instance in self.cfg.GetAllInstancesInfo().values():
3538
        for nic_idx, nic in enumerate(instance.nics):
3539
          params_copy = copy.deepcopy(nic.nicparams)
3540
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3541

    
3542
          # check parameter syntax
3543
          try:
3544
            objects.NIC.CheckParameterSyntax(params_filled)
3545
          except errors.ConfigurationError, err:
3546
            nic_errors.append("Instance %s, nic/%d: %s" %
3547
                              (instance.name, nic_idx, err))
3548

    
3549
          # if we're moving instances to routed, check that they have an ip
3550
          target_mode = params_filled[constants.NIC_MODE]
3551
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3552
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3553
                              " address" % (instance.name, nic_idx))
3554
      if nic_errors:
3555
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3556
                                   "\n".join(nic_errors))
3557

    
3558
    # hypervisor list/parameters
3559
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3560
    if self.op.hvparams:
3561
      for hv_name, hv_dict in self.op.hvparams.items():
3562
        if hv_name not in self.new_hvparams:
3563
          self.new_hvparams[hv_name] = hv_dict
3564
        else:
3565
          self.new_hvparams[hv_name].update(hv_dict)
3566

    
3567
    # os hypervisor parameters
3568
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3569
    if self.op.os_hvp:
3570
      for os_name, hvs in self.op.os_hvp.items():
3571
        if os_name not in self.new_os_hvp:
3572
          self.new_os_hvp[os_name] = hvs
3573
        else:
3574
          for hv_name, hv_dict in hvs.items():
3575
            if hv_name not in self.new_os_hvp[os_name]:
3576
              self.new_os_hvp[os_name][hv_name] = hv_dict
3577
            else:
3578
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3579

    
3580
    # os parameters
3581
    self.new_osp = objects.FillDict(cluster.osparams, {})
3582
    if self.op.osparams:
3583
      for os_name, osp in self.op.osparams.items():
3584
        if os_name not in self.new_osp:
3585
          self.new_osp[os_name] = {}
3586

    
3587
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3588
                                                  use_none=True)
3589

    
3590
        if not self.new_osp[os_name]:
3591
          # we removed all parameters
3592
          del self.new_osp[os_name]
3593
        else:
3594
          # check the parameter validity (remote check)
3595
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3596
                         os_name, self.new_osp[os_name])
3597

    
3598
    # changes to the hypervisor list
3599
    if self.op.enabled_hypervisors is not None:
3600
      self.hv_list = self.op.enabled_hypervisors
3601
      for hv in self.hv_list:
3602
        # if the hypervisor doesn't already exist in the cluster
3603
        # hvparams, we initialize it to empty, and then (in both
3604
        # cases) we make sure to fill the defaults, as we might not
3605
        # have a complete defaults list if the hypervisor wasn't
3606
        # enabled before
3607
        if hv not in new_hvp:
3608
          new_hvp[hv] = {}
3609
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3610
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3611
    else:
3612
      self.hv_list = cluster.enabled_hypervisors
3613

    
3614
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3615
      # either the enabled list has changed, or the parameters have, validate
3616
      for hv_name, hv_params in self.new_hvparams.items():
3617
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3618
            (self.op.enabled_hypervisors and
3619
             hv_name in self.op.enabled_hypervisors)):
3620
          # either this is a new hypervisor, or its parameters have changed
3621
          hv_class = hypervisor.GetHypervisor(hv_name)
3622
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3623
          hv_class.CheckParameterSyntax(hv_params)
3624
          _CheckHVParams(self, node_list, hv_name, hv_params)
3625

    
3626
    if self.op.os_hvp:
3627
      # no need to check any newly-enabled hypervisors, since the
3628
      # defaults have already been checked in the above code-block
3629
      for os_name, os_hvp in self.new_os_hvp.items():
3630
        for hv_name, hv_params in os_hvp.items():
3631
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3632
          # we need to fill in the new os_hvp on top of the actual hv_p
3633
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3634
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3635
          hv_class = hypervisor.GetHypervisor(hv_name)
3636
          hv_class.CheckParameterSyntax(new_osp)
3637
          _CheckHVParams(self, node_list, hv_name, new_osp)
3638

    
3639
    if self.op.default_iallocator:
3640
      alloc_script = utils.FindFile(self.op.default_iallocator,
3641
                                    constants.IALLOCATOR_SEARCH_PATH,
3642
                                    os.path.isfile)
3643
      if alloc_script is None:
3644
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3645
                                   " specified" % self.op.default_iallocator,
3646
                                   errors.ECODE_INVAL)
3647

    
3648
  def Exec(self, feedback_fn):
3649
    """Change the parameters of the cluster.
3650

3651
    """
3652
    if self.op.vg_name is not None:
3653
      new_volume = self.op.vg_name
3654
      if not new_volume:
3655
        new_volume = None
3656
      if new_volume != self.cfg.GetVGName():
3657
        self.cfg.SetVGName(new_volume)
3658
      else:
3659
        feedback_fn("Cluster LVM configuration already in desired"
3660
                    " state, not changing")
3661
    if self.op.drbd_helper is not None:
3662
      new_helper = self.op.drbd_helper
3663
      if not new_helper:
3664
        new_helper = None
3665
      if new_helper != self.cfg.GetDRBDHelper():
3666
        self.cfg.SetDRBDHelper(new_helper)
3667
      else:
3668
        feedback_fn("Cluster DRBD helper already in desired state,"
3669
                    " not changing")
3670
    if self.op.hvparams:
3671
      self.cluster.hvparams = self.new_hvparams
3672
    if self.op.os_hvp:
3673
      self.cluster.os_hvp = self.new_os_hvp
3674
    if self.op.enabled_hypervisors is not None:
3675
      self.cluster.hvparams = self.new_hvparams
3676
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3677
    if self.op.beparams:
3678
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3679
    if self.op.nicparams:
3680
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3681
    if self.op.osparams:
3682
      self.cluster.osparams = self.new_osp
3683
    if self.op.ndparams:
3684
      self.cluster.ndparams = self.new_ndparams
3685

    
3686
    if self.op.candidate_pool_size is not None:
3687
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3688
      # we need to update the pool size here, otherwise the save will fail
3689
      _AdjustCandidatePool(self, [])
3690

    
3691
    if self.op.maintain_node_health is not None:
3692
      self.cluster.maintain_node_health = self.op.maintain_node_health
3693

    
3694
    if self.op.prealloc_wipe_disks is not None:
3695
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3696

    
3697
    if self.op.add_uids is not None:
3698
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3699

    
3700
    if self.op.remove_uids is not None:
3701
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3702

    
3703
    if self.op.uid_pool is not None:
3704
      self.cluster.uid_pool = self.op.uid_pool
3705

    
3706
    if self.op.default_iallocator is not None:
3707
      self.cluster.default_iallocator = self.op.default_iallocator
3708

    
3709
    if self.op.reserved_lvs is not None:
3710
      self.cluster.reserved_lvs = self.op.reserved_lvs
3711

    
3712
    if self.op.use_external_mip_script is not None:
3713
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3714

    
3715
    def helper_os(aname, mods, desc):
3716
      desc += " OS list"
3717
      lst = getattr(self.cluster, aname)
3718
      for key, val in mods:
3719
        if key == constants.DDM_ADD:
3720
          if val in lst:
3721
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3722
          else:
3723
            lst.append(val)
3724
        elif key == constants.DDM_REMOVE:
3725
          if val in lst:
3726
            lst.remove(val)
3727
          else:
3728
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3729
        else:
3730
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3731

    
3732
    if self.op.hidden_os:
3733
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3734

    
3735
    if self.op.blacklisted_os:
3736
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3737

    
3738
    if self.op.master_netdev:
3739
      master_params = self.cfg.GetMasterNetworkParameters()
3740
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3741
                  self.cluster.master_netdev)
3742
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3743
                                                       master_params)
3744
      result.Raise("Could not disable the master ip")
3745
      feedback_fn("Changing master_netdev from %s to %s" %
3746
                  (master_params.netdev, self.op.master_netdev))
3747
      self.cluster.master_netdev = self.op.master_netdev
3748

    
3749
    if self.op.master_netmask:
3750
      master_params = self.cfg.GetMasterNetworkParameters()
3751
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3752
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3753
                                                        master_params.netmask,
3754
                                                        self.op.master_netmask,
3755
                                                        master_params.ip,
3756
                                                        master_params.netdev)
3757
      if result.fail_msg:
3758
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3759
        feedback_fn(msg)
3760

    
3761
      self.cluster.master_netmask = self.op.master_netmask
3762

    
3763
    self.cfg.Update(self.cluster, feedback_fn)
3764

    
3765
    if self.op.master_netdev:
3766
      master_params = self.cfg.GetMasterNetworkParameters()
3767
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3768
                  self.op.master_netdev)
3769
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3770
                                                     master_params)
3771
      if result.fail_msg:
3772
        self.LogWarning("Could not re-enable the master ip on"
3773
                        " the master, please restart manually: %s",
3774
                        result.fail_msg)
3775

    
3776

    
3777
def _UploadHelper(lu, nodes, fname):
3778
  """Helper for uploading a file and showing warnings.
3779

3780
  """
3781
  if os.path.exists(fname):
3782
    result = lu.rpc.call_upload_file(nodes, fname)
3783
    for to_node, to_result in result.items():
3784
      msg = to_result.fail_msg
3785
      if msg:
3786
        msg = ("Copy of file %s to node %s failed: %s" %
3787
               (fname, to_node, msg))
3788
        lu.proc.LogWarning(msg)
3789

    
3790

    
3791
def _ComputeAncillaryFiles(cluster, redist):
3792
  """Compute files external to Ganeti which need to be consistent.
3793

3794
  @type redist: boolean
3795
  @param redist: Whether to include files which need to be redistributed
3796

3797
  """
3798
  # Compute files for all nodes
3799
  files_all = set([
3800
    constants.SSH_KNOWN_HOSTS_FILE,
3801
    constants.CONFD_HMAC_KEY,
3802
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3803
    constants.SPICE_CERT_FILE,
3804
    constants.SPICE_CACERT_FILE,
3805
    constants.RAPI_USERS_FILE,
3806
    ])
3807

    
3808
  if not redist:
3809
    files_all.update(constants.ALL_CERT_FILES)
3810
    files_all.update(ssconf.SimpleStore().GetFileList())
3811
  else:
3812
    # we need to ship at least the RAPI certificate
3813
    files_all.add(constants.RAPI_CERT_FILE)
3814

    
3815
  if cluster.modify_etc_hosts:
3816
    files_all.add(constants.ETC_HOSTS)
3817

    
3818
  # Files which are optional, these must:
3819
  # - be present in one other category as well
3820
  # - either exist or not exist on all nodes of that category (mc, vm all)
3821
  files_opt = set([
3822
    constants.RAPI_USERS_FILE,
3823
    ])
3824

    
3825
  # Files which should only be on master candidates
3826
  files_mc = set()
3827

    
3828
  if not redist:
3829
    files_mc.add(constants.CLUSTER_CONF_FILE)
3830

    
3831
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3832
    # replication
3833
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3834

    
3835
  # Files which should only be on VM-capable nodes
3836
  files_vm = set(filename
3837
    for hv_name in cluster.enabled_hypervisors
3838
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3839

    
3840
  files_opt |= set(filename
3841
    for hv_name in cluster.enabled_hypervisors
3842
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3843

    
3844
  # Filenames in each category must be unique
3845
  all_files_set = files_all | files_mc | files_vm
3846
  assert (len(all_files_set) ==
3847
          sum(map(len, [files_all, files_mc, files_vm]))), \
3848
         "Found file listed in more than one file list"
3849

    
3850
  # Optional files must be present in one other category
3851
  assert all_files_set.issuperset(files_opt), \
3852
         "Optional file not in a different required list"
3853

    
3854
  return (files_all, files_opt, files_mc, files_vm)
3855

    
3856

    
3857
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3858
  """Distribute additional files which are part of the cluster configuration.
3859

3860
  ConfigWriter takes care of distributing the config and ssconf files, but
3861
  there are more files which should be distributed to all nodes. This function
3862
  makes sure those are copied.
3863

3864
  @param lu: calling logical unit
3865
  @param additional_nodes: list of nodes not in the config to distribute to
3866
  @type additional_vm: boolean
3867
  @param additional_vm: whether the additional nodes are vm-capable or not
3868

3869
  """
3870
  # Gather target nodes
3871
  cluster = lu.cfg.GetClusterInfo()
3872
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3873

    
3874
  online_nodes = lu.cfg.GetOnlineNodeList()
3875
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3876

    
3877
  if additional_nodes is not None:
3878
    online_nodes.extend(additional_nodes)
3879
    if additional_vm:
3880
      vm_nodes.extend(additional_nodes)
3881

    
3882
  # Never distribute to master node
3883
  for nodelist in [online_nodes, vm_nodes]:
3884
    if master_info.name in nodelist:
3885
      nodelist.remove(master_info.name)
3886

    
3887
  # Gather file lists
3888
  (files_all, _, files_mc, files_vm) = \
3889
    _ComputeAncillaryFiles(cluster, True)
3890

    
3891
  # Never re-distribute configuration file from here
3892
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3893
              constants.CLUSTER_CONF_FILE in files_vm)
3894
  assert not files_mc, "Master candidates not handled in this function"
3895

    
3896
  filemap = [
3897
    (online_nodes, files_all),
3898
    (vm_nodes, files_vm),
3899
    ]
3900

    
3901
  # Upload the files
3902
  for (node_list, files) in filemap:
3903
    for fname in files:
3904
      _UploadHelper(lu, node_list, fname)
3905

    
3906

    
3907
class LUClusterRedistConf(NoHooksLU):
3908
  """Force the redistribution of cluster configuration.
3909

3910
  This is a very simple LU.
3911

3912
  """
3913
  REQ_BGL = False
3914

    
3915
  def ExpandNames(self):
3916
    self.needed_locks = {
3917
      locking.LEVEL_NODE: locking.ALL_SET,
3918
    }
3919
    self.share_locks[locking.LEVEL_NODE] = 1
3920

    
3921
  def Exec(self, feedback_fn):
3922
    """Redistribute the configuration.
3923

3924
    """
3925
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3926
    _RedistributeAncillaryFiles(self)
3927

    
3928

    
3929
class LUClusterActivateMasterIp(NoHooksLU):
3930
  """Activate the master IP on the master node.
3931

3932
  """
3933
  def Exec(self, feedback_fn):
3934
    """Activate the master IP.
3935

3936
    """
3937
    master_params = self.cfg.GetMasterNetworkParameters()
3938
    self.rpc.call_node_activate_master_ip(master_params.name,
3939
                                          master_params)
3940

    
3941

    
3942
class LUClusterDeactivateMasterIp(NoHooksLU):
3943
  """Deactivate the master IP on the master node.
3944

3945
  """
3946
  def Exec(self, feedback_fn):
3947
    """Deactivate the master IP.
3948

3949
    """
3950
    master_params = self.cfg.GetMasterNetworkParameters()
3951
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params)
3952

    
3953

    
3954
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3955
  """Sleep and poll for an instance's disk to sync.
3956

3957
  """
3958
  if not instance.disks or disks is not None and not disks:
3959
    return True
3960

    
3961
  disks = _ExpandCheckDisks(instance, disks)
3962

    
3963
  if not oneshot:
3964
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3965

    
3966
  node = instance.primary_node
3967

    
3968
  for dev in disks:
3969
    lu.cfg.SetDiskID(dev, node)
3970

    
3971
  # TODO: Convert to utils.Retry
3972

    
3973
  retries = 0
3974
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3975
  while True:
3976
    max_time = 0
3977
    done = True
3978
    cumul_degraded = False
3979
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3980
    msg = rstats.fail_msg
3981
    if msg:
3982
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3983
      retries += 1
3984
      if retries >= 10:
3985
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3986
                                 " aborting." % node)
3987
      time.sleep(6)
3988
      continue
3989
    rstats = rstats.payload
3990
    retries = 0
3991
    for i, mstat in enumerate(rstats):
3992
      if mstat is None:
3993
        lu.LogWarning("Can't compute data for node %s/%s",
3994
                           node, disks[i].iv_name)
3995
        continue
3996

    
3997
      cumul_degraded = (cumul_degraded or
3998
                        (mstat.is_degraded and mstat.sync_percent is None))
3999
      if mstat.sync_percent is not None:
4000
        done = False
4001
        if mstat.estimated_time is not None:
4002
          rem_time = ("%s remaining (estimated)" %
4003
                      utils.FormatSeconds(mstat.estimated_time))
4004
          max_time = mstat.estimated_time
4005
        else:
4006
          rem_time = "no time estimate"
4007
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4008
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4009

    
4010
    # if we're done but degraded, let's do a few small retries, to
4011
    # make sure we see a stable and not transient situation; therefore
4012
    # we force restart of the loop
4013
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4014
      logging.info("Degraded disks found, %d retries left", degr_retries)
4015
      degr_retries -= 1
4016
      time.sleep(1)
4017
      continue
4018

    
4019
    if done or oneshot:
4020
      break
4021

    
4022
    time.sleep(min(60, max_time))
4023

    
4024
  if done:
4025
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4026
  return not cumul_degraded
4027

    
4028

    
4029
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4030
  """Check that mirrors are not degraded.
4031

4032
  The ldisk parameter, if True, will change the test from the
4033
  is_degraded attribute (which represents overall non-ok status for
4034
  the device(s)) to the ldisk (representing the local storage status).
4035

4036
  """
4037
  lu.cfg.SetDiskID(dev, node)
4038

    
4039
  result = True
4040

    
4041
  if on_primary or dev.AssembleOnSecondary():
4042
    rstats = lu.rpc.call_blockdev_find(node, dev)
4043
    msg = rstats.fail_msg
4044
    if msg:
4045
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4046
      result = False
4047
    elif not rstats.payload:
4048
      lu.LogWarning("Can't find disk on node %s", node)
4049
      result = False
4050
    else:
4051
      if ldisk:
4052
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4053
      else:
4054
        result = result and not rstats.payload.is_degraded
4055

    
4056
  if dev.children:
4057
    for child in dev.children:
4058
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4059

    
4060
  return result
4061

    
4062

    
4063
class LUOobCommand(NoHooksLU):
4064
  """Logical unit for OOB handling.
4065

4066
  """
4067
  REG_BGL = False
4068
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4069

    
4070
  def ExpandNames(self):
4071
    """Gather locks we need.
4072

4073
    """
4074
    if self.op.node_names:
4075
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4076
      lock_names = self.op.node_names
4077
    else:
4078
      lock_names = locking.ALL_SET
4079

    
4080
    self.needed_locks = {
4081
      locking.LEVEL_NODE: lock_names,
4082
      }
4083

    
4084
  def CheckPrereq(self):
4085
    """Check prerequisites.
4086

4087
    This checks:
4088
     - the node exists in the configuration
4089
     - OOB is supported
4090

4091
    Any errors are signaled by raising errors.OpPrereqError.
4092

4093
    """
4094
    self.nodes = []
4095
    self.master_node = self.cfg.GetMasterNode()
4096

    
4097
    assert self.op.power_delay >= 0.0
4098

    
4099
    if self.op.node_names:
4100
      if (self.op.command in self._SKIP_MASTER and
4101
          self.master_node in self.op.node_names):
4102
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4103
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4104

    
4105
        if master_oob_handler:
4106
          additional_text = ("run '%s %s %s' if you want to operate on the"
4107
                             " master regardless") % (master_oob_handler,
4108
                                                      self.op.command,
4109
                                                      self.master_node)
4110
        else:
4111
          additional_text = "it does not support out-of-band operations"
4112

    
4113
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4114
                                    " allowed for %s; %s") %
4115
                                   (self.master_node, self.op.command,
4116
                                    additional_text), errors.ECODE_INVAL)
4117
    else:
4118
      self.op.node_names = self.cfg.GetNodeList()
4119
      if self.op.command in self._SKIP_MASTER:
4120
        self.op.node_names.remove(self.master_node)
4121

    
4122
    if self.op.command in self._SKIP_MASTER:
4123
      assert self.master_node not in self.op.node_names
4124

    
4125
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4126
      if node is None:
4127
        raise errors.OpPrereqError("Node %s not found" % node_name,
4128
                                   errors.ECODE_NOENT)
4129
      else:
4130
        self.nodes.append(node)
4131

    
4132
      if (not self.op.ignore_status and
4133
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4134
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4135
                                    " not marked offline") % node_name,
4136
                                   errors.ECODE_STATE)
4137

    
4138
  def Exec(self, feedback_fn):
4139
    """Execute OOB and return result if we expect any.
4140

4141
    """
4142
    master_node = self.master_node
4143
    ret = []
4144

    
4145
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4146
                                              key=lambda node: node.name)):
4147
      node_entry = [(constants.RS_NORMAL, node.name)]
4148
      ret.append(node_entry)
4149

    
4150
      oob_program = _SupportsOob(self.cfg, node)
4151

    
4152
      if not oob_program:
4153
        node_entry.append((constants.RS_UNAVAIL, None))
4154
        continue
4155

    
4156
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4157
                   self.op.command, oob_program, node.name)
4158
      result = self.rpc.call_run_oob(master_node, oob_program,
4159
                                     self.op.command, node.name,
4160
                                     self.op.timeout)
4161

    
4162
      if result.fail_msg:
4163
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4164
                        node.name, result.fail_msg)
4165
        node_entry.append((constants.RS_NODATA, None))
4166
      else:
4167
        try:
4168
          self._CheckPayload(result)
4169
        except errors.OpExecError, err:
4170
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4171
                          node.name, err)
4172
          node_entry.append((constants.RS_NODATA, None))
4173
        else:
4174
          if self.op.command == constants.OOB_HEALTH:
4175
            # For health we should log important events
4176
            for item, status in result.payload:
4177
              if status in [constants.OOB_STATUS_WARNING,
4178
                            constants.OOB_STATUS_CRITICAL]:
4179
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4180
                                item, node.name, status)
4181

    
4182
          if self.op.command == constants.OOB_POWER_ON:
4183
            node.powered = True
4184
          elif self.op.command == constants.OOB_POWER_OFF:
4185
            node.powered = False
4186
          elif self.op.command == constants.OOB_POWER_STATUS:
4187
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4188
            if powered != node.powered:
4189
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4190
                               " match actual power state (%s)"), node.powered,
4191
                              node.name, powered)
4192

    
4193
          # For configuration changing commands we should update the node
4194
          if self.op.command in (constants.OOB_POWER_ON,
4195
                                 constants.OOB_POWER_OFF):
4196
            self.cfg.Update(node, feedback_fn)
4197

    
4198
          node_entry.append((constants.RS_NORMAL, result.payload))
4199

    
4200
          if (self.op.command == constants.OOB_POWER_ON and
4201
              idx < len(self.nodes) - 1):
4202
            time.sleep(self.op.power_delay)
4203

    
4204
    return ret
4205

    
4206
  def _CheckPayload(self, result):
4207
    """Checks if the payload is valid.
4208

4209
    @param result: RPC result
4210
    @raises errors.OpExecError: If payload is not valid
4211

4212
    """
4213
    errs = []
4214
    if self.op.command == constants.OOB_HEALTH:
4215
      if not isinstance(result.payload, list):
4216
        errs.append("command 'health' is expected to return a list but got %s" %
4217
                    type(result.payload))
4218
      else:
4219
        for item, status in result.payload:
4220
          if status not in constants.OOB_STATUSES:
4221
            errs.append("health item '%s' has invalid status '%s'" %
4222
                        (item, status))
4223

    
4224
    if self.op.command == constants.OOB_POWER_STATUS:
4225
      if not isinstance(result.payload, dict):
4226
        errs.append("power-status is expected to return a dict but got %s" %
4227
                    type(result.payload))
4228

    
4229
    if self.op.command in [
4230
        constants.OOB_POWER_ON,
4231
        constants.OOB_POWER_OFF,
4232
        constants.OOB_POWER_CYCLE,
4233
        ]:
4234
      if result.payload is not None:
4235
        errs.append("%s is expected to not return payload but got '%s'" %
4236
                    (self.op.command, result.payload))
4237

    
4238
    if errs:
4239
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4240
                               utils.CommaJoin(errs))
4241

    
4242

    
4243
class _OsQuery(_QueryBase):
4244
  FIELDS = query.OS_FIELDS
4245

    
4246
  def ExpandNames(self, lu):
4247
    # Lock all nodes in shared mode
4248
    # Temporary removal of locks, should be reverted later
4249
    # TODO: reintroduce locks when they are lighter-weight
4250
    lu.needed_locks = {}
4251
    #self.share_locks[locking.LEVEL_NODE] = 1
4252
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4253

    
4254
    # The following variables interact with _QueryBase._GetNames
4255
    if self.names:
4256
      self.wanted = self.names
4257
    else:
4258
      self.wanted = locking.ALL_SET
4259

    
4260
    self.do_locking = self.use_locking
4261

    
4262
  def DeclareLocks(self, lu, level):
4263
    pass
4264

    
4265
  @staticmethod
4266
  def _DiagnoseByOS(rlist):
4267
    """Remaps a per-node return list into an a per-os per-node dictionary
4268

4269
    @param rlist: a map with node names as keys and OS objects as values
4270

4271
    @rtype: dict
4272
    @return: a dictionary with osnames as keys and as value another
4273
        map, with nodes as keys and tuples of (path, status, diagnose,
4274
        variants, parameters, api_versions) as values, eg::
4275

4276
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4277
                                     (/srv/..., False, "invalid api")],
4278
                           "node2": [(/srv/..., True, "", [], [])]}
4279
          }
4280

4281
    """
4282
    all_os = {}
4283
    # we build here the list of nodes that didn't fail the RPC (at RPC
4284
    # level), so that nodes with a non-responding node daemon don't
4285
    # make all OSes invalid
4286
    good_nodes = [node_name for node_name in rlist
4287
                  if not rlist[node_name].fail_msg]
4288
    for node_name, nr in rlist.items():
4289
      if nr.fail_msg or not nr.payload:
4290
        continue
4291
      for (name, path, status, diagnose, variants,
4292
           params, api_versions) in nr.payload:
4293
        if name not in all_os:
4294
          # build a list of nodes for this os containing empty lists
4295
          # for each node in node_list
4296
          all_os[name] = {}
4297
          for nname in good_nodes:
4298
            all_os[name][nname] = []
4299
        # convert params from [name, help] to (name, help)
4300
        params = [tuple(v) for v in params]
4301
        all_os[name][node_name].append((path, status, diagnose,
4302
                                        variants, params, api_versions))
4303
    return all_os
4304

    
4305
  def _GetQueryData(self, lu):
4306
    """Computes the list of nodes and their attributes.
4307

4308
    """
4309
    # Locking is not used
4310
    assert not (compat.any(lu.glm.is_owned(level)
4311
                           for level in locking.LEVELS
4312
                           if level != locking.LEVEL_CLUSTER) or
4313
                self.do_locking or self.use_locking)
4314

    
4315
    valid_nodes = [node.name
4316
                   for node in lu.cfg.GetAllNodesInfo().values()
4317
                   if not node.offline and node.vm_capable]
4318
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4319
    cluster = lu.cfg.GetClusterInfo()
4320

    
4321
    data = {}
4322

    
4323
    for (os_name, os_data) in pol.items():
4324
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4325
                          hidden=(os_name in cluster.hidden_os),
4326
                          blacklisted=(os_name in cluster.blacklisted_os))
4327

    
4328
      variants = set()
4329
      parameters = set()
4330
      api_versions = set()
4331

    
4332
      for idx, osl in enumerate(os_data.values()):
4333
        info.valid = bool(info.valid and osl and osl[0][1])
4334
        if not info.valid:
4335
          break
4336

    
4337
        (node_variants, node_params, node_api) = osl[0][3:6]
4338
        if idx == 0:
4339
          # First entry
4340
          variants.update(node_variants)
4341
          parameters.update(node_params)
4342
          api_versions.update(node_api)
4343
        else:
4344
          # Filter out inconsistent values
4345
          variants.intersection_update(node_variants)
4346
          parameters.intersection_update(node_params)
4347
          api_versions.intersection_update(node_api)
4348

    
4349
      info.variants = list(variants)
4350
      info.parameters = list(parameters)
4351
      info.api_versions = list(api_versions)
4352

    
4353
      data[os_name] = info
4354

    
4355
    # Prepare data in requested order
4356
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4357
            if name in data]
4358

    
4359

    
4360
class LUOsDiagnose(NoHooksLU):
4361
  """Logical unit for OS diagnose/query.
4362

4363
  """
4364
  REQ_BGL = False
4365

    
4366
  @staticmethod
4367
  def _BuildFilter(fields, names):
4368
    """Builds a filter for querying OSes.
4369

4370
    """
4371
    name_filter = qlang.MakeSimpleFilter("name", names)
4372

    
4373
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4374
    # respective field is not requested
4375
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4376
                     for fname in ["hidden", "blacklisted"]
4377
                     if fname not in fields]
4378
    if "valid" not in fields:
4379
      status_filter.append([qlang.OP_TRUE, "valid"])
4380

    
4381
    if status_filter:
4382
      status_filter.insert(0, qlang.OP_AND)
4383
    else:
4384
      status_filter = None
4385

    
4386
    if name_filter and status_filter:
4387
      return [qlang.OP_AND, name_filter, status_filter]
4388
    elif name_filter:
4389
      return name_filter
4390
    else:
4391
      return status_filter
4392

    
4393
  def CheckArguments(self):
4394
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4395
                       self.op.output_fields, False)
4396

    
4397
  def ExpandNames(self):
4398
    self.oq.ExpandNames(self)
4399

    
4400
  def Exec(self, feedback_fn):
4401
    return self.oq.OldStyleQuery(self)
4402

    
4403

    
4404
class LUNodeRemove(LogicalUnit):
4405
  """Logical unit for removing a node.
4406

4407
  """
4408
  HPATH = "node-remove"
4409
  HTYPE = constants.HTYPE_NODE
4410

    
4411
  def BuildHooksEnv(self):
4412
    """Build hooks env.
4413

4414
    This doesn't run on the target node in the pre phase as a failed
4415
    node would then be impossible to remove.
4416

4417
    """
4418
    return {
4419
      "OP_TARGET": self.op.node_name,
4420
      "NODE_NAME": self.op.node_name,
4421
      }
4422

    
4423
  def BuildHooksNodes(self):
4424
    """Build hooks nodes.
4425

4426
    """
4427
    all_nodes = self.cfg.GetNodeList()
4428
    try:
4429
      all_nodes.remove(self.op.node_name)
4430
    except ValueError:
4431
      logging.warning("Node '%s', which is about to be removed, was not found"
4432
                      " in the list of all nodes", self.op.node_name)
4433
    return (all_nodes, all_nodes)
4434

    
4435
  def CheckPrereq(self):
4436
    """Check prerequisites.
4437

4438
    This checks:
4439
     - the node exists in the configuration
4440
     - it does not have primary or secondary instances
4441
     - it's not the master
4442

4443
    Any errors are signaled by raising errors.OpPrereqError.
4444

4445
    """
4446
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4447
    node = self.cfg.GetNodeInfo(self.op.node_name)
4448
    assert node is not None
4449

    
4450
    masternode = self.cfg.GetMasterNode()
4451
    if node.name == masternode:
4452
      raise errors.OpPrereqError("Node is the master node, failover to another"
4453
                                 " node is required", errors.ECODE_INVAL)
4454

    
4455
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4456
      if node.name in instance.all_nodes:
4457
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4458
                                   " please remove first" % instance_name,
4459
                                   errors.ECODE_INVAL)
4460
    self.op.node_name = node.name
4461
    self.node = node
4462

    
4463
  def Exec(self, feedback_fn):
4464
    """Removes the node from the cluster.
4465

4466
    """
4467
    node = self.node
4468
    logging.info("Stopping the node daemon and removing configs from node %s",
4469
                 node.name)
4470

    
4471
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4472

    
4473
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4474
      "Not owning BGL"
4475

    
4476
    # Promote nodes to master candidate as needed
4477
    _AdjustCandidatePool(self, exceptions=[node.name])
4478
    self.context.RemoveNode(node.name)
4479

    
4480
    # Run post hooks on the node before it's removed
4481
    _RunPostHook(self, node.name)
4482

    
4483
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4484
    msg = result.fail_msg
4485
    if msg:
4486
      self.LogWarning("Errors encountered on the remote node while leaving"
4487
                      " the cluster: %s", msg)
4488

    
4489
    # Remove node from our /etc/hosts
4490
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4491
      master_node = self.cfg.GetMasterNode()
4492
      result = self.rpc.call_etc_hosts_modify(master_node,
4493
                                              constants.ETC_HOSTS_REMOVE,
4494
                                              node.name, None)
4495
      result.Raise("Can't update hosts file with new host data")
4496
      _RedistributeAncillaryFiles(self)
4497

    
4498

    
4499
class _NodeQuery(_QueryBase):
4500
  FIELDS = query.NODE_FIELDS
4501

    
4502
  def ExpandNames(self, lu):
4503
    lu.needed_locks = {}
4504
    lu.share_locks = _ShareAll()
4505

    
4506
    if self.names:
4507
      self.wanted = _GetWantedNodes(lu, self.names)
4508
    else:
4509
      self.wanted = locking.ALL_SET
4510

    
4511
    self.do_locking = (self.use_locking and
4512
                       query.NQ_LIVE in self.requested_data)
4513

    
4514
    if self.do_locking:
4515
      # If any non-static field is requested we need to lock the nodes
4516
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4517

    
4518
  def DeclareLocks(self, lu, level):
4519
    pass
4520

    
4521
  def _GetQueryData(self, lu):
4522
    """Computes the list of nodes and their attributes.
4523

4524
    """
4525
    all_info = lu.cfg.GetAllNodesInfo()
4526

    
4527
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4528

    
4529
    # Gather data as requested
4530
    if query.NQ_LIVE in self.requested_data:
4531
      # filter out non-vm_capable nodes
4532
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4533

    
4534
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4535
                                        lu.cfg.GetHypervisorType())
4536
      live_data = dict((name, nresult.payload)
4537
                       for (name, nresult) in node_data.items()
4538
                       if not nresult.fail_msg and nresult.payload)
4539
    else:
4540
      live_data = None
4541

    
4542
    if query.NQ_INST in self.requested_data:
4543
      node_to_primary = dict([(name, set()) for name in nodenames])
4544
      node_to_secondary = dict([(name, set()) for name in nodenames])
4545

    
4546
      inst_data = lu.cfg.GetAllInstancesInfo()
4547

    
4548
      for inst in inst_data.values():
4549
        if inst.primary_node in node_to_primary:
4550
          node_to_primary[inst.primary_node].add(inst.name)
4551
        for secnode in inst.secondary_nodes:
4552
          if secnode in node_to_secondary:
4553
            node_to_secondary[secnode].add(inst.name)
4554
    else:
4555
      node_to_primary = None
4556
      node_to_secondary = None
4557

    
4558
    if query.NQ_OOB in self.requested_data:
4559
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4560
                         for name, node in all_info.iteritems())
4561
    else:
4562
      oob_support = None
4563

    
4564
    if query.NQ_GROUP in self.requested_data:
4565
      groups = lu.cfg.GetAllNodeGroupsInfo()
4566
    else:
4567
      groups = {}
4568

    
4569
    return query.NodeQueryData([all_info[name] for name in nodenames],
4570
                               live_data, lu.cfg.GetMasterNode(),
4571
                               node_to_primary, node_to_secondary, groups,
4572
                               oob_support, lu.cfg.GetClusterInfo())
4573

    
4574

    
4575
class LUNodeQuery(NoHooksLU):
4576
  """Logical unit for querying nodes.
4577

4578
  """
4579
  # pylint: disable=W0142
4580
  REQ_BGL = False
4581

    
4582
  def CheckArguments(self):
4583
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4584
                         self.op.output_fields, self.op.use_locking)
4585

    
4586
  def ExpandNames(self):
4587
    self.nq.ExpandNames(self)
4588

    
4589
  def Exec(self, feedback_fn):
4590
    return self.nq.OldStyleQuery(self)
4591

    
4592

    
4593
class LUNodeQueryvols(NoHooksLU):
4594
  """Logical unit for getting volumes on node(s).
4595

4596
  """
4597
  REQ_BGL = False
4598
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4599
  _FIELDS_STATIC = utils.FieldSet("node")
4600

    
4601
  def CheckArguments(self):
4602
    _CheckOutputFields(static=self._FIELDS_STATIC,
4603
                       dynamic=self._FIELDS_DYNAMIC,
4604
                       selected=self.op.output_fields)
4605

    
4606
  def ExpandNames(self):
4607
    self.needed_locks = {}
4608
    self.share_locks[locking.LEVEL_NODE] = 1
4609
    if not self.op.nodes:
4610
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4611
    else:
4612
      self.needed_locks[locking.LEVEL_NODE] = \
4613
        _GetWantedNodes(self, self.op.nodes)
4614

    
4615
  def Exec(self, feedback_fn):
4616
    """Computes the list of nodes and their attributes.
4617

4618
    """
4619
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4620
    volumes = self.rpc.call_node_volumes(nodenames)
4621

    
4622
    ilist = self.cfg.GetAllInstancesInfo()
4623
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4624

    
4625
    output = []
4626
    for node in nodenames:
4627
      nresult = volumes[node]
4628
      if nresult.offline:
4629
        continue
4630
      msg = nresult.fail_msg
4631
      if msg:
4632
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4633
        continue
4634

    
4635
      node_vols = sorted(nresult.payload,
4636
                         key=operator.itemgetter("dev"))
4637

    
4638
      for vol in node_vols:
4639
        node_output = []
4640
        for field in self.op.output_fields:
4641
          if field == "node":
4642
            val = node
4643
          elif field == "phys":
4644
            val = vol["dev"]
4645
          elif field == "vg":
4646
            val = vol["vg"]
4647
          elif field == "name":
4648
            val = vol["name"]
4649
          elif field == "size":
4650
            val = int(float(vol["size"]))
4651
          elif field == "instance":
4652
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4653
          else:
4654
            raise errors.ParameterError(field)
4655
          node_output.append(str(val))
4656

    
4657
        output.append(node_output)
4658

    
4659
    return output
4660

    
4661

    
4662
class LUNodeQueryStorage(NoHooksLU):
4663
  """Logical unit for getting information on storage units on node(s).
4664

4665
  """
4666
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4667
  REQ_BGL = False
4668

    
4669
  def CheckArguments(self):
4670
    _CheckOutputFields(static=self._FIELDS_STATIC,
4671
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4672
                       selected=self.op.output_fields)
4673

    
4674
  def ExpandNames(self):
4675
    self.needed_locks = {}
4676
    self.share_locks[locking.LEVEL_NODE] = 1
4677

    
4678
    if self.op.nodes:
4679
      self.needed_locks[locking.LEVEL_NODE] = \
4680
        _GetWantedNodes(self, self.op.nodes)
4681
    else:
4682
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4683

    
4684
  def Exec(self, feedback_fn):
4685
    """Computes the list of nodes and their attributes.
4686

4687
    """
4688
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4689

    
4690
    # Always get name to sort by
4691
    if constants.SF_NAME in self.op.output_fields:
4692
      fields = self.op.output_fields[:]
4693
    else:
4694
      fields = [constants.SF_NAME] + self.op.output_fields
4695

    
4696
    # Never ask for node or type as it's only known to the LU
4697
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4698
      while extra in fields:
4699
        fields.remove(extra)
4700

    
4701
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4702
    name_idx = field_idx[constants.SF_NAME]
4703

    
4704
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4705
    data = self.rpc.call_storage_list(self.nodes,
4706
                                      self.op.storage_type, st_args,
4707
                                      self.op.name, fields)
4708

    
4709
    result = []
4710

    
4711
    for node in utils.NiceSort(self.nodes):
4712
      nresult = data[node]
4713
      if nresult.offline:
4714
        continue
4715

    
4716
      msg = nresult.fail_msg
4717
      if msg:
4718
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4719
        continue
4720

    
4721
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4722

    
4723
      for name in utils.NiceSort(rows.keys()):
4724
        row = rows[name]
4725

    
4726
        out = []
4727

    
4728
        for field in self.op.output_fields:
4729
          if field == constants.SF_NODE:
4730
            val = node
4731
          elif field == constants.SF_TYPE:
4732
            val = self.op.storage_type
4733
          elif field in field_idx:
4734
            val = row[field_idx[field]]
4735
          else:
4736
            raise errors.ParameterError(field)
4737

    
4738
          out.append(val)
4739

    
4740
        result.append(out)
4741

    
4742
    return result
4743

    
4744

    
4745
class _InstanceQuery(_QueryBase):
4746
  FIELDS = query.INSTANCE_FIELDS
4747

    
4748
  def ExpandNames(self, lu):
4749
    lu.needed_locks = {}
4750
    lu.share_locks = _ShareAll()
4751

    
4752
    if self.names:
4753
      self.wanted = _GetWantedInstances(lu, self.names)
4754
    else:
4755
      self.wanted = locking.ALL_SET
4756

    
4757
    self.do_locking = (self.use_locking and
4758
                       query.IQ_LIVE in self.requested_data)
4759
    if self.do_locking:
4760
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4761
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4762
      lu.needed_locks[locking.LEVEL_NODE] = []
4763
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4764

    
4765
    self.do_grouplocks = (self.do_locking and
4766
                          query.IQ_NODES in self.requested_data)
4767

    
4768
  def DeclareLocks(self, lu, level):
4769
    if self.do_locking:
4770
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4771
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4772

    
4773
        # Lock all groups used by instances optimistically; this requires going
4774
        # via the node before it's locked, requiring verification later on
4775
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4776
          set(group_uuid
4777
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4778
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4779
      elif level == locking.LEVEL_NODE:
4780
        lu._LockInstancesNodes() # pylint: disable=W0212
4781

    
4782
  @staticmethod
4783
  def _CheckGroupLocks(lu):
4784
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4785
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4786

    
4787
    # Check if node groups for locked instances are still correct
4788
    for instance_name in owned_instances:
4789
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4790

    
4791
  def _GetQueryData(self, lu):
4792
    """Computes the list of instances and their attributes.
4793

4794
    """
4795
    if self.do_grouplocks:
4796
      self._CheckGroupLocks(lu)
4797

    
4798
    cluster = lu.cfg.GetClusterInfo()
4799
    all_info = lu.cfg.GetAllInstancesInfo()
4800

    
4801
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4802

    
4803
    instance_list = [all_info[name] for name in instance_names]
4804
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4805
                                        for inst in instance_list)))
4806
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4807
    bad_nodes = []
4808
    offline_nodes = []
4809
    wrongnode_inst = set()
4810

    
4811
    # Gather data as requested
4812
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4813
      live_data = {}
4814
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4815
      for name in nodes:
4816
        result = node_data[name]
4817
        if result.offline:
4818
          # offline nodes will be in both lists
4819
          assert result.fail_msg
4820
          offline_nodes.append(name)
4821
        if result.fail_msg:
4822
          bad_nodes.append(name)
4823
        elif result.payload:
4824
          for inst in result.payload:
4825
            if inst in all_info:
4826
              if all_info[inst].primary_node == name:
4827
                live_data.update(result.payload)
4828
              else:
4829
                wrongnode_inst.add(inst)
4830
            else:
4831
              # orphan instance; we don't list it here as we don't
4832
              # handle this case yet in the output of instance listing
4833
              logging.warning("Orphan instance '%s' found on node %s",
4834
                              inst, name)
4835
        # else no instance is alive
4836
    else:
4837
      live_data = {}
4838

    
4839
    if query.IQ_DISKUSAGE in self.requested_data:
4840
      disk_usage = dict((inst.name,
4841
                         _ComputeDiskSize(inst.disk_template,
4842
                                          [{constants.IDISK_SIZE: disk.size}
4843
                                           for disk in inst.disks]))
4844
                        for inst in instance_list)
4845
    else:
4846
      disk_usage = None
4847

    
4848
    if query.IQ_CONSOLE in self.requested_data:
4849
      consinfo = {}
4850
      for inst in instance_list:
4851
        if inst.name in live_data:
4852
          # Instance is running
4853
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4854
        else:
4855
          consinfo[inst.name] = None
4856
      assert set(consinfo.keys()) == set(instance_names)
4857
    else:
4858
      consinfo = None
4859

    
4860
    if query.IQ_NODES in self.requested_data:
4861
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4862
                                            instance_list)))
4863
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4864
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4865
                    for uuid in set(map(operator.attrgetter("group"),
4866
                                        nodes.values())))
4867
    else:
4868
      nodes = None
4869
      groups = None
4870

    
4871
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4872
                                   disk_usage, offline_nodes, bad_nodes,
4873
                                   live_data, wrongnode_inst, consinfo,
4874
                                   nodes, groups)
4875

    
4876

    
4877
class LUQuery(NoHooksLU):
4878
  """Query for resources/items of a certain kind.
4879

4880
  """
4881
  # pylint: disable=W0142
4882
  REQ_BGL = False
4883

    
4884
  def CheckArguments(self):
4885
    qcls = _GetQueryImplementation(self.op.what)
4886

    
4887
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4888

    
4889
  def ExpandNames(self):
4890
    self.impl.ExpandNames(self)
4891

    
4892
  def DeclareLocks(self, level):
4893
    self.impl.DeclareLocks(self, level)
4894

    
4895
  def Exec(self, feedback_fn):
4896
    return self.impl.NewStyleQuery(self)
4897

    
4898

    
4899
class LUQueryFields(NoHooksLU):
4900
  """Query for resources/items of a certain kind.
4901

4902
  """
4903
  # pylint: disable=W0142
4904
  REQ_BGL = False
4905

    
4906
  def CheckArguments(self):
4907
    self.qcls = _GetQueryImplementation(self.op.what)
4908

    
4909
  def ExpandNames(self):
4910
    self.needed_locks = {}
4911

    
4912
  def Exec(self, feedback_fn):
4913
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4914

    
4915

    
4916
class LUNodeModifyStorage(NoHooksLU):
4917
  """Logical unit for modifying a storage volume on a node.
4918

4919
  """
4920
  REQ_BGL = False
4921

    
4922
  def CheckArguments(self):
4923
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4924

    
4925
    storage_type = self.op.storage_type
4926

    
4927
    try:
4928
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4929
    except KeyError:
4930
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4931
                                 " modified" % storage_type,
4932
                                 errors.ECODE_INVAL)
4933

    
4934
    diff = set(self.op.changes.keys()) - modifiable
4935
    if diff:
4936
      raise errors.OpPrereqError("The following fields can not be modified for"
4937
                                 " storage units of type '%s': %r" %
4938
                                 (storage_type, list(diff)),
4939
                                 errors.ECODE_INVAL)
4940

    
4941
  def ExpandNames(self):
4942
    self.needed_locks = {
4943
      locking.LEVEL_NODE: self.op.node_name,
4944
      }
4945

    
4946
  def Exec(self, feedback_fn):
4947
    """Computes the list of nodes and their attributes.
4948

4949
    """
4950
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4951
    result = self.rpc.call_storage_modify(self.op.node_name,
4952
                                          self.op.storage_type, st_args,
4953
                                          self.op.name, self.op.changes)
4954
    result.Raise("Failed to modify storage unit '%s' on %s" %
4955
                 (self.op.name, self.op.node_name))
4956

    
4957

    
4958
class LUNodeAdd(LogicalUnit):
4959
  """Logical unit for adding node to the cluster.
4960

4961
  """
4962
  HPATH = "node-add"
4963
  HTYPE = constants.HTYPE_NODE
4964
  _NFLAGS = ["master_capable", "vm_capable"]
4965

    
4966
  def CheckArguments(self):
4967
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4968
    # validate/normalize the node name
4969
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4970
                                         family=self.primary_ip_family)
4971
    self.op.node_name = self.hostname.name
4972

    
4973
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4974
      raise errors.OpPrereqError("Cannot readd the master node",
4975
                                 errors.ECODE_STATE)
4976

    
4977
    if self.op.readd and self.op.group:
4978
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4979
                                 " being readded", errors.ECODE_INVAL)
4980

    
4981
  def BuildHooksEnv(self):
4982
    """Build hooks env.
4983

4984
    This will run on all nodes before, and on all nodes + the new node after.
4985

4986
    """
4987
    return {
4988
      "OP_TARGET": self.op.node_name,
4989
      "NODE_NAME": self.op.node_name,
4990
      "NODE_PIP": self.op.primary_ip,
4991
      "NODE_SIP": self.op.secondary_ip,
4992
      "MASTER_CAPABLE": str(self.op.master_capable),
4993
      "VM_CAPABLE": str(self.op.vm_capable),
4994
      }
4995

    
4996
  def BuildHooksNodes(self):
4997
    """Build hooks nodes.
4998

4999
    """
5000
    # Exclude added node
5001
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5002
    post_nodes = pre_nodes + [self.op.node_name, ]
5003

    
5004
    return (pre_nodes, post_nodes)
5005

    
5006
  def CheckPrereq(self):
5007
    """Check prerequisites.
5008

5009
    This checks:
5010
     - the new node is not already in the config
5011
     - it is resolvable
5012
     - its parameters (single/dual homed) matches the cluster
5013

5014
    Any errors are signaled by raising errors.OpPrereqError.
5015

5016
    """
5017
    cfg = self.cfg
5018
    hostname = self.hostname
5019
    node = hostname.name
5020
    primary_ip = self.op.primary_ip = hostname.ip
5021
    if self.op.secondary_ip is None:
5022
      if self.primary_ip_family == netutils.IP6Address.family:
5023
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5024
                                   " IPv4 address must be given as secondary",
5025
                                   errors.ECODE_INVAL)
5026
      self.op.secondary_ip = primary_ip
5027

    
5028
    secondary_ip = self.op.secondary_ip
5029
    if not netutils.IP4Address.IsValid(secondary_ip):
5030
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5031
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5032

    
5033
    node_list = cfg.GetNodeList()
5034
    if not self.op.readd and node in node_list:
5035
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5036
                                 node, errors.ECODE_EXISTS)
5037
    elif self.op.readd and node not in node_list:
5038
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5039
                                 errors.ECODE_NOENT)
5040

    
5041
    self.changed_primary_ip = False
5042

    
5043
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5044
      if self.op.readd and node == existing_node_name:
5045
        if existing_node.secondary_ip != secondary_ip:
5046
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5047
                                     " address configuration as before",
5048
                                     errors.ECODE_INVAL)
5049
        if existing_node.primary_ip != primary_ip:
5050
          self.changed_primary_ip = True
5051

    
5052
        continue
5053

    
5054
      if (existing_node.primary_ip == primary_ip or
5055
          existing_node.secondary_ip == primary_ip or
5056
          existing_node.primary_ip == secondary_ip or
5057
          existing_node.secondary_ip == secondary_ip):
5058
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5059
                                   " existing node %s" % existing_node.name,
5060
                                   errors.ECODE_NOTUNIQUE)
5061

    
5062
    # After this 'if' block, None is no longer a valid value for the
5063
    # _capable op attributes
5064
    if self.op.readd:
5065
      old_node = self.cfg.GetNodeInfo(node)
5066
      assert old_node is not None, "Can't retrieve locked node %s" % node
5067
      for attr in self._NFLAGS:
5068
        if getattr(self.op, attr) is None:
5069
          setattr(self.op, attr, getattr(old_node, attr))
5070
    else:
5071
      for attr in self._NFLAGS:
5072
        if getattr(self.op, attr) is None:
5073
          setattr(self.op, attr, True)
5074

    
5075
    if self.op.readd and not self.op.vm_capable:
5076
      pri, sec = cfg.GetNodeInstances(node)
5077
      if pri or sec:
5078
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5079
                                   " flag set to false, but it already holds"
5080
                                   " instances" % node,
5081
                                   errors.ECODE_STATE)
5082

    
5083
    # check that the type of the node (single versus dual homed) is the
5084
    # same as for the master
5085
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5086
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5087
    newbie_singlehomed = secondary_ip == primary_ip
5088
    if master_singlehomed != newbie_singlehomed:
5089
      if master_singlehomed:
5090
        raise errors.OpPrereqError("The master has no secondary ip but the"
5091
                                   " new node has one",
5092
                                   errors.ECODE_INVAL)
5093
      else:
5094
        raise errors.OpPrereqError("The master has a secondary ip but the"
5095
                                   " new node doesn't have one",
5096
                                   errors.ECODE_INVAL)
5097

    
5098
    # checks reachability
5099
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5100
      raise errors.OpPrereqError("Node not reachable by ping",
5101
                                 errors.ECODE_ENVIRON)
5102

    
5103
    if not newbie_singlehomed:
5104
      # check reachability from my secondary ip to newbie's secondary ip
5105
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5106
                           source=myself.secondary_ip):
5107
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5108
                                   " based ping to node daemon port",
5109
                                   errors.ECODE_ENVIRON)
5110

    
5111
    if self.op.readd:
5112
      exceptions = [node]
5113
    else:
5114
      exceptions = []
5115

    
5116
    if self.op.master_capable:
5117
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5118
    else:
5119
      self.master_candidate = False
5120

    
5121
    if self.op.readd:
5122
      self.new_node = old_node
5123
    else:
5124
      node_group = cfg.LookupNodeGroup(self.op.group)
5125
      self.new_node = objects.Node(name=node,
5126
                                   primary_ip=primary_ip,
5127
                                   secondary_ip=secondary_ip,
5128
                                   master_candidate=self.master_candidate,
5129
                                   offline=False, drained=False,
5130
                                   group=node_group)
5131

    
5132
    if self.op.ndparams:
5133
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5134

    
5135
  def Exec(self, feedback_fn):
5136
    """Adds the new node to the cluster.
5137

5138
    """
5139
    new_node = self.new_node
5140
    node = new_node.name
5141

    
5142
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5143
      "Not owning BGL"
5144

    
5145
    # We adding a new node so we assume it's powered
5146
    new_node.powered = True
5147

    
5148
    # for re-adds, reset the offline/drained/master-candidate flags;
5149
    # we need to reset here, otherwise offline would prevent RPC calls
5150
    # later in the procedure; this also means that if the re-add
5151
    # fails, we are left with a non-offlined, broken node
5152
    if self.op.readd:
5153
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5154
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5155
      # if we demote the node, we do cleanup later in the procedure
5156
      new_node.master_candidate = self.master_candidate
5157
      if self.changed_primary_ip:
5158
        new_node.primary_ip = self.op.primary_ip
5159

    
5160
    # copy the master/vm_capable flags
5161
    for attr in self._NFLAGS:
5162
      setattr(new_node, attr, getattr(self.op, attr))
5163

    
5164
    # notify the user about any possible mc promotion
5165
    if new_node.master_candidate:
5166
      self.LogInfo("Node will be a master candidate")
5167

    
5168
    if self.op.ndparams:
5169
      new_node.ndparams = self.op.ndparams
5170
    else:
5171
      new_node.ndparams = {}
5172

    
5173
    # check connectivity
5174
    result = self.rpc.call_version([node])[node]
5175
    result.Raise("Can't get version information from node %s" % node)
5176
    if constants.PROTOCOL_VERSION == result.payload:
5177
      logging.info("Communication to node %s fine, sw version %s match",
5178
                   node, result.payload)
5179
    else:
5180
      raise errors.OpExecError("Version mismatch master version %s,"
5181
                               " node version %s" %
5182
                               (constants.PROTOCOL_VERSION, result.payload))
5183

    
5184
    # Add node to our /etc/hosts, and add key to known_hosts
5185
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5186
      master_node = self.cfg.GetMasterNode()
5187
      result = self.rpc.call_etc_hosts_modify(master_node,
5188
                                              constants.ETC_HOSTS_ADD,
5189
                                              self.hostname.name,
5190
                                              self.hostname.ip)
5191
      result.Raise("Can't update hosts file with new host data")
5192

    
5193
    if new_node.secondary_ip != new_node.primary_ip:
5194
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5195
                               False)
5196

    
5197
    node_verify_list = [self.cfg.GetMasterNode()]
5198
    node_verify_param = {
5199
      constants.NV_NODELIST: ([node], {}),
5200
      # TODO: do a node-net-test as well?
5201
    }
5202

    
5203
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5204
                                       self.cfg.GetClusterName())
5205
    for verifier in node_verify_list:
5206
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5207
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5208
      if nl_payload:
5209
        for failed in nl_payload:
5210
          feedback_fn("ssh/hostname verification failed"
5211
                      " (checking from %s): %s" %
5212
                      (verifier, nl_payload[failed]))
5213
        raise errors.OpExecError("ssh/hostname verification failed")
5214

    
5215
    if self.op.readd:
5216
      _RedistributeAncillaryFiles(self)
5217
      self.context.ReaddNode(new_node)
5218
      # make sure we redistribute the config
5219
      self.cfg.Update(new_node, feedback_fn)
5220
      # and make sure the new node will not have old files around
5221
      if not new_node.master_candidate:
5222
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5223
        msg = result.fail_msg
5224
        if msg:
5225
          self.LogWarning("Node failed to demote itself from master"
5226
                          " candidate status: %s" % msg)
5227
    else:
5228
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5229
                                  additional_vm=self.op.vm_capable)
5230
      self.context.AddNode(new_node, self.proc.GetECId())
5231

    
5232

    
5233
class LUNodeSetParams(LogicalUnit):
5234
  """Modifies the parameters of a node.
5235

5236
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5237
      to the node role (as _ROLE_*)
5238
  @cvar _R2F: a dictionary from node role to tuples of flags
5239
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5240

5241
  """
5242
  HPATH = "node-modify"
5243
  HTYPE = constants.HTYPE_NODE
5244
  REQ_BGL = False
5245
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5246
  _F2R = {
5247
    (True, False, False): _ROLE_CANDIDATE,
5248
    (False, True, False): _ROLE_DRAINED,
5249
    (False, False, True): _ROLE_OFFLINE,
5250
    (False, False, False): _ROLE_REGULAR,
5251
    }
5252
  _R2F = dict((v, k) for k, v in _F2R.items())
5253
  _FLAGS = ["master_candidate", "drained", "offline"]
5254

    
5255
  def CheckArguments(self):
5256
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5257
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5258
                self.op.master_capable, self.op.vm_capable,
5259
                self.op.secondary_ip, self.op.ndparams]
5260
    if all_mods.count(None) == len(all_mods):
5261
      raise errors.OpPrereqError("Please pass at least one modification",
5262
                                 errors.ECODE_INVAL)
5263
    if all_mods.count(True) > 1:
5264
      raise errors.OpPrereqError("Can't set the node into more than one"
5265
                                 " state at the same time",
5266
                                 errors.ECODE_INVAL)
5267

    
5268
    # Boolean value that tells us whether we might be demoting from MC
5269
    self.might_demote = (self.op.master_candidate == False or
5270
                         self.op.offline == True or
5271
                         self.op.drained == True or
5272
                         self.op.master_capable == False)
5273

    
5274
    if self.op.secondary_ip:
5275
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5276
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5277
                                   " address" % self.op.secondary_ip,
5278
                                   errors.ECODE_INVAL)
5279

    
5280
    self.lock_all = self.op.auto_promote and self.might_demote
5281
    self.lock_instances = self.op.secondary_ip is not None
5282

    
5283
  def _InstanceFilter(self, instance):
5284
    """Filter for getting affected instances.
5285

5286
    """
5287
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5288
            self.op.node_name in instance.all_nodes)
5289

    
5290
  def ExpandNames(self):
5291
    if self.lock_all:
5292
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5293
    else:
5294
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5295

    
5296
    if self.lock_instances:
5297
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5298
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5299

    
5300
  def BuildHooksEnv(self):
5301
    """Build hooks env.
5302

5303
    This runs on the master node.
5304

5305
    """
5306
    return {
5307
      "OP_TARGET": self.op.node_name,
5308
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5309
      "OFFLINE": str(self.op.offline),
5310
      "DRAINED": str(self.op.drained),
5311
      "MASTER_CAPABLE": str(self.op.master_capable),
5312
      "VM_CAPABLE": str(self.op.vm_capable),
5313
      }
5314

    
5315
  def BuildHooksNodes(self):
5316
    """Build hooks nodes.
5317

5318
    """
5319
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5320
    return (nl, nl)
5321

    
5322
  def CheckPrereq(self):
5323
    """Check prerequisites.
5324

5325
    This only checks the instance list against the existing names.
5326

5327
    """
5328
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5329

    
5330
    if self.lock_instances:
5331
      affected_instances = \
5332
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5333

    
5334
      # Verify instance locks
5335
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5336
      wanted_instances = frozenset(affected_instances.keys())
5337
      if wanted_instances - owned_instances:
5338
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5339
                                   " secondary IP address have changed since"
5340
                                   " locks were acquired, wanted '%s', have"
5341
                                   " '%s'; retry the operation" %
5342
                                   (self.op.node_name,
5343
                                    utils.CommaJoin(wanted_instances),
5344
                                    utils.CommaJoin(owned_instances)),
5345
                                   errors.ECODE_STATE)
5346
    else:
5347
      affected_instances = None
5348

    
5349
    if (self.op.master_candidate is not None or
5350
        self.op.drained is not None or
5351
        self.op.offline is not None):
5352
      # we can't change the master's node flags
5353
      if self.op.node_name == self.cfg.GetMasterNode():
5354
        raise errors.OpPrereqError("The master role can be changed"
5355
                                   " only via master-failover",
5356
                                   errors.ECODE_INVAL)
5357

    
5358
    if self.op.master_candidate and not node.master_capable:
5359
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5360
                                 " it a master candidate" % node.name,
5361
                                 errors.ECODE_STATE)
5362

    
5363
    if self.op.vm_capable == False:
5364
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5365
      if ipri or isec:
5366
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5367
                                   " the vm_capable flag" % node.name,
5368
                                   errors.ECODE_STATE)
5369

    
5370
    if node.master_candidate and self.might_demote and not self.lock_all:
5371
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5372
      # check if after removing the current node, we're missing master
5373
      # candidates
5374
      (mc_remaining, mc_should, _) = \
5375
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5376
      if mc_remaining < mc_should:
5377
        raise errors.OpPrereqError("Not enough master candidates, please"
5378
                                   " pass auto promote option to allow"
5379
                                   " promotion", errors.ECODE_STATE)
5380

    
5381
    self.old_flags = old_flags = (node.master_candidate,
5382
                                  node.drained, node.offline)
5383
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5384
    self.old_role = old_role = self._F2R[old_flags]
5385

    
5386
    # Check for ineffective changes
5387
    for attr in self._FLAGS:
5388
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5389
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5390
        setattr(self.op, attr, None)
5391

    
5392
    # Past this point, any flag change to False means a transition
5393
    # away from the respective state, as only real changes are kept
5394

    
5395
    # TODO: We might query the real power state if it supports OOB
5396
    if _SupportsOob(self.cfg, node):
5397
      if self.op.offline is False and not (node.powered or
5398
                                           self.op.powered == True):
5399
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5400
                                    " offline status can be reset") %
5401
                                   self.op.node_name)
5402
    elif self.op.powered is not None:
5403
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5404
                                  " as it does not support out-of-band"
5405
                                  " handling") % self.op.node_name)
5406

    
5407
    # If we're being deofflined/drained, we'll MC ourself if needed
5408
    if (self.op.drained == False or self.op.offline == False or
5409
        (self.op.master_capable and not node.master_capable)):
5410
      if _DecideSelfPromotion(self):
5411
        self.op.master_candidate = True
5412
        self.LogInfo("Auto-promoting node to master candidate")
5413

    
5414
    # If we're no longer master capable, we'll demote ourselves from MC
5415
    if self.op.master_capable == False and node.master_candidate:
5416
      self.LogInfo("Demoting from master candidate")
5417
      self.op.master_candidate = False
5418

    
5419
    # Compute new role
5420
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5421
    if self.op.master_candidate:
5422
      new_role = self._ROLE_CANDIDATE
5423
    elif self.op.drained:
5424
      new_role = self._ROLE_DRAINED
5425
    elif self.op.offline:
5426
      new_role = self._ROLE_OFFLINE
5427
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5428
      # False is still in new flags, which means we're un-setting (the
5429
      # only) True flag
5430
      new_role = self._ROLE_REGULAR
5431
    else: # no new flags, nothing, keep old role
5432
      new_role = old_role
5433

    
5434
    self.new_role = new_role
5435

    
5436
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5437
      # Trying to transition out of offline status
5438
      # TODO: Use standard RPC runner, but make sure it works when the node is
5439
      # still marked offline
5440
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5441
      if result.fail_msg:
5442
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5443
                                   " to report its version: %s" %
5444
                                   (node.name, result.fail_msg),
5445
                                   errors.ECODE_STATE)
5446
      else:
5447
        self.LogWarning("Transitioning node from offline to online state"
5448
                        " without using re-add. Please make sure the node"
5449
                        " is healthy!")
5450

    
5451
    if self.op.secondary_ip:
5452
      # Ok even without locking, because this can't be changed by any LU
5453
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5454
      master_singlehomed = master.secondary_ip == master.primary_ip
5455
      if master_singlehomed and self.op.secondary_ip:
5456
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5457
                                   " homed cluster", errors.ECODE_INVAL)
5458

    
5459
      assert not (frozenset(affected_instances) -
5460
                  self.owned_locks(locking.LEVEL_INSTANCE))
5461

    
5462
      if node.offline:
5463
        if affected_instances:
5464
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5465
                                     " offline node has instances (%s)"
5466
                                     " configured to use it" %
5467
                                     utils.CommaJoin(affected_instances.keys()))
5468
      else:
5469
        # On online nodes, check that no instances are running, and that
5470
        # the node has the new ip and we can reach it.
5471
        for instance in affected_instances.values():
5472
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5473

    
5474
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5475
        if master.name != node.name:
5476
          # check reachability from master secondary ip to new secondary ip
5477
          if not netutils.TcpPing(self.op.secondary_ip,
5478
                                  constants.DEFAULT_NODED_PORT,
5479
                                  source=master.secondary_ip):
5480
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5481
                                       " based ping to node daemon port",
5482
                                       errors.ECODE_ENVIRON)
5483

    
5484
    if self.op.ndparams:
5485
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5486
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5487
      self.new_ndparams = new_ndparams
5488

    
5489
  def Exec(self, feedback_fn):
5490
    """Modifies a node.
5491

5492
    """
5493
    node = self.node
5494
    old_role = self.old_role
5495
    new_role = self.new_role
5496

    
5497
    result = []
5498

    
5499
    if self.op.ndparams:
5500
      node.ndparams = self.new_ndparams
5501

    
5502
    if self.op.powered is not None:
5503
      node.powered = self.op.powered
5504

    
5505
    for attr in ["master_capable", "vm_capable"]:
5506
      val = getattr(self.op, attr)
5507
      if val is not None:
5508
        setattr(node, attr, val)
5509
        result.append((attr, str(val)))
5510

    
5511
    if new_role != old_role:
5512
      # Tell the node to demote itself, if no longer MC and not offline
5513
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5514
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5515
        if msg:
5516
          self.LogWarning("Node failed to demote itself: %s", msg)
5517

    
5518
      new_flags = self._R2F[new_role]
5519
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5520
        if of != nf:
5521
          result.append((desc, str(nf)))
5522
      (node.master_candidate, node.drained, node.offline) = new_flags
5523

    
5524
      # we locked all nodes, we adjust the CP before updating this node
5525
      if self.lock_all:
5526
        _AdjustCandidatePool(self, [node.name])
5527

    
5528
    if self.op.secondary_ip:
5529
      node.secondary_ip = self.op.secondary_ip
5530
      result.append(("secondary_ip", self.op.secondary_ip))
5531

    
5532
    # this will trigger configuration file update, if needed
5533
    self.cfg.Update(node, feedback_fn)
5534

    
5535
    # this will trigger job queue propagation or cleanup if the mc
5536
    # flag changed
5537
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5538
      self.context.ReaddNode(node)
5539

    
5540
    return result
5541

    
5542

    
5543
class LUNodePowercycle(NoHooksLU):
5544
  """Powercycles a node.
5545

5546
  """
5547
  REQ_BGL = False
5548

    
5549
  def CheckArguments(self):
5550
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5551
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5552
      raise errors.OpPrereqError("The node is the master and the force"
5553
                                 " parameter was not set",
5554
                                 errors.ECODE_INVAL)
5555

    
5556
  def ExpandNames(self):
5557
    """Locking for PowercycleNode.
5558

5559
    This is a last-resort option and shouldn't block on other
5560
    jobs. Therefore, we grab no locks.
5561

5562
    """
5563
    self.needed_locks = {}
5564

    
5565
  def Exec(self, feedback_fn):
5566
    """Reboots a node.
5567

5568
    """
5569
    result = self.rpc.call_node_powercycle(self.op.node_name,
5570
                                           self.cfg.GetHypervisorType())
5571
    result.Raise("Failed to schedule the reboot")
5572
    return result.payload
5573

    
5574

    
5575
class LUClusterQuery(NoHooksLU):
5576
  """Query cluster configuration.
5577

5578
  """
5579
  REQ_BGL = False
5580

    
5581
  def ExpandNames(self):
5582
    self.needed_locks = {}
5583

    
5584
  def Exec(self, feedback_fn):
5585
    """Return cluster config.
5586

5587
    """
5588
    cluster = self.cfg.GetClusterInfo()
5589
    os_hvp = {}
5590

    
5591
    # Filter just for enabled hypervisors
5592
    for os_name, hv_dict in cluster.os_hvp.items():
5593
      os_hvp[os_name] = {}
5594
      for hv_name, hv_params in hv_dict.items():
5595
        if hv_name in cluster.enabled_hypervisors:
5596
          os_hvp[os_name][hv_name] = hv_params
5597

    
5598
    # Convert ip_family to ip_version
5599
    primary_ip_version = constants.IP4_VERSION
5600
    if cluster.primary_ip_family == netutils.IP6Address.family:
5601
      primary_ip_version = constants.IP6_VERSION
5602

    
5603
    result = {
5604
      "software_version": constants.RELEASE_VERSION,
5605
      "protocol_version": constants.PROTOCOL_VERSION,
5606
      "config_version": constants.CONFIG_VERSION,
5607
      "os_api_version": max(constants.OS_API_VERSIONS),
5608
      "export_version": constants.EXPORT_VERSION,
5609
      "architecture": (platform.architecture()[0], platform.machine()),
5610
      "name": cluster.cluster_name,
5611
      "master": cluster.master_node,
5612
      "default_hypervisor": cluster.enabled_hypervisors[0],
5613
      "enabled_hypervisors": cluster.enabled_hypervisors,
5614
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5615
                        for hypervisor_name in cluster.enabled_hypervisors]),
5616
      "os_hvp": os_hvp,
5617
      "beparams": cluster.beparams,
5618
      "osparams": cluster.osparams,
5619
      "nicparams": cluster.nicparams,
5620
      "ndparams": cluster.ndparams,
5621
      "candidate_pool_size": cluster.candidate_pool_size,
5622
      "master_netdev": cluster.master_netdev,
5623
      "master_netmask": cluster.master_netmask,
5624
      "use_external_mip_script": cluster.use_external_mip_script,
5625
      "volume_group_name": cluster.volume_group_name,
5626
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5627
      "file_storage_dir": cluster.file_storage_dir,
5628
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5629
      "maintain_node_health": cluster.maintain_node_health,
5630
      "ctime": cluster.ctime,
5631
      "mtime": cluster.mtime,
5632
      "uuid": cluster.uuid,
5633
      "tags": list(cluster.GetTags()),
5634
      "uid_pool": cluster.uid_pool,
5635
      "default_iallocator": cluster.default_iallocator,
5636
      "reserved_lvs": cluster.reserved_lvs,
5637
      "primary_ip_version": primary_ip_version,
5638
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5639
      "hidden_os": cluster.hidden_os,
5640
      "blacklisted_os": cluster.blacklisted_os,
5641
      }
5642

    
5643
    return result
5644

    
5645

    
5646
class LUClusterConfigQuery(NoHooksLU):
5647
  """Return configuration values.
5648

5649
  """
5650
  REQ_BGL = False
5651
  _FIELDS_DYNAMIC = utils.FieldSet()
5652
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5653
                                  "watcher_pause", "volume_group_name")
5654

    
5655
  def CheckArguments(self):
5656
    _CheckOutputFields(static=self._FIELDS_STATIC,
5657
                       dynamic=self._FIELDS_DYNAMIC,
5658
                       selected=self.op.output_fields)
5659

    
5660
  def ExpandNames(self):
5661
    self.needed_locks = {}
5662

    
5663
  def Exec(self, feedback_fn):
5664
    """Dump a representation of the cluster config to the standard output.
5665

5666
    """
5667
    values = []
5668
    for field in self.op.output_fields:
5669
      if field == "cluster_name":
5670
        entry = self.cfg.GetClusterName()
5671
      elif field == "master_node":
5672
        entry = self.cfg.GetMasterNode()
5673
      elif field == "drain_flag":
5674
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5675
      elif field == "watcher_pause":
5676
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5677
      elif field == "volume_group_name":
5678
        entry = self.cfg.GetVGName()
5679
      else:
5680
        raise errors.ParameterError(field)
5681
      values.append(entry)
5682
    return values
5683

    
5684

    
5685
class LUInstanceActivateDisks(NoHooksLU):
5686
  """Bring up an instance's disks.
5687

5688
  """
5689
  REQ_BGL = False
5690

    
5691
  def ExpandNames(self):
5692
    self._ExpandAndLockInstance()
5693
    self.needed_locks[locking.LEVEL_NODE] = []
5694
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5695

    
5696
  def DeclareLocks(self, level):
5697
    if level == locking.LEVEL_NODE:
5698
      self._LockInstancesNodes()
5699

    
5700
  def CheckPrereq(self):
5701
    """Check prerequisites.
5702

5703
    This checks that the instance is in the cluster.
5704

5705
    """
5706
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5707
    assert self.instance is not None, \
5708
      "Cannot retrieve locked instance %s" % self.op.instance_name
5709
    _CheckNodeOnline(self, self.instance.primary_node)
5710

    
5711
  def Exec(self, feedback_fn):
5712
    """Activate the disks.
5713

5714
    """
5715
    disks_ok, disks_info = \
5716
              _AssembleInstanceDisks(self, self.instance,
5717
                                     ignore_size=self.op.ignore_size)
5718
    if not disks_ok:
5719
      raise errors.OpExecError("Cannot activate block devices")
5720

    
5721
    return disks_info
5722

    
5723

    
5724
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5725
                           ignore_size=False):
5726
  """Prepare the block devices for an instance.
5727

5728
  This sets up the block devices on all nodes.
5729

5730
  @type lu: L{LogicalUnit}
5731
  @param lu: the logical unit on whose behalf we execute
5732
  @type instance: L{objects.Instance}
5733
  @param instance: the instance for whose disks we assemble
5734
  @type disks: list of L{objects.Disk} or None
5735
  @param disks: which disks to assemble (or all, if None)
5736
  @type ignore_secondaries: boolean
5737
  @param ignore_secondaries: if true, errors on secondary nodes
5738
      won't result in an error return from the function
5739
  @type ignore_size: boolean
5740
  @param ignore_size: if true, the current known size of the disk
5741
      will not be used during the disk activation, useful for cases
5742
      when the size is wrong
5743
  @return: False if the operation failed, otherwise a list of
5744
      (host, instance_visible_name, node_visible_name)
5745
      with the mapping from node devices to instance devices
5746

5747
  """
5748
  device_info = []
5749
  disks_ok = True
5750
  iname = instance.name
5751
  disks = _ExpandCheckDisks(instance, disks)
5752

    
5753
  # With the two passes mechanism we try to reduce the window of
5754
  # opportunity for the race condition of switching DRBD to primary
5755
  # before handshaking occured, but we do not eliminate it
5756

    
5757
  # The proper fix would be to wait (with some limits) until the
5758
  # connection has been made and drbd transitions from WFConnection
5759
  # into any other network-connected state (Connected, SyncTarget,
5760
  # SyncSource, etc.)
5761

    
5762
  # 1st pass, assemble on all nodes in secondary mode
5763
  for idx, inst_disk in enumerate(disks):
5764
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5765
      if ignore_size:
5766
        node_disk = node_disk.Copy()
5767
        node_disk.UnsetSize()
5768
      lu.cfg.SetDiskID(node_disk, node)
5769
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5770
      msg = result.fail_msg
5771
      if msg:
5772
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5773
                           " (is_primary=False, pass=1): %s",
5774
                           inst_disk.iv_name, node, msg)
5775
        if not ignore_secondaries:
5776
          disks_ok = False
5777

    
5778
  # FIXME: race condition on drbd migration to primary
5779

    
5780
  # 2nd pass, do only the primary node
5781
  for idx, inst_disk in enumerate(disks):
5782
    dev_path = None
5783

    
5784
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5785
      if node != instance.primary_node:
5786
        continue
5787
      if ignore_size:
5788
        node_disk = node_disk.Copy()
5789
        node_disk.UnsetSize()
5790
      lu.cfg.SetDiskID(node_disk, node)
5791
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5792
      msg = result.fail_msg
5793
      if msg:
5794
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5795
                           " (is_primary=True, pass=2): %s",
5796
                           inst_disk.iv_name, node, msg)
5797
        disks_ok = False
5798
      else:
5799
        dev_path = result.payload
5800

    
5801
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5802

    
5803
  # leave the disks configured for the primary node
5804
  # this is a workaround that would be fixed better by
5805
  # improving the logical/physical id handling
5806
  for disk in disks:
5807
    lu.cfg.SetDiskID(disk, instance.primary_node)
5808

    
5809
  return disks_ok, device_info
5810

    
5811

    
5812
def _StartInstanceDisks(lu, instance, force):
5813
  """Start the disks of an instance.
5814

5815
  """
5816
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5817
                                           ignore_secondaries=force)
5818
  if not disks_ok:
5819
    _ShutdownInstanceDisks(lu, instance)
5820
    if force is not None and not force:
5821
      lu.proc.LogWarning("", hint="If the message above refers to a"
5822
                         " secondary node,"
5823
                         " you can retry the operation using '--force'.")
5824
    raise errors.OpExecError("Disk consistency error")
5825

    
5826

    
5827
class LUInstanceDeactivateDisks(NoHooksLU):
5828
  """Shutdown an instance's disks.
5829

5830
  """
5831
  REQ_BGL = False
5832

    
5833
  def ExpandNames(self):
5834
    self._ExpandAndLockInstance()
5835
    self.needed_locks[locking.LEVEL_NODE] = []
5836
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5837

    
5838
  def DeclareLocks(self, level):
5839
    if level == locking.LEVEL_NODE:
5840
      self._LockInstancesNodes()
5841

    
5842
  def CheckPrereq(self):
5843
    """Check prerequisites.
5844

5845
    This checks that the instance is in the cluster.
5846

5847
    """
5848
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5849
    assert self.instance is not None, \
5850
      "Cannot retrieve locked instance %s" % self.op.instance_name
5851

    
5852
  def Exec(self, feedback_fn):
5853
    """Deactivate the disks
5854

5855
    """
5856
    instance = self.instance
5857
    if self.op.force:
5858
      _ShutdownInstanceDisks(self, instance)
5859
    else:
5860
      _SafeShutdownInstanceDisks(self, instance)
5861

    
5862

    
5863
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5864
  """Shutdown block devices of an instance.
5865

5866
  This function checks if an instance is running, before calling
5867
  _ShutdownInstanceDisks.
5868

5869
  """
5870
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5871
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5872

    
5873

    
5874
def _ExpandCheckDisks(instance, disks):
5875
  """Return the instance disks selected by the disks list
5876

5877
  @type disks: list of L{objects.Disk} or None
5878
  @param disks: selected disks
5879
  @rtype: list of L{objects.Disk}
5880
  @return: selected instance disks to act on
5881

5882
  """
5883
  if disks is None:
5884
    return instance.disks
5885
  else:
5886
    if not set(disks).issubset(instance.disks):
5887
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5888
                                   " target instance")
5889
    return disks
5890

    
5891

    
5892
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5893
  """Shutdown block devices of an instance.
5894

5895
  This does the shutdown on all nodes of the instance.
5896

5897
  If the ignore_primary is false, errors on the primary node are
5898
  ignored.
5899

5900
  """
5901
  all_result = True
5902
  disks = _ExpandCheckDisks(instance, disks)
5903

    
5904
  for disk in disks:
5905
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5906
      lu.cfg.SetDiskID(top_disk, node)
5907
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5908
      msg = result.fail_msg
5909
      if msg:
5910
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5911
                      disk.iv_name, node, msg)
5912
        if ((node == instance.primary_node and not ignore_primary) or
5913
            (node != instance.primary_node and not result.offline)):
5914
          all_result = False
5915
  return all_result
5916

    
5917

    
5918
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5919
  """Checks if a node has enough free memory.
5920

5921
  This function check if a given node has the needed amount of free
5922
  memory. In case the node has less memory or we cannot get the
5923
  information from the node, this function raise an OpPrereqError
5924
  exception.
5925

5926
  @type lu: C{LogicalUnit}
5927
  @param lu: a logical unit from which we get configuration data
5928
  @type node: C{str}
5929
  @param node: the node to check
5930
  @type reason: C{str}
5931
  @param reason: string to use in the error message
5932
  @type requested: C{int}
5933
  @param requested: the amount of memory in MiB to check for
5934
  @type hypervisor_name: C{str}
5935
  @param hypervisor_name: the hypervisor to ask for memory stats
5936
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5937
      we cannot check the node
5938

5939
  """
5940
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5941
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5942
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5943
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5944
  if not isinstance(free_mem, int):
5945
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5946
                               " was '%s'" % (node, free_mem),
5947
                               errors.ECODE_ENVIRON)
5948
  if requested > free_mem:
5949
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5950
                               " needed %s MiB, available %s MiB" %
5951
                               (node, reason, requested, free_mem),
5952
                               errors.ECODE_NORES)
5953

    
5954

    
5955
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5956
  """Checks if nodes have enough free disk space in the all VGs.
5957

5958
  This function check if all given nodes have the needed amount of
5959
  free disk. In case any node has less disk or we cannot get the
5960
  information from the node, this function raise an OpPrereqError
5961
  exception.
5962

5963
  @type lu: C{LogicalUnit}
5964
  @param lu: a logical unit from which we get configuration data
5965
  @type nodenames: C{list}
5966
  @param nodenames: the list of node names to check
5967
  @type req_sizes: C{dict}
5968
  @param req_sizes: the hash of vg and corresponding amount of disk in
5969
      MiB to check for
5970
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5971
      or we cannot check the node
5972

5973
  """
5974
  for vg, req_size in req_sizes.items():
5975
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5976

    
5977

    
5978
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5979
  """Checks if nodes have enough free disk space in the specified VG.
5980

5981
  This function check if all given nodes have the needed amount of
5982
  free disk. In case any node has less disk or we cannot get the
5983
  information from the node, this function raise an OpPrereqError
5984
  exception.
5985

5986
  @type lu: C{LogicalUnit}
5987
  @param lu: a logical unit from which we get configuration data
5988
  @type nodenames: C{list}
5989
  @param nodenames: the list of node names to check
5990
  @type vg: C{str}
5991
  @param vg: the volume group to check
5992
  @type requested: C{int}
5993
  @param requested: the amount of disk in MiB to check for
5994
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5995
      or we cannot check the node
5996

5997
  """
5998
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5999
  for node in nodenames:
6000
    info = nodeinfo[node]
6001
    info.Raise("Cannot get current information from node %s" % node,
6002
               prereq=True, ecode=errors.ECODE_ENVIRON)
6003
    vg_free = info.payload.get("vg_free", None)
6004
    if not isinstance(vg_free, int):
6005
      raise errors.OpPrereqError("Can't compute free disk space on node"
6006
                                 " %s for vg %s, result was '%s'" %
6007
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6008
    if requested > vg_free:
6009
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6010
                                 " vg %s: required %d MiB, available %d MiB" %
6011
                                 (node, vg, requested, vg_free),
6012
                                 errors.ECODE_NORES)
6013

    
6014

    
6015
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6016
  """Checks if nodes have enough physical CPUs
6017

6018
  This function checks if all given nodes have the needed number of
6019
  physical CPUs. In case any node has less CPUs or we cannot get the
6020
  information from the node, this function raises an OpPrereqError
6021
  exception.
6022

6023
  @type lu: C{LogicalUnit}
6024
  @param lu: a logical unit from which we get configuration data
6025
  @type nodenames: C{list}
6026
  @param nodenames: the list of node names to check
6027
  @type requested: C{int}
6028
  @param requested: the minimum acceptable number of physical CPUs
6029
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6030
      or we cannot check the node
6031

6032
  """
6033
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6034
  for node in nodenames:
6035
    info = nodeinfo[node]
6036
    info.Raise("Cannot get current information from node %s" % node,
6037
               prereq=True, ecode=errors.ECODE_ENVIRON)
6038
    num_cpus = info.payload.get("cpu_total", None)
6039
    if not isinstance(num_cpus, int):
6040
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6041
                                 " on node %s, result was '%s'" %
6042
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6043
    if requested > num_cpus:
6044
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6045
                                 "required" % (node, num_cpus, requested),
6046
                                 errors.ECODE_NORES)
6047

    
6048

    
6049
class LUInstanceStartup(LogicalUnit):
6050
  """Starts an instance.
6051

6052
  """
6053
  HPATH = "instance-start"
6054
  HTYPE = constants.HTYPE_INSTANCE
6055
  REQ_BGL = False
6056

    
6057
  def CheckArguments(self):
6058
    # extra beparams
6059
    if self.op.beparams:
6060
      # fill the beparams dict
6061
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6062

    
6063
  def ExpandNames(self):
6064
    self._ExpandAndLockInstance()
6065

    
6066
  def BuildHooksEnv(self):
6067
    """Build hooks env.
6068

6069
    This runs on master, primary and secondary nodes of the instance.
6070

6071
    """
6072
    env = {
6073
      "FORCE": self.op.force,
6074
      }
6075

    
6076
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6077

    
6078
    return env
6079

    
6080
  def BuildHooksNodes(self):
6081
    """Build hooks nodes.
6082

6083
    """
6084
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6085
    return (nl, nl)
6086

    
6087
  def CheckPrereq(self):
6088
    """Check prerequisites.
6089

6090
    This checks that the instance is in the cluster.
6091

6092
    """
6093
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6094
    assert self.instance is not None, \
6095
      "Cannot retrieve locked instance %s" % self.op.instance_name
6096

    
6097
    # extra hvparams
6098
    if self.op.hvparams:
6099
      # check hypervisor parameter syntax (locally)
6100
      cluster = self.cfg.GetClusterInfo()
6101
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6102
      filled_hvp = cluster.FillHV(instance)
6103
      filled_hvp.update(self.op.hvparams)
6104
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6105
      hv_type.CheckParameterSyntax(filled_hvp)
6106
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6107

    
6108
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6109

    
6110
    if self.primary_offline and self.op.ignore_offline_nodes:
6111
      self.proc.LogWarning("Ignoring offline primary node")
6112

    
6113
      if self.op.hvparams or self.op.beparams:
6114
        self.proc.LogWarning("Overridden parameters are ignored")
6115
    else:
6116
      _CheckNodeOnline(self, instance.primary_node)
6117

    
6118
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6119

    
6120
      # check bridges existence
6121
      _CheckInstanceBridgesExist(self, instance)
6122

    
6123
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6124
                                                instance.name,
6125
                                                instance.hypervisor)
6126
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6127
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6128
      if not remote_info.payload: # not running already
6129
        _CheckNodeFreeMemory(self, instance.primary_node,
6130
                             "starting instance %s" % instance.name,
6131
                             bep[constants.BE_MEMORY], instance.hypervisor)
6132

    
6133
  def Exec(self, feedback_fn):
6134
    """Start the instance.
6135

6136
    """
6137
    instance = self.instance
6138
    force = self.op.force
6139

    
6140
    if not self.op.no_remember:
6141
      self.cfg.MarkInstanceUp(instance.name)
6142

    
6143
    if self.primary_offline:
6144
      assert self.op.ignore_offline_nodes
6145
      self.proc.LogInfo("Primary node offline, marked instance as started")
6146
    else:
6147
      node_current = instance.primary_node
6148

    
6149
      _StartInstanceDisks(self, instance, force)
6150

    
6151
      result = \
6152
        self.rpc.call_instance_start(node_current,
6153
                                     (instance, self.op.hvparams,
6154
                                      self.op.beparams),
6155
                                     self.op.startup_paused)
6156
      msg = result.fail_msg
6157
      if msg:
6158
        _ShutdownInstanceDisks(self, instance)
6159
        raise errors.OpExecError("Could not start instance: %s" % msg)
6160

    
6161

    
6162
class LUInstanceReboot(LogicalUnit):
6163
  """Reboot an instance.
6164

6165
  """
6166
  HPATH = "instance-reboot"
6167
  HTYPE = constants.HTYPE_INSTANCE
6168
  REQ_BGL = False
6169

    
6170
  def ExpandNames(self):
6171
    self._ExpandAndLockInstance()
6172

    
6173
  def BuildHooksEnv(self):
6174
    """Build hooks env.
6175

6176
    This runs on master, primary and secondary nodes of the instance.
6177

6178
    """
6179
    env = {
6180
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6181
      "REBOOT_TYPE": self.op.reboot_type,
6182
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6183
      }
6184

    
6185
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6186

    
6187
    return env
6188

    
6189
  def BuildHooksNodes(self):
6190
    """Build hooks nodes.
6191

6192
    """
6193
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6194
    return (nl, nl)
6195

    
6196
  def CheckPrereq(self):
6197
    """Check prerequisites.
6198

6199
    This checks that the instance is in the cluster.
6200

6201
    """
6202
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6203
    assert self.instance is not None, \
6204
      "Cannot retrieve locked instance %s" % self.op.instance_name
6205

    
6206
    _CheckNodeOnline(self, instance.primary_node)
6207

    
6208
    # check bridges existence
6209
    _CheckInstanceBridgesExist(self, instance)
6210

    
6211
  def Exec(self, feedback_fn):
6212
    """Reboot the instance.
6213

6214
    """
6215
    instance = self.instance
6216
    ignore_secondaries = self.op.ignore_secondaries
6217
    reboot_type = self.op.reboot_type
6218

    
6219
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6220
                                              instance.name,
6221
                                              instance.hypervisor)
6222
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6223
    instance_running = bool(remote_info.payload)
6224

    
6225
    node_current = instance.primary_node
6226

    
6227
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6228
                                            constants.INSTANCE_REBOOT_HARD]:
6229
      for disk in instance.disks:
6230
        self.cfg.SetDiskID(disk, node_current)
6231
      result = self.rpc.call_instance_reboot(node_current, instance,
6232
                                             reboot_type,
6233
                                             self.op.shutdown_timeout)
6234
      result.Raise("Could not reboot instance")
6235
    else:
6236
      if instance_running:
6237
        result = self.rpc.call_instance_shutdown(node_current, instance,
6238
                                                 self.op.shutdown_timeout)
6239
        result.Raise("Could not shutdown instance for full reboot")
6240
        _ShutdownInstanceDisks(self, instance)
6241
      else:
6242
        self.LogInfo("Instance %s was already stopped, starting now",
6243
                     instance.name)
6244
      _StartInstanceDisks(self, instance, ignore_secondaries)
6245
      result = self.rpc.call_instance_start(node_current,
6246
                                            (instance, None, None), False)
6247
      msg = result.fail_msg
6248
      if msg:
6249
        _ShutdownInstanceDisks(self, instance)
6250
        raise errors.OpExecError("Could not start instance for"
6251
                                 " full reboot: %s" % msg)
6252

    
6253
    self.cfg.MarkInstanceUp(instance.name)
6254

    
6255

    
6256
class LUInstanceShutdown(LogicalUnit):
6257
  """Shutdown an instance.
6258

6259
  """
6260
  HPATH = "instance-stop"
6261
  HTYPE = constants.HTYPE_INSTANCE
6262
  REQ_BGL = False
6263

    
6264
  def ExpandNames(self):
6265
    self._ExpandAndLockInstance()
6266

    
6267
  def BuildHooksEnv(self):
6268
    """Build hooks env.
6269

6270
    This runs on master, primary and secondary nodes of the instance.
6271

6272
    """
6273
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6274
    env["TIMEOUT"] = self.op.timeout
6275
    return env
6276

    
6277
  def BuildHooksNodes(self):
6278
    """Build hooks nodes.
6279

6280
    """
6281
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6282
    return (nl, nl)
6283

    
6284
  def CheckPrereq(self):
6285
    """Check prerequisites.
6286

6287
    This checks that the instance is in the cluster.
6288

6289
    """
6290
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6291
    assert self.instance is not None, \
6292
      "Cannot retrieve locked instance %s" % self.op.instance_name
6293

    
6294
    self.primary_offline = \
6295
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6296

    
6297
    if self.primary_offline and self.op.ignore_offline_nodes:
6298
      self.proc.LogWarning("Ignoring offline primary node")
6299
    else:
6300
      _CheckNodeOnline(self, self.instance.primary_node)
6301

    
6302
  def Exec(self, feedback_fn):
6303
    """Shutdown the instance.
6304

6305
    """
6306
    instance = self.instance
6307
    node_current = instance.primary_node
6308
    timeout = self.op.timeout
6309

    
6310
    if not self.op.no_remember:
6311
      self.cfg.MarkInstanceDown(instance.name)
6312

    
6313
    if self.primary_offline:
6314
      assert self.op.ignore_offline_nodes
6315
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6316
    else:
6317
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6318
      msg = result.fail_msg
6319
      if msg:
6320
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6321

    
6322
      _ShutdownInstanceDisks(self, instance)
6323

    
6324

    
6325
class LUInstanceReinstall(LogicalUnit):
6326
  """Reinstall an instance.
6327

6328
  """
6329
  HPATH = "instance-reinstall"
6330
  HTYPE = constants.HTYPE_INSTANCE
6331
  REQ_BGL = False
6332

    
6333
  def ExpandNames(self):
6334
    self._ExpandAndLockInstance()
6335

    
6336
  def BuildHooksEnv(self):
6337
    """Build hooks env.
6338

6339
    This runs on master, primary and secondary nodes of the instance.
6340

6341
    """
6342
    return _BuildInstanceHookEnvByObject(self, self.instance)
6343

    
6344
  def BuildHooksNodes(self):
6345
    """Build hooks nodes.
6346

6347
    """
6348
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6349
    return (nl, nl)
6350

    
6351
  def CheckPrereq(self):
6352
    """Check prerequisites.
6353

6354
    This checks that the instance is in the cluster and is not running.
6355

6356
    """
6357
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6358
    assert instance is not None, \
6359
      "Cannot retrieve locked instance %s" % self.op.instance_name
6360
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6361
                     " offline, cannot reinstall")
6362
    for node in instance.secondary_nodes:
6363
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6364
                       " cannot reinstall")
6365

    
6366
    if instance.disk_template == constants.DT_DISKLESS:
6367
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6368
                                 self.op.instance_name,
6369
                                 errors.ECODE_INVAL)
6370
    _CheckInstanceDown(self, instance, "cannot reinstall")
6371

    
6372
    if self.op.os_type is not None:
6373
      # OS verification
6374
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6375
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6376
      instance_os = self.op.os_type
6377
    else:
6378
      instance_os = instance.os
6379

    
6380
    nodelist = list(instance.all_nodes)
6381

    
6382
    if self.op.osparams:
6383
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6384
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6385
      self.os_inst = i_osdict # the new dict (without defaults)
6386
    else:
6387
      self.os_inst = None
6388

    
6389
    self.instance = instance
6390

    
6391
  def Exec(self, feedback_fn):
6392
    """Reinstall the instance.
6393

6394
    """
6395
    inst = self.instance
6396

    
6397
    if self.op.os_type is not None:
6398
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6399
      inst.os = self.op.os_type
6400
      # Write to configuration
6401
      self.cfg.Update(inst, feedback_fn)
6402

    
6403
    _StartInstanceDisks(self, inst, None)
6404
    try:
6405
      feedback_fn("Running the instance OS create scripts...")
6406
      # FIXME: pass debug option from opcode to backend
6407
      result = self.rpc.call_instance_os_add(inst.primary_node,
6408
                                             (inst, self.os_inst), True,
6409
                                             self.op.debug_level)
6410
      result.Raise("Could not install OS for instance %s on node %s" %
6411
                   (inst.name, inst.primary_node))
6412
    finally:
6413
      _ShutdownInstanceDisks(self, inst)
6414

    
6415

    
6416
class LUInstanceRecreateDisks(LogicalUnit):
6417
  """Recreate an instance's missing disks.
6418

6419
  """
6420
  HPATH = "instance-recreate-disks"
6421
  HTYPE = constants.HTYPE_INSTANCE
6422
  REQ_BGL = False
6423

    
6424
  def CheckArguments(self):
6425
    # normalise the disk list
6426
    self.op.disks = sorted(frozenset(self.op.disks))
6427

    
6428
  def ExpandNames(self):
6429
    self._ExpandAndLockInstance()
6430
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6431
    if self.op.nodes:
6432
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6433
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6434
    else:
6435
      self.needed_locks[locking.LEVEL_NODE] = []
6436

    
6437
  def DeclareLocks(self, level):
6438
    if level == locking.LEVEL_NODE:
6439
      # if we replace the nodes, we only need to lock the old primary,
6440
      # otherwise we need to lock all nodes for disk re-creation
6441
      primary_only = bool(self.op.nodes)
6442
      self._LockInstancesNodes(primary_only=primary_only)
6443

    
6444
  def BuildHooksEnv(self):
6445
    """Build hooks env.
6446

6447
    This runs on master, primary and secondary nodes of the instance.
6448

6449
    """
6450
    return _BuildInstanceHookEnvByObject(self, self.instance)
6451

    
6452
  def BuildHooksNodes(self):
6453
    """Build hooks nodes.
6454

6455
    """
6456
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6457
    return (nl, nl)
6458

    
6459
  def CheckPrereq(self):
6460
    """Check prerequisites.
6461

6462
    This checks that the instance is in the cluster and is not running.
6463

6464
    """
6465
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6466
    assert instance is not None, \
6467
      "Cannot retrieve locked instance %s" % self.op.instance_name
6468
    if self.op.nodes:
6469
      if len(self.op.nodes) != len(instance.all_nodes):
6470
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6471
                                   " %d replacement nodes were specified" %
6472
                                   (instance.name, len(instance.all_nodes),
6473
                                    len(self.op.nodes)),
6474
                                   errors.ECODE_INVAL)
6475
      assert instance.disk_template != constants.DT_DRBD8 or \
6476
          len(self.op.nodes) == 2
6477
      assert instance.disk_template != constants.DT_PLAIN or \
6478
          len(self.op.nodes) == 1
6479
      primary_node = self.op.nodes[0]
6480
    else:
6481
      primary_node = instance.primary_node
6482
    _CheckNodeOnline(self, primary_node)
6483

    
6484
    if instance.disk_template == constants.DT_DISKLESS:
6485
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6486
                                 self.op.instance_name, errors.ECODE_INVAL)
6487
    # if we replace nodes *and* the old primary is offline, we don't
6488
    # check
6489
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6490
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6491
    if not (self.op.nodes and old_pnode.offline):
6492
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6493

    
6494
    if not self.op.disks:
6495
      self.op.disks = range(len(instance.disks))
6496
    else:
6497
      for idx in self.op.disks:
6498
        if idx >= len(instance.disks):
6499
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6500
                                     errors.ECODE_INVAL)
6501
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6502
      raise errors.OpPrereqError("Can't recreate disks partially and"
6503
                                 " change the nodes at the same time",
6504
                                 errors.ECODE_INVAL)
6505
    self.instance = instance
6506

    
6507
  def Exec(self, feedback_fn):
6508
    """Recreate the disks.
6509

6510
    """
6511
    instance = self.instance
6512

    
6513
    to_skip = []
6514
    mods = [] # keeps track of needed logical_id changes
6515

    
6516
    for idx, disk in enumerate(instance.disks):
6517
      if idx not in self.op.disks: # disk idx has not been passed in
6518
        to_skip.append(idx)
6519
        continue
6520
      # update secondaries for disks, if needed
6521
      if self.op.nodes:
6522
        if disk.dev_type == constants.LD_DRBD8:
6523
          # need to update the nodes and minors
6524
          assert len(self.op.nodes) == 2
6525
          assert len(disk.logical_id) == 6 # otherwise disk internals
6526
                                           # have changed
6527
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6528
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6529
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6530
                    new_minors[0], new_minors[1], old_secret)
6531
          assert len(disk.logical_id) == len(new_id)
6532
          mods.append((idx, new_id))
6533

    
6534
    # now that we have passed all asserts above, we can apply the mods
6535
    # in a single run (to avoid partial changes)
6536
    for idx, new_id in mods:
6537
      instance.disks[idx].logical_id = new_id
6538

    
6539
    # change primary node, if needed
6540
    if self.op.nodes:
6541
      instance.primary_node = self.op.nodes[0]
6542
      self.LogWarning("Changing the instance's nodes, you will have to"
6543
                      " remove any disks left on the older nodes manually")
6544

    
6545
    if self.op.nodes:
6546
      self.cfg.Update(instance, feedback_fn)
6547

    
6548
    _CreateDisks(self, instance, to_skip=to_skip)
6549

    
6550

    
6551
class LUInstanceRename(LogicalUnit):
6552
  """Rename an instance.
6553

6554
  """
6555
  HPATH = "instance-rename"
6556
  HTYPE = constants.HTYPE_INSTANCE
6557

    
6558
  def CheckArguments(self):
6559
    """Check arguments.
6560

6561
    """
6562
    if self.op.ip_check and not self.op.name_check:
6563
      # TODO: make the ip check more flexible and not depend on the name check
6564
      raise errors.OpPrereqError("IP address check requires a name check",
6565
                                 errors.ECODE_INVAL)
6566

    
6567
  def BuildHooksEnv(self):
6568
    """Build hooks env.
6569

6570
    This runs on master, primary and secondary nodes of the instance.
6571

6572
    """
6573
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6574
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6575
    return env
6576

    
6577
  def BuildHooksNodes(self):
6578
    """Build hooks nodes.
6579

6580
    """
6581
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6582
    return (nl, nl)
6583

    
6584
  def CheckPrereq(self):
6585
    """Check prerequisites.
6586

6587
    This checks that the instance is in the cluster and is not running.
6588

6589
    """
6590
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6591
                                                self.op.instance_name)
6592
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6593
    assert instance is not None
6594
    _CheckNodeOnline(self, instance.primary_node)
6595
    _CheckInstanceDown(self, instance, "cannot rename")
6596
    self.instance = instance
6597

    
6598
    new_name = self.op.new_name
6599
    if self.op.name_check:
6600
      hostname = netutils.GetHostname(name=new_name)
6601
      if hostname != new_name:
6602
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6603
                     hostname.name)
6604
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6605
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6606
                                    " same as given hostname '%s'") %
6607
                                    (hostname.name, self.op.new_name),
6608
                                    errors.ECODE_INVAL)
6609
      new_name = self.op.new_name = hostname.name
6610
      if (self.op.ip_check and
6611
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6612
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6613
                                   (hostname.ip, new_name),
6614
                                   errors.ECODE_NOTUNIQUE)
6615

    
6616
    instance_list = self.cfg.GetInstanceList()
6617
    if new_name in instance_list and new_name != instance.name:
6618
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6619
                                 new_name, errors.ECODE_EXISTS)
6620

    
6621
  def Exec(self, feedback_fn):
6622
    """Rename the instance.
6623

6624
    """
6625
    inst = self.instance
6626
    old_name = inst.name
6627

    
6628
    rename_file_storage = False
6629
    if (inst.disk_template in constants.DTS_FILEBASED and
6630
        self.op.new_name != inst.name):
6631
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6632
      rename_file_storage = True
6633

    
6634
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6635
    # Change the instance lock. This is definitely safe while we hold the BGL.
6636
    # Otherwise the new lock would have to be added in acquired mode.
6637
    assert self.REQ_BGL
6638
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6639
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6640

    
6641
    # re-read the instance from the configuration after rename
6642
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6643

    
6644
    if rename_file_storage:
6645
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6646
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6647
                                                     old_file_storage_dir,
6648
                                                     new_file_storage_dir)
6649
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6650
                   " (but the instance has been renamed in Ganeti)" %
6651
                   (inst.primary_node, old_file_storage_dir,
6652
                    new_file_storage_dir))
6653

    
6654
    _StartInstanceDisks(self, inst, None)
6655
    try:
6656
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6657
                                                 old_name, self.op.debug_level)
6658
      msg = result.fail_msg
6659
      if msg:
6660
        msg = ("Could not run OS rename script for instance %s on node %s"
6661
               " (but the instance has been renamed in Ganeti): %s" %
6662
               (inst.name, inst.primary_node, msg))
6663
        self.proc.LogWarning(msg)
6664
    finally:
6665
      _ShutdownInstanceDisks(self, inst)
6666

    
6667
    return inst.name
6668

    
6669

    
6670
class LUInstanceRemove(LogicalUnit):
6671
  """Remove an instance.
6672

6673
  """
6674
  HPATH = "instance-remove"
6675
  HTYPE = constants.HTYPE_INSTANCE
6676
  REQ_BGL = False
6677

    
6678
  def ExpandNames(self):
6679
    self._ExpandAndLockInstance()
6680
    self.needed_locks[locking.LEVEL_NODE] = []
6681
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6682

    
6683
  def DeclareLocks(self, level):
6684
    if level == locking.LEVEL_NODE:
6685
      self._LockInstancesNodes()
6686

    
6687
  def BuildHooksEnv(self):
6688
    """Build hooks env.
6689

6690
    This runs on master, primary and secondary nodes of the instance.
6691

6692
    """
6693
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6694
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6695
    return env
6696

    
6697
  def BuildHooksNodes(self):
6698
    """Build hooks nodes.
6699

6700
    """
6701
    nl = [self.cfg.GetMasterNode()]
6702
    nl_post = list(self.instance.all_nodes) + nl
6703
    return (nl, nl_post)
6704

    
6705
  def CheckPrereq(self):
6706
    """Check prerequisites.
6707

6708
    This checks that the instance is in the cluster.
6709

6710
    """
6711
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6712
    assert self.instance is not None, \
6713
      "Cannot retrieve locked instance %s" % self.op.instance_name
6714

    
6715
  def Exec(self, feedback_fn):
6716
    """Remove the instance.
6717

6718
    """
6719
    instance = self.instance
6720
    logging.info("Shutting down instance %s on node %s",
6721
                 instance.name, instance.primary_node)
6722

    
6723
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6724
                                             self.op.shutdown_timeout)
6725
    msg = result.fail_msg
6726
    if msg:
6727
      if self.op.ignore_failures:
6728
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6729
      else:
6730
        raise errors.OpExecError("Could not shutdown instance %s on"
6731
                                 " node %s: %s" %
6732
                                 (instance.name, instance.primary_node, msg))
6733

    
6734
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6735

    
6736

    
6737
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6738
  """Utility function to remove an instance.
6739

6740
  """
6741
  logging.info("Removing block devices for instance %s", instance.name)
6742

    
6743
  if not _RemoveDisks(lu, instance):
6744
    if not ignore_failures:
6745
      raise errors.OpExecError("Can't remove instance's disks")
6746
    feedback_fn("Warning: can't remove instance's disks")
6747

    
6748
  logging.info("Removing instance %s out of cluster config", instance.name)
6749

    
6750
  lu.cfg.RemoveInstance(instance.name)
6751

    
6752
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6753
    "Instance lock removal conflict"
6754

    
6755
  # Remove lock for the instance
6756
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6757

    
6758

    
6759
class LUInstanceQuery(NoHooksLU):
6760
  """Logical unit for querying instances.
6761

6762
  """
6763
  # pylint: disable=W0142
6764
  REQ_BGL = False
6765

    
6766
  def CheckArguments(self):
6767
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6768
                             self.op.output_fields, self.op.use_locking)
6769

    
6770
  def ExpandNames(self):
6771
    self.iq.ExpandNames(self)
6772

    
6773
  def DeclareLocks(self, level):
6774
    self.iq.DeclareLocks(self, level)
6775

    
6776
  def Exec(self, feedback_fn):
6777
    return self.iq.OldStyleQuery(self)
6778

    
6779

    
6780
class LUInstanceFailover(LogicalUnit):
6781
  """Failover an instance.
6782

6783
  """
6784
  HPATH = "instance-failover"
6785
  HTYPE = constants.HTYPE_INSTANCE
6786
  REQ_BGL = False
6787

    
6788
  def CheckArguments(self):
6789
    """Check the arguments.
6790

6791
    """
6792
    self.iallocator = getattr(self.op, "iallocator", None)
6793
    self.target_node = getattr(self.op, "target_node", None)
6794

    
6795
  def ExpandNames(self):
6796
    self._ExpandAndLockInstance()
6797

    
6798
    if self.op.target_node is not None:
6799
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6800

    
6801
    self.needed_locks[locking.LEVEL_NODE] = []
6802
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6803

    
6804
    ignore_consistency = self.op.ignore_consistency
6805
    shutdown_timeout = self.op.shutdown_timeout
6806
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6807
                                       cleanup=False,
6808
                                       failover=True,
6809
                                       ignore_consistency=ignore_consistency,
6810
                                       shutdown_timeout=shutdown_timeout)
6811
    self.tasklets = [self._migrater]
6812

    
6813
  def DeclareLocks(self, level):
6814
    if level == locking.LEVEL_NODE:
6815
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6816
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6817
        if self.op.target_node is None:
6818
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6819
        else:
6820
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6821
                                                   self.op.target_node]
6822
        del self.recalculate_locks[locking.LEVEL_NODE]
6823
      else:
6824
        self._LockInstancesNodes()
6825

    
6826
  def BuildHooksEnv(self):
6827
    """Build hooks env.
6828

6829
    This runs on master, primary and secondary nodes of the instance.
6830

6831
    """
6832
    instance = self._migrater.instance
6833
    source_node = instance.primary_node
6834
    target_node = self.op.target_node
6835
    env = {
6836
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6837
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6838
      "OLD_PRIMARY": source_node,
6839
      "NEW_PRIMARY": target_node,
6840
      }
6841

    
6842
    if instance.disk_template in constants.DTS_INT_MIRROR:
6843
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6844
      env["NEW_SECONDARY"] = source_node
6845
    else:
6846
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6847

    
6848
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6849

    
6850
    return env
6851

    
6852
  def BuildHooksNodes(self):
6853
    """Build hooks nodes.
6854

6855
    """
6856
    instance = self._migrater.instance
6857
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6858
    return (nl, nl + [instance.primary_node])
6859

    
6860

    
6861
class LUInstanceMigrate(LogicalUnit):
6862
  """Migrate an instance.
6863

6864
  This is migration without shutting down, compared to the failover,
6865
  which is done with shutdown.
6866

6867
  """
6868
  HPATH = "instance-migrate"
6869
  HTYPE = constants.HTYPE_INSTANCE
6870
  REQ_BGL = False
6871

    
6872
  def ExpandNames(self):
6873
    self._ExpandAndLockInstance()
6874

    
6875
    if self.op.target_node is not None:
6876
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6877

    
6878
    self.needed_locks[locking.LEVEL_NODE] = []
6879
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6880

    
6881
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6882
                                       cleanup=self.op.cleanup,
6883
                                       failover=False,
6884
                                       fallback=self.op.allow_failover)
6885
    self.tasklets = [self._migrater]
6886

    
6887
  def DeclareLocks(self, level):
6888
    if level == locking.LEVEL_NODE:
6889
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6890
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6891
        if self.op.target_node is None:
6892
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6893
        else:
6894
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6895
                                                   self.op.target_node]
6896
        del self.recalculate_locks[locking.LEVEL_NODE]
6897
      else:
6898
        self._LockInstancesNodes()
6899

    
6900
  def BuildHooksEnv(self):
6901
    """Build hooks env.
6902

6903
    This runs on master, primary and secondary nodes of the instance.
6904

6905
    """
6906
    instance = self._migrater.instance
6907
    source_node = instance.primary_node
6908
    target_node = self.op.target_node
6909
    env = _BuildInstanceHookEnvByObject(self, instance)
6910
    env.update({
6911
      "MIGRATE_LIVE": self._migrater.live,
6912
      "MIGRATE_CLEANUP": self.op.cleanup,
6913
      "OLD_PRIMARY": source_node,
6914
      "NEW_PRIMARY": target_node,
6915
      })
6916

    
6917
    if instance.disk_template in constants.DTS_INT_MIRROR:
6918
      env["OLD_SECONDARY"] = target_node
6919
      env["NEW_SECONDARY"] = source_node
6920
    else:
6921
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6922

    
6923
    return env
6924

    
6925
  def BuildHooksNodes(self):
6926
    """Build hooks nodes.
6927

6928
    """
6929
    instance = self._migrater.instance
6930
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6931
    return (nl, nl + [instance.primary_node])
6932

    
6933

    
6934
class LUInstanceMove(LogicalUnit):
6935
  """Move an instance by data-copying.
6936

6937
  """
6938
  HPATH = "instance-move"
6939
  HTYPE = constants.HTYPE_INSTANCE
6940
  REQ_BGL = False
6941

    
6942
  def ExpandNames(self):
6943
    self._ExpandAndLockInstance()
6944
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6945
    self.op.target_node = target_node
6946
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6947
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6948

    
6949
  def DeclareLocks(self, level):
6950
    if level == locking.LEVEL_NODE:
6951
      self._LockInstancesNodes(primary_only=True)
6952

    
6953
  def BuildHooksEnv(self):
6954
    """Build hooks env.
6955

6956
    This runs on master, primary and secondary nodes of the instance.
6957

6958
    """
6959
    env = {
6960
      "TARGET_NODE": self.op.target_node,
6961
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6962
      }
6963
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6964
    return env
6965

    
6966
  def BuildHooksNodes(self):
6967
    """Build hooks nodes.
6968

6969
    """
6970
    nl = [
6971
      self.cfg.GetMasterNode(),
6972
      self.instance.primary_node,
6973
      self.op.target_node,
6974
      ]
6975
    return (nl, nl)
6976

    
6977
  def CheckPrereq(self):
6978
    """Check prerequisites.
6979

6980
    This checks that the instance is in the cluster.
6981

6982
    """
6983
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6984
    assert self.instance is not None, \
6985
      "Cannot retrieve locked instance %s" % self.op.instance_name
6986

    
6987
    node = self.cfg.GetNodeInfo(self.op.target_node)
6988
    assert node is not None, \
6989
      "Cannot retrieve locked node %s" % self.op.target_node
6990

    
6991
    self.target_node = target_node = node.name
6992

    
6993
    if target_node == instance.primary_node:
6994
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6995
                                 (instance.name, target_node),
6996
                                 errors.ECODE_STATE)
6997

    
6998
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6999

    
7000
    for idx, dsk in enumerate(instance.disks):
7001
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7002
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7003
                                   " cannot copy" % idx, errors.ECODE_STATE)
7004

    
7005
    _CheckNodeOnline(self, target_node)
7006
    _CheckNodeNotDrained(self, target_node)
7007
    _CheckNodeVmCapable(self, target_node)
7008

    
7009
    if instance.admin_up:
7010
      # check memory requirements on the secondary node
7011
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7012
                           instance.name, bep[constants.BE_MEMORY],
7013
                           instance.hypervisor)
7014
    else:
7015
      self.LogInfo("Not checking memory on the secondary node as"
7016
                   " instance will not be started")
7017

    
7018
    # check bridge existance
7019
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7020

    
7021
  def Exec(self, feedback_fn):
7022
    """Move an instance.
7023

7024
    The move is done by shutting it down on its present node, copying
7025
    the data over (slow) and starting it on the new node.
7026

7027
    """
7028
    instance = self.instance
7029

    
7030
    source_node = instance.primary_node
7031
    target_node = self.target_node
7032

    
7033
    self.LogInfo("Shutting down instance %s on source node %s",
7034
                 instance.name, source_node)
7035

    
7036
    result = self.rpc.call_instance_shutdown(source_node, instance,
7037
                                             self.op.shutdown_timeout)
7038
    msg = result.fail_msg
7039
    if msg:
7040
      if self.op.ignore_consistency:
7041
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7042
                             " Proceeding anyway. Please make sure node"
7043
                             " %s is down. Error details: %s",
7044
                             instance.name, source_node, source_node, msg)
7045
      else:
7046
        raise errors.OpExecError("Could not shutdown instance %s on"
7047
                                 " node %s: %s" %
7048
                                 (instance.name, source_node, msg))
7049

    
7050
    # create the target disks
7051
    try:
7052
      _CreateDisks(self, instance, target_node=target_node)
7053
    except errors.OpExecError:
7054
      self.LogWarning("Device creation failed, reverting...")
7055
      try:
7056
        _RemoveDisks(self, instance, target_node=target_node)
7057
      finally:
7058
        self.cfg.ReleaseDRBDMinors(instance.name)
7059
        raise
7060

    
7061
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7062

    
7063
    errs = []
7064
    # activate, get path, copy the data over
7065
    for idx, disk in enumerate(instance.disks):
7066
      self.LogInfo("Copying data for disk %d", idx)
7067
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7068
                                               instance.name, True, idx)
7069
      if result.fail_msg:
7070
        self.LogWarning("Can't assemble newly created disk %d: %s",
7071
                        idx, result.fail_msg)
7072
        errs.append(result.fail_msg)
7073
        break
7074
      dev_path = result.payload
7075
      result = self.rpc.call_blockdev_export(source_node, disk,
7076
                                             target_node, dev_path,
7077
                                             cluster_name)
7078
      if result.fail_msg:
7079
        self.LogWarning("Can't copy data over for disk %d: %s",
7080
                        idx, result.fail_msg)
7081
        errs.append(result.fail_msg)
7082
        break
7083

    
7084
    if errs:
7085
      self.LogWarning("Some disks failed to copy, aborting")
7086
      try:
7087
        _RemoveDisks(self, instance, target_node=target_node)
7088
      finally:
7089
        self.cfg.ReleaseDRBDMinors(instance.name)
7090
        raise errors.OpExecError("Errors during disk copy: %s" %
7091
                                 (",".join(errs),))
7092

    
7093
    instance.primary_node = target_node
7094
    self.cfg.Update(instance, feedback_fn)
7095

    
7096
    self.LogInfo("Removing the disks on the original node")
7097
    _RemoveDisks(self, instance, target_node=source_node)
7098

    
7099
    # Only start the instance if it's marked as up
7100
    if instance.admin_up:
7101
      self.LogInfo("Starting instance %s on node %s",
7102
                   instance.name, target_node)
7103

    
7104
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7105
                                           ignore_secondaries=True)
7106
      if not disks_ok:
7107
        _ShutdownInstanceDisks(self, instance)
7108
        raise errors.OpExecError("Can't activate the instance's disks")
7109

    
7110
      result = self.rpc.call_instance_start(target_node,
7111
                                            (instance, None, None), False)
7112
      msg = result.fail_msg
7113
      if msg:
7114
        _ShutdownInstanceDisks(self, instance)
7115
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7116
                                 (instance.name, target_node, msg))
7117

    
7118

    
7119
class LUNodeMigrate(LogicalUnit):
7120
  """Migrate all instances from a node.
7121

7122
  """
7123
  HPATH = "node-migrate"
7124
  HTYPE = constants.HTYPE_NODE
7125
  REQ_BGL = False
7126

    
7127
  def CheckArguments(self):
7128
    pass
7129

    
7130
  def ExpandNames(self):
7131
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7132

    
7133
    self.share_locks = _ShareAll()
7134
    self.needed_locks = {
7135
      locking.LEVEL_NODE: [self.op.node_name],
7136
      }
7137

    
7138
  def BuildHooksEnv(self):
7139
    """Build hooks env.
7140

7141
    This runs on the master, the primary and all the secondaries.
7142

7143
    """
7144
    return {
7145
      "NODE_NAME": self.op.node_name,
7146
      }
7147

    
7148
  def BuildHooksNodes(self):
7149
    """Build hooks nodes.
7150

7151
    """
7152
    nl = [self.cfg.GetMasterNode()]
7153
    return (nl, nl)
7154

    
7155
  def CheckPrereq(self):
7156
    pass
7157

    
7158
  def Exec(self, feedback_fn):
7159
    # Prepare jobs for migration instances
7160
    jobs = [
7161
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7162
                                 mode=self.op.mode,
7163
                                 live=self.op.live,
7164
                                 iallocator=self.op.iallocator,
7165
                                 target_node=self.op.target_node)]
7166
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7167
      ]
7168

    
7169
    # TODO: Run iallocator in this opcode and pass correct placement options to
7170
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7171
    # running the iallocator and the actual migration, a good consistency model
7172
    # will have to be found.
7173

    
7174
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7175
            frozenset([self.op.node_name]))
7176

    
7177
    return ResultWithJobs(jobs)
7178

    
7179

    
7180
class TLMigrateInstance(Tasklet):
7181
  """Tasklet class for instance migration.
7182

7183
  @type live: boolean
7184
  @ivar live: whether the migration will be done live or non-live;
7185
      this variable is initalized only after CheckPrereq has run
7186
  @type cleanup: boolean
7187
  @ivar cleanup: Wheater we cleanup from a failed migration
7188
  @type iallocator: string
7189
  @ivar iallocator: The iallocator used to determine target_node
7190
  @type target_node: string
7191
  @ivar target_node: If given, the target_node to reallocate the instance to
7192
  @type failover: boolean
7193
  @ivar failover: Whether operation results in failover or migration
7194
  @type fallback: boolean
7195
  @ivar fallback: Whether fallback to failover is allowed if migration not
7196
                  possible
7197
  @type ignore_consistency: boolean
7198
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7199
                            and target node
7200
  @type shutdown_timeout: int
7201
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7202

7203
  """
7204

    
7205
  # Constants
7206
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7207
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7208

    
7209
  def __init__(self, lu, instance_name, cleanup=False,
7210
               failover=False, fallback=False,
7211
               ignore_consistency=False,
7212
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7213
    """Initializes this class.
7214

7215
    """
7216
    Tasklet.__init__(self, lu)
7217

    
7218
    # Parameters
7219
    self.instance_name = instance_name
7220
    self.cleanup = cleanup
7221
    self.live = False # will be overridden later
7222
    self.failover = failover
7223
    self.fallback = fallback
7224
    self.ignore_consistency = ignore_consistency
7225
    self.shutdown_timeout = shutdown_timeout
7226

    
7227
  def CheckPrereq(self):
7228
    """Check prerequisites.
7229

7230
    This checks that the instance is in the cluster.
7231

7232
    """
7233
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7234
    instance = self.cfg.GetInstanceInfo(instance_name)
7235
    assert instance is not None
7236
    self.instance = instance
7237

    
7238
    if (not self.cleanup and not instance.admin_up and not self.failover and
7239
        self.fallback):
7240
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7241
                      " to failover")
7242
      self.failover = True
7243

    
7244
    if instance.disk_template not in constants.DTS_MIRRORED:
7245
      if self.failover:
7246
        text = "failovers"
7247
      else:
7248
        text = "migrations"
7249
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7250
                                 " %s" % (instance.disk_template, text),
7251
                                 errors.ECODE_STATE)
7252

    
7253
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7254
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7255

    
7256
      if self.lu.op.iallocator:
7257
        self._RunAllocator()
7258
      else:
7259
        # We set set self.target_node as it is required by
7260
        # BuildHooksEnv
7261
        self.target_node = self.lu.op.target_node
7262

    
7263
      # self.target_node is already populated, either directly or by the
7264
      # iallocator run
7265
      target_node = self.target_node
7266
      if self.target_node == instance.primary_node:
7267
        raise errors.OpPrereqError("Cannot migrate instance %s"
7268
                                   " to its primary (%s)" %
7269
                                   (instance.name, instance.primary_node))
7270

    
7271
      if len(self.lu.tasklets) == 1:
7272
        # It is safe to release locks only when we're the only tasklet
7273
        # in the LU
7274
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7275
                      keep=[instance.primary_node, self.target_node])
7276

    
7277
    else:
7278
      secondary_nodes = instance.secondary_nodes
7279
      if not secondary_nodes:
7280
        raise errors.ConfigurationError("No secondary node but using"
7281
                                        " %s disk template" %
7282
                                        instance.disk_template)
7283
      target_node = secondary_nodes[0]
7284
      if self.lu.op.iallocator or (self.lu.op.target_node and
7285
                                   self.lu.op.target_node != target_node):
7286
        if self.failover:
7287
          text = "failed over"
7288
        else:
7289
          text = "migrated"
7290
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7291
                                   " be %s to arbitrary nodes"
7292
                                   " (neither an iallocator nor a target"
7293
                                   " node can be passed)" %
7294
                                   (instance.disk_template, text),
7295
                                   errors.ECODE_INVAL)
7296

    
7297
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7298

    
7299
    # check memory requirements on the secondary node
7300
    if not self.failover or instance.admin_up:
7301
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7302
                           instance.name, i_be[constants.BE_MEMORY],
7303
                           instance.hypervisor)
7304
    else:
7305
      self.lu.LogInfo("Not checking memory on the secondary node as"
7306
                      " instance will not be started")
7307

    
7308
    # check bridge existance
7309
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7310

    
7311
    if not self.cleanup:
7312
      _CheckNodeNotDrained(self.lu, target_node)
7313
      if not self.failover:
7314
        result = self.rpc.call_instance_migratable(instance.primary_node,
7315
                                                   instance)
7316
        if result.fail_msg and self.fallback:
7317
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7318
                          " failover")
7319
          self.failover = True
7320
        else:
7321
          result.Raise("Can't migrate, please use failover",
7322
                       prereq=True, ecode=errors.ECODE_STATE)
7323

    
7324
    assert not (self.failover and self.cleanup)
7325

    
7326
    if not self.failover:
7327
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7328
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7329
                                   " parameters are accepted",
7330
                                   errors.ECODE_INVAL)
7331
      if self.lu.op.live is not None:
7332
        if self.lu.op.live:
7333
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7334
        else:
7335
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7336
        # reset the 'live' parameter to None so that repeated
7337
        # invocations of CheckPrereq do not raise an exception
7338
        self.lu.op.live = None
7339
      elif self.lu.op.mode is None:
7340
        # read the default value from the hypervisor
7341
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7342
                                                skip_globals=False)
7343
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7344

    
7345
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7346
    else:
7347
      # Failover is never live
7348
      self.live = False
7349

    
7350
  def _RunAllocator(self):
7351
    """Run the allocator based on input opcode.
7352

7353
    """
7354
    ial = IAllocator(self.cfg, self.rpc,
7355
                     mode=constants.IALLOCATOR_MODE_RELOC,
7356
                     name=self.instance_name,
7357
                     # TODO See why hail breaks with a single node below
7358
                     relocate_from=[self.instance.primary_node,
7359
                                    self.instance.primary_node],
7360
                     )
7361

    
7362
    ial.Run(self.lu.op.iallocator)
7363

    
7364
    if not ial.success:
7365
      raise errors.OpPrereqError("Can't compute nodes using"
7366
                                 " iallocator '%s': %s" %
7367
                                 (self.lu.op.iallocator, ial.info),
7368
                                 errors.ECODE_NORES)
7369
    if len(ial.result) != ial.required_nodes:
7370
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7371
                                 " of nodes (%s), required %s" %
7372
                                 (self.lu.op.iallocator, len(ial.result),
7373
                                  ial.required_nodes), errors.ECODE_FAULT)
7374
    self.target_node = ial.result[0]
7375
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7376
                 self.instance_name, self.lu.op.iallocator,
7377
                 utils.CommaJoin(ial.result))
7378

    
7379
  def _WaitUntilSync(self):
7380
    """Poll with custom rpc for disk sync.
7381

7382
    This uses our own step-based rpc call.
7383

7384
    """
7385
    self.feedback_fn("* wait until resync is done")
7386
    all_done = False
7387
    while not all_done:
7388
      all_done = True
7389
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7390
                                            self.nodes_ip,
7391
                                            self.instance.disks)
7392
      min_percent = 100
7393
      for node, nres in result.items():
7394
        nres.Raise("Cannot resync disks on node %s" % node)
7395
        node_done, node_percent = nres.payload
7396
        all_done = all_done and node_done
7397
        if node_percent is not None:
7398
          min_percent = min(min_percent, node_percent)
7399
      if not all_done:
7400
        if min_percent < 100:
7401
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7402
        time.sleep(2)
7403

    
7404
  def _EnsureSecondary(self, node):
7405
    """Demote a node to secondary.
7406

7407
    """
7408
    self.feedback_fn("* switching node %s to secondary mode" % node)
7409

    
7410
    for dev in self.instance.disks:
7411
      self.cfg.SetDiskID(dev, node)
7412

    
7413
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7414
                                          self.instance.disks)
7415
    result.Raise("Cannot change disk to secondary on node %s" % node)
7416

    
7417
  def _GoStandalone(self):
7418
    """Disconnect from the network.
7419

7420
    """
7421
    self.feedback_fn("* changing into standalone mode")
7422
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7423
                                               self.instance.disks)
7424
    for node, nres in result.items():
7425
      nres.Raise("Cannot disconnect disks node %s" % node)
7426

    
7427
  def _GoReconnect(self, multimaster):
7428
    """Reconnect to the network.
7429

7430
    """
7431
    if multimaster:
7432
      msg = "dual-master"
7433
    else:
7434
      msg = "single-master"
7435
    self.feedback_fn("* changing disks into %s mode" % msg)
7436
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7437
                                           self.instance.disks,
7438
                                           self.instance.name, multimaster)
7439
    for node, nres in result.items():
7440
      nres.Raise("Cannot change disks config on node %s" % node)
7441

    
7442
  def _ExecCleanup(self):
7443
    """Try to cleanup after a failed migration.
7444

7445
    The cleanup is done by:
7446
      - check that the instance is running only on one node
7447
        (and update the config if needed)
7448
      - change disks on its secondary node to secondary
7449
      - wait until disks are fully synchronized
7450
      - disconnect from the network
7451
      - change disks into single-master mode
7452
      - wait again until disks are fully synchronized
7453

7454
    """
7455
    instance = self.instance
7456
    target_node = self.target_node
7457
    source_node = self.source_node
7458

    
7459
    # check running on only one node
7460
    self.feedback_fn("* checking where the instance actually runs"
7461
                     " (if this hangs, the hypervisor might be in"
7462
                     " a bad state)")
7463
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7464
    for node, result in ins_l.items():
7465
      result.Raise("Can't contact node %s" % node)
7466

    
7467
    runningon_source = instance.name in ins_l[source_node].payload
7468
    runningon_target = instance.name in ins_l[target_node].payload
7469

    
7470
    if runningon_source and runningon_target:
7471
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7472
                               " or the hypervisor is confused; you will have"
7473
                               " to ensure manually that it runs only on one"
7474
                               " and restart this operation")
7475

    
7476
    if not (runningon_source or runningon_target):
7477
      raise errors.OpExecError("Instance does not seem to be running at all;"
7478
                               " in this case it's safer to repair by"
7479
                               " running 'gnt-instance stop' to ensure disk"
7480
                               " shutdown, and then restarting it")
7481

    
7482
    if runningon_target:
7483
      # the migration has actually succeeded, we need to update the config
7484
      self.feedback_fn("* instance running on secondary node (%s),"
7485
                       " updating config" % target_node)
7486
      instance.primary_node = target_node
7487
      self.cfg.Update(instance, self.feedback_fn)
7488
      demoted_node = source_node
7489
    else:
7490
      self.feedback_fn("* instance confirmed to be running on its"
7491
                       " primary node (%s)" % source_node)
7492
      demoted_node = target_node
7493

    
7494
    if instance.disk_template in constants.DTS_INT_MIRROR:
7495
      self._EnsureSecondary(demoted_node)
7496
      try:
7497
        self._WaitUntilSync()
7498
      except errors.OpExecError:
7499
        # we ignore here errors, since if the device is standalone, it
7500
        # won't be able to sync
7501
        pass
7502
      self._GoStandalone()
7503
      self._GoReconnect(False)
7504
      self._WaitUntilSync()
7505

    
7506
    self.feedback_fn("* done")
7507

    
7508
  def _RevertDiskStatus(self):
7509
    """Try to revert the disk status after a failed migration.
7510

7511
    """
7512
    target_node = self.target_node
7513
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7514
      return
7515

    
7516
    try:
7517
      self._EnsureSecondary(target_node)
7518
      self._GoStandalone()
7519
      self._GoReconnect(False)
7520
      self._WaitUntilSync()
7521
    except errors.OpExecError, err:
7522
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7523
                         " please try to recover the instance manually;"
7524
                         " error '%s'" % str(err))
7525

    
7526
  def _AbortMigration(self):
7527
    """Call the hypervisor code to abort a started migration.
7528

7529
    """
7530
    instance = self.instance
7531
    target_node = self.target_node
7532
    source_node = self.source_node
7533
    migration_info = self.migration_info
7534

    
7535
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7536
                                                                 instance,
7537
                                                                 migration_info,
7538
                                                                 False)
7539
    abort_msg = abort_result.fail_msg
7540
    if abort_msg:
7541
      logging.error("Aborting migration failed on target node %s: %s",
7542
                    target_node, abort_msg)
7543
      # Don't raise an exception here, as we stil have to try to revert the
7544
      # disk status, even if this step failed.
7545

    
7546
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7547
        instance, False, self.live)
7548
    abort_msg = abort_result.fail_msg
7549
    if abort_msg:
7550
      logging.error("Aborting migration failed on source node %s: %s",
7551
                    source_node, abort_msg)
7552

    
7553
  def _ExecMigration(self):
7554
    """Migrate an instance.
7555

7556
    The migrate is done by:
7557
      - change the disks into dual-master mode
7558
      - wait until disks are fully synchronized again
7559
      - migrate the instance
7560
      - change disks on the new secondary node (the old primary) to secondary
7561
      - wait until disks are fully synchronized
7562
      - change disks into single-master mode
7563

7564
    """
7565
    instance = self.instance
7566
    target_node = self.target_node
7567
    source_node = self.source_node
7568

    
7569
    # Check for hypervisor version mismatch and warn the user.
7570
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7571
                                       None, self.instance.hypervisor)
7572
    src_info = nodeinfo[source_node]
7573
    dst_info = nodeinfo[target_node]
7574

    
7575
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7576
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7577
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7578
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7579
      if src_version != dst_version:
7580
        self.feedback_fn("* warning: hypervisor version mismatch between"
7581
                         " source (%s) and target (%s) node" %
7582
                         (src_version, dst_version))
7583

    
7584
    self.feedback_fn("* checking disk consistency between source and target")
7585
    for dev in instance.disks:
7586
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7587
        raise errors.OpExecError("Disk %s is degraded or not fully"
7588
                                 " synchronized on target node,"
7589
                                 " aborting migration" % dev.iv_name)
7590

    
7591
    # First get the migration information from the remote node
7592
    result = self.rpc.call_migration_info(source_node, instance)
7593
    msg = result.fail_msg
7594
    if msg:
7595
      log_err = ("Failed fetching source migration information from %s: %s" %
7596
                 (source_node, msg))
7597
      logging.error(log_err)
7598
      raise errors.OpExecError(log_err)
7599

    
7600
    self.migration_info = migration_info = result.payload
7601

    
7602
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7603
      # Then switch the disks to master/master mode
7604
      self._EnsureSecondary(target_node)
7605
      self._GoStandalone()
7606
      self._GoReconnect(True)
7607
      self._WaitUntilSync()
7608

    
7609
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7610
    result = self.rpc.call_accept_instance(target_node,
7611
                                           instance,
7612
                                           migration_info,
7613
                                           self.nodes_ip[target_node])
7614

    
7615
    msg = result.fail_msg
7616
    if msg:
7617
      logging.error("Instance pre-migration failed, trying to revert"
7618
                    " disk status: %s", msg)
7619
      self.feedback_fn("Pre-migration failed, aborting")
7620
      self._AbortMigration()
7621
      self._RevertDiskStatus()
7622
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7623
                               (instance.name, msg))
7624

    
7625
    self.feedback_fn("* migrating instance to %s" % target_node)
7626
    result = self.rpc.call_instance_migrate(source_node, instance,
7627
                                            self.nodes_ip[target_node],
7628
                                            self.live)
7629
    msg = result.fail_msg
7630
    if msg:
7631
      logging.error("Instance migration failed, trying to revert"
7632
                    " disk status: %s", msg)
7633
      self.feedback_fn("Migration failed, aborting")
7634
      self._AbortMigration()
7635
      self._RevertDiskStatus()
7636
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7637
                               (instance.name, msg))
7638

    
7639
    self.feedback_fn("* starting memory transfer")
7640
    last_feedback = time.time()
7641
    while True:
7642
      result = self.rpc.call_instance_get_migration_status(source_node,
7643
                                                           instance)
7644
      msg = result.fail_msg
7645
      ms = result.payload   # MigrationStatus instance
7646
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7647
        logging.error("Instance migration failed, trying to revert"
7648
                      " disk status: %s", msg)
7649
        self.feedback_fn("Migration failed, aborting")
7650
        self._AbortMigration()
7651
        self._RevertDiskStatus()
7652
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7653
                                 (instance.name, msg))
7654

    
7655
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7656
        self.feedback_fn("* memory transfer complete")
7657
        break
7658

    
7659
      if (utils.TimeoutExpired(last_feedback,
7660
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7661
          ms.transferred_ram is not None):
7662
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7663
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7664
        last_feedback = time.time()
7665

    
7666
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7667

    
7668
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7669
                                                           instance,
7670
                                                           True,
7671
                                                           self.live)
7672
    msg = result.fail_msg
7673
    if msg:
7674
      logging.error("Instance migration succeeded, but finalization failed"
7675
                    " on the source node: %s", msg)
7676
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7677
                               msg)
7678

    
7679
    instance.primary_node = target_node
7680

    
7681
    # distribute new instance config to the other nodes
7682
    self.cfg.Update(instance, self.feedback_fn)
7683

    
7684
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7685
                                                           instance,
7686
                                                           migration_info,
7687
                                                           True)
7688
    msg = result.fail_msg
7689
    if msg:
7690
      logging.error("Instance migration succeeded, but finalization failed"
7691
                    " on the target node: %s", msg)
7692
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7693
                               msg)
7694

    
7695
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7696
      self._EnsureSecondary(source_node)
7697
      self._WaitUntilSync()
7698
      self._GoStandalone()
7699
      self._GoReconnect(False)
7700
      self._WaitUntilSync()
7701

    
7702
    self.feedback_fn("* done")
7703

    
7704
  def _ExecFailover(self):
7705
    """Failover an instance.
7706

7707
    The failover is done by shutting it down on its present node and
7708
    starting it on the secondary.
7709

7710
    """
7711
    instance = self.instance
7712
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7713

    
7714
    source_node = instance.primary_node
7715
    target_node = self.target_node
7716

    
7717
    if instance.admin_up:
7718
      self.feedback_fn("* checking disk consistency between source and target")
7719
      for dev in instance.disks:
7720
        # for drbd, these are drbd over lvm
7721
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7722
          if primary_node.offline:
7723
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7724
                             " target node %s" %
7725
                             (primary_node.name, dev.iv_name, target_node))
7726
          elif not self.ignore_consistency:
7727
            raise errors.OpExecError("Disk %s is degraded on target node,"
7728
                                     " aborting failover" % dev.iv_name)
7729
    else:
7730
      self.feedback_fn("* not checking disk consistency as instance is not"
7731
                       " running")
7732

    
7733
    self.feedback_fn("* shutting down instance on source node")
7734
    logging.info("Shutting down instance %s on node %s",
7735
                 instance.name, source_node)
7736

    
7737
    result = self.rpc.call_instance_shutdown(source_node, instance,
7738
                                             self.shutdown_timeout)
7739
    msg = result.fail_msg
7740
    if msg:
7741
      if self.ignore_consistency or primary_node.offline:
7742
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7743
                           " proceeding anyway; please make sure node"
7744
                           " %s is down; error details: %s",
7745
                           instance.name, source_node, source_node, msg)
7746
      else:
7747
        raise errors.OpExecError("Could not shutdown instance %s on"
7748
                                 " node %s: %s" %
7749
                                 (instance.name, source_node, msg))
7750

    
7751
    self.feedback_fn("* deactivating the instance's disks on source node")
7752
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7753
      raise errors.OpExecError("Can't shut down the instance's disks")
7754

    
7755
    instance.primary_node = target_node
7756
    # distribute new instance config to the other nodes
7757
    self.cfg.Update(instance, self.feedback_fn)
7758

    
7759
    # Only start the instance if it's marked as up
7760
    if instance.admin_up:
7761
      self.feedback_fn("* activating the instance's disks on target node %s" %
7762
                       target_node)
7763
      logging.info("Starting instance %s on node %s",
7764
                   instance.name, target_node)
7765

    
7766
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7767
                                           ignore_secondaries=True)
7768
      if not disks_ok:
7769
        _ShutdownInstanceDisks(self.lu, instance)
7770
        raise errors.OpExecError("Can't activate the instance's disks")
7771

    
7772
      self.feedback_fn("* starting the instance on the target node %s" %
7773
                       target_node)
7774
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7775
                                            False)
7776
      msg = result.fail_msg
7777
      if msg:
7778
        _ShutdownInstanceDisks(self.lu, instance)
7779
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7780
                                 (instance.name, target_node, msg))
7781

    
7782
  def Exec(self, feedback_fn):
7783
    """Perform the migration.
7784

7785
    """
7786
    self.feedback_fn = feedback_fn
7787
    self.source_node = self.instance.primary_node
7788

    
7789
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7790
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7791
      self.target_node = self.instance.secondary_nodes[0]
7792
      # Otherwise self.target_node has been populated either
7793
      # directly, or through an iallocator.
7794

    
7795
    self.all_nodes = [self.source_node, self.target_node]
7796
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7797
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7798

    
7799
    if self.failover:
7800
      feedback_fn("Failover instance %s" % self.instance.name)
7801
      self._ExecFailover()
7802
    else:
7803
      feedback_fn("Migrating instance %s" % self.instance.name)
7804

    
7805
      if self.cleanup:
7806
        return self._ExecCleanup()
7807
      else:
7808
        return self._ExecMigration()
7809

    
7810

    
7811
def _CreateBlockDev(lu, node, instance, device, force_create,
7812
                    info, force_open):
7813
  """Create a tree of block devices on a given node.
7814

7815
  If this device type has to be created on secondaries, create it and
7816
  all its children.
7817

7818
  If not, just recurse to children keeping the same 'force' value.
7819

7820
  @param lu: the lu on whose behalf we execute
7821
  @param node: the node on which to create the device
7822
  @type instance: L{objects.Instance}
7823
  @param instance: the instance which owns the device
7824
  @type device: L{objects.Disk}
7825
  @param device: the device to create
7826
  @type force_create: boolean
7827
  @param force_create: whether to force creation of this device; this
7828
      will be change to True whenever we find a device which has
7829
      CreateOnSecondary() attribute
7830
  @param info: the extra 'metadata' we should attach to the device
7831
      (this will be represented as a LVM tag)
7832
  @type force_open: boolean
7833
  @param force_open: this parameter will be passes to the
7834
      L{backend.BlockdevCreate} function where it specifies
7835
      whether we run on primary or not, and it affects both
7836
      the child assembly and the device own Open() execution
7837

7838
  """
7839
  if device.CreateOnSecondary():
7840
    force_create = True
7841

    
7842
  if device.children:
7843
    for child in device.children:
7844
      _CreateBlockDev(lu, node, instance, child, force_create,
7845
                      info, force_open)
7846

    
7847
  if not force_create:
7848
    return
7849

    
7850
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7851

    
7852

    
7853
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7854
  """Create a single block device on a given node.
7855

7856
  This will not recurse over children of the device, so they must be
7857
  created in advance.
7858

7859
  @param lu: the lu on whose behalf we execute
7860
  @param node: the node on which to create the device
7861
  @type instance: L{objects.Instance}
7862
  @param instance: the instance which owns the device
7863
  @type device: L{objects.Disk}
7864
  @param device: the device to create
7865
  @param info: the extra 'metadata' we should attach to the device
7866
      (this will be represented as a LVM tag)
7867
  @type force_open: boolean
7868
  @param force_open: this parameter will be passes to the
7869
      L{backend.BlockdevCreate} function where it specifies
7870
      whether we run on primary or not, and it affects both
7871
      the child assembly and the device own Open() execution
7872

7873
  """
7874
  lu.cfg.SetDiskID(device, node)
7875
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7876
                                       instance.name, force_open, info)
7877
  result.Raise("Can't create block device %s on"
7878
               " node %s for instance %s" % (device, node, instance.name))
7879
  if device.physical_id is None:
7880
    device.physical_id = result.payload
7881

    
7882

    
7883
def _GenerateUniqueNames(lu, exts):
7884
  """Generate a suitable LV name.
7885

7886
  This will generate a logical volume name for the given instance.
7887

7888
  """
7889
  results = []
7890
  for val in exts:
7891
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7892
    results.append("%s%s" % (new_id, val))
7893
  return results
7894

    
7895

    
7896
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7897
                         iv_name, p_minor, s_minor):
7898
  """Generate a drbd8 device complete with its children.
7899

7900
  """
7901
  assert len(vgnames) == len(names) == 2
7902
  port = lu.cfg.AllocatePort()
7903
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7904
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7905
                          logical_id=(vgnames[0], names[0]))
7906
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7907
                          logical_id=(vgnames[1], names[1]))
7908
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7909
                          logical_id=(primary, secondary, port,
7910
                                      p_minor, s_minor,
7911
                                      shared_secret),
7912
                          children=[dev_data, dev_meta],
7913
                          iv_name=iv_name)
7914
  return drbd_dev
7915

    
7916

    
7917
def _GenerateDiskTemplate(lu, template_name,
7918
                          instance_name, primary_node,
7919
                          secondary_nodes, disk_info,
7920
                          file_storage_dir, file_driver,
7921
                          base_index, feedback_fn):
7922
  """Generate the entire disk layout for a given template type.
7923

7924
  """
7925
  #TODO: compute space requirements
7926

    
7927
  vgname = lu.cfg.GetVGName()
7928
  disk_count = len(disk_info)
7929
  disks = []
7930
  if template_name == constants.DT_DISKLESS:
7931
    pass
7932
  elif template_name == constants.DT_PLAIN:
7933
    if len(secondary_nodes) != 0:
7934
      raise errors.ProgrammerError("Wrong template configuration")
7935

    
7936
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7937
                                      for i in range(disk_count)])
7938
    for idx, disk in enumerate(disk_info):
7939
      disk_index = idx + base_index
7940
      vg = disk.get(constants.IDISK_VG, vgname)
7941
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7942
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7943
                              size=disk[constants.IDISK_SIZE],
7944
                              logical_id=(vg, names[idx]),
7945
                              iv_name="disk/%d" % disk_index,
7946
                              mode=disk[constants.IDISK_MODE])
7947
      disks.append(disk_dev)
7948
  elif template_name == constants.DT_DRBD8:
7949
    if len(secondary_nodes) != 1:
7950
      raise errors.ProgrammerError("Wrong template configuration")
7951
    remote_node = secondary_nodes[0]
7952
    minors = lu.cfg.AllocateDRBDMinor(
7953
      [primary_node, remote_node] * len(disk_info), instance_name)
7954

    
7955
    names = []
7956
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7957
                                               for i in range(disk_count)]):
7958
      names.append(lv_prefix + "_data")
7959
      names.append(lv_prefix + "_meta")
7960
    for idx, disk in enumerate(disk_info):
7961
      disk_index = idx + base_index
7962
      data_vg = disk.get(constants.IDISK_VG, vgname)
7963
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7964
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7965
                                      disk[constants.IDISK_SIZE],
7966
                                      [data_vg, meta_vg],
7967
                                      names[idx * 2:idx * 2 + 2],
7968
                                      "disk/%d" % disk_index,
7969
                                      minors[idx * 2], minors[idx * 2 + 1])
7970
      disk_dev.mode = disk[constants.IDISK_MODE]
7971
      disks.append(disk_dev)
7972
  elif template_name == constants.DT_FILE:
7973
    if len(secondary_nodes) != 0:
7974
      raise errors.ProgrammerError("Wrong template configuration")
7975

    
7976
    opcodes.RequireFileStorage()
7977

    
7978
    for idx, disk in enumerate(disk_info):
7979
      disk_index = idx + base_index
7980
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7981
                              size=disk[constants.IDISK_SIZE],
7982
                              iv_name="disk/%d" % disk_index,
7983
                              logical_id=(file_driver,
7984
                                          "%s/disk%d" % (file_storage_dir,
7985
                                                         disk_index)),
7986
                              mode=disk[constants.IDISK_MODE])
7987
      disks.append(disk_dev)
7988
  elif template_name == constants.DT_SHARED_FILE:
7989
    if len(secondary_nodes) != 0:
7990
      raise errors.ProgrammerError("Wrong template configuration")
7991

    
7992
    opcodes.RequireSharedFileStorage()
7993

    
7994
    for idx, disk in enumerate(disk_info):
7995
      disk_index = idx + base_index
7996
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7997
                              size=disk[constants.IDISK_SIZE],
7998
                              iv_name="disk/%d" % disk_index,
7999
                              logical_id=(file_driver,
8000
                                          "%s/disk%d" % (file_storage_dir,
8001
                                                         disk_index)),
8002
                              mode=disk[constants.IDISK_MODE])
8003
      disks.append(disk_dev)
8004
  elif template_name == constants.DT_BLOCK:
8005
    if len(secondary_nodes) != 0:
8006
      raise errors.ProgrammerError("Wrong template configuration")
8007

    
8008
    for idx, disk in enumerate(disk_info):
8009
      disk_index = idx + base_index
8010
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8011
                              size=disk[constants.IDISK_SIZE],
8012
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8013
                                          disk[constants.IDISK_ADOPT]),
8014
                              iv_name="disk/%d" % disk_index,
8015
                              mode=disk[constants.IDISK_MODE])
8016
      disks.append(disk_dev)
8017

    
8018
  else:
8019
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8020
  return disks
8021

    
8022

    
8023
def _GetInstanceInfoText(instance):
8024
  """Compute that text that should be added to the disk's metadata.
8025

8026
  """
8027
  return "originstname+%s" % instance.name
8028

    
8029

    
8030
def _CalcEta(time_taken, written, total_size):
8031
  """Calculates the ETA based on size written and total size.
8032

8033
  @param time_taken: The time taken so far
8034
  @param written: amount written so far
8035
  @param total_size: The total size of data to be written
8036
  @return: The remaining time in seconds
8037

8038
  """
8039
  avg_time = time_taken / float(written)
8040
  return (total_size - written) * avg_time
8041

    
8042

    
8043
def _WipeDisks(lu, instance):
8044
  """Wipes instance disks.
8045

8046
  @type lu: L{LogicalUnit}
8047
  @param lu: the logical unit on whose behalf we execute
8048
  @type instance: L{objects.Instance}
8049
  @param instance: the instance whose disks we should create
8050
  @return: the success of the wipe
8051

8052
  """
8053
  node = instance.primary_node
8054

    
8055
  for device in instance.disks:
8056
    lu.cfg.SetDiskID(device, node)
8057

    
8058
  logging.info("Pause sync of instance %s disks", instance.name)
8059
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8060

    
8061
  for idx, success in enumerate(result.payload):
8062
    if not success:
8063
      logging.warn("pause-sync of instance %s for disks %d failed",
8064
                   instance.name, idx)
8065

    
8066
  try:
8067
    for idx, device in enumerate(instance.disks):
8068
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8069
      # MAX_WIPE_CHUNK at max
8070
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8071
                            constants.MIN_WIPE_CHUNK_PERCENT)
8072
      # we _must_ make this an int, otherwise rounding errors will
8073
      # occur
8074
      wipe_chunk_size = int(wipe_chunk_size)
8075

    
8076
      lu.LogInfo("* Wiping disk %d", idx)
8077
      logging.info("Wiping disk %d for instance %s, node %s using"
8078
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8079

    
8080
      offset = 0
8081
      size = device.size
8082
      last_output = 0
8083
      start_time = time.time()
8084

    
8085
      while offset < size:
8086
        wipe_size = min(wipe_chunk_size, size - offset)
8087
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8088
                      idx, offset, wipe_size)
8089
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8090
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8091
                     (idx, offset, wipe_size))
8092
        now = time.time()
8093
        offset += wipe_size
8094
        if now - last_output >= 60:
8095
          eta = _CalcEta(now - start_time, offset, size)
8096
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8097
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8098
          last_output = now
8099
  finally:
8100
    logging.info("Resume sync of instance %s disks", instance.name)
8101

    
8102
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8103

    
8104
    for idx, success in enumerate(result.payload):
8105
      if not success:
8106
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8107
                      " look at the status and troubleshoot the issue", idx)
8108
        logging.warn("resume-sync of instance %s for disks %d failed",
8109
                     instance.name, idx)
8110

    
8111

    
8112
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8113
  """Create all disks for an instance.
8114

8115
  This abstracts away some work from AddInstance.
8116

8117
  @type lu: L{LogicalUnit}
8118
  @param lu: the logical unit on whose behalf we execute
8119
  @type instance: L{objects.Instance}
8120
  @param instance: the instance whose disks we should create
8121
  @type to_skip: list
8122
  @param to_skip: list of indices to skip
8123
  @type target_node: string
8124
  @param target_node: if passed, overrides the target node for creation
8125
  @rtype: boolean
8126
  @return: the success of the creation
8127

8128
  """
8129
  info = _GetInstanceInfoText(instance)
8130
  if target_node is None:
8131
    pnode = instance.primary_node
8132
    all_nodes = instance.all_nodes
8133
  else:
8134
    pnode = target_node
8135
    all_nodes = [pnode]
8136

    
8137
  if instance.disk_template in constants.DTS_FILEBASED:
8138
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8139
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8140

    
8141
    result.Raise("Failed to create directory '%s' on"
8142
                 " node %s" % (file_storage_dir, pnode))
8143

    
8144
  # Note: this needs to be kept in sync with adding of disks in
8145
  # LUInstanceSetParams
8146
  for idx, device in enumerate(instance.disks):
8147
    if to_skip and idx in to_skip:
8148
      continue
8149
    logging.info("Creating volume %s for instance %s",
8150
                 device.iv_name, instance.name)
8151
    #HARDCODE
8152
    for node in all_nodes:
8153
      f_create = node == pnode
8154
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8155

    
8156

    
8157
def _RemoveDisks(lu, instance, target_node=None):
8158
  """Remove all disks for an instance.
8159

8160
  This abstracts away some work from `AddInstance()` and
8161
  `RemoveInstance()`. Note that in case some of the devices couldn't
8162
  be removed, the removal will continue with the other ones (compare
8163
  with `_CreateDisks()`).
8164

8165
  @type lu: L{LogicalUnit}
8166
  @param lu: the logical unit on whose behalf we execute
8167
  @type instance: L{objects.Instance}
8168
  @param instance: the instance whose disks we should remove
8169
  @type target_node: string
8170
  @param target_node: used to override the node on which to remove the disks
8171
  @rtype: boolean
8172
  @return: the success of the removal
8173

8174
  """
8175
  logging.info("Removing block devices for instance %s", instance.name)
8176

    
8177
  all_result = True
8178
  for device in instance.disks:
8179
    if target_node:
8180
      edata = [(target_node, device)]
8181
    else:
8182
      edata = device.ComputeNodeTree(instance.primary_node)
8183
    for node, disk in edata:
8184
      lu.cfg.SetDiskID(disk, node)
8185
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8186
      if msg:
8187
        lu.LogWarning("Could not remove block device %s on node %s,"
8188
                      " continuing anyway: %s", device.iv_name, node, msg)
8189
        all_result = False
8190

    
8191
  if instance.disk_template == constants.DT_FILE:
8192
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8193
    if target_node:
8194
      tgt = target_node
8195
    else:
8196
      tgt = instance.primary_node
8197
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8198
    if result.fail_msg:
8199
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8200
                    file_storage_dir, instance.primary_node, result.fail_msg)
8201
      all_result = False
8202

    
8203
  return all_result
8204

    
8205

    
8206
def _ComputeDiskSizePerVG(disk_template, disks):
8207
  """Compute disk size requirements in the volume group
8208

8209
  """
8210
  def _compute(disks, payload):
8211
    """Universal algorithm.
8212

8213
    """
8214
    vgs = {}
8215
    for disk in disks:
8216
      vgs[disk[constants.IDISK_VG]] = \
8217
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8218

    
8219
    return vgs
8220

    
8221
  # Required free disk space as a function of disk and swap space
8222
  req_size_dict = {
8223
    constants.DT_DISKLESS: {},
8224
    constants.DT_PLAIN: _compute(disks, 0),
8225
    # 128 MB are added for drbd metadata for each disk
8226
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8227
    constants.DT_FILE: {},
8228
    constants.DT_SHARED_FILE: {},
8229
  }
8230

    
8231
  if disk_template not in req_size_dict:
8232
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8233
                                 " is unknown" % disk_template)
8234

    
8235
  return req_size_dict[disk_template]
8236

    
8237

    
8238
def _ComputeDiskSize(disk_template, disks):
8239
  """Compute disk size requirements in the volume group
8240

8241
  """
8242
  # Required free disk space as a function of disk and swap space
8243
  req_size_dict = {
8244
    constants.DT_DISKLESS: None,
8245
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8246
    # 128 MB are added for drbd metadata for each disk
8247
    constants.DT_DRBD8:
8248
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8249
    constants.DT_FILE: None,
8250
    constants.DT_SHARED_FILE: 0,
8251
    constants.DT_BLOCK: 0,
8252
  }
8253

    
8254
  if disk_template not in req_size_dict:
8255
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8256
                                 " is unknown" % disk_template)
8257

    
8258
  return req_size_dict[disk_template]
8259

    
8260

    
8261
def _FilterVmNodes(lu, nodenames):
8262
  """Filters out non-vm_capable nodes from a list.
8263

8264
  @type lu: L{LogicalUnit}
8265
  @param lu: the logical unit for which we check
8266
  @type nodenames: list
8267
  @param nodenames: the list of nodes on which we should check
8268
  @rtype: list
8269
  @return: the list of vm-capable nodes
8270

8271
  """
8272
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8273
  return [name for name in nodenames if name not in vm_nodes]
8274

    
8275

    
8276
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8277
  """Hypervisor parameter validation.
8278

8279
  This function abstract the hypervisor parameter validation to be
8280
  used in both instance create and instance modify.
8281

8282
  @type lu: L{LogicalUnit}
8283
  @param lu: the logical unit for which we check
8284
  @type nodenames: list
8285
  @param nodenames: the list of nodes on which we should check
8286
  @type hvname: string
8287
  @param hvname: the name of the hypervisor we should use
8288
  @type hvparams: dict
8289
  @param hvparams: the parameters which we need to check
8290
  @raise errors.OpPrereqError: if the parameters are not valid
8291

8292
  """
8293
  nodenames = _FilterVmNodes(lu, nodenames)
8294

    
8295
  cluster = lu.cfg.GetClusterInfo()
8296
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8297

    
8298
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8299
  for node in nodenames:
8300
    info = hvinfo[node]
8301
    if info.offline:
8302
      continue
8303
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8304

    
8305

    
8306
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8307
  """OS parameters validation.
8308

8309
  @type lu: L{LogicalUnit}
8310
  @param lu: the logical unit for which we check
8311
  @type required: boolean
8312
  @param required: whether the validation should fail if the OS is not
8313
      found
8314
  @type nodenames: list
8315
  @param nodenames: the list of nodes on which we should check
8316
  @type osname: string
8317
  @param osname: the name of the hypervisor we should use
8318
  @type osparams: dict
8319
  @param osparams: the parameters which we need to check
8320
  @raise errors.OpPrereqError: if the parameters are not valid
8321

8322
  """
8323
  nodenames = _FilterVmNodes(lu, nodenames)
8324
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8325
                                   [constants.OS_VALIDATE_PARAMETERS],
8326
                                   osparams)
8327
  for node, nres in result.items():
8328
    # we don't check for offline cases since this should be run only
8329
    # against the master node and/or an instance's nodes
8330
    nres.Raise("OS Parameters validation failed on node %s" % node)
8331
    if not nres.payload:
8332
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8333
                 osname, node)
8334

    
8335

    
8336
class LUInstanceCreate(LogicalUnit):
8337
  """Create an instance.
8338

8339
  """
8340
  HPATH = "instance-add"
8341
  HTYPE = constants.HTYPE_INSTANCE
8342
  REQ_BGL = False
8343

    
8344
  def CheckArguments(self):
8345
    """Check arguments.
8346

8347
    """
8348
    # do not require name_check to ease forward/backward compatibility
8349
    # for tools
8350
    if self.op.no_install and self.op.start:
8351
      self.LogInfo("No-installation mode selected, disabling startup")
8352
      self.op.start = False
8353
    # validate/normalize the instance name
8354
    self.op.instance_name = \
8355
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8356

    
8357
    if self.op.ip_check and not self.op.name_check:
8358
      # TODO: make the ip check more flexible and not depend on the name check
8359
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8360
                                 " check", errors.ECODE_INVAL)
8361

    
8362
    # check nics' parameter names
8363
    for nic in self.op.nics:
8364
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8365

    
8366
    # check disks. parameter names and consistent adopt/no-adopt strategy
8367
    has_adopt = has_no_adopt = False
8368
    for disk in self.op.disks:
8369
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8370
      if constants.IDISK_ADOPT in disk:
8371
        has_adopt = True
8372
      else:
8373
        has_no_adopt = True
8374
    if has_adopt and has_no_adopt:
8375
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8376
                                 errors.ECODE_INVAL)
8377
    if has_adopt:
8378
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8379
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8380
                                   " '%s' disk template" %
8381
                                   self.op.disk_template,
8382
                                   errors.ECODE_INVAL)
8383
      if self.op.iallocator is not None:
8384
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8385
                                   " iallocator script", errors.ECODE_INVAL)
8386
      if self.op.mode == constants.INSTANCE_IMPORT:
8387
        raise errors.OpPrereqError("Disk adoption not allowed for"
8388
                                   " instance import", errors.ECODE_INVAL)
8389
    else:
8390
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8391
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8392
                                   " but no 'adopt' parameter given" %
8393
                                   self.op.disk_template,
8394
                                   errors.ECODE_INVAL)
8395

    
8396
    self.adopt_disks = has_adopt
8397

    
8398
    # instance name verification
8399
    if self.op.name_check:
8400
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8401
      self.op.instance_name = self.hostname1.name
8402
      # used in CheckPrereq for ip ping check
8403
      self.check_ip = self.hostname1.ip
8404
    else:
8405
      self.check_ip = None
8406

    
8407
    # file storage checks
8408
    if (self.op.file_driver and
8409
        not self.op.file_driver in constants.FILE_DRIVER):
8410
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8411
                                 self.op.file_driver, errors.ECODE_INVAL)
8412

    
8413
    if self.op.disk_template == constants.DT_FILE:
8414
      opcodes.RequireFileStorage()
8415
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8416
      opcodes.RequireSharedFileStorage()
8417

    
8418
    ### Node/iallocator related checks
8419
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8420

    
8421
    if self.op.pnode is not None:
8422
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8423
        if self.op.snode is None:
8424
          raise errors.OpPrereqError("The networked disk templates need"
8425
                                     " a mirror node", errors.ECODE_INVAL)
8426
      elif self.op.snode:
8427
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8428
                        " template")
8429
        self.op.snode = None
8430

    
8431
    self._cds = _GetClusterDomainSecret()
8432

    
8433
    if self.op.mode == constants.INSTANCE_IMPORT:
8434
      # On import force_variant must be True, because if we forced it at
8435
      # initial install, our only chance when importing it back is that it
8436
      # works again!
8437
      self.op.force_variant = True
8438

    
8439
      if self.op.no_install:
8440
        self.LogInfo("No-installation mode has no effect during import")
8441

    
8442
    elif self.op.mode == constants.INSTANCE_CREATE:
8443
      if self.op.os_type is None:
8444
        raise errors.OpPrereqError("No guest OS specified",
8445
                                   errors.ECODE_INVAL)
8446
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8447
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8448
                                   " installation" % self.op.os_type,
8449
                                   errors.ECODE_STATE)
8450
      if self.op.disk_template is None:
8451
        raise errors.OpPrereqError("No disk template specified",
8452
                                   errors.ECODE_INVAL)
8453

    
8454
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8455
      # Check handshake to ensure both clusters have the same domain secret
8456
      src_handshake = self.op.source_handshake
8457
      if not src_handshake:
8458
        raise errors.OpPrereqError("Missing source handshake",
8459
                                   errors.ECODE_INVAL)
8460

    
8461
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8462
                                                           src_handshake)
8463
      if errmsg:
8464
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8465
                                   errors.ECODE_INVAL)
8466

    
8467
      # Load and check source CA
8468
      self.source_x509_ca_pem = self.op.source_x509_ca
8469
      if not self.source_x509_ca_pem:
8470
        raise errors.OpPrereqError("Missing source X509 CA",
8471
                                   errors.ECODE_INVAL)
8472

    
8473
      try:
8474
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8475
                                                    self._cds)
8476
      except OpenSSL.crypto.Error, err:
8477
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8478
                                   (err, ), errors.ECODE_INVAL)
8479

    
8480
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8481
      if errcode is not None:
8482
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8483
                                   errors.ECODE_INVAL)
8484

    
8485
      self.source_x509_ca = cert
8486

    
8487
      src_instance_name = self.op.source_instance_name
8488
      if not src_instance_name:
8489
        raise errors.OpPrereqError("Missing source instance name",
8490
                                   errors.ECODE_INVAL)
8491

    
8492
      self.source_instance_name = \
8493
          netutils.GetHostname(name=src_instance_name).name
8494

    
8495
    else:
8496
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8497
                                 self.op.mode, errors.ECODE_INVAL)
8498

    
8499
  def ExpandNames(self):
8500
    """ExpandNames for CreateInstance.
8501

8502
    Figure out the right locks for instance creation.
8503

8504
    """
8505
    self.needed_locks = {}
8506

    
8507
    instance_name = self.op.instance_name
8508
    # this is just a preventive check, but someone might still add this
8509
    # instance in the meantime, and creation will fail at lock-add time
8510
    if instance_name in self.cfg.GetInstanceList():
8511
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8512
                                 instance_name, errors.ECODE_EXISTS)
8513

    
8514
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8515

    
8516
    if self.op.iallocator:
8517
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8518
    else:
8519
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8520
      nodelist = [self.op.pnode]
8521
      if self.op.snode is not None:
8522
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8523
        nodelist.append(self.op.snode)
8524
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8525

    
8526
    # in case of import lock the source node too
8527
    if self.op.mode == constants.INSTANCE_IMPORT:
8528
      src_node = self.op.src_node
8529
      src_path = self.op.src_path
8530

    
8531
      if src_path is None:
8532
        self.op.src_path = src_path = self.op.instance_name
8533

    
8534
      if src_node is None:
8535
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8536
        self.op.src_node = None
8537
        if os.path.isabs(src_path):
8538
          raise errors.OpPrereqError("Importing an instance from a path"
8539
                                     " requires a source node option",
8540
                                     errors.ECODE_INVAL)
8541
      else:
8542
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8543
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8544
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8545
        if not os.path.isabs(src_path):
8546
          self.op.src_path = src_path = \
8547
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8548

    
8549
  def _RunAllocator(self):
8550
    """Run the allocator based on input opcode.
8551

8552
    """
8553
    nics = [n.ToDict() for n in self.nics]
8554
    ial = IAllocator(self.cfg, self.rpc,
8555
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8556
                     name=self.op.instance_name,
8557
                     disk_template=self.op.disk_template,
8558
                     tags=self.op.tags,
8559
                     os=self.op.os_type,
8560
                     vcpus=self.be_full[constants.BE_VCPUS],
8561
                     memory=self.be_full[constants.BE_MEMORY],
8562
                     disks=self.disks,
8563
                     nics=nics,
8564
                     hypervisor=self.op.hypervisor,
8565
                     )
8566

    
8567
    ial.Run(self.op.iallocator)
8568

    
8569
    if not ial.success:
8570
      raise errors.OpPrereqError("Can't compute nodes using"
8571
                                 " iallocator '%s': %s" %
8572
                                 (self.op.iallocator, ial.info),
8573
                                 errors.ECODE_NORES)
8574
    if len(ial.result) != ial.required_nodes:
8575
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8576
                                 " of nodes (%s), required %s" %
8577
                                 (self.op.iallocator, len(ial.result),
8578
                                  ial.required_nodes), errors.ECODE_FAULT)
8579
    self.op.pnode = ial.result[0]
8580
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8581
                 self.op.instance_name, self.op.iallocator,
8582
                 utils.CommaJoin(ial.result))
8583
    if ial.required_nodes == 2:
8584
      self.op.snode = ial.result[1]
8585

    
8586
  def BuildHooksEnv(self):
8587
    """Build hooks env.
8588

8589
    This runs on master, primary and secondary nodes of the instance.
8590

8591
    """
8592
    env = {
8593
      "ADD_MODE": self.op.mode,
8594
      }
8595
    if self.op.mode == constants.INSTANCE_IMPORT:
8596
      env["SRC_NODE"] = self.op.src_node
8597
      env["SRC_PATH"] = self.op.src_path
8598
      env["SRC_IMAGES"] = self.src_images
8599

    
8600
    env.update(_BuildInstanceHookEnv(
8601
      name=self.op.instance_name,
8602
      primary_node=self.op.pnode,
8603
      secondary_nodes=self.secondaries,
8604
      status=self.op.start,
8605
      os_type=self.op.os_type,
8606
      memory=self.be_full[constants.BE_MEMORY],
8607
      vcpus=self.be_full[constants.BE_VCPUS],
8608
      nics=_NICListToTuple(self, self.nics),
8609
      disk_template=self.op.disk_template,
8610
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8611
             for d in self.disks],
8612
      bep=self.be_full,
8613
      hvp=self.hv_full,
8614
      hypervisor_name=self.op.hypervisor,
8615
      tags=self.op.tags,
8616
    ))
8617

    
8618
    return env
8619

    
8620
  def BuildHooksNodes(self):
8621
    """Build hooks nodes.
8622

8623
    """
8624
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8625
    return nl, nl
8626

    
8627
  def _ReadExportInfo(self):
8628
    """Reads the export information from disk.
8629

8630
    It will override the opcode source node and path with the actual
8631
    information, if these two were not specified before.
8632

8633
    @return: the export information
8634

8635
    """
8636
    assert self.op.mode == constants.INSTANCE_IMPORT
8637

    
8638
    src_node = self.op.src_node
8639
    src_path = self.op.src_path
8640

    
8641
    if src_node is None:
8642
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8643
      exp_list = self.rpc.call_export_list(locked_nodes)
8644
      found = False
8645
      for node in exp_list:
8646
        if exp_list[node].fail_msg:
8647
          continue
8648
        if src_path in exp_list[node].payload:
8649
          found = True
8650
          self.op.src_node = src_node = node
8651
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8652
                                                       src_path)
8653
          break
8654
      if not found:
8655
        raise errors.OpPrereqError("No export found for relative path %s" %
8656
                                    src_path, errors.ECODE_INVAL)
8657

    
8658
    _CheckNodeOnline(self, src_node)
8659
    result = self.rpc.call_export_info(src_node, src_path)
8660
    result.Raise("No export or invalid export found in dir %s" % src_path)
8661

    
8662
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8663
    if not export_info.has_section(constants.INISECT_EXP):
8664
      raise errors.ProgrammerError("Corrupted export config",
8665
                                   errors.ECODE_ENVIRON)
8666

    
8667
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8668
    if (int(ei_version) != constants.EXPORT_VERSION):
8669
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8670
                                 (ei_version, constants.EXPORT_VERSION),
8671
                                 errors.ECODE_ENVIRON)
8672
    return export_info
8673

    
8674
  def _ReadExportParams(self, einfo):
8675
    """Use export parameters as defaults.
8676

8677
    In case the opcode doesn't specify (as in override) some instance
8678
    parameters, then try to use them from the export information, if
8679
    that declares them.
8680

8681
    """
8682
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8683

    
8684
    if self.op.disk_template is None:
8685
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8686
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8687
                                          "disk_template")
8688
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8689
          raise errors.OpPrereqError("Disk template specified in configuration"
8690
                                     " file is not one of the allowed values:"
8691
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8692
      else:
8693
        raise errors.OpPrereqError("No disk template specified and the export"
8694
                                   " is missing the disk_template information",
8695
                                   errors.ECODE_INVAL)
8696

    
8697
    if not self.op.disks:
8698
      disks = []
8699
      # TODO: import the disk iv_name too
8700
      for idx in range(constants.MAX_DISKS):
8701
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8702
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8703
          disks.append({constants.IDISK_SIZE: disk_sz})
8704
      self.op.disks = disks
8705
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8706
        raise errors.OpPrereqError("No disk info specified and the export"
8707
                                   " is missing the disk information",
8708
                                   errors.ECODE_INVAL)
8709

    
8710
    if not self.op.nics:
8711
      nics = []
8712
      for idx in range(constants.MAX_NICS):
8713
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8714
          ndict = {}
8715
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8716
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8717
            ndict[name] = v
8718
          nics.append(ndict)
8719
        else:
8720
          break
8721
      self.op.nics = nics
8722

    
8723
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8724
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8725

    
8726
    if (self.op.hypervisor is None and
8727
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8728
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8729

    
8730
    if einfo.has_section(constants.INISECT_HYP):
8731
      # use the export parameters but do not override the ones
8732
      # specified by the user
8733
      for name, value in einfo.items(constants.INISECT_HYP):
8734
        if name not in self.op.hvparams:
8735
          self.op.hvparams[name] = value
8736

    
8737
    if einfo.has_section(constants.INISECT_BEP):
8738
      # use the parameters, without overriding
8739
      for name, value in einfo.items(constants.INISECT_BEP):
8740
        if name not in self.op.beparams:
8741
          self.op.beparams[name] = value
8742
    else:
8743
      # try to read the parameters old style, from the main section
8744
      for name in constants.BES_PARAMETERS:
8745
        if (name not in self.op.beparams and
8746
            einfo.has_option(constants.INISECT_INS, name)):
8747
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8748

    
8749
    if einfo.has_section(constants.INISECT_OSP):
8750
      # use the parameters, without overriding
8751
      for name, value in einfo.items(constants.INISECT_OSP):
8752
        if name not in self.op.osparams:
8753
          self.op.osparams[name] = value
8754

    
8755
  def _RevertToDefaults(self, cluster):
8756
    """Revert the instance parameters to the default values.
8757

8758
    """
8759
    # hvparams
8760
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8761
    for name in self.op.hvparams.keys():
8762
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8763
        del self.op.hvparams[name]
8764
    # beparams
8765
    be_defs = cluster.SimpleFillBE({})
8766
    for name in self.op.beparams.keys():
8767
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8768
        del self.op.beparams[name]
8769
    # nic params
8770
    nic_defs = cluster.SimpleFillNIC({})
8771
    for nic in self.op.nics:
8772
      for name in constants.NICS_PARAMETERS:
8773
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8774
          del nic[name]
8775
    # osparams
8776
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8777
    for name in self.op.osparams.keys():
8778
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8779
        del self.op.osparams[name]
8780

    
8781
  def _CalculateFileStorageDir(self):
8782
    """Calculate final instance file storage dir.
8783

8784
    """
8785
    # file storage dir calculation/check
8786
    self.instance_file_storage_dir = None
8787
    if self.op.disk_template in constants.DTS_FILEBASED:
8788
      # build the full file storage dir path
8789
      joinargs = []
8790

    
8791
      if self.op.disk_template == constants.DT_SHARED_FILE:
8792
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8793
      else:
8794
        get_fsd_fn = self.cfg.GetFileStorageDir
8795

    
8796
      cfg_storagedir = get_fsd_fn()
8797
      if not cfg_storagedir:
8798
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8799
      joinargs.append(cfg_storagedir)
8800

    
8801
      if self.op.file_storage_dir is not None:
8802
        joinargs.append(self.op.file_storage_dir)
8803

    
8804
      joinargs.append(self.op.instance_name)
8805

    
8806
      # pylint: disable=W0142
8807
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8808

    
8809
  def CheckPrereq(self):
8810
    """Check prerequisites.
8811

8812
    """
8813
    self._CalculateFileStorageDir()
8814

    
8815
    if self.op.mode == constants.INSTANCE_IMPORT:
8816
      export_info = self._ReadExportInfo()
8817
      self._ReadExportParams(export_info)
8818

    
8819
    if (not self.cfg.GetVGName() and
8820
        self.op.disk_template not in constants.DTS_NOT_LVM):
8821
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8822
                                 " instances", errors.ECODE_STATE)
8823

    
8824
    if (self.op.hypervisor is None or
8825
        self.op.hypervisor == constants.VALUE_AUTO):
8826
      self.op.hypervisor = self.cfg.GetHypervisorType()
8827

    
8828
    cluster = self.cfg.GetClusterInfo()
8829
    enabled_hvs = cluster.enabled_hypervisors
8830
    if self.op.hypervisor not in enabled_hvs:
8831
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8832
                                 " cluster (%s)" % (self.op.hypervisor,
8833
                                  ",".join(enabled_hvs)),
8834
                                 errors.ECODE_STATE)
8835

    
8836
    # Check tag validity
8837
    for tag in self.op.tags:
8838
      objects.TaggableObject.ValidateTag(tag)
8839

    
8840
    # check hypervisor parameter syntax (locally)
8841
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8842
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8843
                                      self.op.hvparams)
8844
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8845
    hv_type.CheckParameterSyntax(filled_hvp)
8846
    self.hv_full = filled_hvp
8847
    # check that we don't specify global parameters on an instance
8848
    _CheckGlobalHvParams(self.op.hvparams)
8849

    
8850
    # fill and remember the beparams dict
8851
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8852
    for param, value in self.op.beparams.iteritems():
8853
      if value == constants.VALUE_AUTO:
8854
        self.op.beparams[param] = default_beparams[param]
8855
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8856
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8857

    
8858
    # build os parameters
8859
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8860

    
8861
    # now that hvp/bep are in final format, let's reset to defaults,
8862
    # if told to do so
8863
    if self.op.identify_defaults:
8864
      self._RevertToDefaults(cluster)
8865

    
8866
    # NIC buildup
8867
    self.nics = []
8868
    for idx, nic in enumerate(self.op.nics):
8869
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8870
      nic_mode = nic_mode_req
8871
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8872
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8873

    
8874
      # in routed mode, for the first nic, the default ip is 'auto'
8875
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8876
        default_ip_mode = constants.VALUE_AUTO
8877
      else:
8878
        default_ip_mode = constants.VALUE_NONE
8879

    
8880
      # ip validity checks
8881
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8882
      if ip is None or ip.lower() == constants.VALUE_NONE:
8883
        nic_ip = None
8884
      elif ip.lower() == constants.VALUE_AUTO:
8885
        if not self.op.name_check:
8886
          raise errors.OpPrereqError("IP address set to auto but name checks"
8887
                                     " have been skipped",
8888
                                     errors.ECODE_INVAL)
8889
        nic_ip = self.hostname1.ip
8890
      else:
8891
        if not netutils.IPAddress.IsValid(ip):
8892
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8893
                                     errors.ECODE_INVAL)
8894
        nic_ip = ip
8895

    
8896
      # TODO: check the ip address for uniqueness
8897
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8898
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8899
                                   errors.ECODE_INVAL)
8900

    
8901
      # MAC address verification
8902
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8903
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8904
        mac = utils.NormalizeAndValidateMac(mac)
8905

    
8906
        try:
8907
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8908
        except errors.ReservationError:
8909
          raise errors.OpPrereqError("MAC address %s already in use"
8910
                                     " in cluster" % mac,
8911
                                     errors.ECODE_NOTUNIQUE)
8912

    
8913
      #  Build nic parameters
8914
      link = nic.get(constants.INIC_LINK, None)
8915
      if link == constants.VALUE_AUTO:
8916
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8917
      nicparams = {}
8918
      if nic_mode_req:
8919
        nicparams[constants.NIC_MODE] = nic_mode
8920
      if link:
8921
        nicparams[constants.NIC_LINK] = link
8922

    
8923
      check_params = cluster.SimpleFillNIC(nicparams)
8924
      objects.NIC.CheckParameterSyntax(check_params)
8925
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8926

    
8927
    # disk checks/pre-build
8928
    default_vg = self.cfg.GetVGName()
8929
    self.disks = []
8930
    for disk in self.op.disks:
8931
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8932
      if mode not in constants.DISK_ACCESS_SET:
8933
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8934
                                   mode, errors.ECODE_INVAL)
8935
      size = disk.get(constants.IDISK_SIZE, None)
8936
      if size is None:
8937
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8938
      try:
8939
        size = int(size)
8940
      except (TypeError, ValueError):
8941
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8942
                                   errors.ECODE_INVAL)
8943

    
8944
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8945
      new_disk = {
8946
        constants.IDISK_SIZE: size,
8947
        constants.IDISK_MODE: mode,
8948
        constants.IDISK_VG: data_vg,
8949
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8950
        }
8951
      if constants.IDISK_ADOPT in disk:
8952
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8953
      self.disks.append(new_disk)
8954

    
8955
    if self.op.mode == constants.INSTANCE_IMPORT:
8956
      disk_images = []
8957
      for idx in range(len(self.disks)):
8958
        option = "disk%d_dump" % idx
8959
        if export_info.has_option(constants.INISECT_INS, option):
8960
          # FIXME: are the old os-es, disk sizes, etc. useful?
8961
          export_name = export_info.get(constants.INISECT_INS, option)
8962
          image = utils.PathJoin(self.op.src_path, export_name)
8963
          disk_images.append(image)
8964
        else:
8965
          disk_images.append(False)
8966

    
8967
      self.src_images = disk_images
8968

    
8969
      old_name = export_info.get(constants.INISECT_INS, "name")
8970
      if self.op.instance_name == old_name:
8971
        for idx, nic in enumerate(self.nics):
8972
          if nic.mac == constants.VALUE_AUTO:
8973
            nic_mac_ini = "nic%d_mac" % idx
8974
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8975

    
8976
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8977

    
8978
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8979
    if self.op.ip_check:
8980
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8981
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8982
                                   (self.check_ip, self.op.instance_name),
8983
                                   errors.ECODE_NOTUNIQUE)
8984

    
8985
    #### mac address generation
8986
    # By generating here the mac address both the allocator and the hooks get
8987
    # the real final mac address rather than the 'auto' or 'generate' value.
8988
    # There is a race condition between the generation and the instance object
8989
    # creation, which means that we know the mac is valid now, but we're not
8990
    # sure it will be when we actually add the instance. If things go bad
8991
    # adding the instance will abort because of a duplicate mac, and the
8992
    # creation job will fail.
8993
    for nic in self.nics:
8994
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8995
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8996

    
8997
    #### allocator run
8998

    
8999
    if self.op.iallocator is not None:
9000
      self._RunAllocator()
9001

    
9002
    #### node related checks
9003

    
9004
    # check primary node
9005
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9006
    assert self.pnode is not None, \
9007
      "Cannot retrieve locked node %s" % self.op.pnode
9008
    if pnode.offline:
9009
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9010
                                 pnode.name, errors.ECODE_STATE)
9011
    if pnode.drained:
9012
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9013
                                 pnode.name, errors.ECODE_STATE)
9014
    if not pnode.vm_capable:
9015
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9016
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9017

    
9018
    self.secondaries = []
9019

    
9020
    # mirror node verification
9021
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9022
      if self.op.snode == pnode.name:
9023
        raise errors.OpPrereqError("The secondary node cannot be the"
9024
                                   " primary node", errors.ECODE_INVAL)
9025
      _CheckNodeOnline(self, self.op.snode)
9026
      _CheckNodeNotDrained(self, self.op.snode)
9027
      _CheckNodeVmCapable(self, self.op.snode)
9028
      self.secondaries.append(self.op.snode)
9029

    
9030
    nodenames = [pnode.name] + self.secondaries
9031

    
9032
    if not self.adopt_disks:
9033
      # Check lv size requirements, if not adopting
9034
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9035
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9036

    
9037
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9038
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9039
                                disk[constants.IDISK_ADOPT])
9040
                     for disk in self.disks])
9041
      if len(all_lvs) != len(self.disks):
9042
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9043
                                   errors.ECODE_INVAL)
9044
      for lv_name in all_lvs:
9045
        try:
9046
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9047
          # to ReserveLV uses the same syntax
9048
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9049
        except errors.ReservationError:
9050
          raise errors.OpPrereqError("LV named %s used by another instance" %
9051
                                     lv_name, errors.ECODE_NOTUNIQUE)
9052

    
9053
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9054
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9055

    
9056
      node_lvs = self.rpc.call_lv_list([pnode.name],
9057
                                       vg_names.payload.keys())[pnode.name]
9058
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9059
      node_lvs = node_lvs.payload
9060

    
9061
      delta = all_lvs.difference(node_lvs.keys())
9062
      if delta:
9063
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9064
                                   utils.CommaJoin(delta),
9065
                                   errors.ECODE_INVAL)
9066
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9067
      if online_lvs:
9068
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9069
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9070
                                   errors.ECODE_STATE)
9071
      # update the size of disk based on what is found
9072
      for dsk in self.disks:
9073
        dsk[constants.IDISK_SIZE] = \
9074
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9075
                                        dsk[constants.IDISK_ADOPT])][0]))
9076

    
9077
    elif self.op.disk_template == constants.DT_BLOCK:
9078
      # Normalize and de-duplicate device paths
9079
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9080
                       for disk in self.disks])
9081
      if len(all_disks) != len(self.disks):
9082
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9083
                                   errors.ECODE_INVAL)
9084
      baddisks = [d for d in all_disks
9085
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9086
      if baddisks:
9087
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9088
                                   " cannot be adopted" %
9089
                                   (", ".join(baddisks),
9090
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9091
                                   errors.ECODE_INVAL)
9092

    
9093
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9094
                                            list(all_disks))[pnode.name]
9095
      node_disks.Raise("Cannot get block device information from node %s" %
9096
                       pnode.name)
9097
      node_disks = node_disks.payload
9098
      delta = all_disks.difference(node_disks.keys())
9099
      if delta:
9100
        raise errors.OpPrereqError("Missing block device(s): %s" %
9101
                                   utils.CommaJoin(delta),
9102
                                   errors.ECODE_INVAL)
9103
      for dsk in self.disks:
9104
        dsk[constants.IDISK_SIZE] = \
9105
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9106

    
9107
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9108

    
9109
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9110
    # check OS parameters (remotely)
9111
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9112

    
9113
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9114

    
9115
    # memory check on primary node
9116
    if self.op.start:
9117
      _CheckNodeFreeMemory(self, self.pnode.name,
9118
                           "creating instance %s" % self.op.instance_name,
9119
                           self.be_full[constants.BE_MEMORY],
9120
                           self.op.hypervisor)
9121

    
9122
    self.dry_run_result = list(nodenames)
9123

    
9124
  def Exec(self, feedback_fn):
9125
    """Create and add the instance to the cluster.
9126

9127
    """
9128
    instance = self.op.instance_name
9129
    pnode_name = self.pnode.name
9130

    
9131
    ht_kind = self.op.hypervisor
9132
    if ht_kind in constants.HTS_REQ_PORT:
9133
      network_port = self.cfg.AllocatePort()
9134
    else:
9135
      network_port = None
9136

    
9137
    disks = _GenerateDiskTemplate(self,
9138
                                  self.op.disk_template,
9139
                                  instance, pnode_name,
9140
                                  self.secondaries,
9141
                                  self.disks,
9142
                                  self.instance_file_storage_dir,
9143
                                  self.op.file_driver,
9144
                                  0,
9145
                                  feedback_fn)
9146

    
9147
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9148
                            primary_node=pnode_name,
9149
                            nics=self.nics, disks=disks,
9150
                            disk_template=self.op.disk_template,
9151
                            admin_up=False,
9152
                            network_port=network_port,
9153
                            beparams=self.op.beparams,
9154
                            hvparams=self.op.hvparams,
9155
                            hypervisor=self.op.hypervisor,
9156
                            osparams=self.op.osparams,
9157
                            )
9158

    
9159
    if self.op.tags:
9160
      for tag in self.op.tags:
9161
        iobj.AddTag(tag)
9162

    
9163
    if self.adopt_disks:
9164
      if self.op.disk_template == constants.DT_PLAIN:
9165
        # rename LVs to the newly-generated names; we need to construct
9166
        # 'fake' LV disks with the old data, plus the new unique_id
9167
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9168
        rename_to = []
9169
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9170
          rename_to.append(t_dsk.logical_id)
9171
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9172
          self.cfg.SetDiskID(t_dsk, pnode_name)
9173
        result = self.rpc.call_blockdev_rename(pnode_name,
9174
                                               zip(tmp_disks, rename_to))
9175
        result.Raise("Failed to rename adoped LVs")
9176
    else:
9177
      feedback_fn("* creating instance disks...")
9178
      try:
9179
        _CreateDisks(self, iobj)
9180
      except errors.OpExecError:
9181
        self.LogWarning("Device creation failed, reverting...")
9182
        try:
9183
          _RemoveDisks(self, iobj)
9184
        finally:
9185
          self.cfg.ReleaseDRBDMinors(instance)
9186
          raise
9187

    
9188
    feedback_fn("adding instance %s to cluster config" % instance)
9189

    
9190
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9191

    
9192
    # Declare that we don't want to remove the instance lock anymore, as we've
9193
    # added the instance to the config
9194
    del self.remove_locks[locking.LEVEL_INSTANCE]
9195

    
9196
    if self.op.mode == constants.INSTANCE_IMPORT:
9197
      # Release unused nodes
9198
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9199
    else:
9200
      # Release all nodes
9201
      _ReleaseLocks(self, locking.LEVEL_NODE)
9202

    
9203
    disk_abort = False
9204
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9205
      feedback_fn("* wiping instance disks...")
9206
      try:
9207
        _WipeDisks(self, iobj)
9208
      except errors.OpExecError, err:
9209
        logging.exception("Wiping disks failed")
9210
        self.LogWarning("Wiping instance disks failed (%s)", err)
9211
        disk_abort = True
9212

    
9213
    if disk_abort:
9214
      # Something is already wrong with the disks, don't do anything else
9215
      pass
9216
    elif self.op.wait_for_sync:
9217
      disk_abort = not _WaitForSync(self, iobj)
9218
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9219
      # make sure the disks are not degraded (still sync-ing is ok)
9220
      feedback_fn("* checking mirrors status")
9221
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9222
    else:
9223
      disk_abort = False
9224

    
9225
    if disk_abort:
9226
      _RemoveDisks(self, iobj)
9227
      self.cfg.RemoveInstance(iobj.name)
9228
      # Make sure the instance lock gets removed
9229
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9230
      raise errors.OpExecError("There are some degraded disks for"
9231
                               " this instance")
9232

    
9233
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9234
      if self.op.mode == constants.INSTANCE_CREATE:
9235
        if not self.op.no_install:
9236
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9237
                        not self.op.wait_for_sync)
9238
          if pause_sync:
9239
            feedback_fn("* pausing disk sync to install instance OS")
9240
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9241
                                                              iobj.disks, True)
9242
            for idx, success in enumerate(result.payload):
9243
              if not success:
9244
                logging.warn("pause-sync of instance %s for disk %d failed",
9245
                             instance, idx)
9246

    
9247
          feedback_fn("* running the instance OS create scripts...")
9248
          # FIXME: pass debug option from opcode to backend
9249
          os_add_result = \
9250
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9251
                                          self.op.debug_level)
9252
          if pause_sync:
9253
            feedback_fn("* resuming disk sync")
9254
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9255
                                                              iobj.disks, False)
9256
            for idx, success in enumerate(result.payload):
9257
              if not success:
9258
                logging.warn("resume-sync of instance %s for disk %d failed",
9259
                             instance, idx)
9260

    
9261
          os_add_result.Raise("Could not add os for instance %s"
9262
                              " on node %s" % (instance, pnode_name))
9263

    
9264
      elif self.op.mode == constants.INSTANCE_IMPORT:
9265
        feedback_fn("* running the instance OS import scripts...")
9266

    
9267
        transfers = []
9268

    
9269
        for idx, image in enumerate(self.src_images):
9270
          if not image:
9271
            continue
9272

    
9273
          # FIXME: pass debug option from opcode to backend
9274
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9275
                                             constants.IEIO_FILE, (image, ),
9276
                                             constants.IEIO_SCRIPT,
9277
                                             (iobj.disks[idx], idx),
9278
                                             None)
9279
          transfers.append(dt)
9280

    
9281
        import_result = \
9282
          masterd.instance.TransferInstanceData(self, feedback_fn,
9283
                                                self.op.src_node, pnode_name,
9284
                                                self.pnode.secondary_ip,
9285
                                                iobj, transfers)
9286
        if not compat.all(import_result):
9287
          self.LogWarning("Some disks for instance %s on node %s were not"
9288
                          " imported successfully" % (instance, pnode_name))
9289

    
9290
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9291
        feedback_fn("* preparing remote import...")
9292
        # The source cluster will stop the instance before attempting to make a
9293
        # connection. In some cases stopping an instance can take a long time,
9294
        # hence the shutdown timeout is added to the connection timeout.
9295
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9296
                           self.op.source_shutdown_timeout)
9297
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9298

    
9299
        assert iobj.primary_node == self.pnode.name
9300
        disk_results = \
9301
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9302
                                        self.source_x509_ca,
9303
                                        self._cds, timeouts)
9304
        if not compat.all(disk_results):
9305
          # TODO: Should the instance still be started, even if some disks
9306
          # failed to import (valid for local imports, too)?
9307
          self.LogWarning("Some disks for instance %s on node %s were not"
9308
                          " imported successfully" % (instance, pnode_name))
9309

    
9310
        # Run rename script on newly imported instance
9311
        assert iobj.name == instance
9312
        feedback_fn("Running rename script for %s" % instance)
9313
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9314
                                                   self.source_instance_name,
9315
                                                   self.op.debug_level)
9316
        if result.fail_msg:
9317
          self.LogWarning("Failed to run rename script for %s on node"
9318
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9319

    
9320
      else:
9321
        # also checked in the prereq part
9322
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9323
                                     % self.op.mode)
9324

    
9325
    if self.op.start:
9326
      iobj.admin_up = True
9327
      self.cfg.Update(iobj, feedback_fn)
9328
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9329
      feedback_fn("* starting instance...")
9330
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9331
                                            False)
9332
      result.Raise("Could not start instance")
9333

    
9334
    return list(iobj.all_nodes)
9335

    
9336

    
9337
class LUInstanceConsole(NoHooksLU):
9338
  """Connect to an instance's console.
9339

9340
  This is somewhat special in that it returns the command line that
9341
  you need to run on the master node in order to connect to the
9342
  console.
9343

9344
  """
9345
  REQ_BGL = False
9346

    
9347
  def ExpandNames(self):
9348
    self._ExpandAndLockInstance()
9349

    
9350
  def CheckPrereq(self):
9351
    """Check prerequisites.
9352

9353
    This checks that the instance is in the cluster.
9354

9355
    """
9356
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9357
    assert self.instance is not None, \
9358
      "Cannot retrieve locked instance %s" % self.op.instance_name
9359
    _CheckNodeOnline(self, self.instance.primary_node)
9360

    
9361
  def Exec(self, feedback_fn):
9362
    """Connect to the console of an instance
9363

9364
    """
9365
    instance = self.instance
9366
    node = instance.primary_node
9367

    
9368
    node_insts = self.rpc.call_instance_list([node],
9369
                                             [instance.hypervisor])[node]
9370
    node_insts.Raise("Can't get node information from %s" % node)
9371

    
9372
    if instance.name not in node_insts.payload:
9373
      if instance.admin_up:
9374
        state = constants.INSTST_ERRORDOWN
9375
      else:
9376
        state = constants.INSTST_ADMINDOWN
9377
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9378
                               (instance.name, state))
9379

    
9380
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9381

    
9382
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9383

    
9384

    
9385
def _GetInstanceConsole(cluster, instance):
9386
  """Returns console information for an instance.
9387

9388
  @type cluster: L{objects.Cluster}
9389
  @type instance: L{objects.Instance}
9390
  @rtype: dict
9391

9392
  """
9393
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9394
  # beparams and hvparams are passed separately, to avoid editing the
9395
  # instance and then saving the defaults in the instance itself.
9396
  hvparams = cluster.FillHV(instance)
9397
  beparams = cluster.FillBE(instance)
9398
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9399

    
9400
  assert console.instance == instance.name
9401
  assert console.Validate()
9402

    
9403
  return console.ToDict()
9404

    
9405

    
9406
class LUInstanceReplaceDisks(LogicalUnit):
9407
  """Replace the disks of an instance.
9408

9409
  """
9410
  HPATH = "mirrors-replace"
9411
  HTYPE = constants.HTYPE_INSTANCE
9412
  REQ_BGL = False
9413

    
9414
  def CheckArguments(self):
9415
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9416
                                  self.op.iallocator)
9417

    
9418
  def ExpandNames(self):
9419
    self._ExpandAndLockInstance()
9420

    
9421
    assert locking.LEVEL_NODE not in self.needed_locks
9422
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9423

    
9424
    assert self.op.iallocator is None or self.op.remote_node is None, \
9425
      "Conflicting options"
9426

    
9427
    if self.op.remote_node is not None:
9428
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9429

    
9430
      # Warning: do not remove the locking of the new secondary here
9431
      # unless DRBD8.AddChildren is changed to work in parallel;
9432
      # currently it doesn't since parallel invocations of
9433
      # FindUnusedMinor will conflict
9434
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9435
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9436
    else:
9437
      self.needed_locks[locking.LEVEL_NODE] = []
9438
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9439

    
9440
      if self.op.iallocator is not None:
9441
        # iallocator will select a new node in the same group
9442
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9443

    
9444
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9445
                                   self.op.iallocator, self.op.remote_node,
9446
                                   self.op.disks, False, self.op.early_release)
9447

    
9448
    self.tasklets = [self.replacer]
9449

    
9450
  def DeclareLocks(self, level):
9451
    if level == locking.LEVEL_NODEGROUP:
9452
      assert self.op.remote_node is None
9453
      assert self.op.iallocator is not None
9454
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9455

    
9456
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9457
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9458
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9459

    
9460
    elif level == locking.LEVEL_NODE:
9461
      if self.op.iallocator is not None:
9462
        assert self.op.remote_node is None
9463
        assert not self.needed_locks[locking.LEVEL_NODE]
9464

    
9465
        # Lock member nodes of all locked groups
9466
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9467
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9468
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9469
      else:
9470
        self._LockInstancesNodes()
9471

    
9472
  def BuildHooksEnv(self):
9473
    """Build hooks env.
9474

9475
    This runs on the master, the primary and all the secondaries.
9476

9477
    """
9478
    instance = self.replacer.instance
9479
    env = {
9480
      "MODE": self.op.mode,
9481
      "NEW_SECONDARY": self.op.remote_node,
9482
      "OLD_SECONDARY": instance.secondary_nodes[0],
9483
      }
9484
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9485
    return env
9486

    
9487
  def BuildHooksNodes(self):
9488
    """Build hooks nodes.
9489

9490
    """
9491
    instance = self.replacer.instance
9492
    nl = [
9493
      self.cfg.GetMasterNode(),
9494
      instance.primary_node,
9495
      ]
9496
    if self.op.remote_node is not None:
9497
      nl.append(self.op.remote_node)
9498
    return nl, nl
9499

    
9500
  def CheckPrereq(self):
9501
    """Check prerequisites.
9502

9503
    """
9504
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9505
            self.op.iallocator is None)
9506

    
9507
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9508
    if owned_groups:
9509
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9510

    
9511
    return LogicalUnit.CheckPrereq(self)
9512

    
9513

    
9514
class TLReplaceDisks(Tasklet):
9515
  """Replaces disks for an instance.
9516

9517
  Note: Locking is not within the scope of this class.
9518

9519
  """
9520
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9521
               disks, delay_iallocator, early_release):
9522
    """Initializes this class.
9523

9524
    """
9525
    Tasklet.__init__(self, lu)
9526

    
9527
    # Parameters
9528
    self.instance_name = instance_name
9529
    self.mode = mode
9530
    self.iallocator_name = iallocator_name
9531
    self.remote_node = remote_node
9532
    self.disks = disks
9533
    self.delay_iallocator = delay_iallocator
9534
    self.early_release = early_release
9535

    
9536
    # Runtime data
9537
    self.instance = None
9538
    self.new_node = None
9539
    self.target_node = None
9540
    self.other_node = None
9541
    self.remote_node_info = None
9542
    self.node_secondary_ip = None
9543

    
9544
  @staticmethod
9545
  def CheckArguments(mode, remote_node, iallocator):
9546
    """Helper function for users of this class.
9547

9548
    """
9549
    # check for valid parameter combination
9550
    if mode == constants.REPLACE_DISK_CHG:
9551
      if remote_node is None and iallocator is None:
9552
        raise errors.OpPrereqError("When changing the secondary either an"
9553
                                   " iallocator script must be used or the"
9554
                                   " new node given", errors.ECODE_INVAL)
9555

    
9556
      if remote_node is not None and iallocator is not None:
9557
        raise errors.OpPrereqError("Give either the iallocator or the new"
9558
                                   " secondary, not both", errors.ECODE_INVAL)
9559

    
9560
    elif remote_node is not None or iallocator is not None:
9561
      # Not replacing the secondary
9562
      raise errors.OpPrereqError("The iallocator and new node options can"
9563
                                 " only be used when changing the"
9564
                                 " secondary node", errors.ECODE_INVAL)
9565

    
9566
  @staticmethod
9567
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9568
    """Compute a new secondary node using an IAllocator.
9569

9570
    """
9571
    ial = IAllocator(lu.cfg, lu.rpc,
9572
                     mode=constants.IALLOCATOR_MODE_RELOC,
9573
                     name=instance_name,
9574
                     relocate_from=list(relocate_from))
9575

    
9576
    ial.Run(iallocator_name)
9577

    
9578
    if not ial.success:
9579
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9580
                                 " %s" % (iallocator_name, ial.info),
9581
                                 errors.ECODE_NORES)
9582

    
9583
    if len(ial.result) != ial.required_nodes:
9584
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9585
                                 " of nodes (%s), required %s" %
9586
                                 (iallocator_name,
9587
                                  len(ial.result), ial.required_nodes),
9588
                                 errors.ECODE_FAULT)
9589

    
9590
    remote_node_name = ial.result[0]
9591

    
9592
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9593
               instance_name, remote_node_name)
9594

    
9595
    return remote_node_name
9596

    
9597
  def _FindFaultyDisks(self, node_name):
9598
    """Wrapper for L{_FindFaultyInstanceDisks}.
9599

9600
    """
9601
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9602
                                    node_name, True)
9603

    
9604
  def _CheckDisksActivated(self, instance):
9605
    """Checks if the instance disks are activated.
9606

9607
    @param instance: The instance to check disks
9608
    @return: True if they are activated, False otherwise
9609

9610
    """
9611
    nodes = instance.all_nodes
9612

    
9613
    for idx, dev in enumerate(instance.disks):
9614
      for node in nodes:
9615
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9616
        self.cfg.SetDiskID(dev, node)
9617

    
9618
        result = self.rpc.call_blockdev_find(node, dev)
9619

    
9620
        if result.offline:
9621
          continue
9622
        elif result.fail_msg or not result.payload:
9623
          return False
9624

    
9625
    return True
9626

    
9627
  def CheckPrereq(self):
9628
    """Check prerequisites.
9629

9630
    This checks that the instance is in the cluster.
9631

9632
    """
9633
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9634
    assert instance is not None, \
9635
      "Cannot retrieve locked instance %s" % self.instance_name
9636

    
9637
    if instance.disk_template != constants.DT_DRBD8:
9638
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9639
                                 " instances", errors.ECODE_INVAL)
9640

    
9641
    if len(instance.secondary_nodes) != 1:
9642
      raise errors.OpPrereqError("The instance has a strange layout,"
9643
                                 " expected one secondary but found %d" %
9644
                                 len(instance.secondary_nodes),
9645
                                 errors.ECODE_FAULT)
9646

    
9647
    if not self.delay_iallocator:
9648
      self._CheckPrereq2()
9649

    
9650
  def _CheckPrereq2(self):
9651
    """Check prerequisites, second part.
9652

9653
    This function should always be part of CheckPrereq. It was separated and is
9654
    now called from Exec because during node evacuation iallocator was only
9655
    called with an unmodified cluster model, not taking planned changes into
9656
    account.
9657

9658
    """
9659
    instance = self.instance
9660
    secondary_node = instance.secondary_nodes[0]
9661

    
9662
    if self.iallocator_name is None:
9663
      remote_node = self.remote_node
9664
    else:
9665
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9666
                                       instance.name, instance.secondary_nodes)
9667

    
9668
    if remote_node is None:
9669
      self.remote_node_info = None
9670
    else:
9671
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9672
             "Remote node '%s' is not locked" % remote_node
9673

    
9674
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9675
      assert self.remote_node_info is not None, \
9676
        "Cannot retrieve locked node %s" % remote_node
9677

    
9678
    if remote_node == self.instance.primary_node:
9679
      raise errors.OpPrereqError("The specified node is the primary node of"
9680
                                 " the instance", errors.ECODE_INVAL)
9681

    
9682
    if remote_node == secondary_node:
9683
      raise errors.OpPrereqError("The specified node is already the"
9684
                                 " secondary node of the instance",
9685
                                 errors.ECODE_INVAL)
9686

    
9687
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9688
                                    constants.REPLACE_DISK_CHG):
9689
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9690
                                 errors.ECODE_INVAL)
9691

    
9692
    if self.mode == constants.REPLACE_DISK_AUTO:
9693
      if not self._CheckDisksActivated(instance):
9694
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9695
                                   " first" % self.instance_name,
9696
                                   errors.ECODE_STATE)
9697
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9698
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9699

    
9700
      if faulty_primary and faulty_secondary:
9701
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9702
                                   " one node and can not be repaired"
9703
                                   " automatically" % self.instance_name,
9704
                                   errors.ECODE_STATE)
9705

    
9706
      if faulty_primary:
9707
        self.disks = faulty_primary
9708
        self.target_node = instance.primary_node
9709
        self.other_node = secondary_node
9710
        check_nodes = [self.target_node, self.other_node]
9711
      elif faulty_secondary:
9712
        self.disks = faulty_secondary
9713
        self.target_node = secondary_node
9714
        self.other_node = instance.primary_node
9715
        check_nodes = [self.target_node, self.other_node]
9716
      else:
9717
        self.disks = []
9718
        check_nodes = []
9719

    
9720
    else:
9721
      # Non-automatic modes
9722
      if self.mode == constants.REPLACE_DISK_PRI:
9723
        self.target_node = instance.primary_node
9724
        self.other_node = secondary_node
9725
        check_nodes = [self.target_node, self.other_node]
9726

    
9727
      elif self.mode == constants.REPLACE_DISK_SEC:
9728
        self.target_node = secondary_node
9729
        self.other_node = instance.primary_node
9730
        check_nodes = [self.target_node, self.other_node]
9731

    
9732
      elif self.mode == constants.REPLACE_DISK_CHG:
9733
        self.new_node = remote_node
9734
        self.other_node = instance.primary_node
9735
        self.target_node = secondary_node
9736
        check_nodes = [self.new_node, self.other_node]
9737

    
9738
        _CheckNodeNotDrained(self.lu, remote_node)
9739
        _CheckNodeVmCapable(self.lu, remote_node)
9740

    
9741
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9742
        assert old_node_info is not None
9743
        if old_node_info.offline and not self.early_release:
9744
          # doesn't make sense to delay the release
9745
          self.early_release = True
9746
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9747
                          " early-release mode", secondary_node)
9748

    
9749
      else:
9750
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9751
                                     self.mode)
9752

    
9753
      # If not specified all disks should be replaced
9754
      if not self.disks:
9755
        self.disks = range(len(self.instance.disks))
9756

    
9757
    for node in check_nodes:
9758
      _CheckNodeOnline(self.lu, node)
9759

    
9760
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9761
                                                          self.other_node,
9762
                                                          self.target_node]
9763
                              if node_name is not None)
9764

    
9765
    # Release unneeded node locks
9766
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9767

    
9768
    # Release any owned node group
9769
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9770
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9771

    
9772
    # Check whether disks are valid
9773
    for disk_idx in self.disks:
9774
      instance.FindDisk(disk_idx)
9775

    
9776
    # Get secondary node IP addresses
9777
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9778
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9779

    
9780
  def Exec(self, feedback_fn):
9781
    """Execute disk replacement.
9782

9783
    This dispatches the disk replacement to the appropriate handler.
9784

9785
    """
9786
    if self.delay_iallocator:
9787
      self._CheckPrereq2()
9788

    
9789
    if __debug__:
9790
      # Verify owned locks before starting operation
9791
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9792
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9793
          ("Incorrect node locks, owning %s, expected %s" %
9794
           (owned_nodes, self.node_secondary_ip.keys()))
9795

    
9796
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9797
      assert list(owned_instances) == [self.instance_name], \
9798
          "Instance '%s' not locked" % self.instance_name
9799

    
9800
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9801
          "Should not own any node group lock at this point"
9802

    
9803
    if not self.disks:
9804
      feedback_fn("No disks need replacement")
9805
      return
9806

    
9807
    feedback_fn("Replacing disk(s) %s for %s" %
9808
                (utils.CommaJoin(self.disks), self.instance.name))
9809

    
9810
    activate_disks = (not self.instance.admin_up)
9811

    
9812
    # Activate the instance disks if we're replacing them on a down instance
9813
    if activate_disks:
9814
      _StartInstanceDisks(self.lu, self.instance, True)
9815

    
9816
    try:
9817
      # Should we replace the secondary node?
9818
      if self.new_node is not None:
9819
        fn = self._ExecDrbd8Secondary
9820
      else:
9821
        fn = self._ExecDrbd8DiskOnly
9822

    
9823
      result = fn(feedback_fn)
9824
    finally:
9825
      # Deactivate the instance disks if we're replacing them on a
9826
      # down instance
9827
      if activate_disks:
9828
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9829

    
9830
    if __debug__:
9831
      # Verify owned locks
9832
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9833
      nodes = frozenset(self.node_secondary_ip)
9834
      assert ((self.early_release and not owned_nodes) or
9835
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9836
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9837
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9838

    
9839
    return result
9840

    
9841
  def _CheckVolumeGroup(self, nodes):
9842
    self.lu.LogInfo("Checking volume groups")
9843

    
9844
    vgname = self.cfg.GetVGName()
9845

    
9846
    # Make sure volume group exists on all involved nodes
9847
    results = self.rpc.call_vg_list(nodes)
9848
    if not results:
9849
      raise errors.OpExecError("Can't list volume groups on the nodes")
9850

    
9851
    for node in nodes:
9852
      res = results[node]
9853
      res.Raise("Error checking node %s" % node)
9854
      if vgname not in res.payload:
9855
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9856
                                 (vgname, node))
9857

    
9858
  def _CheckDisksExistence(self, nodes):
9859
    # Check disk existence
9860
    for idx, dev in enumerate(self.instance.disks):
9861
      if idx not in self.disks:
9862
        continue
9863

    
9864
      for node in nodes:
9865
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9866
        self.cfg.SetDiskID(dev, node)
9867

    
9868
        result = self.rpc.call_blockdev_find(node, dev)
9869

    
9870
        msg = result.fail_msg
9871
        if msg or not result.payload:
9872
          if not msg:
9873
            msg = "disk not found"
9874
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9875
                                   (idx, node, msg))
9876

    
9877
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9878
    for idx, dev in enumerate(self.instance.disks):
9879
      if idx not in self.disks:
9880
        continue
9881

    
9882
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9883
                      (idx, node_name))
9884

    
9885
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9886
                                   ldisk=ldisk):
9887
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9888
                                 " replace disks for instance %s" %
9889
                                 (node_name, self.instance.name))
9890

    
9891
  def _CreateNewStorage(self, node_name):
9892
    """Create new storage on the primary or secondary node.
9893

9894
    This is only used for same-node replaces, not for changing the
9895
    secondary node, hence we don't want to modify the existing disk.
9896

9897
    """
9898
    iv_names = {}
9899

    
9900
    for idx, dev in enumerate(self.instance.disks):
9901
      if idx not in self.disks:
9902
        continue
9903

    
9904
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9905

    
9906
      self.cfg.SetDiskID(dev, node_name)
9907

    
9908
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9909
      names = _GenerateUniqueNames(self.lu, lv_names)
9910

    
9911
      vg_data = dev.children[0].logical_id[0]
9912
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9913
                             logical_id=(vg_data, names[0]))
9914
      vg_meta = dev.children[1].logical_id[0]
9915
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9916
                             logical_id=(vg_meta, names[1]))
9917

    
9918
      new_lvs = [lv_data, lv_meta]
9919
      old_lvs = [child.Copy() for child in dev.children]
9920
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9921

    
9922
      # we pass force_create=True to force the LVM creation
9923
      for new_lv in new_lvs:
9924
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9925
                        _GetInstanceInfoText(self.instance), False)
9926

    
9927
    return iv_names
9928

    
9929
  def _CheckDevices(self, node_name, iv_names):
9930
    for name, (dev, _, _) in iv_names.iteritems():
9931
      self.cfg.SetDiskID(dev, node_name)
9932

    
9933
      result = self.rpc.call_blockdev_find(node_name, dev)
9934

    
9935
      msg = result.fail_msg
9936
      if msg or not result.payload:
9937
        if not msg:
9938
          msg = "disk not found"
9939
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9940
                                 (name, msg))
9941

    
9942
      if result.payload.is_degraded:
9943
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9944

    
9945
  def _RemoveOldStorage(self, node_name, iv_names):
9946
    for name, (_, old_lvs, _) in iv_names.iteritems():
9947
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9948

    
9949
      for lv in old_lvs:
9950
        self.cfg.SetDiskID(lv, node_name)
9951

    
9952
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9953
        if msg:
9954
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9955
                             hint="remove unused LVs manually")
9956

    
9957
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9958
    """Replace a disk on the primary or secondary for DRBD 8.
9959

9960
    The algorithm for replace is quite complicated:
9961

9962
      1. for each disk to be replaced:
9963

9964
        1. create new LVs on the target node with unique names
9965
        1. detach old LVs from the drbd device
9966
        1. rename old LVs to name_replaced.<time_t>
9967
        1. rename new LVs to old LVs
9968
        1. attach the new LVs (with the old names now) to the drbd device
9969

9970
      1. wait for sync across all devices
9971

9972
      1. for each modified disk:
9973

9974
        1. remove old LVs (which have the name name_replaces.<time_t>)
9975

9976
    Failures are not very well handled.
9977

9978
    """
9979
    steps_total = 6
9980

    
9981
    # Step: check device activation
9982
    self.lu.LogStep(1, steps_total, "Check device existence")
9983
    self._CheckDisksExistence([self.other_node, self.target_node])
9984
    self._CheckVolumeGroup([self.target_node, self.other_node])
9985

    
9986
    # Step: check other node consistency
9987
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9988
    self._CheckDisksConsistency(self.other_node,
9989
                                self.other_node == self.instance.primary_node,
9990
                                False)
9991

    
9992
    # Step: create new storage
9993
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9994
    iv_names = self._CreateNewStorage(self.target_node)
9995

    
9996
    # Step: for each lv, detach+rename*2+attach
9997
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9998
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9999
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10000

    
10001
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10002
                                                     old_lvs)
10003
      result.Raise("Can't detach drbd from local storage on node"
10004
                   " %s for device %s" % (self.target_node, dev.iv_name))
10005
      #dev.children = []
10006
      #cfg.Update(instance)
10007

    
10008
      # ok, we created the new LVs, so now we know we have the needed
10009
      # storage; as such, we proceed on the target node to rename
10010
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10011
      # using the assumption that logical_id == physical_id (which in
10012
      # turn is the unique_id on that node)
10013

    
10014
      # FIXME(iustin): use a better name for the replaced LVs
10015
      temp_suffix = int(time.time())
10016
      ren_fn = lambda d, suff: (d.physical_id[0],
10017
                                d.physical_id[1] + "_replaced-%s" % suff)
10018

    
10019
      # Build the rename list based on what LVs exist on the node
10020
      rename_old_to_new = []
10021
      for to_ren in old_lvs:
10022
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10023
        if not result.fail_msg and result.payload:
10024
          # device exists
10025
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10026

    
10027
      self.lu.LogInfo("Renaming the old LVs on the target node")
10028
      result = self.rpc.call_blockdev_rename(self.target_node,
10029
                                             rename_old_to_new)
10030
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10031

    
10032
      # Now we rename the new LVs to the old LVs
10033
      self.lu.LogInfo("Renaming the new LVs on the target node")
10034
      rename_new_to_old = [(new, old.physical_id)
10035
                           for old, new in zip(old_lvs, new_lvs)]
10036
      result = self.rpc.call_blockdev_rename(self.target_node,
10037
                                             rename_new_to_old)
10038
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10039

    
10040
      # Intermediate steps of in memory modifications
10041
      for old, new in zip(old_lvs, new_lvs):
10042
        new.logical_id = old.logical_id
10043
        self.cfg.SetDiskID(new, self.target_node)
10044

    
10045
      # We need to modify old_lvs so that removal later removes the
10046
      # right LVs, not the newly added ones; note that old_lvs is a
10047
      # copy here
10048
      for disk in old_lvs:
10049
        disk.logical_id = ren_fn(disk, temp_suffix)
10050
        self.cfg.SetDiskID(disk, self.target_node)
10051

    
10052
      # Now that the new lvs have the old name, we can add them to the device
10053
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10054
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10055
                                                  new_lvs)
10056
      msg = result.fail_msg
10057
      if msg:
10058
        for new_lv in new_lvs:
10059
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10060
                                               new_lv).fail_msg
10061
          if msg2:
10062
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10063
                               hint=("cleanup manually the unused logical"
10064
                                     "volumes"))
10065
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10066

    
10067
    cstep = 5
10068
    if self.early_release:
10069
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10070
      cstep += 1
10071
      self._RemoveOldStorage(self.target_node, iv_names)
10072
      # WARNING: we release both node locks here, do not do other RPCs
10073
      # than WaitForSync to the primary node
10074
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10075
                    names=[self.target_node, self.other_node])
10076

    
10077
    # Wait for sync
10078
    # This can fail as the old devices are degraded and _WaitForSync
10079
    # does a combined result over all disks, so we don't check its return value
10080
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10081
    cstep += 1
10082
    _WaitForSync(self.lu, self.instance)
10083

    
10084
    # Check all devices manually
10085
    self._CheckDevices(self.instance.primary_node, iv_names)
10086

    
10087
    # Step: remove old storage
10088
    if not self.early_release:
10089
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10090
      cstep += 1
10091
      self._RemoveOldStorage(self.target_node, iv_names)
10092

    
10093
  def _ExecDrbd8Secondary(self, feedback_fn):
10094
    """Replace the secondary node for DRBD 8.
10095

10096
    The algorithm for replace is quite complicated:
10097
      - for all disks of the instance:
10098
        - create new LVs on the new node with same names
10099
        - shutdown the drbd device on the old secondary
10100
        - disconnect the drbd network on the primary
10101
        - create the drbd device on the new secondary
10102
        - network attach the drbd on the primary, using an artifice:
10103
          the drbd code for Attach() will connect to the network if it
10104
          finds a device which is connected to the good local disks but
10105
          not network enabled
10106
      - wait for sync across all devices
10107
      - remove all disks from the old secondary
10108

10109
    Failures are not very well handled.
10110

10111
    """
10112
    steps_total = 6
10113

    
10114
    pnode = self.instance.primary_node
10115

    
10116
    # Step: check device activation
10117
    self.lu.LogStep(1, steps_total, "Check device existence")
10118
    self._CheckDisksExistence([self.instance.primary_node])
10119
    self._CheckVolumeGroup([self.instance.primary_node])
10120

    
10121
    # Step: check other node consistency
10122
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10123
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10124

    
10125
    # Step: create new storage
10126
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10127
    for idx, dev in enumerate(self.instance.disks):
10128
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10129
                      (self.new_node, idx))
10130
      # we pass force_create=True to force LVM creation
10131
      for new_lv in dev.children:
10132
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10133
                        _GetInstanceInfoText(self.instance), False)
10134

    
10135
    # Step 4: dbrd minors and drbd setups changes
10136
    # after this, we must manually remove the drbd minors on both the
10137
    # error and the success paths
10138
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10139
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10140
                                         for dev in self.instance.disks],
10141
                                        self.instance.name)
10142
    logging.debug("Allocated minors %r", minors)
10143

    
10144
    iv_names = {}
10145
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10146
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10147
                      (self.new_node, idx))
10148
      # create new devices on new_node; note that we create two IDs:
10149
      # one without port, so the drbd will be activated without
10150
      # networking information on the new node at this stage, and one
10151
      # with network, for the latter activation in step 4
10152
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10153
      if self.instance.primary_node == o_node1:
10154
        p_minor = o_minor1
10155
      else:
10156
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10157
        p_minor = o_minor2
10158

    
10159
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10160
                      p_minor, new_minor, o_secret)
10161
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10162
                    p_minor, new_minor, o_secret)
10163

    
10164
      iv_names[idx] = (dev, dev.children, new_net_id)
10165
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10166
                    new_net_id)
10167
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10168
                              logical_id=new_alone_id,
10169
                              children=dev.children,
10170
                              size=dev.size)
10171
      try:
10172
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10173
                              _GetInstanceInfoText(self.instance), False)
10174
      except errors.GenericError:
10175
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10176
        raise
10177

    
10178
    # We have new devices, shutdown the drbd on the old secondary
10179
    for idx, dev in enumerate(self.instance.disks):
10180
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10181
      self.cfg.SetDiskID(dev, self.target_node)
10182
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10183
      if msg:
10184
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10185
                           "node: %s" % (idx, msg),
10186
                           hint=("Please cleanup this device manually as"
10187
                                 " soon as possible"))
10188

    
10189
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10190
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10191
                                               self.instance.disks)[pnode]
10192

    
10193
    msg = result.fail_msg
10194
    if msg:
10195
      # detaches didn't succeed (unlikely)
10196
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10197
      raise errors.OpExecError("Can't detach the disks from the network on"
10198
                               " old node: %s" % (msg,))
10199

    
10200
    # if we managed to detach at least one, we update all the disks of
10201
    # the instance to point to the new secondary
10202
    self.lu.LogInfo("Updating instance configuration")
10203
    for dev, _, new_logical_id in iv_names.itervalues():
10204
      dev.logical_id = new_logical_id
10205
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10206

    
10207
    self.cfg.Update(self.instance, feedback_fn)
10208

    
10209
    # and now perform the drbd attach
10210
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10211
                    " (standalone => connected)")
10212
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10213
                                            self.new_node],
10214
                                           self.node_secondary_ip,
10215
                                           self.instance.disks,
10216
                                           self.instance.name,
10217
                                           False)
10218
    for to_node, to_result in result.items():
10219
      msg = to_result.fail_msg
10220
      if msg:
10221
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10222
                           to_node, msg,
10223
                           hint=("please do a gnt-instance info to see the"
10224
                                 " status of disks"))
10225
    cstep = 5
10226
    if self.early_release:
10227
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10228
      cstep += 1
10229
      self._RemoveOldStorage(self.target_node, iv_names)
10230
      # WARNING: we release all node locks here, do not do other RPCs
10231
      # than WaitForSync to the primary node
10232
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10233
                    names=[self.instance.primary_node,
10234
                           self.target_node,
10235
                           self.new_node])
10236

    
10237
    # Wait for sync
10238
    # This can fail as the old devices are degraded and _WaitForSync
10239
    # does a combined result over all disks, so we don't check its return value
10240
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10241
    cstep += 1
10242
    _WaitForSync(self.lu, self.instance)
10243

    
10244
    # Check all devices manually
10245
    self._CheckDevices(self.instance.primary_node, iv_names)
10246

    
10247
    # Step: remove old storage
10248
    if not self.early_release:
10249
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10250
      self._RemoveOldStorage(self.target_node, iv_names)
10251

    
10252

    
10253
class LURepairNodeStorage(NoHooksLU):
10254
  """Repairs the volume group on a node.
10255

10256
  """
10257
  REQ_BGL = False
10258

    
10259
  def CheckArguments(self):
10260
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10261

    
10262
    storage_type = self.op.storage_type
10263

    
10264
    if (constants.SO_FIX_CONSISTENCY not in
10265
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10266
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10267
                                 " repaired" % storage_type,
10268
                                 errors.ECODE_INVAL)
10269

    
10270
  def ExpandNames(self):
10271
    self.needed_locks = {
10272
      locking.LEVEL_NODE: [self.op.node_name],
10273
      }
10274

    
10275
  def _CheckFaultyDisks(self, instance, node_name):
10276
    """Ensure faulty disks abort the opcode or at least warn."""
10277
    try:
10278
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10279
                                  node_name, True):
10280
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10281
                                   " node '%s'" % (instance.name, node_name),
10282
                                   errors.ECODE_STATE)
10283
    except errors.OpPrereqError, err:
10284
      if self.op.ignore_consistency:
10285
        self.proc.LogWarning(str(err.args[0]))
10286
      else:
10287
        raise
10288

    
10289
  def CheckPrereq(self):
10290
    """Check prerequisites.
10291

10292
    """
10293
    # Check whether any instance on this node has faulty disks
10294
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10295
      if not inst.admin_up:
10296
        continue
10297
      check_nodes = set(inst.all_nodes)
10298
      check_nodes.discard(self.op.node_name)
10299
      for inst_node_name in check_nodes:
10300
        self._CheckFaultyDisks(inst, inst_node_name)
10301

    
10302
  def Exec(self, feedback_fn):
10303
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10304
                (self.op.name, self.op.node_name))
10305

    
10306
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10307
    result = self.rpc.call_storage_execute(self.op.node_name,
10308
                                           self.op.storage_type, st_args,
10309
                                           self.op.name,
10310
                                           constants.SO_FIX_CONSISTENCY)
10311
    result.Raise("Failed to repair storage unit '%s' on %s" %
10312
                 (self.op.name, self.op.node_name))
10313

    
10314

    
10315
class LUNodeEvacuate(NoHooksLU):
10316
  """Evacuates instances off a list of nodes.
10317

10318
  """
10319
  REQ_BGL = False
10320

    
10321
  def CheckArguments(self):
10322
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10323

    
10324
  def ExpandNames(self):
10325
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10326

    
10327
    if self.op.remote_node is not None:
10328
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10329
      assert self.op.remote_node
10330

    
10331
      if self.op.remote_node == self.op.node_name:
10332
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10333
                                   " secondary node", errors.ECODE_INVAL)
10334

    
10335
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10336
        raise errors.OpPrereqError("Without the use of an iallocator only"
10337
                                   " secondary instances can be evacuated",
10338
                                   errors.ECODE_INVAL)
10339

    
10340
    # Declare locks
10341
    self.share_locks = _ShareAll()
10342
    self.needed_locks = {
10343
      locking.LEVEL_INSTANCE: [],
10344
      locking.LEVEL_NODEGROUP: [],
10345
      locking.LEVEL_NODE: [],
10346
      }
10347

    
10348
    if self.op.remote_node is None:
10349
      # Iallocator will choose any node(s) in the same group
10350
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10351
    else:
10352
      group_nodes = frozenset([self.op.remote_node])
10353

    
10354
    # Determine nodes to be locked
10355
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10356

    
10357
  def _DetermineInstances(self):
10358
    """Builds list of instances to operate on.
10359

10360
    """
10361
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10362

    
10363
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10364
      # Primary instances only
10365
      inst_fn = _GetNodePrimaryInstances
10366
      assert self.op.remote_node is None, \
10367
        "Evacuating primary instances requires iallocator"
10368
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10369
      # Secondary instances only
10370
      inst_fn = _GetNodeSecondaryInstances
10371
    else:
10372
      # All instances
10373
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10374
      inst_fn = _GetNodeInstances
10375

    
10376
    return inst_fn(self.cfg, self.op.node_name)
10377

    
10378
  def DeclareLocks(self, level):
10379
    if level == locking.LEVEL_INSTANCE:
10380
      # Lock instances optimistically, needs verification once node and group
10381
      # locks have been acquired
10382
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10383
        set(i.name for i in self._DetermineInstances())
10384

    
10385
    elif level == locking.LEVEL_NODEGROUP:
10386
      # Lock node groups optimistically, needs verification once nodes have
10387
      # been acquired
10388
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10389
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10390

    
10391
    elif level == locking.LEVEL_NODE:
10392
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10393

    
10394
  def CheckPrereq(self):
10395
    # Verify locks
10396
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10397
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10398
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10399

    
10400
    assert owned_nodes == self.lock_nodes
10401

    
10402
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10403
    if owned_groups != wanted_groups:
10404
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10405
                               " current groups are '%s', used to be '%s'" %
10406
                               (utils.CommaJoin(wanted_groups),
10407
                                utils.CommaJoin(owned_groups)))
10408

    
10409
    # Determine affected instances
10410
    self.instances = self._DetermineInstances()
10411
    self.instance_names = [i.name for i in self.instances]
10412

    
10413
    if set(self.instance_names) != owned_instances:
10414
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10415
                               " were acquired, current instances are '%s',"
10416
                               " used to be '%s'" %
10417
                               (self.op.node_name,
10418
                                utils.CommaJoin(self.instance_names),
10419
                                utils.CommaJoin(owned_instances)))
10420

    
10421
    if self.instance_names:
10422
      self.LogInfo("Evacuating instances from node '%s': %s",
10423
                   self.op.node_name,
10424
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10425
    else:
10426
      self.LogInfo("No instances to evacuate from node '%s'",
10427
                   self.op.node_name)
10428

    
10429
    if self.op.remote_node is not None:
10430
      for i in self.instances:
10431
        if i.primary_node == self.op.remote_node:
10432
          raise errors.OpPrereqError("Node %s is the primary node of"
10433
                                     " instance %s, cannot use it as"
10434
                                     " secondary" %
10435
                                     (self.op.remote_node, i.name),
10436
                                     errors.ECODE_INVAL)
10437

    
10438
  def Exec(self, feedback_fn):
10439
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10440

    
10441
    if not self.instance_names:
10442
      # No instances to evacuate
10443
      jobs = []
10444

    
10445
    elif self.op.iallocator is not None:
10446
      # TODO: Implement relocation to other group
10447
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10448
                       evac_mode=self.op.mode,
10449
                       instances=list(self.instance_names))
10450

    
10451
      ial.Run(self.op.iallocator)
10452

    
10453
      if not ial.success:
10454
        raise errors.OpPrereqError("Can't compute node evacuation using"
10455
                                   " iallocator '%s': %s" %
10456
                                   (self.op.iallocator, ial.info),
10457
                                   errors.ECODE_NORES)
10458

    
10459
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10460

    
10461
    elif self.op.remote_node is not None:
10462
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10463
      jobs = [
10464
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10465
                                        remote_node=self.op.remote_node,
10466
                                        disks=[],
10467
                                        mode=constants.REPLACE_DISK_CHG,
10468
                                        early_release=self.op.early_release)]
10469
        for instance_name in self.instance_names
10470
        ]
10471

    
10472
    else:
10473
      raise errors.ProgrammerError("No iallocator or remote node")
10474

    
10475
    return ResultWithJobs(jobs)
10476

    
10477

    
10478
def _SetOpEarlyRelease(early_release, op):
10479
  """Sets C{early_release} flag on opcodes if available.
10480

10481
  """
10482
  try:
10483
    op.early_release = early_release
10484
  except AttributeError:
10485
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10486

    
10487
  return op
10488

    
10489

    
10490
def _NodeEvacDest(use_nodes, group, nodes):
10491
  """Returns group or nodes depending on caller's choice.
10492

10493
  """
10494
  if use_nodes:
10495
    return utils.CommaJoin(nodes)
10496
  else:
10497
    return group
10498

    
10499

    
10500
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10501
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10502

10503
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10504
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10505

10506
  @type lu: L{LogicalUnit}
10507
  @param lu: Logical unit instance
10508
  @type alloc_result: tuple/list
10509
  @param alloc_result: Result from iallocator
10510
  @type early_release: bool
10511
  @param early_release: Whether to release locks early if possible
10512
  @type use_nodes: bool
10513
  @param use_nodes: Whether to display node names instead of groups
10514

10515
  """
10516
  (moved, failed, jobs) = alloc_result
10517

    
10518
  if failed:
10519
    lu.LogWarning("Unable to evacuate instances %s",
10520
                  utils.CommaJoin("%s (%s)" % (name, reason)
10521
                                  for (name, reason) in failed))
10522

    
10523
  if moved:
10524
    lu.LogInfo("Instances to be moved: %s",
10525
               utils.CommaJoin("%s (to %s)" %
10526
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10527
                               for (name, group, nodes) in moved))
10528

    
10529
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10530
              map(opcodes.OpCode.LoadOpCode, ops))
10531
          for ops in jobs]
10532

    
10533

    
10534
class LUInstanceGrowDisk(LogicalUnit):
10535
  """Grow a disk of an instance.
10536

10537
  """
10538
  HPATH = "disk-grow"
10539
  HTYPE = constants.HTYPE_INSTANCE
10540
  REQ_BGL = False
10541

    
10542
  def ExpandNames(self):
10543
    self._ExpandAndLockInstance()
10544
    self.needed_locks[locking.LEVEL_NODE] = []
10545
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10546

    
10547
  def DeclareLocks(self, level):
10548
    if level == locking.LEVEL_NODE:
10549
      self._LockInstancesNodes()
10550

    
10551
  def BuildHooksEnv(self):
10552
    """Build hooks env.
10553

10554
    This runs on the master, the primary and all the secondaries.
10555

10556
    """
10557
    env = {
10558
      "DISK": self.op.disk,
10559
      "AMOUNT": self.op.amount,
10560
      }
10561
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10562
    return env
10563

    
10564
  def BuildHooksNodes(self):
10565
    """Build hooks nodes.
10566

10567
    """
10568
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10569
    return (nl, nl)
10570

    
10571
  def CheckPrereq(self):
10572
    """Check prerequisites.
10573

10574
    This checks that the instance is in the cluster.
10575

10576
    """
10577
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10578
    assert instance is not None, \
10579
      "Cannot retrieve locked instance %s" % self.op.instance_name
10580
    nodenames = list(instance.all_nodes)
10581
    for node in nodenames:
10582
      _CheckNodeOnline(self, node)
10583

    
10584
    self.instance = instance
10585

    
10586
    if instance.disk_template not in constants.DTS_GROWABLE:
10587
      raise errors.OpPrereqError("Instance's disk layout does not support"
10588
                                 " growing", errors.ECODE_INVAL)
10589

    
10590
    self.disk = instance.FindDisk(self.op.disk)
10591

    
10592
    if instance.disk_template not in (constants.DT_FILE,
10593
                                      constants.DT_SHARED_FILE):
10594
      # TODO: check the free disk space for file, when that feature will be
10595
      # supported
10596
      _CheckNodesFreeDiskPerVG(self, nodenames,
10597
                               self.disk.ComputeGrowth(self.op.amount))
10598

    
10599
  def Exec(self, feedback_fn):
10600
    """Execute disk grow.
10601

10602
    """
10603
    instance = self.instance
10604
    disk = self.disk
10605

    
10606
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10607
    if not disks_ok:
10608
      raise errors.OpExecError("Cannot activate block device to grow")
10609

    
10610
    # First run all grow ops in dry-run mode
10611
    for node in instance.all_nodes:
10612
      self.cfg.SetDiskID(disk, node)
10613
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10614
      result.Raise("Grow request failed to node %s" % node)
10615

    
10616
    # We know that (as far as we can test) operations across different
10617
    # nodes will succeed, time to run it for real
10618
    for node in instance.all_nodes:
10619
      self.cfg.SetDiskID(disk, node)
10620
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10621
      result.Raise("Grow request failed to node %s" % node)
10622

    
10623
      # TODO: Rewrite code to work properly
10624
      # DRBD goes into sync mode for a short amount of time after executing the
10625
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10626
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10627
      # time is a work-around.
10628
      time.sleep(5)
10629

    
10630
    disk.RecordGrow(self.op.amount)
10631
    self.cfg.Update(instance, feedback_fn)
10632
    if self.op.wait_for_sync:
10633
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10634
      if disk_abort:
10635
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10636
                             " status; please check the instance")
10637
      if not instance.admin_up:
10638
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10639
    elif not instance.admin_up:
10640
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10641
                           " not supposed to be running because no wait for"
10642
                           " sync mode was requested")
10643

    
10644

    
10645
class LUInstanceQueryData(NoHooksLU):
10646
  """Query runtime instance data.
10647

10648
  """
10649
  REQ_BGL = False
10650

    
10651
  def ExpandNames(self):
10652
    self.needed_locks = {}
10653

    
10654
    # Use locking if requested or when non-static information is wanted
10655
    if not (self.op.static or self.op.use_locking):
10656
      self.LogWarning("Non-static data requested, locks need to be acquired")
10657
      self.op.use_locking = True
10658

    
10659
    if self.op.instances or not self.op.use_locking:
10660
      # Expand instance names right here
10661
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10662
    else:
10663
      # Will use acquired locks
10664
      self.wanted_names = None
10665

    
10666
    if self.op.use_locking:
10667
      self.share_locks = _ShareAll()
10668

    
10669
      if self.wanted_names is None:
10670
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10671
      else:
10672
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10673

    
10674
      self.needed_locks[locking.LEVEL_NODE] = []
10675
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10676

    
10677
  def DeclareLocks(self, level):
10678
    if self.op.use_locking and level == locking.LEVEL_NODE:
10679
      self._LockInstancesNodes()
10680

    
10681
  def CheckPrereq(self):
10682
    """Check prerequisites.
10683

10684
    This only checks the optional instance list against the existing names.
10685

10686
    """
10687
    if self.wanted_names is None:
10688
      assert self.op.use_locking, "Locking was not used"
10689
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10690

    
10691
    self.wanted_instances = \
10692
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10693

    
10694
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10695
    """Returns the status of a block device
10696

10697
    """
10698
    if self.op.static or not node:
10699
      return None
10700

    
10701
    self.cfg.SetDiskID(dev, node)
10702

    
10703
    result = self.rpc.call_blockdev_find(node, dev)
10704
    if result.offline:
10705
      return None
10706

    
10707
    result.Raise("Can't compute disk status for %s" % instance_name)
10708

    
10709
    status = result.payload
10710
    if status is None:
10711
      return None
10712

    
10713
    return (status.dev_path, status.major, status.minor,
10714
            status.sync_percent, status.estimated_time,
10715
            status.is_degraded, status.ldisk_status)
10716

    
10717
  def _ComputeDiskStatus(self, instance, snode, dev):
10718
    """Compute block device status.
10719

10720
    """
10721
    if dev.dev_type in constants.LDS_DRBD:
10722
      # we change the snode then (otherwise we use the one passed in)
10723
      if dev.logical_id[0] == instance.primary_node:
10724
        snode = dev.logical_id[1]
10725
      else:
10726
        snode = dev.logical_id[0]
10727

    
10728
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10729
                                              instance.name, dev)
10730
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10731

    
10732
    if dev.children:
10733
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10734
                                        instance, snode),
10735
                         dev.children)
10736
    else:
10737
      dev_children = []
10738

    
10739
    return {
10740
      "iv_name": dev.iv_name,
10741
      "dev_type": dev.dev_type,
10742
      "logical_id": dev.logical_id,
10743
      "physical_id": dev.physical_id,
10744
      "pstatus": dev_pstatus,
10745
      "sstatus": dev_sstatus,
10746
      "children": dev_children,
10747
      "mode": dev.mode,
10748
      "size": dev.size,
10749
      }
10750

    
10751
  def Exec(self, feedback_fn):
10752
    """Gather and return data"""
10753
    result = {}
10754

    
10755
    cluster = self.cfg.GetClusterInfo()
10756

    
10757
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10758
                                          for i in self.wanted_instances)
10759
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10760
      if self.op.static or pnode.offline:
10761
        remote_state = None
10762
        if pnode.offline:
10763
          self.LogWarning("Primary node %s is marked offline, returning static"
10764
                          " information only for instance %s" %
10765
                          (pnode.name, instance.name))
10766
      else:
10767
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10768
                                                  instance.name,
10769
                                                  instance.hypervisor)
10770
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10771
        remote_info = remote_info.payload
10772
        if remote_info and "state" in remote_info:
10773
          remote_state = "up"
10774
        else:
10775
          remote_state = "down"
10776

    
10777
      if instance.admin_up:
10778
        config_state = "up"
10779
      else:
10780
        config_state = "down"
10781

    
10782
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10783
                  instance.disks)
10784

    
10785
      result[instance.name] = {
10786
        "name": instance.name,
10787
        "config_state": config_state,
10788
        "run_state": remote_state,
10789
        "pnode": instance.primary_node,
10790
        "snodes": instance.secondary_nodes,
10791
        "os": instance.os,
10792
        # this happens to be the same format used for hooks
10793
        "nics": _NICListToTuple(self, instance.nics),
10794
        "disk_template": instance.disk_template,
10795
        "disks": disks,
10796
        "hypervisor": instance.hypervisor,
10797
        "network_port": instance.network_port,
10798
        "hv_instance": instance.hvparams,
10799
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10800
        "be_instance": instance.beparams,
10801
        "be_actual": cluster.FillBE(instance),
10802
        "os_instance": instance.osparams,
10803
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10804
        "serial_no": instance.serial_no,
10805
        "mtime": instance.mtime,
10806
        "ctime": instance.ctime,
10807
        "uuid": instance.uuid,
10808
        }
10809

    
10810
    return result
10811

    
10812

    
10813
class LUInstanceSetParams(LogicalUnit):
10814
  """Modifies an instances's parameters.
10815

10816
  """
10817
  HPATH = "instance-modify"
10818
  HTYPE = constants.HTYPE_INSTANCE
10819
  REQ_BGL = False
10820

    
10821
  def CheckArguments(self):
10822
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10823
            self.op.hvparams or self.op.beparams or self.op.os_name):
10824
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10825

    
10826
    if self.op.hvparams:
10827
      _CheckGlobalHvParams(self.op.hvparams)
10828

    
10829
    # Disk validation
10830
    disk_addremove = 0
10831
    for disk_op, disk_dict in self.op.disks:
10832
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10833
      if disk_op == constants.DDM_REMOVE:
10834
        disk_addremove += 1
10835
        continue
10836
      elif disk_op == constants.DDM_ADD:
10837
        disk_addremove += 1
10838
      else:
10839
        if not isinstance(disk_op, int):
10840
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10841
        if not isinstance(disk_dict, dict):
10842
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10843
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10844

    
10845
      if disk_op == constants.DDM_ADD:
10846
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10847
        if mode not in constants.DISK_ACCESS_SET:
10848
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10849
                                     errors.ECODE_INVAL)
10850
        size = disk_dict.get(constants.IDISK_SIZE, None)
10851
        if size is None:
10852
          raise errors.OpPrereqError("Required disk parameter size missing",
10853
                                     errors.ECODE_INVAL)
10854
        try:
10855
          size = int(size)
10856
        except (TypeError, ValueError), err:
10857
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10858
                                     str(err), errors.ECODE_INVAL)
10859
        disk_dict[constants.IDISK_SIZE] = size
10860
      else:
10861
        # modification of disk
10862
        if constants.IDISK_SIZE in disk_dict:
10863
          raise errors.OpPrereqError("Disk size change not possible, use"
10864
                                     " grow-disk", errors.ECODE_INVAL)
10865

    
10866
    if disk_addremove > 1:
10867
      raise errors.OpPrereqError("Only one disk add or remove operation"
10868
                                 " supported at a time", errors.ECODE_INVAL)
10869

    
10870
    if self.op.disks and self.op.disk_template is not None:
10871
      raise errors.OpPrereqError("Disk template conversion and other disk"
10872
                                 " changes not supported at the same time",
10873
                                 errors.ECODE_INVAL)
10874

    
10875
    if (self.op.disk_template and
10876
        self.op.disk_template in constants.DTS_INT_MIRROR and
10877
        self.op.remote_node is None):
10878
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10879
                                 " one requires specifying a secondary node",
10880
                                 errors.ECODE_INVAL)
10881

    
10882
    # NIC validation
10883
    nic_addremove = 0
10884
    for nic_op, nic_dict in self.op.nics:
10885
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10886
      if nic_op == constants.DDM_REMOVE:
10887
        nic_addremove += 1
10888
        continue
10889
      elif nic_op == constants.DDM_ADD:
10890
        nic_addremove += 1
10891
      else:
10892
        if not isinstance(nic_op, int):
10893
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10894
        if not isinstance(nic_dict, dict):
10895
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10896
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10897

    
10898
      # nic_dict should be a dict
10899
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10900
      if nic_ip is not None:
10901
        if nic_ip.lower() == constants.VALUE_NONE:
10902
          nic_dict[constants.INIC_IP] = None
10903
        else:
10904
          if not netutils.IPAddress.IsValid(nic_ip):
10905
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10906
                                       errors.ECODE_INVAL)
10907

    
10908
      nic_bridge = nic_dict.get("bridge", None)
10909
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10910
      if nic_bridge and nic_link:
10911
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10912
                                   " at the same time", errors.ECODE_INVAL)
10913
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10914
        nic_dict["bridge"] = None
10915
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10916
        nic_dict[constants.INIC_LINK] = None
10917

    
10918
      if nic_op == constants.DDM_ADD:
10919
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10920
        if nic_mac is None:
10921
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10922

    
10923
      if constants.INIC_MAC in nic_dict:
10924
        nic_mac = nic_dict[constants.INIC_MAC]
10925
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10926
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10927

    
10928
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10929
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10930
                                     " modifying an existing nic",
10931
                                     errors.ECODE_INVAL)
10932

    
10933
    if nic_addremove > 1:
10934
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10935
                                 " supported at a time", errors.ECODE_INVAL)
10936

    
10937
  def ExpandNames(self):
10938
    self._ExpandAndLockInstance()
10939
    self.needed_locks[locking.LEVEL_NODE] = []
10940
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10941

    
10942
  def DeclareLocks(self, level):
10943
    if level == locking.LEVEL_NODE:
10944
      self._LockInstancesNodes()
10945
      if self.op.disk_template and self.op.remote_node:
10946
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10947
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10948

    
10949
  def BuildHooksEnv(self):
10950
    """Build hooks env.
10951

10952
    This runs on the master, primary and secondaries.
10953

10954
    """
10955
    args = dict()
10956
    if constants.BE_MEMORY in self.be_new:
10957
      args["memory"] = self.be_new[constants.BE_MEMORY]
10958
    if constants.BE_VCPUS in self.be_new:
10959
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10960
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10961
    # information at all.
10962
    if self.op.nics:
10963
      args["nics"] = []
10964
      nic_override = dict(self.op.nics)
10965
      for idx, nic in enumerate(self.instance.nics):
10966
        if idx in nic_override:
10967
          this_nic_override = nic_override[idx]
10968
        else:
10969
          this_nic_override = {}
10970
        if constants.INIC_IP in this_nic_override:
10971
          ip = this_nic_override[constants.INIC_IP]
10972
        else:
10973
          ip = nic.ip
10974
        if constants.INIC_MAC in this_nic_override:
10975
          mac = this_nic_override[constants.INIC_MAC]
10976
        else:
10977
          mac = nic.mac
10978
        if idx in self.nic_pnew:
10979
          nicparams = self.nic_pnew[idx]
10980
        else:
10981
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10982
        mode = nicparams[constants.NIC_MODE]
10983
        link = nicparams[constants.NIC_LINK]
10984
        args["nics"].append((ip, mac, mode, link))
10985
      if constants.DDM_ADD in nic_override:
10986
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10987
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10988
        nicparams = self.nic_pnew[constants.DDM_ADD]
10989
        mode = nicparams[constants.NIC_MODE]
10990
        link = nicparams[constants.NIC_LINK]
10991
        args["nics"].append((ip, mac, mode, link))
10992
      elif constants.DDM_REMOVE in nic_override:
10993
        del args["nics"][-1]
10994

    
10995
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10996
    if self.op.disk_template:
10997
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10998

    
10999
    return env
11000

    
11001
  def BuildHooksNodes(self):
11002
    """Build hooks nodes.
11003

11004
    """
11005
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11006
    return (nl, nl)
11007

    
11008
  def CheckPrereq(self):
11009
    """Check prerequisites.
11010

11011
    This only checks the instance list against the existing names.
11012

11013
    """
11014
    # checking the new params on the primary/secondary nodes
11015

    
11016
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11017
    cluster = self.cluster = self.cfg.GetClusterInfo()
11018
    assert self.instance is not None, \
11019
      "Cannot retrieve locked instance %s" % self.op.instance_name
11020
    pnode = instance.primary_node
11021
    nodelist = list(instance.all_nodes)
11022

    
11023
    # OS change
11024
    if self.op.os_name and not self.op.force:
11025
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11026
                      self.op.force_variant)
11027
      instance_os = self.op.os_name
11028
    else:
11029
      instance_os = instance.os
11030

    
11031
    if self.op.disk_template:
11032
      if instance.disk_template == self.op.disk_template:
11033
        raise errors.OpPrereqError("Instance already has disk template %s" %
11034
                                   instance.disk_template, errors.ECODE_INVAL)
11035

    
11036
      if (instance.disk_template,
11037
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11038
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11039
                                   " %s to %s" % (instance.disk_template,
11040
                                                  self.op.disk_template),
11041
                                   errors.ECODE_INVAL)
11042
      _CheckInstanceDown(self, instance, "cannot change disk template")
11043
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11044
        if self.op.remote_node == pnode:
11045
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11046
                                     " as the primary node of the instance" %
11047
                                     self.op.remote_node, errors.ECODE_STATE)
11048
        _CheckNodeOnline(self, self.op.remote_node)
11049
        _CheckNodeNotDrained(self, self.op.remote_node)
11050
        # FIXME: here we assume that the old instance type is DT_PLAIN
11051
        assert instance.disk_template == constants.DT_PLAIN
11052
        disks = [{constants.IDISK_SIZE: d.size,
11053
                  constants.IDISK_VG: d.logical_id[0]}
11054
                 for d in instance.disks]
11055
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11056
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11057

    
11058
    # hvparams processing
11059
    if self.op.hvparams:
11060
      hv_type = instance.hypervisor
11061
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11062
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11063
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11064

    
11065
      # local check
11066
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11067
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11068
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11069
      self.hv_inst = i_hvdict # the new dict (without defaults)
11070
    else:
11071
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11072
                                              instance.hvparams)
11073
      self.hv_new = self.hv_inst = {}
11074

    
11075
    # beparams processing
11076
    if self.op.beparams:
11077
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11078
                                   use_none=True)
11079
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11080
      be_new = cluster.SimpleFillBE(i_bedict)
11081
      self.be_proposed = self.be_new = be_new # the new actual values
11082
      self.be_inst = i_bedict # the new dict (without defaults)
11083
    else:
11084
      self.be_new = self.be_inst = {}
11085
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11086
    be_old = cluster.FillBE(instance)
11087

    
11088
    # CPU param validation -- checking every time a paramtere is
11089
    # changed to cover all cases where either CPU mask or vcpus have
11090
    # changed
11091
    if (constants.BE_VCPUS in self.be_proposed and
11092
        constants.HV_CPU_MASK in self.hv_proposed):
11093
      cpu_list = \
11094
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11095
      # Verify mask is consistent with number of vCPUs. Can skip this
11096
      # test if only 1 entry in the CPU mask, which means same mask
11097
      # is applied to all vCPUs.
11098
      if (len(cpu_list) > 1 and
11099
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11100
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11101
                                   " CPU mask [%s]" %
11102
                                   (self.be_proposed[constants.BE_VCPUS],
11103
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11104
                                   errors.ECODE_INVAL)
11105

    
11106
      # Only perform this test if a new CPU mask is given
11107
      if constants.HV_CPU_MASK in self.hv_new:
11108
        # Calculate the largest CPU number requested
11109
        max_requested_cpu = max(map(max, cpu_list))
11110
        # Check that all of the instance's nodes have enough physical CPUs to
11111
        # satisfy the requested CPU mask
11112
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11113
                                max_requested_cpu + 1, instance.hypervisor)
11114

    
11115
    # osparams processing
11116
    if self.op.osparams:
11117
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11118
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11119
      self.os_inst = i_osdict # the new dict (without defaults)
11120
    else:
11121
      self.os_inst = {}
11122

    
11123
    self.warn = []
11124

    
11125
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11126
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11127
      mem_check_list = [pnode]
11128
      if be_new[constants.BE_AUTO_BALANCE]:
11129
        # either we changed auto_balance to yes or it was from before
11130
        mem_check_list.extend(instance.secondary_nodes)
11131
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11132
                                                  instance.hypervisor)
11133
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11134
                                         instance.hypervisor)
11135
      pninfo = nodeinfo[pnode]
11136
      msg = pninfo.fail_msg
11137
      if msg:
11138
        # Assume the primary node is unreachable and go ahead
11139
        self.warn.append("Can't get info from primary node %s: %s" %
11140
                         (pnode, msg))
11141
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11142
        self.warn.append("Node data from primary node %s doesn't contain"
11143
                         " free memory information" % pnode)
11144
      elif instance_info.fail_msg:
11145
        self.warn.append("Can't get instance runtime information: %s" %
11146
                        instance_info.fail_msg)
11147
      else:
11148
        if instance_info.payload:
11149
          current_mem = int(instance_info.payload["memory"])
11150
        else:
11151
          # Assume instance not running
11152
          # (there is a slight race condition here, but it's not very probable,
11153
          # and we have no other way to check)
11154
          current_mem = 0
11155
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11156
                    pninfo.payload["memory_free"])
11157
        if miss_mem > 0:
11158
          raise errors.OpPrereqError("This change will prevent the instance"
11159
                                     " from starting, due to %d MB of memory"
11160
                                     " missing on its primary node" % miss_mem,
11161
                                     errors.ECODE_NORES)
11162

    
11163
      if be_new[constants.BE_AUTO_BALANCE]:
11164
        for node, nres in nodeinfo.items():
11165
          if node not in instance.secondary_nodes:
11166
            continue
11167
          nres.Raise("Can't get info from secondary node %s" % node,
11168
                     prereq=True, ecode=errors.ECODE_STATE)
11169
          if not isinstance(nres.payload.get("memory_free", None), int):
11170
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11171
                                       " memory information" % node,
11172
                                       errors.ECODE_STATE)
11173
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11174
            raise errors.OpPrereqError("This change will prevent the instance"
11175
                                       " from failover to its secondary node"
11176
                                       " %s, due to not enough memory" % node,
11177
                                       errors.ECODE_STATE)
11178

    
11179
    # NIC processing
11180
    self.nic_pnew = {}
11181
    self.nic_pinst = {}
11182
    for nic_op, nic_dict in self.op.nics:
11183
      if nic_op == constants.DDM_REMOVE:
11184
        if not instance.nics:
11185
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11186
                                     errors.ECODE_INVAL)
11187
        continue
11188
      if nic_op != constants.DDM_ADD:
11189
        # an existing nic
11190
        if not instance.nics:
11191
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11192
                                     " no NICs" % nic_op,
11193
                                     errors.ECODE_INVAL)
11194
        if nic_op < 0 or nic_op >= len(instance.nics):
11195
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11196
                                     " are 0 to %d" %
11197
                                     (nic_op, len(instance.nics) - 1),
11198
                                     errors.ECODE_INVAL)
11199
        old_nic_params = instance.nics[nic_op].nicparams
11200
        old_nic_ip = instance.nics[nic_op].ip
11201
      else:
11202
        old_nic_params = {}
11203
        old_nic_ip = None
11204

    
11205
      update_params_dict = dict([(key, nic_dict[key])
11206
                                 for key in constants.NICS_PARAMETERS
11207
                                 if key in nic_dict])
11208

    
11209
      if "bridge" in nic_dict:
11210
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11211

    
11212
      new_nic_params = _GetUpdatedParams(old_nic_params,
11213
                                         update_params_dict)
11214
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11215
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11216
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11217
      self.nic_pinst[nic_op] = new_nic_params
11218
      self.nic_pnew[nic_op] = new_filled_nic_params
11219
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11220

    
11221
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11222
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11223
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11224
        if msg:
11225
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11226
          if self.op.force:
11227
            self.warn.append(msg)
11228
          else:
11229
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11230
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11231
        if constants.INIC_IP in nic_dict:
11232
          nic_ip = nic_dict[constants.INIC_IP]
11233
        else:
11234
          nic_ip = old_nic_ip
11235
        if nic_ip is None:
11236
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11237
                                     " on a routed nic", errors.ECODE_INVAL)
11238
      if constants.INIC_MAC in nic_dict:
11239
        nic_mac = nic_dict[constants.INIC_MAC]
11240
        if nic_mac is None:
11241
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11242
                                     errors.ECODE_INVAL)
11243
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11244
          # otherwise generate the mac
11245
          nic_dict[constants.INIC_MAC] = \
11246
            self.cfg.GenerateMAC(self.proc.GetECId())
11247
        else:
11248
          # or validate/reserve the current one
11249
          try:
11250
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11251
          except errors.ReservationError:
11252
            raise errors.OpPrereqError("MAC address %s already in use"
11253
                                       " in cluster" % nic_mac,
11254
                                       errors.ECODE_NOTUNIQUE)
11255

    
11256
    # DISK processing
11257
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11258
      raise errors.OpPrereqError("Disk operations not supported for"
11259
                                 " diskless instances",
11260
                                 errors.ECODE_INVAL)
11261
    for disk_op, _ in self.op.disks:
11262
      if disk_op == constants.DDM_REMOVE:
11263
        if len(instance.disks) == 1:
11264
          raise errors.OpPrereqError("Cannot remove the last disk of"
11265
                                     " an instance", errors.ECODE_INVAL)
11266
        _CheckInstanceDown(self, instance, "cannot remove disks")
11267

    
11268
      if (disk_op == constants.DDM_ADD and
11269
          len(instance.disks) >= constants.MAX_DISKS):
11270
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11271
                                   " add more" % constants.MAX_DISKS,
11272
                                   errors.ECODE_STATE)
11273
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11274
        # an existing disk
11275
        if disk_op < 0 or disk_op >= len(instance.disks):
11276
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11277
                                     " are 0 to %d" %
11278
                                     (disk_op, len(instance.disks)),
11279
                                     errors.ECODE_INVAL)
11280

    
11281
    return
11282

    
11283
  def _ConvertPlainToDrbd(self, feedback_fn):
11284
    """Converts an instance from plain to drbd.
11285

11286
    """
11287
    feedback_fn("Converting template to drbd")
11288
    instance = self.instance
11289
    pnode = instance.primary_node
11290
    snode = self.op.remote_node
11291

    
11292
    # create a fake disk info for _GenerateDiskTemplate
11293
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11294
                  constants.IDISK_VG: d.logical_id[0]}
11295
                 for d in instance.disks]
11296
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11297
                                      instance.name, pnode, [snode],
11298
                                      disk_info, None, None, 0, feedback_fn)
11299
    info = _GetInstanceInfoText(instance)
11300
    feedback_fn("Creating aditional volumes...")
11301
    # first, create the missing data and meta devices
11302
    for disk in new_disks:
11303
      # unfortunately this is... not too nice
11304
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11305
                            info, True)
11306
      for child in disk.children:
11307
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11308
    # at this stage, all new LVs have been created, we can rename the
11309
    # old ones
11310
    feedback_fn("Renaming original volumes...")
11311
    rename_list = [(o, n.children[0].logical_id)
11312
                   for (o, n) in zip(instance.disks, new_disks)]
11313
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11314
    result.Raise("Failed to rename original LVs")
11315

    
11316
    feedback_fn("Initializing DRBD devices...")
11317
    # all child devices are in place, we can now create the DRBD devices
11318
    for disk in new_disks:
11319
      for node in [pnode, snode]:
11320
        f_create = node == pnode
11321
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11322

    
11323
    # at this point, the instance has been modified
11324
    instance.disk_template = constants.DT_DRBD8
11325
    instance.disks = new_disks
11326
    self.cfg.Update(instance, feedback_fn)
11327

    
11328
    # disks are created, waiting for sync
11329
    disk_abort = not _WaitForSync(self, instance,
11330
                                  oneshot=not self.op.wait_for_sync)
11331
    if disk_abort:
11332
      raise errors.OpExecError("There are some degraded disks for"
11333
                               " this instance, please cleanup manually")
11334

    
11335
  def _ConvertDrbdToPlain(self, feedback_fn):
11336
    """Converts an instance from drbd to plain.
11337

11338
    """
11339
    instance = self.instance
11340
    assert len(instance.secondary_nodes) == 1
11341
    pnode = instance.primary_node
11342
    snode = instance.secondary_nodes[0]
11343
    feedback_fn("Converting template to plain")
11344

    
11345
    old_disks = instance.disks
11346
    new_disks = [d.children[0] for d in old_disks]
11347

    
11348
    # copy over size and mode
11349
    for parent, child in zip(old_disks, new_disks):
11350
      child.size = parent.size
11351
      child.mode = parent.mode
11352

    
11353
    # update instance structure
11354
    instance.disks = new_disks
11355
    instance.disk_template = constants.DT_PLAIN
11356
    self.cfg.Update(instance, feedback_fn)
11357

    
11358
    feedback_fn("Removing volumes on the secondary node...")
11359
    for disk in old_disks:
11360
      self.cfg.SetDiskID(disk, snode)
11361
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11362
      if msg:
11363
        self.LogWarning("Could not remove block device %s on node %s,"
11364
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11365

    
11366
    feedback_fn("Removing unneeded volumes on the primary node...")
11367
    for idx, disk in enumerate(old_disks):
11368
      meta = disk.children[1]
11369
      self.cfg.SetDiskID(meta, pnode)
11370
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11371
      if msg:
11372
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11373
                        " continuing anyway: %s", idx, pnode, msg)
11374

    
11375
  def Exec(self, feedback_fn):
11376
    """Modifies an instance.
11377

11378
    All parameters take effect only at the next restart of the instance.
11379

11380
    """
11381
    # Process here the warnings from CheckPrereq, as we don't have a
11382
    # feedback_fn there.
11383
    for warn in self.warn:
11384
      feedback_fn("WARNING: %s" % warn)
11385

    
11386
    result = []
11387
    instance = self.instance
11388
    # disk changes
11389
    for disk_op, disk_dict in self.op.disks:
11390
      if disk_op == constants.DDM_REMOVE:
11391
        # remove the last disk
11392
        device = instance.disks.pop()
11393
        device_idx = len(instance.disks)
11394
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11395
          self.cfg.SetDiskID(disk, node)
11396
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11397
          if msg:
11398
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11399
                            " continuing anyway", device_idx, node, msg)
11400
        result.append(("disk/%d" % device_idx, "remove"))
11401
      elif disk_op == constants.DDM_ADD:
11402
        # add a new disk
11403
        if instance.disk_template in (constants.DT_FILE,
11404
                                        constants.DT_SHARED_FILE):
11405
          file_driver, file_path = instance.disks[0].logical_id
11406
          file_path = os.path.dirname(file_path)
11407
        else:
11408
          file_driver = file_path = None
11409
        disk_idx_base = len(instance.disks)
11410
        new_disk = _GenerateDiskTemplate(self,
11411
                                         instance.disk_template,
11412
                                         instance.name, instance.primary_node,
11413
                                         instance.secondary_nodes,
11414
                                         [disk_dict],
11415
                                         file_path,
11416
                                         file_driver,
11417
                                         disk_idx_base, feedback_fn)[0]
11418
        instance.disks.append(new_disk)
11419
        info = _GetInstanceInfoText(instance)
11420

    
11421
        logging.info("Creating volume %s for instance %s",
11422
                     new_disk.iv_name, instance.name)
11423
        # Note: this needs to be kept in sync with _CreateDisks
11424
        #HARDCODE
11425
        for node in instance.all_nodes:
11426
          f_create = node == instance.primary_node
11427
          try:
11428
            _CreateBlockDev(self, node, instance, new_disk,
11429
                            f_create, info, f_create)
11430
          except errors.OpExecError, err:
11431
            self.LogWarning("Failed to create volume %s (%s) on"
11432
                            " node %s: %s",
11433
                            new_disk.iv_name, new_disk, node, err)
11434
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11435
                       (new_disk.size, new_disk.mode)))
11436
      else:
11437
        # change a given disk
11438
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11439
        result.append(("disk.mode/%d" % disk_op,
11440
                       disk_dict[constants.IDISK_MODE]))
11441

    
11442
    if self.op.disk_template:
11443
      r_shut = _ShutdownInstanceDisks(self, instance)
11444
      if not r_shut:
11445
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11446
                                 " proceed with disk template conversion")
11447
      mode = (instance.disk_template, self.op.disk_template)
11448
      try:
11449
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11450
      except:
11451
        self.cfg.ReleaseDRBDMinors(instance.name)
11452
        raise
11453
      result.append(("disk_template", self.op.disk_template))
11454

    
11455
    # NIC changes
11456
    for nic_op, nic_dict in self.op.nics:
11457
      if nic_op == constants.DDM_REMOVE:
11458
        # remove the last nic
11459
        del instance.nics[-1]
11460
        result.append(("nic.%d" % len(instance.nics), "remove"))
11461
      elif nic_op == constants.DDM_ADD:
11462
        # mac and bridge should be set, by now
11463
        mac = nic_dict[constants.INIC_MAC]
11464
        ip = nic_dict.get(constants.INIC_IP, None)
11465
        nicparams = self.nic_pinst[constants.DDM_ADD]
11466
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11467
        instance.nics.append(new_nic)
11468
        result.append(("nic.%d" % (len(instance.nics) - 1),
11469
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11470
                       (new_nic.mac, new_nic.ip,
11471
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11472
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11473
                       )))
11474
      else:
11475
        for key in (constants.INIC_MAC, constants.INIC_IP):
11476
          if key in nic_dict:
11477
            setattr(instance.nics[nic_op], key, nic_dict[key])
11478
        if nic_op in self.nic_pinst:
11479
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11480
        for key, val in nic_dict.iteritems():
11481
          result.append(("nic.%s/%d" % (key, nic_op), val))
11482

    
11483
    # hvparams changes
11484
    if self.op.hvparams:
11485
      instance.hvparams = self.hv_inst
11486
      for key, val in self.op.hvparams.iteritems():
11487
        result.append(("hv/%s" % key, val))
11488

    
11489
    # beparams changes
11490
    if self.op.beparams:
11491
      instance.beparams = self.be_inst
11492
      for key, val in self.op.beparams.iteritems():
11493
        result.append(("be/%s" % key, val))
11494

    
11495
    # OS change
11496
    if self.op.os_name:
11497
      instance.os = self.op.os_name
11498

    
11499
    # osparams changes
11500
    if self.op.osparams:
11501
      instance.osparams = self.os_inst
11502
      for key, val in self.op.osparams.iteritems():
11503
        result.append(("os/%s" % key, val))
11504

    
11505
    self.cfg.Update(instance, feedback_fn)
11506

    
11507
    return result
11508

    
11509
  _DISK_CONVERSIONS = {
11510
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11511
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11512
    }
11513

    
11514

    
11515
class LUInstanceChangeGroup(LogicalUnit):
11516
  HPATH = "instance-change-group"
11517
  HTYPE = constants.HTYPE_INSTANCE
11518
  REQ_BGL = False
11519

    
11520
  def ExpandNames(self):
11521
    self.share_locks = _ShareAll()
11522
    self.needed_locks = {
11523
      locking.LEVEL_NODEGROUP: [],
11524
      locking.LEVEL_NODE: [],
11525
      }
11526

    
11527
    self._ExpandAndLockInstance()
11528

    
11529
    if self.op.target_groups:
11530
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11531
                                  self.op.target_groups)
11532
    else:
11533
      self.req_target_uuids = None
11534

    
11535
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11536

    
11537
  def DeclareLocks(self, level):
11538
    if level == locking.LEVEL_NODEGROUP:
11539
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11540

    
11541
      if self.req_target_uuids:
11542
        lock_groups = set(self.req_target_uuids)
11543

    
11544
        # Lock all groups used by instance optimistically; this requires going
11545
        # via the node before it's locked, requiring verification later on
11546
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11547
        lock_groups.update(instance_groups)
11548
      else:
11549
        # No target groups, need to lock all of them
11550
        lock_groups = locking.ALL_SET
11551

    
11552
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11553

    
11554
    elif level == locking.LEVEL_NODE:
11555
      if self.req_target_uuids:
11556
        # Lock all nodes used by instances
11557
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11558
        self._LockInstancesNodes()
11559

    
11560
        # Lock all nodes in all potential target groups
11561
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11562
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11563
        member_nodes = [node_name
11564
                        for group in lock_groups
11565
                        for node_name in self.cfg.GetNodeGroup(group).members]
11566
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11567
      else:
11568
        # Lock all nodes as all groups are potential targets
11569
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11570

    
11571
  def CheckPrereq(self):
11572
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11573
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11574
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11575

    
11576
    assert (self.req_target_uuids is None or
11577
            owned_groups.issuperset(self.req_target_uuids))
11578
    assert owned_instances == set([self.op.instance_name])
11579

    
11580
    # Get instance information
11581
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11582

    
11583
    # Check if node groups for locked instance are still correct
11584
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11585
      ("Instance %s's nodes changed while we kept the lock" %
11586
       self.op.instance_name)
11587

    
11588
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11589
                                           owned_groups)
11590

    
11591
    if self.req_target_uuids:
11592
      # User requested specific target groups
11593
      self.target_uuids = self.req_target_uuids
11594
    else:
11595
      # All groups except those used by the instance are potential targets
11596
      self.target_uuids = owned_groups - inst_groups
11597

    
11598
    conflicting_groups = self.target_uuids & inst_groups
11599
    if conflicting_groups:
11600
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11601
                                 " used by the instance '%s'" %
11602
                                 (utils.CommaJoin(conflicting_groups),
11603
                                  self.op.instance_name),
11604
                                 errors.ECODE_INVAL)
11605

    
11606
    if not self.target_uuids:
11607
      raise errors.OpPrereqError("There are no possible target groups",
11608
                                 errors.ECODE_INVAL)
11609

    
11610
  def BuildHooksEnv(self):
11611
    """Build hooks env.
11612

11613
    """
11614
    assert self.target_uuids
11615

    
11616
    env = {
11617
      "TARGET_GROUPS": " ".join(self.target_uuids),
11618
      }
11619

    
11620
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11621

    
11622
    return env
11623

    
11624
  def BuildHooksNodes(self):
11625
    """Build hooks nodes.
11626

11627
    """
11628
    mn = self.cfg.GetMasterNode()
11629
    return ([mn], [mn])
11630

    
11631
  def Exec(self, feedback_fn):
11632
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11633

    
11634
    assert instances == [self.op.instance_name], "Instance not locked"
11635

    
11636
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11637
                     instances=instances, target_groups=list(self.target_uuids))
11638

    
11639
    ial.Run(self.op.iallocator)
11640

    
11641
    if not ial.success:
11642
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11643
                                 " instance '%s' using iallocator '%s': %s" %
11644
                                 (self.op.instance_name, self.op.iallocator,
11645
                                  ial.info),
11646
                                 errors.ECODE_NORES)
11647

    
11648
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11649

    
11650
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11651
                 " instance '%s'", len(jobs), self.op.instance_name)
11652

    
11653
    return ResultWithJobs(jobs)
11654

    
11655

    
11656
class LUBackupQuery(NoHooksLU):
11657
  """Query the exports list
11658

11659
  """
11660
  REQ_BGL = False
11661

    
11662
  def ExpandNames(self):
11663
    self.needed_locks = {}
11664
    self.share_locks[locking.LEVEL_NODE] = 1
11665
    if not self.op.nodes:
11666
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11667
    else:
11668
      self.needed_locks[locking.LEVEL_NODE] = \
11669
        _GetWantedNodes(self, self.op.nodes)
11670

    
11671
  def Exec(self, feedback_fn):
11672
    """Compute the list of all the exported system images.
11673

11674
    @rtype: dict
11675
    @return: a dictionary with the structure node->(export-list)
11676
        where export-list is a list of the instances exported on
11677
        that node.
11678

11679
    """
11680
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11681
    rpcresult = self.rpc.call_export_list(self.nodes)
11682
    result = {}
11683
    for node in rpcresult:
11684
      if rpcresult[node].fail_msg:
11685
        result[node] = False
11686
      else:
11687
        result[node] = rpcresult[node].payload
11688

    
11689
    return result
11690

    
11691

    
11692
class LUBackupPrepare(NoHooksLU):
11693
  """Prepares an instance for an export and returns useful information.
11694

11695
  """
11696
  REQ_BGL = False
11697

    
11698
  def ExpandNames(self):
11699
    self._ExpandAndLockInstance()
11700

    
11701
  def CheckPrereq(self):
11702
    """Check prerequisites.
11703

11704
    """
11705
    instance_name = self.op.instance_name
11706

    
11707
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11708
    assert self.instance is not None, \
11709
          "Cannot retrieve locked instance %s" % self.op.instance_name
11710
    _CheckNodeOnline(self, self.instance.primary_node)
11711

    
11712
    self._cds = _GetClusterDomainSecret()
11713

    
11714
  def Exec(self, feedback_fn):
11715
    """Prepares an instance for an export.
11716

11717
    """
11718
    instance = self.instance
11719

    
11720
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11721
      salt = utils.GenerateSecret(8)
11722

    
11723
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11724
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11725
                                              constants.RIE_CERT_VALIDITY)
11726
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11727

    
11728
      (name, cert_pem) = result.payload
11729

    
11730
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11731
                                             cert_pem)
11732

    
11733
      return {
11734
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11735
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11736
                          salt),
11737
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11738
        }
11739

    
11740
    return None
11741

    
11742

    
11743
class LUBackupExport(LogicalUnit):
11744
  """Export an instance to an image in the cluster.
11745

11746
  """
11747
  HPATH = "instance-export"
11748
  HTYPE = constants.HTYPE_INSTANCE
11749
  REQ_BGL = False
11750

    
11751
  def CheckArguments(self):
11752
    """Check the arguments.
11753

11754
    """
11755
    self.x509_key_name = self.op.x509_key_name
11756
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11757

    
11758
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11759
      if not self.x509_key_name:
11760
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11761
                                   errors.ECODE_INVAL)
11762

    
11763
      if not self.dest_x509_ca_pem:
11764
        raise errors.OpPrereqError("Missing destination X509 CA",
11765
                                   errors.ECODE_INVAL)
11766

    
11767
  def ExpandNames(self):
11768
    self._ExpandAndLockInstance()
11769

    
11770
    # Lock all nodes for local exports
11771
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11772
      # FIXME: lock only instance primary and destination node
11773
      #
11774
      # Sad but true, for now we have do lock all nodes, as we don't know where
11775
      # the previous export might be, and in this LU we search for it and
11776
      # remove it from its current node. In the future we could fix this by:
11777
      #  - making a tasklet to search (share-lock all), then create the
11778
      #    new one, then one to remove, after
11779
      #  - removing the removal operation altogether
11780
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11781

    
11782
  def DeclareLocks(self, level):
11783
    """Last minute lock declaration."""
11784
    # All nodes are locked anyway, so nothing to do here.
11785

    
11786
  def BuildHooksEnv(self):
11787
    """Build hooks env.
11788

11789
    This will run on the master, primary node and target node.
11790

11791
    """
11792
    env = {
11793
      "EXPORT_MODE": self.op.mode,
11794
      "EXPORT_NODE": self.op.target_node,
11795
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11796
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11797
      # TODO: Generic function for boolean env variables
11798
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11799
      }
11800

    
11801
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11802

    
11803
    return env
11804

    
11805
  def BuildHooksNodes(self):
11806
    """Build hooks nodes.
11807

11808
    """
11809
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11810

    
11811
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11812
      nl.append(self.op.target_node)
11813

    
11814
    return (nl, nl)
11815

    
11816
  def CheckPrereq(self):
11817
    """Check prerequisites.
11818

11819
    This checks that the instance and node names are valid.
11820

11821
    """
11822
    instance_name = self.op.instance_name
11823

    
11824
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11825
    assert self.instance is not None, \
11826
          "Cannot retrieve locked instance %s" % self.op.instance_name
11827
    _CheckNodeOnline(self, self.instance.primary_node)
11828

    
11829
    if (self.op.remove_instance and self.instance.admin_up and
11830
        not self.op.shutdown):
11831
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11832
                                 " down before")
11833

    
11834
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11835
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11836
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11837
      assert self.dst_node is not None
11838

    
11839
      _CheckNodeOnline(self, self.dst_node.name)
11840
      _CheckNodeNotDrained(self, self.dst_node.name)
11841

    
11842
      self._cds = None
11843
      self.dest_disk_info = None
11844
      self.dest_x509_ca = None
11845

    
11846
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11847
      self.dst_node = None
11848

    
11849
      if len(self.op.target_node) != len(self.instance.disks):
11850
        raise errors.OpPrereqError(("Received destination information for %s"
11851
                                    " disks, but instance %s has %s disks") %
11852
                                   (len(self.op.target_node), instance_name,
11853
                                    len(self.instance.disks)),
11854
                                   errors.ECODE_INVAL)
11855

    
11856
      cds = _GetClusterDomainSecret()
11857

    
11858
      # Check X509 key name
11859
      try:
11860
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11861
      except (TypeError, ValueError), err:
11862
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11863

    
11864
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11865
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11866
                                   errors.ECODE_INVAL)
11867

    
11868
      # Load and verify CA
11869
      try:
11870
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11871
      except OpenSSL.crypto.Error, err:
11872
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11873
                                   (err, ), errors.ECODE_INVAL)
11874

    
11875
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11876
      if errcode is not None:
11877
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11878
                                   (msg, ), errors.ECODE_INVAL)
11879

    
11880
      self.dest_x509_ca = cert
11881

    
11882
      # Verify target information
11883
      disk_info = []
11884
      for idx, disk_data in enumerate(self.op.target_node):
11885
        try:
11886
          (host, port, magic) = \
11887
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11888
        except errors.GenericError, err:
11889
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11890
                                     (idx, err), errors.ECODE_INVAL)
11891

    
11892
        disk_info.append((host, port, magic))
11893

    
11894
      assert len(disk_info) == len(self.op.target_node)
11895
      self.dest_disk_info = disk_info
11896

    
11897
    else:
11898
      raise errors.ProgrammerError("Unhandled export mode %r" %
11899
                                   self.op.mode)
11900

    
11901
    # instance disk type verification
11902
    # TODO: Implement export support for file-based disks
11903
    for disk in self.instance.disks:
11904
      if disk.dev_type == constants.LD_FILE:
11905
        raise errors.OpPrereqError("Export not supported for instances with"
11906
                                   " file-based disks", errors.ECODE_INVAL)
11907

    
11908
  def _CleanupExports(self, feedback_fn):
11909
    """Removes exports of current instance from all other nodes.
11910

11911
    If an instance in a cluster with nodes A..D was exported to node C, its
11912
    exports will be removed from the nodes A, B and D.
11913

11914
    """
11915
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11916

    
11917
    nodelist = self.cfg.GetNodeList()
11918
    nodelist.remove(self.dst_node.name)
11919

    
11920
    # on one-node clusters nodelist will be empty after the removal
11921
    # if we proceed the backup would be removed because OpBackupQuery
11922
    # substitutes an empty list with the full cluster node list.
11923
    iname = self.instance.name
11924
    if nodelist:
11925
      feedback_fn("Removing old exports for instance %s" % iname)
11926
      exportlist = self.rpc.call_export_list(nodelist)
11927
      for node in exportlist:
11928
        if exportlist[node].fail_msg:
11929
          continue
11930
        if iname in exportlist[node].payload:
11931
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11932
          if msg:
11933
            self.LogWarning("Could not remove older export for instance %s"
11934
                            " on node %s: %s", iname, node, msg)
11935

    
11936
  def Exec(self, feedback_fn):
11937
    """Export an instance to an image in the cluster.
11938

11939
    """
11940
    assert self.op.mode in constants.EXPORT_MODES
11941

    
11942
    instance = self.instance
11943
    src_node = instance.primary_node
11944

    
11945
    if self.op.shutdown:
11946
      # shutdown the instance, but not the disks
11947
      feedback_fn("Shutting down instance %s" % instance.name)
11948
      result = self.rpc.call_instance_shutdown(src_node, instance,
11949
                                               self.op.shutdown_timeout)
11950
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11951
      result.Raise("Could not shutdown instance %s on"
11952
                   " node %s" % (instance.name, src_node))
11953

    
11954
    # set the disks ID correctly since call_instance_start needs the
11955
    # correct drbd minor to create the symlinks
11956
    for disk in instance.disks:
11957
      self.cfg.SetDiskID(disk, src_node)
11958

    
11959
    activate_disks = (not instance.admin_up)
11960

    
11961
    if activate_disks:
11962
      # Activate the instance disks if we'exporting a stopped instance
11963
      feedback_fn("Activating disks for %s" % instance.name)
11964
      _StartInstanceDisks(self, instance, None)
11965

    
11966
    try:
11967
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11968
                                                     instance)
11969

    
11970
      helper.CreateSnapshots()
11971
      try:
11972
        if (self.op.shutdown and instance.admin_up and
11973
            not self.op.remove_instance):
11974
          assert not activate_disks
11975
          feedback_fn("Starting instance %s" % instance.name)
11976
          result = self.rpc.call_instance_start(src_node,
11977
                                                (instance, None, None), False)
11978
          msg = result.fail_msg
11979
          if msg:
11980
            feedback_fn("Failed to start instance: %s" % msg)
11981
            _ShutdownInstanceDisks(self, instance)
11982
            raise errors.OpExecError("Could not start instance: %s" % msg)
11983

    
11984
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11985
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11986
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11987
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11988
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11989

    
11990
          (key_name, _, _) = self.x509_key_name
11991

    
11992
          dest_ca_pem = \
11993
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11994
                                            self.dest_x509_ca)
11995

    
11996
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11997
                                                     key_name, dest_ca_pem,
11998
                                                     timeouts)
11999
      finally:
12000
        helper.Cleanup()
12001

    
12002
      # Check for backwards compatibility
12003
      assert len(dresults) == len(instance.disks)
12004
      assert compat.all(isinstance(i, bool) for i in dresults), \
12005
             "Not all results are boolean: %r" % dresults
12006

    
12007
    finally:
12008
      if activate_disks:
12009
        feedback_fn("Deactivating disks for %s" % instance.name)
12010
        _ShutdownInstanceDisks(self, instance)
12011

    
12012
    if not (compat.all(dresults) and fin_resu):
12013
      failures = []
12014
      if not fin_resu:
12015
        failures.append("export finalization")
12016
      if not compat.all(dresults):
12017
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12018
                               if not dsk)
12019
        failures.append("disk export: disk(s) %s" % fdsk)
12020

    
12021
      raise errors.OpExecError("Export failed, errors in %s" %
12022
                               utils.CommaJoin(failures))
12023

    
12024
    # At this point, the export was successful, we can cleanup/finish
12025

    
12026
    # Remove instance if requested
12027
    if self.op.remove_instance:
12028
      feedback_fn("Removing instance %s" % instance.name)
12029
      _RemoveInstance(self, feedback_fn, instance,
12030
                      self.op.ignore_remove_failures)
12031

    
12032
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12033
      self._CleanupExports(feedback_fn)
12034

    
12035
    return fin_resu, dresults
12036

    
12037

    
12038
class LUBackupRemove(NoHooksLU):
12039
  """Remove exports related to the named instance.
12040

12041
  """
12042
  REQ_BGL = False
12043

    
12044
  def ExpandNames(self):
12045
    self.needed_locks = {}
12046
    # We need all nodes to be locked in order for RemoveExport to work, but we
12047
    # don't need to lock the instance itself, as nothing will happen to it (and
12048
    # we can remove exports also for a removed instance)
12049
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12050

    
12051
  def Exec(self, feedback_fn):
12052
    """Remove any export.
12053

12054
    """
12055
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12056
    # If the instance was not found we'll try with the name that was passed in.
12057
    # This will only work if it was an FQDN, though.
12058
    fqdn_warn = False
12059
    if not instance_name:
12060
      fqdn_warn = True
12061
      instance_name = self.op.instance_name
12062

    
12063
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12064
    exportlist = self.rpc.call_export_list(locked_nodes)
12065
    found = False
12066
    for node in exportlist:
12067
      msg = exportlist[node].fail_msg
12068
      if msg:
12069
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12070
        continue
12071
      if instance_name in exportlist[node].payload:
12072
        found = True
12073
        result = self.rpc.call_export_remove(node, instance_name)
12074
        msg = result.fail_msg
12075
        if msg:
12076
          logging.error("Could not remove export for instance %s"
12077
                        " on node %s: %s", instance_name, node, msg)
12078

    
12079
    if fqdn_warn and not found:
12080
      feedback_fn("Export not found. If trying to remove an export belonging"
12081
                  " to a deleted instance please use its Fully Qualified"
12082
                  " Domain Name.")
12083

    
12084

    
12085
class LUGroupAdd(LogicalUnit):
12086
  """Logical unit for creating node groups.
12087

12088
  """
12089
  HPATH = "group-add"
12090
  HTYPE = constants.HTYPE_GROUP
12091
  REQ_BGL = False
12092

    
12093
  def ExpandNames(self):
12094
    # We need the new group's UUID here so that we can create and acquire the
12095
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12096
    # that it should not check whether the UUID exists in the configuration.
12097
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12098
    self.needed_locks = {}
12099
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12100

    
12101
  def CheckPrereq(self):
12102
    """Check prerequisites.
12103

12104
    This checks that the given group name is not an existing node group
12105
    already.
12106

12107
    """
12108
    try:
12109
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12110
    except errors.OpPrereqError:
12111
      pass
12112
    else:
12113
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12114
                                 " node group (UUID: %s)" %
12115
                                 (self.op.group_name, existing_uuid),
12116
                                 errors.ECODE_EXISTS)
12117

    
12118
    if self.op.ndparams:
12119
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12120

    
12121
  def BuildHooksEnv(self):
12122
    """Build hooks env.
12123

12124
    """
12125
    return {
12126
      "GROUP_NAME": self.op.group_name,
12127
      }
12128

    
12129
  def BuildHooksNodes(self):
12130
    """Build hooks nodes.
12131

12132
    """
12133
    mn = self.cfg.GetMasterNode()
12134
    return ([mn], [mn])
12135

    
12136
  def Exec(self, feedback_fn):
12137
    """Add the node group to the cluster.
12138

12139
    """
12140
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12141
                                  uuid=self.group_uuid,
12142
                                  alloc_policy=self.op.alloc_policy,
12143
                                  ndparams=self.op.ndparams)
12144

    
12145
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12146
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12147

    
12148

    
12149
class LUGroupAssignNodes(NoHooksLU):
12150
  """Logical unit for assigning nodes to groups.
12151

12152
  """
12153
  REQ_BGL = False
12154

    
12155
  def ExpandNames(self):
12156
    # These raise errors.OpPrereqError on their own:
12157
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12158
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12159

    
12160
    # We want to lock all the affected nodes and groups. We have readily
12161
    # available the list of nodes, and the *destination* group. To gather the
12162
    # list of "source" groups, we need to fetch node information later on.
12163
    self.needed_locks = {
12164
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12165
      locking.LEVEL_NODE: self.op.nodes,
12166
      }
12167

    
12168
  def DeclareLocks(self, level):
12169
    if level == locking.LEVEL_NODEGROUP:
12170
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12171

    
12172
      # Try to get all affected nodes' groups without having the group or node
12173
      # lock yet. Needs verification later in the code flow.
12174
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12175

    
12176
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12177

    
12178
  def CheckPrereq(self):
12179
    """Check prerequisites.
12180

12181
    """
12182
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12183
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12184
            frozenset(self.op.nodes))
12185

    
12186
    expected_locks = (set([self.group_uuid]) |
12187
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12188
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12189
    if actual_locks != expected_locks:
12190
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12191
                               " current groups are '%s', used to be '%s'" %
12192
                               (utils.CommaJoin(expected_locks),
12193
                                utils.CommaJoin(actual_locks)))
12194

    
12195
    self.node_data = self.cfg.GetAllNodesInfo()
12196
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12197
    instance_data = self.cfg.GetAllInstancesInfo()
12198

    
12199
    if self.group is None:
12200
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12201
                               (self.op.group_name, self.group_uuid))
12202

    
12203
    (new_splits, previous_splits) = \
12204
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12205
                                             for node in self.op.nodes],
12206
                                            self.node_data, instance_data)
12207

    
12208
    if new_splits:
12209
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12210

    
12211
      if not self.op.force:
12212
        raise errors.OpExecError("The following instances get split by this"
12213
                                 " change and --force was not given: %s" %
12214
                                 fmt_new_splits)
12215
      else:
12216
        self.LogWarning("This operation will split the following instances: %s",
12217
                        fmt_new_splits)
12218

    
12219
        if previous_splits:
12220
          self.LogWarning("In addition, these already-split instances continue"
12221
                          " to be split across groups: %s",
12222
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12223

    
12224
  def Exec(self, feedback_fn):
12225
    """Assign nodes to a new group.
12226

12227
    """
12228
    for node in self.op.nodes:
12229
      self.node_data[node].group = self.group_uuid
12230

    
12231
    # FIXME: Depends on side-effects of modifying the result of
12232
    # C{cfg.GetAllNodesInfo}
12233

    
12234
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12235

    
12236
  @staticmethod
12237
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12238
    """Check for split instances after a node assignment.
12239

12240
    This method considers a series of node assignments as an atomic operation,
12241
    and returns information about split instances after applying the set of
12242
    changes.
12243

12244
    In particular, it returns information about newly split instances, and
12245
    instances that were already split, and remain so after the change.
12246

12247
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12248
    considered.
12249

12250
    @type changes: list of (node_name, new_group_uuid) pairs.
12251
    @param changes: list of node assignments to consider.
12252
    @param node_data: a dict with data for all nodes
12253
    @param instance_data: a dict with all instances to consider
12254
    @rtype: a two-tuple
12255
    @return: a list of instances that were previously okay and result split as a
12256
      consequence of this change, and a list of instances that were previously
12257
      split and this change does not fix.
12258

12259
    """
12260
    changed_nodes = dict((node, group) for node, group in changes
12261
                         if node_data[node].group != group)
12262

    
12263
    all_split_instances = set()
12264
    previously_split_instances = set()
12265

    
12266
    def InstanceNodes(instance):
12267
      return [instance.primary_node] + list(instance.secondary_nodes)
12268

    
12269
    for inst in instance_data.values():
12270
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12271
        continue
12272

    
12273
      instance_nodes = InstanceNodes(inst)
12274

    
12275
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12276
        previously_split_instances.add(inst.name)
12277

    
12278
      if len(set(changed_nodes.get(node, node_data[node].group)
12279
                 for node in instance_nodes)) > 1:
12280
        all_split_instances.add(inst.name)
12281

    
12282
    return (list(all_split_instances - previously_split_instances),
12283
            list(previously_split_instances & all_split_instances))
12284

    
12285

    
12286
class _GroupQuery(_QueryBase):
12287
  FIELDS = query.GROUP_FIELDS
12288

    
12289
  def ExpandNames(self, lu):
12290
    lu.needed_locks = {}
12291

    
12292
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12293
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12294

    
12295
    if not self.names:
12296
      self.wanted = [name_to_uuid[name]
12297
                     for name in utils.NiceSort(name_to_uuid.keys())]
12298
    else:
12299
      # Accept names to be either names or UUIDs.
12300
      missing = []
12301
      self.wanted = []
12302
      all_uuid = frozenset(self._all_groups.keys())
12303

    
12304
      for name in self.names:
12305
        if name in all_uuid:
12306
          self.wanted.append(name)
12307
        elif name in name_to_uuid:
12308
          self.wanted.append(name_to_uuid[name])
12309
        else:
12310
          missing.append(name)
12311

    
12312
      if missing:
12313
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12314
                                   utils.CommaJoin(missing),
12315
                                   errors.ECODE_NOENT)
12316

    
12317
  def DeclareLocks(self, lu, level):
12318
    pass
12319

    
12320
  def _GetQueryData(self, lu):
12321
    """Computes the list of node groups and their attributes.
12322

12323
    """
12324
    do_nodes = query.GQ_NODE in self.requested_data
12325
    do_instances = query.GQ_INST in self.requested_data
12326

    
12327
    group_to_nodes = None
12328
    group_to_instances = None
12329

    
12330
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12331
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12332
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12333
    # instance->node. Hence, we will need to process nodes even if we only need
12334
    # instance information.
12335
    if do_nodes or do_instances:
12336
      all_nodes = lu.cfg.GetAllNodesInfo()
12337
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12338
      node_to_group = {}
12339

    
12340
      for node in all_nodes.values():
12341
        if node.group in group_to_nodes:
12342
          group_to_nodes[node.group].append(node.name)
12343
          node_to_group[node.name] = node.group
12344

    
12345
      if do_instances:
12346
        all_instances = lu.cfg.GetAllInstancesInfo()
12347
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12348

    
12349
        for instance in all_instances.values():
12350
          node = instance.primary_node
12351
          if node in node_to_group:
12352
            group_to_instances[node_to_group[node]].append(instance.name)
12353

    
12354
        if not do_nodes:
12355
          # Do not pass on node information if it was not requested.
12356
          group_to_nodes = None
12357

    
12358
    return query.GroupQueryData([self._all_groups[uuid]
12359
                                 for uuid in self.wanted],
12360
                                group_to_nodes, group_to_instances)
12361

    
12362

    
12363
class LUGroupQuery(NoHooksLU):
12364
  """Logical unit for querying node groups.
12365

12366
  """
12367
  REQ_BGL = False
12368

    
12369
  def CheckArguments(self):
12370
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12371
                          self.op.output_fields, False)
12372

    
12373
  def ExpandNames(self):
12374
    self.gq.ExpandNames(self)
12375

    
12376
  def DeclareLocks(self, level):
12377
    self.gq.DeclareLocks(self, level)
12378

    
12379
  def Exec(self, feedback_fn):
12380
    return self.gq.OldStyleQuery(self)
12381

    
12382

    
12383
class LUGroupSetParams(LogicalUnit):
12384
  """Modifies the parameters of a node group.
12385

12386
  """
12387
  HPATH = "group-modify"
12388
  HTYPE = constants.HTYPE_GROUP
12389
  REQ_BGL = False
12390

    
12391
  def CheckArguments(self):
12392
    all_changes = [
12393
      self.op.ndparams,
12394
      self.op.alloc_policy,
12395
      ]
12396

    
12397
    if all_changes.count(None) == len(all_changes):
12398
      raise errors.OpPrereqError("Please pass at least one modification",
12399
                                 errors.ECODE_INVAL)
12400

    
12401
  def ExpandNames(self):
12402
    # This raises errors.OpPrereqError on its own:
12403
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12404

    
12405
    self.needed_locks = {
12406
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12407
      }
12408

    
12409
  def CheckPrereq(self):
12410
    """Check prerequisites.
12411

12412
    """
12413
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12414

    
12415
    if self.group is None:
12416
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12417
                               (self.op.group_name, self.group_uuid))
12418

    
12419
    if self.op.ndparams:
12420
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12421
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12422
      self.new_ndparams = new_ndparams
12423

    
12424
  def BuildHooksEnv(self):
12425
    """Build hooks env.
12426

12427
    """
12428
    return {
12429
      "GROUP_NAME": self.op.group_name,
12430
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12431
      }
12432

    
12433
  def BuildHooksNodes(self):
12434
    """Build hooks nodes.
12435

12436
    """
12437
    mn = self.cfg.GetMasterNode()
12438
    return ([mn], [mn])
12439

    
12440
  def Exec(self, feedback_fn):
12441
    """Modifies the node group.
12442

12443
    """
12444
    result = []
12445

    
12446
    if self.op.ndparams:
12447
      self.group.ndparams = self.new_ndparams
12448
      result.append(("ndparams", str(self.group.ndparams)))
12449

    
12450
    if self.op.alloc_policy:
12451
      self.group.alloc_policy = self.op.alloc_policy
12452

    
12453
    self.cfg.Update(self.group, feedback_fn)
12454
    return result
12455

    
12456

    
12457
class LUGroupRemove(LogicalUnit):
12458
  HPATH = "group-remove"
12459
  HTYPE = constants.HTYPE_GROUP
12460
  REQ_BGL = False
12461

    
12462
  def ExpandNames(self):
12463
    # This will raises errors.OpPrereqError on its own:
12464
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12465
    self.needed_locks = {
12466
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12467
      }
12468

    
12469
  def CheckPrereq(self):
12470
    """Check prerequisites.
12471

12472
    This checks that the given group name exists as a node group, that is
12473
    empty (i.e., contains no nodes), and that is not the last group of the
12474
    cluster.
12475

12476
    """
12477
    # Verify that the group is empty.
12478
    group_nodes = [node.name
12479
                   for node in self.cfg.GetAllNodesInfo().values()
12480
                   if node.group == self.group_uuid]
12481

    
12482
    if group_nodes:
12483
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12484
                                 " nodes: %s" %
12485
                                 (self.op.group_name,
12486
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12487
                                 errors.ECODE_STATE)
12488

    
12489
    # Verify the cluster would not be left group-less.
12490
    if len(self.cfg.GetNodeGroupList()) == 1:
12491
      raise errors.OpPrereqError("Group '%s' is the only group,"
12492
                                 " cannot be removed" %
12493
                                 self.op.group_name,
12494
                                 errors.ECODE_STATE)
12495

    
12496
  def BuildHooksEnv(self):
12497
    """Build hooks env.
12498

12499
    """
12500
    return {
12501
      "GROUP_NAME": self.op.group_name,
12502
      }
12503

    
12504
  def BuildHooksNodes(self):
12505
    """Build hooks nodes.
12506

12507
    """
12508
    mn = self.cfg.GetMasterNode()
12509
    return ([mn], [mn])
12510

    
12511
  def Exec(self, feedback_fn):
12512
    """Remove the node group.
12513

12514
    """
12515
    try:
12516
      self.cfg.RemoveNodeGroup(self.group_uuid)
12517
    except errors.ConfigurationError:
12518
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12519
                               (self.op.group_name, self.group_uuid))
12520

    
12521
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12522

    
12523

    
12524
class LUGroupRename(LogicalUnit):
12525
  HPATH = "group-rename"
12526
  HTYPE = constants.HTYPE_GROUP
12527
  REQ_BGL = False
12528

    
12529
  def ExpandNames(self):
12530
    # This raises errors.OpPrereqError on its own:
12531
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12532

    
12533
    self.needed_locks = {
12534
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12535
      }
12536

    
12537
  def CheckPrereq(self):
12538
    """Check prerequisites.
12539

12540
    Ensures requested new name is not yet used.
12541

12542
    """
12543
    try:
12544
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12545
    except errors.OpPrereqError:
12546
      pass
12547
    else:
12548
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12549
                                 " node group (UUID: %s)" %
12550
                                 (self.op.new_name, new_name_uuid),
12551
                                 errors.ECODE_EXISTS)
12552

    
12553
  def BuildHooksEnv(self):
12554
    """Build hooks env.
12555

12556
    """
12557
    return {
12558
      "OLD_NAME": self.op.group_name,
12559
      "NEW_NAME": self.op.new_name,
12560
      }
12561

    
12562
  def BuildHooksNodes(self):
12563
    """Build hooks nodes.
12564

12565
    """
12566
    mn = self.cfg.GetMasterNode()
12567

    
12568
    all_nodes = self.cfg.GetAllNodesInfo()
12569
    all_nodes.pop(mn, None)
12570

    
12571
    run_nodes = [mn]
12572
    run_nodes.extend(node.name for node in all_nodes.values()
12573
                     if node.group == self.group_uuid)
12574

    
12575
    return (run_nodes, run_nodes)
12576

    
12577
  def Exec(self, feedback_fn):
12578
    """Rename the node group.
12579

12580
    """
12581
    group = self.cfg.GetNodeGroup(self.group_uuid)
12582

    
12583
    if group is None:
12584
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12585
                               (self.op.group_name, self.group_uuid))
12586

    
12587
    group.name = self.op.new_name
12588
    self.cfg.Update(group, feedback_fn)
12589

    
12590
    return self.op.new_name
12591

    
12592

    
12593
class LUGroupEvacuate(LogicalUnit):
12594
  HPATH = "group-evacuate"
12595
  HTYPE = constants.HTYPE_GROUP
12596
  REQ_BGL = False
12597

    
12598
  def ExpandNames(self):
12599
    # This raises errors.OpPrereqError on its own:
12600
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12601

    
12602
    if self.op.target_groups:
12603
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12604
                                  self.op.target_groups)
12605
    else:
12606
      self.req_target_uuids = []
12607

    
12608
    if self.group_uuid in self.req_target_uuids:
12609
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12610
                                 " as a target group (targets are %s)" %
12611
                                 (self.group_uuid,
12612
                                  utils.CommaJoin(self.req_target_uuids)),
12613
                                 errors.ECODE_INVAL)
12614

    
12615
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12616

    
12617
    self.share_locks = _ShareAll()
12618
    self.needed_locks = {
12619
      locking.LEVEL_INSTANCE: [],
12620
      locking.LEVEL_NODEGROUP: [],
12621
      locking.LEVEL_NODE: [],
12622
      }
12623

    
12624
  def DeclareLocks(self, level):
12625
    if level == locking.LEVEL_INSTANCE:
12626
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12627

    
12628
      # Lock instances optimistically, needs verification once node and group
12629
      # locks have been acquired
12630
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12631
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12632

    
12633
    elif level == locking.LEVEL_NODEGROUP:
12634
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12635

    
12636
      if self.req_target_uuids:
12637
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12638

    
12639
        # Lock all groups used by instances optimistically; this requires going
12640
        # via the node before it's locked, requiring verification later on
12641
        lock_groups.update(group_uuid
12642
                           for instance_name in
12643
                             self.owned_locks(locking.LEVEL_INSTANCE)
12644
                           for group_uuid in
12645
                             self.cfg.GetInstanceNodeGroups(instance_name))
12646
      else:
12647
        # No target groups, need to lock all of them
12648
        lock_groups = locking.ALL_SET
12649

    
12650
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12651

    
12652
    elif level == locking.LEVEL_NODE:
12653
      # This will only lock the nodes in the group to be evacuated which
12654
      # contain actual instances
12655
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12656
      self._LockInstancesNodes()
12657

    
12658
      # Lock all nodes in group to be evacuated and target groups
12659
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12660
      assert self.group_uuid in owned_groups
12661
      member_nodes = [node_name
12662
                      for group in owned_groups
12663
                      for node_name in self.cfg.GetNodeGroup(group).members]
12664
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12665

    
12666
  def CheckPrereq(self):
12667
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12668
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12669
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12670

    
12671
    assert owned_groups.issuperset(self.req_target_uuids)
12672
    assert self.group_uuid in owned_groups
12673

    
12674
    # Check if locked instances are still correct
12675
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12676

    
12677
    # Get instance information
12678
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12679

    
12680
    # Check if node groups for locked instances are still correct
12681
    for instance_name in owned_instances:
12682
      inst = self.instances[instance_name]
12683
      assert owned_nodes.issuperset(inst.all_nodes), \
12684
        "Instance %s's nodes changed while we kept the lock" % instance_name
12685

    
12686
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12687
                                             owned_groups)
12688

    
12689
      assert self.group_uuid in inst_groups, \
12690
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12691

    
12692
    if self.req_target_uuids:
12693
      # User requested specific target groups
12694
      self.target_uuids = self.req_target_uuids
12695
    else:
12696
      # All groups except the one to be evacuated are potential targets
12697
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12698
                           if group_uuid != self.group_uuid]
12699

    
12700
      if not self.target_uuids:
12701
        raise errors.OpPrereqError("There are no possible target groups",
12702
                                   errors.ECODE_INVAL)
12703

    
12704
  def BuildHooksEnv(self):
12705
    """Build hooks env.
12706

12707
    """
12708
    return {
12709
      "GROUP_NAME": self.op.group_name,
12710
      "TARGET_GROUPS": " ".join(self.target_uuids),
12711
      }
12712

    
12713
  def BuildHooksNodes(self):
12714
    """Build hooks nodes.
12715

12716
    """
12717
    mn = self.cfg.GetMasterNode()
12718

    
12719
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12720

    
12721
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12722

    
12723
    return (run_nodes, run_nodes)
12724

    
12725
  def Exec(self, feedback_fn):
12726
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12727

    
12728
    assert self.group_uuid not in self.target_uuids
12729

    
12730
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12731
                     instances=instances, target_groups=self.target_uuids)
12732

    
12733
    ial.Run(self.op.iallocator)
12734

    
12735
    if not ial.success:
12736
      raise errors.OpPrereqError("Can't compute group evacuation using"
12737
                                 " iallocator '%s': %s" %
12738
                                 (self.op.iallocator, ial.info),
12739
                                 errors.ECODE_NORES)
12740

    
12741
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12742

    
12743
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12744
                 len(jobs), self.op.group_name)
12745

    
12746
    return ResultWithJobs(jobs)
12747

    
12748

    
12749
class TagsLU(NoHooksLU): # pylint: disable=W0223
12750
  """Generic tags LU.
12751

12752
  This is an abstract class which is the parent of all the other tags LUs.
12753

12754
  """
12755
  def ExpandNames(self):
12756
    self.group_uuid = None
12757
    self.needed_locks = {}
12758
    if self.op.kind == constants.TAG_NODE:
12759
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12760
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12761
    elif self.op.kind == constants.TAG_INSTANCE:
12762
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12763
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12764
    elif self.op.kind == constants.TAG_NODEGROUP:
12765
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12766

    
12767
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12768
    # not possible to acquire the BGL based on opcode parameters)
12769

    
12770
  def CheckPrereq(self):
12771
    """Check prerequisites.
12772

12773
    """
12774
    if self.op.kind == constants.TAG_CLUSTER:
12775
      self.target = self.cfg.GetClusterInfo()
12776
    elif self.op.kind == constants.TAG_NODE:
12777
      self.target = self.cfg.GetNodeInfo(self.op.name)
12778
    elif self.op.kind == constants.TAG_INSTANCE:
12779
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12780
    elif self.op.kind == constants.TAG_NODEGROUP:
12781
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12782
    else:
12783
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12784
                                 str(self.op.kind), errors.ECODE_INVAL)
12785

    
12786

    
12787
class LUTagsGet(TagsLU):
12788
  """Returns the tags of a given object.
12789

12790
  """
12791
  REQ_BGL = False
12792

    
12793
  def ExpandNames(self):
12794
    TagsLU.ExpandNames(self)
12795

    
12796
    # Share locks as this is only a read operation
12797
    self.share_locks = _ShareAll()
12798

    
12799
  def Exec(self, feedback_fn):
12800
    """Returns the tag list.
12801

12802
    """
12803
    return list(self.target.GetTags())
12804

    
12805

    
12806
class LUTagsSearch(NoHooksLU):
12807
  """Searches the tags for a given pattern.
12808

12809
  """
12810
  REQ_BGL = False
12811

    
12812
  def ExpandNames(self):
12813
    self.needed_locks = {}
12814

    
12815
  def CheckPrereq(self):
12816
    """Check prerequisites.
12817

12818
    This checks the pattern passed for validity by compiling it.
12819

12820
    """
12821
    try:
12822
      self.re = re.compile(self.op.pattern)
12823
    except re.error, err:
12824
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12825
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12826

    
12827
  def Exec(self, feedback_fn):
12828
    """Returns the tag list.
12829

12830
    """
12831
    cfg = self.cfg
12832
    tgts = [("/cluster", cfg.GetClusterInfo())]
12833
    ilist = cfg.GetAllInstancesInfo().values()
12834
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12835
    nlist = cfg.GetAllNodesInfo().values()
12836
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12837
    tgts.extend(("/nodegroup/%s" % n.name, n)
12838
                for n in cfg.GetAllNodeGroupsInfo().values())
12839
    results = []
12840
    for path, target in tgts:
12841
      for tag in target.GetTags():
12842
        if self.re.search(tag):
12843
          results.append((path, tag))
12844
    return results
12845

    
12846

    
12847
class LUTagsSet(TagsLU):
12848
  """Sets a tag on a given object.
12849

12850
  """
12851
  REQ_BGL = False
12852

    
12853
  def CheckPrereq(self):
12854
    """Check prerequisites.
12855

12856
    This checks the type and length of the tag name and value.
12857

12858
    """
12859
    TagsLU.CheckPrereq(self)
12860
    for tag in self.op.tags:
12861
      objects.TaggableObject.ValidateTag(tag)
12862

    
12863
  def Exec(self, feedback_fn):
12864
    """Sets the tag.
12865

12866
    """
12867
    try:
12868
      for tag in self.op.tags:
12869
        self.target.AddTag(tag)
12870
    except errors.TagError, err:
12871
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12872
    self.cfg.Update(self.target, feedback_fn)
12873

    
12874

    
12875
class LUTagsDel(TagsLU):
12876
  """Delete a list of tags from a given object.
12877

12878
  """
12879
  REQ_BGL = False
12880

    
12881
  def CheckPrereq(self):
12882
    """Check prerequisites.
12883

12884
    This checks that we have the given tag.
12885

12886
    """
12887
    TagsLU.CheckPrereq(self)
12888
    for tag in self.op.tags:
12889
      objects.TaggableObject.ValidateTag(tag)
12890
    del_tags = frozenset(self.op.tags)
12891
    cur_tags = self.target.GetTags()
12892

    
12893
    diff_tags = del_tags - cur_tags
12894
    if diff_tags:
12895
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12896
      raise errors.OpPrereqError("Tag(s) %s not found" %
12897
                                 (utils.CommaJoin(diff_names), ),
12898
                                 errors.ECODE_NOENT)
12899

    
12900
  def Exec(self, feedback_fn):
12901
    """Remove the tag from the object.
12902

12903
    """
12904
    for tag in self.op.tags:
12905
      self.target.RemoveTag(tag)
12906
    self.cfg.Update(self.target, feedback_fn)
12907

    
12908

    
12909
class LUTestDelay(NoHooksLU):
12910
  """Sleep for a specified amount of time.
12911

12912
  This LU sleeps on the master and/or nodes for a specified amount of
12913
  time.
12914

12915
  """
12916
  REQ_BGL = False
12917

    
12918
  def ExpandNames(self):
12919
    """Expand names and set required locks.
12920

12921
    This expands the node list, if any.
12922

12923
    """
12924
    self.needed_locks = {}
12925
    if self.op.on_nodes:
12926
      # _GetWantedNodes can be used here, but is not always appropriate to use
12927
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12928
      # more information.
12929
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12930
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12931

    
12932
  def _TestDelay(self):
12933
    """Do the actual sleep.
12934

12935
    """
12936
    if self.op.on_master:
12937
      if not utils.TestDelay(self.op.duration):
12938
        raise errors.OpExecError("Error during master delay test")
12939
    if self.op.on_nodes:
12940
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12941
      for node, node_result in result.items():
12942
        node_result.Raise("Failure during rpc call to node %s" % node)
12943

    
12944
  def Exec(self, feedback_fn):
12945
    """Execute the test delay opcode, with the wanted repetitions.
12946

12947
    """
12948
    if self.op.repeat == 0:
12949
      self._TestDelay()
12950
    else:
12951
      top_value = self.op.repeat - 1
12952
      for i in range(self.op.repeat):
12953
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12954
        self._TestDelay()
12955

    
12956

    
12957
class LUTestJqueue(NoHooksLU):
12958
  """Utility LU to test some aspects of the job queue.
12959

12960
  """
12961
  REQ_BGL = False
12962

    
12963
  # Must be lower than default timeout for WaitForJobChange to see whether it
12964
  # notices changed jobs
12965
  _CLIENT_CONNECT_TIMEOUT = 20.0
12966
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12967

    
12968
  @classmethod
12969
  def _NotifyUsingSocket(cls, cb, errcls):
12970
    """Opens a Unix socket and waits for another program to connect.
12971

12972
    @type cb: callable
12973
    @param cb: Callback to send socket name to client
12974
    @type errcls: class
12975
    @param errcls: Exception class to use for errors
12976

12977
    """
12978
    # Using a temporary directory as there's no easy way to create temporary
12979
    # sockets without writing a custom loop around tempfile.mktemp and
12980
    # socket.bind
12981
    tmpdir = tempfile.mkdtemp()
12982
    try:
12983
      tmpsock = utils.PathJoin(tmpdir, "sock")
12984

    
12985
      logging.debug("Creating temporary socket at %s", tmpsock)
12986
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12987
      try:
12988
        sock.bind(tmpsock)
12989
        sock.listen(1)
12990

    
12991
        # Send details to client
12992
        cb(tmpsock)
12993

    
12994
        # Wait for client to connect before continuing
12995
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12996
        try:
12997
          (conn, _) = sock.accept()
12998
        except socket.error, err:
12999
          raise errcls("Client didn't connect in time (%s)" % err)
13000
      finally:
13001
        sock.close()
13002
    finally:
13003
      # Remove as soon as client is connected
13004
      shutil.rmtree(tmpdir)
13005

    
13006
    # Wait for client to close
13007
    try:
13008
      try:
13009
        # pylint: disable=E1101
13010
        # Instance of '_socketobject' has no ... member
13011
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13012
        conn.recv(1)
13013
      except socket.error, err:
13014
        raise errcls("Client failed to confirm notification (%s)" % err)
13015
    finally:
13016
      conn.close()
13017

    
13018
  def _SendNotification(self, test, arg, sockname):
13019
    """Sends a notification to the client.
13020

13021
    @type test: string
13022
    @param test: Test name
13023
    @param arg: Test argument (depends on test)
13024
    @type sockname: string
13025
    @param sockname: Socket path
13026

13027
    """
13028
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13029

    
13030
  def _Notify(self, prereq, test, arg):
13031
    """Notifies the client of a test.
13032

13033
    @type prereq: bool
13034
    @param prereq: Whether this is a prereq-phase test
13035
    @type test: string
13036
    @param test: Test name
13037
    @param arg: Test argument (depends on test)
13038

13039
    """
13040
    if prereq:
13041
      errcls = errors.OpPrereqError
13042
    else:
13043
      errcls = errors.OpExecError
13044

    
13045
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13046
                                                  test, arg),
13047
                                   errcls)
13048

    
13049
  def CheckArguments(self):
13050
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13051
    self.expandnames_calls = 0
13052

    
13053
  def ExpandNames(self):
13054
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13055
    if checkargs_calls < 1:
13056
      raise errors.ProgrammerError("CheckArguments was not called")
13057

    
13058
    self.expandnames_calls += 1
13059

    
13060
    if self.op.notify_waitlock:
13061
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13062

    
13063
    self.LogInfo("Expanding names")
13064

    
13065
    # Get lock on master node (just to get a lock, not for a particular reason)
13066
    self.needed_locks = {
13067
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13068
      }
13069

    
13070
  def Exec(self, feedback_fn):
13071
    if self.expandnames_calls < 1:
13072
      raise errors.ProgrammerError("ExpandNames was not called")
13073

    
13074
    if self.op.notify_exec:
13075
      self._Notify(False, constants.JQT_EXEC, None)
13076

    
13077
    self.LogInfo("Executing")
13078

    
13079
    if self.op.log_messages:
13080
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13081
      for idx, msg in enumerate(self.op.log_messages):
13082
        self.LogInfo("Sending log message %s", idx + 1)
13083
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13084
        # Report how many test messages have been sent
13085
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13086

    
13087
    if self.op.fail:
13088
      raise errors.OpExecError("Opcode failure was requested")
13089

    
13090
    return True
13091

    
13092

    
13093
class IAllocator(object):
13094
  """IAllocator framework.
13095

13096
  An IAllocator instance has three sets of attributes:
13097
    - cfg that is needed to query the cluster
13098
    - input data (all members of the _KEYS class attribute are required)
13099
    - four buffer attributes (in|out_data|text), that represent the
13100
      input (to the external script) in text and data structure format,
13101
      and the output from it, again in two formats
13102
    - the result variables from the script (success, info, nodes) for
13103
      easy usage
13104

13105
  """
13106
  # pylint: disable=R0902
13107
  # lots of instance attributes
13108

    
13109
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13110
    self.cfg = cfg
13111
    self.rpc = rpc_runner
13112
    # init buffer variables
13113
    self.in_text = self.out_text = self.in_data = self.out_data = None
13114
    # init all input fields so that pylint is happy
13115
    self.mode = mode
13116
    self.memory = self.disks = self.disk_template = None
13117
    self.os = self.tags = self.nics = self.vcpus = None
13118
    self.hypervisor = None
13119
    self.relocate_from = None
13120
    self.name = None
13121
    self.instances = None
13122
    self.evac_mode = None
13123
    self.target_groups = []
13124
    # computed fields
13125
    self.required_nodes = None
13126
    # init result fields
13127
    self.success = self.info = self.result = None
13128

    
13129
    try:
13130
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13131
    except KeyError:
13132
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13133
                                   " IAllocator" % self.mode)
13134

    
13135
    keyset = [n for (n, _) in keydata]
13136

    
13137
    for key in kwargs:
13138
      if key not in keyset:
13139
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13140
                                     " IAllocator" % key)
13141
      setattr(self, key, kwargs[key])
13142

    
13143
    for key in keyset:
13144
      if key not in kwargs:
13145
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13146
                                     " IAllocator" % key)
13147
    self._BuildInputData(compat.partial(fn, self), keydata)
13148

    
13149
  def _ComputeClusterData(self):
13150
    """Compute the generic allocator input data.
13151

13152
    This is the data that is independent of the actual operation.
13153

13154
    """
13155
    cfg = self.cfg
13156
    cluster_info = cfg.GetClusterInfo()
13157
    # cluster data
13158
    data = {
13159
      "version": constants.IALLOCATOR_VERSION,
13160
      "cluster_name": cfg.GetClusterName(),
13161
      "cluster_tags": list(cluster_info.GetTags()),
13162
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13163
      # we don't have job IDs
13164
      }
13165
    ninfo = cfg.GetAllNodesInfo()
13166
    iinfo = cfg.GetAllInstancesInfo().values()
13167
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13168

    
13169
    # node data
13170
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13171

    
13172
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13173
      hypervisor_name = self.hypervisor
13174
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13175
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13176
    else:
13177
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13178

    
13179
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13180
                                        hypervisor_name)
13181
    node_iinfo = \
13182
      self.rpc.call_all_instances_info(node_list,
13183
                                       cluster_info.enabled_hypervisors)
13184

    
13185
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13186

    
13187
    config_ndata = self._ComputeBasicNodeData(ninfo)
13188
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13189
                                                 i_list, config_ndata)
13190
    assert len(data["nodes"]) == len(ninfo), \
13191
        "Incomplete node data computed"
13192

    
13193
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13194

    
13195
    self.in_data = data
13196

    
13197
  @staticmethod
13198
  def _ComputeNodeGroupData(cfg):
13199
    """Compute node groups data.
13200

13201
    """
13202
    ng = dict((guuid, {
13203
      "name": gdata.name,
13204
      "alloc_policy": gdata.alloc_policy,
13205
      })
13206
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13207

    
13208
    return ng
13209

    
13210
  @staticmethod
13211
  def _ComputeBasicNodeData(node_cfg):
13212
    """Compute global node data.
13213

13214
    @rtype: dict
13215
    @returns: a dict of name: (node dict, node config)
13216

13217
    """
13218
    # fill in static (config-based) values
13219
    node_results = dict((ninfo.name, {
13220
      "tags": list(ninfo.GetTags()),
13221
      "primary_ip": ninfo.primary_ip,
13222
      "secondary_ip": ninfo.secondary_ip,
13223
      "offline": ninfo.offline,
13224
      "drained": ninfo.drained,
13225
      "master_candidate": ninfo.master_candidate,
13226
      "group": ninfo.group,
13227
      "master_capable": ninfo.master_capable,
13228
      "vm_capable": ninfo.vm_capable,
13229
      })
13230
      for ninfo in node_cfg.values())
13231

    
13232
    return node_results
13233

    
13234
  @staticmethod
13235
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13236
                              node_results):
13237
    """Compute global node data.
13238

13239
    @param node_results: the basic node structures as filled from the config
13240

13241
    """
13242
    # make a copy of the current dict
13243
    node_results = dict(node_results)
13244
    for nname, nresult in node_data.items():
13245
      assert nname in node_results, "Missing basic data for node %s" % nname
13246
      ninfo = node_cfg[nname]
13247

    
13248
      if not (ninfo.offline or ninfo.drained):
13249
        nresult.Raise("Can't get data for node %s" % nname)
13250
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13251
                                nname)
13252
        remote_info = nresult.payload
13253

    
13254
        for attr in ["memory_total", "memory_free", "memory_dom0",
13255
                     "vg_size", "vg_free", "cpu_total"]:
13256
          if attr not in remote_info:
13257
            raise errors.OpExecError("Node '%s' didn't return attribute"
13258
                                     " '%s'" % (nname, attr))
13259
          if not isinstance(remote_info[attr], int):
13260
            raise errors.OpExecError("Node '%s' returned invalid value"
13261
                                     " for '%s': %s" %
13262
                                     (nname, attr, remote_info[attr]))
13263
        # compute memory used by primary instances
13264
        i_p_mem = i_p_up_mem = 0
13265
        for iinfo, beinfo in i_list:
13266
          if iinfo.primary_node == nname:
13267
            i_p_mem += beinfo[constants.BE_MEMORY]
13268
            if iinfo.name not in node_iinfo[nname].payload:
13269
              i_used_mem = 0
13270
            else:
13271
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13272
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13273
            remote_info["memory_free"] -= max(0, i_mem_diff)
13274

    
13275
            if iinfo.admin_up:
13276
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13277

    
13278
        # compute memory used by instances
13279
        pnr_dyn = {
13280
          "total_memory": remote_info["memory_total"],
13281
          "reserved_memory": remote_info["memory_dom0"],
13282
          "free_memory": remote_info["memory_free"],
13283
          "total_disk": remote_info["vg_size"],
13284
          "free_disk": remote_info["vg_free"],
13285
          "total_cpus": remote_info["cpu_total"],
13286
          "i_pri_memory": i_p_mem,
13287
          "i_pri_up_memory": i_p_up_mem,
13288
          }
13289
        pnr_dyn.update(node_results[nname])
13290
        node_results[nname] = pnr_dyn
13291

    
13292
    return node_results
13293

    
13294
  @staticmethod
13295
  def _ComputeInstanceData(cluster_info, i_list):
13296
    """Compute global instance data.
13297

13298
    """
13299
    instance_data = {}
13300
    for iinfo, beinfo in i_list:
13301
      nic_data = []
13302
      for nic in iinfo.nics:
13303
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13304
        nic_dict = {
13305
          "mac": nic.mac,
13306
          "ip": nic.ip,
13307
          "mode": filled_params[constants.NIC_MODE],
13308
          "link": filled_params[constants.NIC_LINK],
13309
          }
13310
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13311
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13312
        nic_data.append(nic_dict)
13313
      pir = {
13314
        "tags": list(iinfo.GetTags()),
13315
        "admin_up": iinfo.admin_up,
13316
        "vcpus": beinfo[constants.BE_VCPUS],
13317
        "memory": beinfo[constants.BE_MEMORY],
13318
        "os": iinfo.os,
13319
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13320
        "nics": nic_data,
13321
        "disks": [{constants.IDISK_SIZE: dsk.size,
13322
                   constants.IDISK_MODE: dsk.mode}
13323
                  for dsk in iinfo.disks],
13324
        "disk_template": iinfo.disk_template,
13325
        "hypervisor": iinfo.hypervisor,
13326
        }
13327
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13328
                                                 pir["disks"])
13329
      instance_data[iinfo.name] = pir
13330

    
13331
    return instance_data
13332

    
13333
  def _AddNewInstance(self):
13334
    """Add new instance data to allocator structure.
13335

13336
    This in combination with _AllocatorGetClusterData will create the
13337
    correct structure needed as input for the allocator.
13338

13339
    The checks for the completeness of the opcode must have already been
13340
    done.
13341

13342
    """
13343
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13344

    
13345
    if self.disk_template in constants.DTS_INT_MIRROR:
13346
      self.required_nodes = 2
13347
    else:
13348
      self.required_nodes = 1
13349

    
13350
    request = {
13351
      "name": self.name,
13352
      "disk_template": self.disk_template,
13353
      "tags": self.tags,
13354
      "os": self.os,
13355
      "vcpus": self.vcpus,
13356
      "memory": self.memory,
13357
      "disks": self.disks,
13358
      "disk_space_total": disk_space,
13359
      "nics": self.nics,
13360
      "required_nodes": self.required_nodes,
13361
      "hypervisor": self.hypervisor,
13362
      }
13363

    
13364
    return request
13365

    
13366
  def _AddRelocateInstance(self):
13367
    """Add relocate instance data to allocator structure.
13368

13369
    This in combination with _IAllocatorGetClusterData will create the
13370
    correct structure needed as input for the allocator.
13371

13372
    The checks for the completeness of the opcode must have already been
13373
    done.
13374

13375
    """
13376
    instance = self.cfg.GetInstanceInfo(self.name)
13377
    if instance is None:
13378
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13379
                                   " IAllocator" % self.name)
13380

    
13381
    if instance.disk_template not in constants.DTS_MIRRORED:
13382
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13383
                                 errors.ECODE_INVAL)
13384

    
13385
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13386
        len(instance.secondary_nodes) != 1:
13387
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13388
                                 errors.ECODE_STATE)
13389

    
13390
    self.required_nodes = 1
13391
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13392
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13393

    
13394
    request = {
13395
      "name": self.name,
13396
      "disk_space_total": disk_space,
13397
      "required_nodes": self.required_nodes,
13398
      "relocate_from": self.relocate_from,
13399
      }
13400
    return request
13401

    
13402
  def _AddNodeEvacuate(self):
13403
    """Get data for node-evacuate requests.
13404

13405
    """
13406
    return {
13407
      "instances": self.instances,
13408
      "evac_mode": self.evac_mode,
13409
      }
13410

    
13411
  def _AddChangeGroup(self):
13412
    """Get data for node-evacuate requests.
13413

13414
    """
13415
    return {
13416
      "instances": self.instances,
13417
      "target_groups": self.target_groups,
13418
      }
13419

    
13420
  def _BuildInputData(self, fn, keydata):
13421
    """Build input data structures.
13422

13423
    """
13424
    self._ComputeClusterData()
13425

    
13426
    request = fn()
13427
    request["type"] = self.mode
13428
    for keyname, keytype in keydata:
13429
      if keyname not in request:
13430
        raise errors.ProgrammerError("Request parameter %s is missing" %
13431
                                     keyname)
13432
      val = request[keyname]
13433
      if not keytype(val):
13434
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13435
                                     " validation, value %s, expected"
13436
                                     " type %s" % (keyname, val, keytype))
13437
    self.in_data["request"] = request
13438

    
13439
    self.in_text = serializer.Dump(self.in_data)
13440

    
13441
  _STRING_LIST = ht.TListOf(ht.TString)
13442
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13443
     # pylint: disable=E1101
13444
     # Class '...' has no 'OP_ID' member
13445
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13446
                          opcodes.OpInstanceMigrate.OP_ID,
13447
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13448
     })))
13449

    
13450
  _NEVAC_MOVED = \
13451
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13452
                       ht.TItems([ht.TNonEmptyString,
13453
                                  ht.TNonEmptyString,
13454
                                  ht.TListOf(ht.TNonEmptyString),
13455
                                 ])))
13456
  _NEVAC_FAILED = \
13457
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13458
                       ht.TItems([ht.TNonEmptyString,
13459
                                  ht.TMaybeString,
13460
                                 ])))
13461
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13462
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13463

    
13464
  _MODE_DATA = {
13465
    constants.IALLOCATOR_MODE_ALLOC:
13466
      (_AddNewInstance,
13467
       [
13468
        ("name", ht.TString),
13469
        ("memory", ht.TInt),
13470
        ("disks", ht.TListOf(ht.TDict)),
13471
        ("disk_template", ht.TString),
13472
        ("os", ht.TString),
13473
        ("tags", _STRING_LIST),
13474
        ("nics", ht.TListOf(ht.TDict)),
13475
        ("vcpus", ht.TInt),
13476
        ("hypervisor", ht.TString),
13477
        ], ht.TList),
13478
    constants.IALLOCATOR_MODE_RELOC:
13479
      (_AddRelocateInstance,
13480
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13481
       ht.TList),
13482
     constants.IALLOCATOR_MODE_NODE_EVAC:
13483
      (_AddNodeEvacuate, [
13484
        ("instances", _STRING_LIST),
13485
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13486
        ], _NEVAC_RESULT),
13487
     constants.IALLOCATOR_MODE_CHG_GROUP:
13488
      (_AddChangeGroup, [
13489
        ("instances", _STRING_LIST),
13490
        ("target_groups", _STRING_LIST),
13491
        ], _NEVAC_RESULT),
13492
    }
13493

    
13494
  def Run(self, name, validate=True, call_fn=None):
13495
    """Run an instance allocator and return the results.
13496

13497
    """
13498
    if call_fn is None:
13499
      call_fn = self.rpc.call_iallocator_runner
13500

    
13501
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13502
    result.Raise("Failure while running the iallocator script")
13503

    
13504
    self.out_text = result.payload
13505
    if validate:
13506
      self._ValidateResult()
13507

    
13508
  def _ValidateResult(self):
13509
    """Process the allocator results.
13510

13511
    This will process and if successful save the result in
13512
    self.out_data and the other parameters.
13513

13514
    """
13515
    try:
13516
      rdict = serializer.Load(self.out_text)
13517
    except Exception, err:
13518
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13519

    
13520
    if not isinstance(rdict, dict):
13521
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13522

    
13523
    # TODO: remove backwards compatiblity in later versions
13524
    if "nodes" in rdict and "result" not in rdict:
13525
      rdict["result"] = rdict["nodes"]
13526
      del rdict["nodes"]
13527

    
13528
    for key in "success", "info", "result":
13529
      if key not in rdict:
13530
        raise errors.OpExecError("Can't parse iallocator results:"
13531
                                 " missing key '%s'" % key)
13532
      setattr(self, key, rdict[key])
13533

    
13534
    if not self._result_check(self.result):
13535
      raise errors.OpExecError("Iallocator returned invalid result,"
13536
                               " expected %s, got %s" %
13537
                               (self._result_check, self.result),
13538
                               errors.ECODE_INVAL)
13539

    
13540
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13541
      assert self.relocate_from is not None
13542
      assert self.required_nodes == 1
13543

    
13544
      node2group = dict((name, ndata["group"])
13545
                        for (name, ndata) in self.in_data["nodes"].items())
13546

    
13547
      fn = compat.partial(self._NodesToGroups, node2group,
13548
                          self.in_data["nodegroups"])
13549

    
13550
      instance = self.cfg.GetInstanceInfo(self.name)
13551
      request_groups = fn(self.relocate_from + [instance.primary_node])
13552
      result_groups = fn(rdict["result"] + [instance.primary_node])
13553

    
13554
      if self.success and not set(result_groups).issubset(request_groups):
13555
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13556
                                 " differ from original groups (%s)" %
13557
                                 (utils.CommaJoin(result_groups),
13558
                                  utils.CommaJoin(request_groups)))
13559

    
13560
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13561
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13562

    
13563
    self.out_data = rdict
13564

    
13565
  @staticmethod
13566
  def _NodesToGroups(node2group, groups, nodes):
13567
    """Returns a list of unique group names for a list of nodes.
13568

13569
    @type node2group: dict
13570
    @param node2group: Map from node name to group UUID
13571
    @type groups: dict
13572
    @param groups: Group information
13573
    @type nodes: list
13574
    @param nodes: Node names
13575

13576
    """
13577
    result = set()
13578

    
13579
    for node in nodes:
13580
      try:
13581
        group_uuid = node2group[node]
13582
      except KeyError:
13583
        # Ignore unknown node
13584
        pass
13585
      else:
13586
        try:
13587
          group = groups[group_uuid]
13588
        except KeyError:
13589
          # Can't find group, let's use UUID
13590
          group_name = group_uuid
13591
        else:
13592
          group_name = group["name"]
13593

    
13594
        result.add(group_name)
13595

    
13596
    return sorted(result)
13597

    
13598

    
13599
class LUTestAllocator(NoHooksLU):
13600
  """Run allocator tests.
13601

13602
  This LU runs the allocator tests
13603

13604
  """
13605
  def CheckPrereq(self):
13606
    """Check prerequisites.
13607

13608
    This checks the opcode parameters depending on the director and mode test.
13609

13610
    """
13611
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13612
      for attr in ["memory", "disks", "disk_template",
13613
                   "os", "tags", "nics", "vcpus"]:
13614
        if not hasattr(self.op, attr):
13615
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13616
                                     attr, errors.ECODE_INVAL)
13617
      iname = self.cfg.ExpandInstanceName(self.op.name)
13618
      if iname is not None:
13619
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13620
                                   iname, errors.ECODE_EXISTS)
13621
      if not isinstance(self.op.nics, list):
13622
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13623
                                   errors.ECODE_INVAL)
13624
      if not isinstance(self.op.disks, list):
13625
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13626
                                   errors.ECODE_INVAL)
13627
      for row in self.op.disks:
13628
        if (not isinstance(row, dict) or
13629
            constants.IDISK_SIZE not in row or
13630
            not isinstance(row[constants.IDISK_SIZE], int) or
13631
            constants.IDISK_MODE not in row or
13632
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13633
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13634
                                     " parameter", errors.ECODE_INVAL)
13635
      if self.op.hypervisor is None:
13636
        self.op.hypervisor = self.cfg.GetHypervisorType()
13637
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13638
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13639
      self.op.name = fname
13640
      self.relocate_from = \
13641
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13642
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13643
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13644
      if not self.op.instances:
13645
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13646
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13647
    else:
13648
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13649
                                 self.op.mode, errors.ECODE_INVAL)
13650

    
13651
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13652
      if self.op.allocator is None:
13653
        raise errors.OpPrereqError("Missing allocator name",
13654
                                   errors.ECODE_INVAL)
13655
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13656
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13657
                                 self.op.direction, errors.ECODE_INVAL)
13658

    
13659
  def Exec(self, feedback_fn):
13660
    """Run the allocator test.
13661

13662
    """
13663
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13664
      ial = IAllocator(self.cfg, self.rpc,
13665
                       mode=self.op.mode,
13666
                       name=self.op.name,
13667
                       memory=self.op.memory,
13668
                       disks=self.op.disks,
13669
                       disk_template=self.op.disk_template,
13670
                       os=self.op.os,
13671
                       tags=self.op.tags,
13672
                       nics=self.op.nics,
13673
                       vcpus=self.op.vcpus,
13674
                       hypervisor=self.op.hypervisor,
13675
                       )
13676
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13677
      ial = IAllocator(self.cfg, self.rpc,
13678
                       mode=self.op.mode,
13679
                       name=self.op.name,
13680
                       relocate_from=list(self.relocate_from),
13681
                       )
13682
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13683
      ial = IAllocator(self.cfg, self.rpc,
13684
                       mode=self.op.mode,
13685
                       instances=self.op.instances,
13686
                       target_groups=self.op.target_groups)
13687
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13688
      ial = IAllocator(self.cfg, self.rpc,
13689
                       mode=self.op.mode,
13690
                       instances=self.op.instances,
13691
                       evac_mode=self.op.evac_mode)
13692
    else:
13693
      raise errors.ProgrammerError("Uncatched mode %s in"
13694
                                   " LUTestAllocator.Exec", self.op.mode)
13695

    
13696
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13697
      result = ial.in_text
13698
    else:
13699
      ial.Run(self.op.allocator, validate=False)
13700
      result = ial.out_text
13701
    return result
13702

    
13703

    
13704
#: Query type implementations
13705
_QUERY_IMPL = {
13706
  constants.QR_INSTANCE: _InstanceQuery,
13707
  constants.QR_NODE: _NodeQuery,
13708
  constants.QR_GROUP: _GroupQuery,
13709
  constants.QR_OS: _OsQuery,
13710
  }
13711

    
13712
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13713

    
13714

    
13715
def _GetQueryImplementation(name):
13716
  """Returns the implemtnation for a query type.
13717

13718
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13719

13720
  """
13721
  try:
13722
    return _QUERY_IMPL[name]
13723
  except KeyError:
13724
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13725
                               errors.ECODE_INVAL)