Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ d9d9e0d9

History | View | Annotate | Download (484.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False,
353
                          level=locking.LEVEL_NODE):
354
    """Helper function to declare instances' nodes for locking.
355

356
    This function should be called after locking one or more instances to lock
357
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
358
    with all primary or secondary nodes for instances already locked and
359
    present in self.needed_locks[locking.LEVEL_INSTANCE].
360

361
    It should be called from DeclareLocks, and for safety only works if
362
    self.recalculate_locks[locking.LEVEL_NODE] is set.
363

364
    In the future it may grow parameters to just lock some instance's nodes, or
365
    to just lock primaries or secondary nodes, if needed.
366

367
    If should be called in DeclareLocks in a way similar to::
368

369
      if level == locking.LEVEL_NODE:
370
        self._LockInstancesNodes()
371

372
    @type primary_only: boolean
373
    @param primary_only: only lock primary nodes of locked instances
374
    @param level: Which lock level to use for locking nodes
375

376
    """
377
    assert level in self.recalculate_locks, \
378
      "_LockInstancesNodes helper function called with no nodes to recalculate"
379

    
380
    # TODO: check if we're really been called with the instance locks held
381

    
382
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
383
    # future we might want to have different behaviors depending on the value
384
    # of self.recalculate_locks[locking.LEVEL_NODE]
385
    wanted_nodes = []
386
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
387
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
388
      wanted_nodes.append(instance.primary_node)
389
      if not primary_only:
390
        wanted_nodes.extend(instance.secondary_nodes)
391

    
392
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
393
      self.needed_locks[level] = wanted_nodes
394
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
395
      self.needed_locks[level].extend(wanted_nodes)
396
    else:
397
      raise errors.ProgrammerError("Unknown recalculation mode")
398

    
399
    del self.recalculate_locks[level]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, qfilter, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.owned_locks(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _ShareAll():
563
  """Returns a dict declaring all lock levels shared.
564

565
  """
566
  return dict.fromkeys(locking.LEVELS, 1)
567

    
568

    
569
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
570
  """Checks if the owned node groups are still correct for an instance.
571

572
  @type cfg: L{config.ConfigWriter}
573
  @param cfg: The cluster configuration
574
  @type instance_name: string
575
  @param instance_name: Instance name
576
  @type owned_groups: set or frozenset
577
  @param owned_groups: List of currently owned node groups
578

579
  """
580
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
581

    
582
  if not owned_groups.issuperset(inst_groups):
583
    raise errors.OpPrereqError("Instance %s's node groups changed since"
584
                               " locks were acquired, current groups are"
585
                               " are '%s', owning groups '%s'; retry the"
586
                               " operation" %
587
                               (instance_name,
588
                                utils.CommaJoin(inst_groups),
589
                                utils.CommaJoin(owned_groups)),
590
                               errors.ECODE_STATE)
591

    
592
  return inst_groups
593

    
594

    
595
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
596
  """Checks if the instances in a node group are still correct.
597

598
  @type cfg: L{config.ConfigWriter}
599
  @param cfg: The cluster configuration
600
  @type group_uuid: string
601
  @param group_uuid: Node group UUID
602
  @type owned_instances: set or frozenset
603
  @param owned_instances: List of currently owned instances
604

605
  """
606
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
607
  if owned_instances != wanted_instances:
608
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
609
                               " locks were acquired, wanted '%s', have '%s';"
610
                               " retry the operation" %
611
                               (group_uuid,
612
                                utils.CommaJoin(wanted_instances),
613
                                utils.CommaJoin(owned_instances)),
614
                               errors.ECODE_STATE)
615

    
616
  return wanted_instances
617

    
618

    
619
def _SupportsOob(cfg, node):
620
  """Tells if node supports OOB.
621

622
  @type cfg: L{config.ConfigWriter}
623
  @param cfg: The cluster configuration
624
  @type node: L{objects.Node}
625
  @param node: The node
626
  @return: The OOB script if supported or an empty string otherwise
627

628
  """
629
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if nodes:
645
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
646

    
647
  return utils.NiceSort(lu.cfg.GetNodeList())
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _ReleaseLocks(lu, level, names=None, keep=None):
704
  """Releases locks owned by an LU.
705

706
  @type lu: L{LogicalUnit}
707
  @param level: Lock level
708
  @type names: list or None
709
  @param names: Names of locks to release
710
  @type keep: list or None
711
  @param keep: Names of locks to retain
712

713
  """
714
  assert not (keep is not None and names is not None), \
715
         "Only one of the 'names' and the 'keep' parameters can be given"
716

    
717
  if names is not None:
718
    should_release = names.__contains__
719
  elif keep:
720
    should_release = lambda name: name not in keep
721
  else:
722
    should_release = None
723

    
724
  if should_release:
725
    retain = []
726
    release = []
727

    
728
    # Determine which locks to release
729
    for name in lu.owned_locks(level):
730
      if should_release(name):
731
        release.append(name)
732
      else:
733
        retain.append(name)
734

    
735
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
736

    
737
    # Release just some locks
738
    lu.glm.release(level, names=release)
739

    
740
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
741
  else:
742
    # Release everything
743
    lu.glm.release(level)
744

    
745
    assert not lu.glm.is_owned(level), "No locks should be owned"
746

    
747

    
748
def _MapInstanceDisksToNodes(instances):
749
  """Creates a map from (node, volume) to instance name.
750

751
  @type instances: list of L{objects.Instance}
752
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
753

754
  """
755
  return dict(((node, vol), inst.name)
756
              for inst in instances
757
              for (node, vols) in inst.MapLVsByNode().items()
758
              for vol in vols)
759

    
760

    
761
def _RunPostHook(lu, node_name):
762
  """Runs the post-hook for an opcode on a single node.
763

764
  """
765
  hm = lu.proc.BuildHooksManager(lu)
766
  try:
767
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
768
  except:
769
    # pylint: disable=W0702
770
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
771

    
772

    
773
def _CheckOutputFields(static, dynamic, selected):
774
  """Checks whether all selected fields are valid.
775

776
  @type static: L{utils.FieldSet}
777
  @param static: static fields set
778
  @type dynamic: L{utils.FieldSet}
779
  @param dynamic: dynamic fields set
780

781
  """
782
  f = utils.FieldSet()
783
  f.Extend(static)
784
  f.Extend(dynamic)
785

    
786
  delta = f.NonMatching(selected)
787
  if delta:
788
    raise errors.OpPrereqError("Unknown output fields selected: %s"
789
                               % ",".join(delta), errors.ECODE_INVAL)
790

    
791

    
792
def _CheckGlobalHvParams(params):
793
  """Validates that given hypervisor params are not global ones.
794

795
  This will ensure that instances don't get customised versions of
796
  global params.
797

798
  """
799
  used_globals = constants.HVC_GLOBALS.intersection(params)
800
  if used_globals:
801
    msg = ("The following hypervisor parameters are global and cannot"
802
           " be customized at instance level, please modify them at"
803
           " cluster level: %s" % utils.CommaJoin(used_globals))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805

    
806

    
807
def _CheckNodeOnline(lu, node, msg=None):
808
  """Ensure that a given node is online.
809

810
  @param lu: the LU on behalf of which we make the check
811
  @param node: the node to check
812
  @param msg: if passed, should be a message to replace the default one
813
  @raise errors.OpPrereqError: if the node is offline
814

815
  """
816
  if msg is None:
817
    msg = "Can't use offline node"
818
  if lu.cfg.GetNodeInfo(node).offline:
819
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
820

    
821

    
822
def _CheckNodeNotDrained(lu, node):
823
  """Ensure that a given node is not drained.
824

825
  @param lu: the LU on behalf of which we make the check
826
  @param node: the node to check
827
  @raise errors.OpPrereqError: if the node is drained
828

829
  """
830
  if lu.cfg.GetNodeInfo(node).drained:
831
    raise errors.OpPrereqError("Can't use drained node %s" % node,
832
                               errors.ECODE_STATE)
833

    
834

    
835
def _CheckNodeVmCapable(lu, node):
836
  """Ensure that a given node is vm capable.
837

838
  @param lu: the LU on behalf of which we make the check
839
  @param node: the node to check
840
  @raise errors.OpPrereqError: if the node is not vm capable
841

842
  """
843
  if not lu.cfg.GetNodeInfo(node).vm_capable:
844
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
845
                               errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeHasOS(lu, node, os_name, force_variant):
849
  """Ensure that a node supports a given OS.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @param os_name: the OS to query about
854
  @param force_variant: whether to ignore variant errors
855
  @raise errors.OpPrereqError: if the node is not supporting the OS
856

857
  """
858
  result = lu.rpc.call_os_get(node, os_name)
859
  result.Raise("OS '%s' not in supported OS list for node %s" %
860
               (os_name, node),
861
               prereq=True, ecode=errors.ECODE_INVAL)
862
  if not force_variant:
863
    _CheckOSVariant(result.payload, os_name)
864

    
865

    
866
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
867
  """Ensure that a node has the given secondary ip.
868

869
  @type lu: L{LogicalUnit}
870
  @param lu: the LU on behalf of which we make the check
871
  @type node: string
872
  @param node: the node to check
873
  @type secondary_ip: string
874
  @param secondary_ip: the ip to check
875
  @type prereq: boolean
876
  @param prereq: whether to throw a prerequisite or an execute error
877
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
878
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
879

880
  """
881
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
882
  result.Raise("Failure checking secondary ip on node %s" % node,
883
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
884
  if not result.payload:
885
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
886
           " please fix and re-run this command" % secondary_ip)
887
    if prereq:
888
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
889
    else:
890
      raise errors.OpExecError(msg)
891

    
892

    
893
def _GetClusterDomainSecret():
894
  """Reads the cluster domain secret.
895

896
  """
897
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
898
                               strict=True)
899

    
900

    
901
def _CheckInstanceDown(lu, instance, reason):
902
  """Ensure that an instance is not running."""
903
  if instance.admin_up:
904
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907
  pnode = instance.primary_node
908
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
909
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
910
              prereq=True, ecode=errors.ECODE_ENVIRON)
911

    
912
  if instance.name in ins_l.payload:
913
    raise errors.OpPrereqError("Instance %s is running, %s" %
914
                               (instance.name, reason), errors.ECODE_STATE)
915

    
916

    
917
def _ExpandItemName(fn, name, kind):
918
  """Expand an item name.
919

920
  @param fn: the function to use for expansion
921
  @param name: requested item name
922
  @param kind: text description ('Node' or 'Instance')
923
  @return: the resolved (full) name
924
  @raise errors.OpPrereqError: if the item is not found
925

926
  """
927
  full_name = fn(name)
928
  if full_name is None:
929
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
930
                               errors.ECODE_NOENT)
931
  return full_name
932

    
933

    
934
def _ExpandNodeName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for nodes."""
936
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
937

    
938

    
939
def _ExpandInstanceName(cfg, name):
940
  """Wrapper over L{_ExpandItemName} for instance."""
941
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
942

    
943

    
944
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
945
                          memory, vcpus, nics, disk_template, disks,
946
                          bep, hvp, hypervisor_name, tags):
947
  """Builds instance related env variables for hooks
948

949
  This builds the hook environment from individual variables.
950

951
  @type name: string
952
  @param name: the name of the instance
953
  @type primary_node: string
954
  @param primary_node: the name of the instance's primary node
955
  @type secondary_nodes: list
956
  @param secondary_nodes: list of secondary nodes as strings
957
  @type os_type: string
958
  @param os_type: the name of the instance's OS
959
  @type status: boolean
960
  @param status: the should_run status of the instance
961
  @type memory: string
962
  @param memory: the memory size of the instance
963
  @type vcpus: string
964
  @param vcpus: the count of VCPUs the instance has
965
  @type nics: list
966
  @param nics: list of tuples (ip, mac, mode, link) representing
967
      the NICs the instance has
968
  @type disk_template: string
969
  @param disk_template: the disk template of the instance
970
  @type disks: list
971
  @param disks: the list of (size, mode) pairs
972
  @type bep: dict
973
  @param bep: the backend parameters for the instance
974
  @type hvp: dict
975
  @param hvp: the hypervisor parameters for the instance
976
  @type hypervisor_name: string
977
  @param hypervisor_name: the hypervisor for the instance
978
  @type tags: list
979
  @param tags: list of instance tags as strings
980
  @rtype: dict
981
  @return: the hook environment for this instance
982

983
  """
984
  if status:
985
    str_status = "up"
986
  else:
987
    str_status = "down"
988
  env = {
989
    "OP_TARGET": name,
990
    "INSTANCE_NAME": name,
991
    "INSTANCE_PRIMARY": primary_node,
992
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
993
    "INSTANCE_OS_TYPE": os_type,
994
    "INSTANCE_STATUS": str_status,
995
    "INSTANCE_MEMORY": memory,
996
    "INSTANCE_VCPUS": vcpus,
997
    "INSTANCE_DISK_TEMPLATE": disk_template,
998
    "INSTANCE_HYPERVISOR": hypervisor_name,
999
  }
1000

    
1001
  if nics:
1002
    nic_count = len(nics)
1003
    for idx, (ip, mac, mode, link) in enumerate(nics):
1004
      if ip is None:
1005
        ip = ""
1006
      env["INSTANCE_NIC%d_IP" % idx] = ip
1007
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1008
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1009
      env["INSTANCE_NIC%d_LINK" % idx] = link
1010
      if mode == constants.NIC_MODE_BRIDGED:
1011
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012
  else:
1013
    nic_count = 0
1014

    
1015
  env["INSTANCE_NIC_COUNT"] = nic_count
1016

    
1017
  if disks:
1018
    disk_count = len(disks)
1019
    for idx, (size, mode) in enumerate(disks):
1020
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1021
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1022
  else:
1023
    disk_count = 0
1024

    
1025
  env["INSTANCE_DISK_COUNT"] = disk_count
1026

    
1027
  if not tags:
1028
    tags = []
1029

    
1030
  env["INSTANCE_TAGS"] = " ".join(tags)
1031

    
1032
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033
    for key, value in source.items():
1034
      env["INSTANCE_%s_%s" % (kind, key)] = value
1035

    
1036
  return env
1037

    
1038

    
1039
def _NICListToTuple(lu, nics):
1040
  """Build a list of nic information tuples.
1041

1042
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043
  value in LUInstanceQueryData.
1044

1045
  @type lu:  L{LogicalUnit}
1046
  @param lu: the logical unit on whose behalf we execute
1047
  @type nics: list of L{objects.NIC}
1048
  @param nics: list of nics to convert to hooks tuples
1049

1050
  """
1051
  hooks_nics = []
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  for nic in nics:
1054
    ip = nic.ip
1055
    mac = nic.mac
1056
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057
    mode = filled_params[constants.NIC_MODE]
1058
    link = filled_params[constants.NIC_LINK]
1059
    hooks_nics.append((ip, mac, mode, link))
1060
  return hooks_nics
1061

    
1062

    
1063
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064
  """Builds instance related env variables for hooks from an object.
1065

1066
  @type lu: L{LogicalUnit}
1067
  @param lu: the logical unit on whose behalf we execute
1068
  @type instance: L{objects.Instance}
1069
  @param instance: the instance for which we should build the
1070
      environment
1071
  @type override: dict
1072
  @param override: dictionary with key/values that will override
1073
      our values
1074
  @rtype: dict
1075
  @return: the hook environment dictionary
1076

1077
  """
1078
  cluster = lu.cfg.GetClusterInfo()
1079
  bep = cluster.FillBE(instance)
1080
  hvp = cluster.FillHV(instance)
1081
  args = {
1082
    "name": instance.name,
1083
    "primary_node": instance.primary_node,
1084
    "secondary_nodes": instance.secondary_nodes,
1085
    "os_type": instance.os,
1086
    "status": instance.admin_up,
1087
    "memory": bep[constants.BE_MEMORY],
1088
    "vcpus": bep[constants.BE_VCPUS],
1089
    "nics": _NICListToTuple(lu, instance.nics),
1090
    "disk_template": instance.disk_template,
1091
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092
    "bep": bep,
1093
    "hvp": hvp,
1094
    "hypervisor_name": instance.hypervisor,
1095
    "tags": instance.tags,
1096
  }
1097
  if override:
1098
    args.update(override)
1099
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100

    
1101

    
1102
def _AdjustCandidatePool(lu, exceptions):
1103
  """Adjust the candidate pool after node operations.
1104

1105
  """
1106
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107
  if mod_list:
1108
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1109
               utils.CommaJoin(node.name for node in mod_list))
1110
    for name in mod_list:
1111
      lu.context.ReaddNode(name)
1112
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113
  if mc_now > mc_max:
1114
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115
               (mc_now, mc_max))
1116

    
1117

    
1118
def _DecideSelfPromotion(lu, exceptions=None):
1119
  """Decide whether I should promote myself as a master candidate.
1120

1121
  """
1122
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124
  # the new node will increase mc_max with one, so:
1125
  mc_should = min(mc_should + 1, cp_size)
1126
  return mc_now < mc_should
1127

    
1128

    
1129
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130
  """Check that the brigdes needed by a list of nics exist.
1131

1132
  """
1133
  cluster = lu.cfg.GetClusterInfo()
1134
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135
  brlist = [params[constants.NIC_LINK] for params in paramslist
1136
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137
  if brlist:
1138
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1139
    result.Raise("Error checking bridges on destination node '%s'" %
1140
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141

    
1142

    
1143
def _CheckInstanceBridgesExist(lu, instance, node=None):
1144
  """Check that the brigdes needed by an instance exist.
1145

1146
  """
1147
  if node is None:
1148
    node = instance.primary_node
1149
  _CheckNicsBridgesExist(lu, instance.nics, node)
1150

    
1151

    
1152
def _CheckOSVariant(os_obj, name):
1153
  """Check whether an OS name conforms to the os variants specification.
1154

1155
  @type os_obj: L{objects.OS}
1156
  @param os_obj: OS object to check
1157
  @type name: string
1158
  @param name: OS name passed by the user, to check for validity
1159

1160
  """
1161
  variant = objects.OS.GetVariant(name)
1162
  if not os_obj.supported_variants:
1163
    if variant:
1164
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165
                                 " passed)" % (os_obj.name, variant),
1166
                                 errors.ECODE_INVAL)
1167
    return
1168
  if not variant:
1169
    raise errors.OpPrereqError("OS name must include a variant",
1170
                               errors.ECODE_INVAL)
1171

    
1172
  if variant not in os_obj.supported_variants:
1173
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174

    
1175

    
1176
def _GetNodeInstancesInner(cfg, fn):
1177
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178

    
1179

    
1180
def _GetNodeInstances(cfg, node_name):
1181
  """Returns a list of all primary and secondary instances on a node.
1182

1183
  """
1184

    
1185
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186

    
1187

    
1188
def _GetNodePrimaryInstances(cfg, node_name):
1189
  """Returns primary instances on a node.
1190

1191
  """
1192
  return _GetNodeInstancesInner(cfg,
1193
                                lambda inst: node_name == inst.primary_node)
1194

    
1195

    
1196
def _GetNodeSecondaryInstances(cfg, node_name):
1197
  """Returns secondary instances on a node.
1198

1199
  """
1200
  return _GetNodeInstancesInner(cfg,
1201
                                lambda inst: node_name in inst.secondary_nodes)
1202

    
1203

    
1204
def _GetStorageTypeArgs(cfg, storage_type):
1205
  """Returns the arguments for a storage type.
1206

1207
  """
1208
  # Special case for file storage
1209
  if storage_type == constants.ST_FILE:
1210
    # storage.FileStorage wants a list of storage directories
1211
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212

    
1213
  return []
1214

    
1215

    
1216
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217
  faulty = []
1218

    
1219
  for dev in instance.disks:
1220
    cfg.SetDiskID(dev, node_name)
1221

    
1222
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223
  result.Raise("Failed to get disk status from node %s" % node_name,
1224
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225

    
1226
  for idx, bdev_status in enumerate(result.payload):
1227
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228
      faulty.append(idx)
1229

    
1230
  return faulty
1231

    
1232

    
1233
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234
  """Check the sanity of iallocator and node arguments and use the
1235
  cluster-wide iallocator if appropriate.
1236

1237
  Check that at most one of (iallocator, node) is specified. If none is
1238
  specified, then the LU's opcode's iallocator slot is filled with the
1239
  cluster-wide default iallocator.
1240

1241
  @type iallocator_slot: string
1242
  @param iallocator_slot: the name of the opcode iallocator slot
1243
  @type node_slot: string
1244
  @param node_slot: the name of the opcode target node slot
1245

1246
  """
1247
  node = getattr(lu.op, node_slot, None)
1248
  iallocator = getattr(lu.op, iallocator_slot, None)
1249

    
1250
  if node is not None and iallocator is not None:
1251
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252
                               errors.ECODE_INVAL)
1253
  elif node is None and iallocator is None:
1254
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1255
    if default_iallocator:
1256
      setattr(lu.op, iallocator_slot, default_iallocator)
1257
    else:
1258
      raise errors.OpPrereqError("No iallocator or node given and no"
1259
                                 " cluster-wide default iallocator found;"
1260
                                 " please specify either an iallocator or a"
1261
                                 " node, or set a cluster-wide default"
1262
                                 " iallocator")
1263

    
1264

    
1265
def _GetDefaultIAllocator(cfg, iallocator):
1266
  """Decides on which iallocator to use.
1267

1268
  @type cfg: L{config.ConfigWriter}
1269
  @param cfg: Cluster configuration object
1270
  @type iallocator: string or None
1271
  @param iallocator: Iallocator specified in opcode
1272
  @rtype: string
1273
  @return: Iallocator name
1274

1275
  """
1276
  if not iallocator:
1277
    # Use default iallocator
1278
    iallocator = cfg.GetDefaultIAllocator()
1279

    
1280
  if not iallocator:
1281
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282
                               " opcode nor as a cluster-wide default",
1283
                               errors.ECODE_INVAL)
1284

    
1285
  return iallocator
1286

    
1287

    
1288
class LUClusterPostInit(LogicalUnit):
1289
  """Logical unit for running hooks after cluster initialization.
1290

1291
  """
1292
  HPATH = "cluster-init"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294

    
1295
  def BuildHooksEnv(self):
1296
    """Build hooks env.
1297

1298
    """
1299
    return {
1300
      "OP_TARGET": self.cfg.GetClusterName(),
1301
      }
1302

    
1303
  def BuildHooksNodes(self):
1304
    """Build hooks nodes.
1305

1306
    """
1307
    return ([], [self.cfg.GetMasterNode()])
1308

    
1309
  def Exec(self, feedback_fn):
1310
    """Nothing to do.
1311

1312
    """
1313
    return True
1314

    
1315

    
1316
class LUClusterDestroy(LogicalUnit):
1317
  """Logical unit for destroying the cluster.
1318

1319
  """
1320
  HPATH = "cluster-destroy"
1321
  HTYPE = constants.HTYPE_CLUSTER
1322

    
1323
  def BuildHooksEnv(self):
1324
    """Build hooks env.
1325

1326
    """
1327
    return {
1328
      "OP_TARGET": self.cfg.GetClusterName(),
1329
      }
1330

    
1331
  def BuildHooksNodes(self):
1332
    """Build hooks nodes.
1333

1334
    """
1335
    return ([], [])
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    This checks whether the cluster is empty.
1341

1342
    Any errors are signaled by raising errors.OpPrereqError.
1343

1344
    """
1345
    master = self.cfg.GetMasterNode()
1346

    
1347
    nodelist = self.cfg.GetNodeList()
1348
    if len(nodelist) != 1 or nodelist[0] != master:
1349
      raise errors.OpPrereqError("There are still %d node(s) in"
1350
                                 " this cluster." % (len(nodelist) - 1),
1351
                                 errors.ECODE_INVAL)
1352
    instancelist = self.cfg.GetInstanceList()
1353
    if instancelist:
1354
      raise errors.OpPrereqError("There are still %d instance(s) in"
1355
                                 " this cluster." % len(instancelist),
1356
                                 errors.ECODE_INVAL)
1357

    
1358
  def Exec(self, feedback_fn):
1359
    """Destroys the cluster.
1360

1361
    """
1362
    master_params = self.cfg.GetMasterNetworkParameters()
1363

    
1364
    # Run post hooks on master node before it's removed
1365
    _RunPostHook(self, master_params.name)
1366

    
1367
    ems = self.cfg.GetUseExternalMipScript()
1368
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1369
                                                     master_params, ems)
1370
    result.Raise("Could not disable the master role")
1371

    
1372
    return master_params.name
1373

    
1374

    
1375
def _VerifyCertificate(filename):
1376
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1377

1378
  @type filename: string
1379
  @param filename: Path to PEM file
1380

1381
  """
1382
  try:
1383
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1384
                                           utils.ReadFile(filename))
1385
  except Exception, err: # pylint: disable=W0703
1386
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1387
            "Failed to load X509 certificate %s: %s" % (filename, err))
1388

    
1389
  (errcode, msg) = \
1390
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1391
                                constants.SSL_CERT_EXPIRATION_ERROR)
1392

    
1393
  if msg:
1394
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1395
  else:
1396
    fnamemsg = None
1397

    
1398
  if errcode is None:
1399
    return (None, fnamemsg)
1400
  elif errcode == utils.CERT_WARNING:
1401
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1402
  elif errcode == utils.CERT_ERROR:
1403
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1404

    
1405
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1406

    
1407

    
1408
def _GetAllHypervisorParameters(cluster, instances):
1409
  """Compute the set of all hypervisor parameters.
1410

1411
  @type cluster: L{objects.Cluster}
1412
  @param cluster: the cluster object
1413
  @param instances: list of L{objects.Instance}
1414
  @param instances: additional instances from which to obtain parameters
1415
  @rtype: list of (origin, hypervisor, parameters)
1416
  @return: a list with all parameters found, indicating the hypervisor they
1417
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1418

1419
  """
1420
  hvp_data = []
1421

    
1422
  for hv_name in cluster.enabled_hypervisors:
1423
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1424

    
1425
  for os_name, os_hvp in cluster.os_hvp.items():
1426
    for hv_name, hv_params in os_hvp.items():
1427
      if hv_params:
1428
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1429
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1430

    
1431
  # TODO: collapse identical parameter values in a single one
1432
  for instance in instances:
1433
    if instance.hvparams:
1434
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1435
                       cluster.FillHV(instance)))
1436

    
1437
  return hvp_data
1438

    
1439

    
1440
class _VerifyErrors(object):
1441
  """Mix-in for cluster/group verify LUs.
1442

1443
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1444
  self.op and self._feedback_fn to be available.)
1445

1446
  """
1447

    
1448
  ETYPE_FIELD = "code"
1449
  ETYPE_ERROR = "ERROR"
1450
  ETYPE_WARNING = "WARNING"
1451

    
1452
  def _Error(self, ecode, item, msg, *args, **kwargs):
1453
    """Format an error message.
1454

1455
    Based on the opcode's error_codes parameter, either format a
1456
    parseable error code, or a simpler error string.
1457

1458
    This must be called only from Exec and functions called from Exec.
1459

1460
    """
1461
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1462
    itype, etxt, _ = ecode
1463
    # first complete the msg
1464
    if args:
1465
      msg = msg % args
1466
    # then format the whole message
1467
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1468
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1469
    else:
1470
      if item:
1471
        item = " " + item
1472
      else:
1473
        item = ""
1474
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1475
    # and finally report it via the feedback_fn
1476
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1477

    
1478
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1479
    """Log an error message if the passed condition is True.
1480

1481
    """
1482
    cond = (bool(cond)
1483
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1484

    
1485
    # If the error code is in the list of ignored errors, demote the error to a
1486
    # warning
1487
    (_, etxt, _) = ecode
1488
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1489
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1490

    
1491
    if cond:
1492
      self._Error(ecode, *args, **kwargs)
1493

    
1494
    # do not mark the operation as failed for WARN cases only
1495
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1496
      self.bad = self.bad or cond
1497

    
1498

    
1499
class LUClusterVerify(NoHooksLU):
1500
  """Submits all jobs necessary to verify the cluster.
1501

1502
  """
1503
  REQ_BGL = False
1504

    
1505
  def ExpandNames(self):
1506
    self.needed_locks = {}
1507

    
1508
  def Exec(self, feedback_fn):
1509
    jobs = []
1510

    
1511
    if self.op.group_name:
1512
      groups = [self.op.group_name]
1513
      depends_fn = lambda: None
1514
    else:
1515
      groups = self.cfg.GetNodeGroupList()
1516

    
1517
      # Verify global configuration
1518
      jobs.append([
1519
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1520
        ])
1521

    
1522
      # Always depend on global verification
1523
      depends_fn = lambda: [(-len(jobs), [])]
1524

    
1525
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1526
                                            ignore_errors=self.op.ignore_errors,
1527
                                            depends=depends_fn())]
1528
                for group in groups)
1529

    
1530
    # Fix up all parameters
1531
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1532
      op.debug_simulate_errors = self.op.debug_simulate_errors
1533
      op.verbose = self.op.verbose
1534
      op.error_codes = self.op.error_codes
1535
      try:
1536
        op.skip_checks = self.op.skip_checks
1537
      except AttributeError:
1538
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1539

    
1540
    return ResultWithJobs(jobs)
1541

    
1542

    
1543
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1544
  """Verifies the cluster config.
1545

1546
  """
1547
  REQ_BGL = True
1548

    
1549
  def _VerifyHVP(self, hvp_data):
1550
    """Verifies locally the syntax of the hypervisor parameters.
1551

1552
    """
1553
    for item, hv_name, hv_params in hvp_data:
1554
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1555
             (item, hv_name))
1556
      try:
1557
        hv_class = hypervisor.GetHypervisor(hv_name)
1558
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1559
        hv_class.CheckParameterSyntax(hv_params)
1560
      except errors.GenericError, err:
1561
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1562

    
1563
  def ExpandNames(self):
1564
    # Information can be safely retrieved as the BGL is acquired in exclusive
1565
    # mode
1566
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1567
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1568
    self.all_node_info = self.cfg.GetAllNodesInfo()
1569
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1570
    self.needed_locks = {}
1571

    
1572
  def Exec(self, feedback_fn):
1573
    """Verify integrity of cluster, performing various test on nodes.
1574

1575
    """
1576
    self.bad = False
1577
    self._feedback_fn = feedback_fn
1578

    
1579
    feedback_fn("* Verifying cluster config")
1580

    
1581
    for msg in self.cfg.VerifyConfig():
1582
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1583

    
1584
    feedback_fn("* Verifying cluster certificate files")
1585

    
1586
    for cert_filename in constants.ALL_CERT_FILES:
1587
      (errcode, msg) = _VerifyCertificate(cert_filename)
1588
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1589

    
1590
    feedback_fn("* Verifying hypervisor parameters")
1591

    
1592
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1593
                                                self.all_inst_info.values()))
1594

    
1595
    feedback_fn("* Verifying all nodes belong to an existing group")
1596

    
1597
    # We do this verification here because, should this bogus circumstance
1598
    # occur, it would never be caught by VerifyGroup, which only acts on
1599
    # nodes/instances reachable from existing node groups.
1600

    
1601
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1602
                         if node.group not in self.all_group_info)
1603

    
1604
    dangling_instances = {}
1605
    no_node_instances = []
1606

    
1607
    for inst in self.all_inst_info.values():
1608
      if inst.primary_node in dangling_nodes:
1609
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1610
      elif inst.primary_node not in self.all_node_info:
1611
        no_node_instances.append(inst.name)
1612

    
1613
    pretty_dangling = [
1614
        "%s (%s)" %
1615
        (node.name,
1616
         utils.CommaJoin(dangling_instances.get(node.name,
1617
                                                ["no instances"])))
1618
        for node in dangling_nodes]
1619

    
1620
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1621
                  None,
1622
                  "the following nodes (and their instances) belong to a non"
1623
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1624

    
1625
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1626
                  None,
1627
                  "the following instances have a non-existing primary-node:"
1628
                  " %s", utils.CommaJoin(no_node_instances))
1629

    
1630
    return not self.bad
1631

    
1632

    
1633
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1634
  """Verifies the status of a node group.
1635

1636
  """
1637
  HPATH = "cluster-verify"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  REQ_BGL = False
1640

    
1641
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1642

    
1643
  class NodeImage(object):
1644
    """A class representing the logical and physical status of a node.
1645

1646
    @type name: string
1647
    @ivar name: the node name to which this object refers
1648
    @ivar volumes: a structure as returned from
1649
        L{ganeti.backend.GetVolumeList} (runtime)
1650
    @ivar instances: a list of running instances (runtime)
1651
    @ivar pinst: list of configured primary instances (config)
1652
    @ivar sinst: list of configured secondary instances (config)
1653
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1654
        instances for which this node is secondary (config)
1655
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1656
    @ivar dfree: free disk, as reported by the node (runtime)
1657
    @ivar offline: the offline status (config)
1658
    @type rpc_fail: boolean
1659
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1660
        not whether the individual keys were correct) (runtime)
1661
    @type lvm_fail: boolean
1662
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1663
    @type hyp_fail: boolean
1664
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1665
    @type ghost: boolean
1666
    @ivar ghost: whether this is a known node or not (config)
1667
    @type os_fail: boolean
1668
    @ivar os_fail: whether the RPC call didn't return valid OS data
1669
    @type oslist: list
1670
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1671
    @type vm_capable: boolean
1672
    @ivar vm_capable: whether the node can host instances
1673

1674
    """
1675
    def __init__(self, offline=False, name=None, vm_capable=True):
1676
      self.name = name
1677
      self.volumes = {}
1678
      self.instances = []
1679
      self.pinst = []
1680
      self.sinst = []
1681
      self.sbp = {}
1682
      self.mfree = 0
1683
      self.dfree = 0
1684
      self.offline = offline
1685
      self.vm_capable = vm_capable
1686
      self.rpc_fail = False
1687
      self.lvm_fail = False
1688
      self.hyp_fail = False
1689
      self.ghost = False
1690
      self.os_fail = False
1691
      self.oslist = {}
1692

    
1693
  def ExpandNames(self):
1694
    # This raises errors.OpPrereqError on its own:
1695
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1696

    
1697
    # Get instances in node group; this is unsafe and needs verification later
1698
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1699

    
1700
    self.needed_locks = {
1701
      locking.LEVEL_INSTANCE: inst_names,
1702
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1703
      locking.LEVEL_NODE: [],
1704
      }
1705

    
1706
    self.share_locks = _ShareAll()
1707

    
1708
  def DeclareLocks(self, level):
1709
    if level == locking.LEVEL_NODE:
1710
      # Get members of node group; this is unsafe and needs verification later
1711
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1712

    
1713
      all_inst_info = self.cfg.GetAllInstancesInfo()
1714

    
1715
      # In Exec(), we warn about mirrored instances that have primary and
1716
      # secondary living in separate node groups. To fully verify that
1717
      # volumes for these instances are healthy, we will need to do an
1718
      # extra call to their secondaries. We ensure here those nodes will
1719
      # be locked.
1720
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1721
        # Important: access only the instances whose lock is owned
1722
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1723
          nodes.update(all_inst_info[inst].secondary_nodes)
1724

    
1725
      self.needed_locks[locking.LEVEL_NODE] = nodes
1726

    
1727
  def CheckPrereq(self):
1728
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1729
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1730

    
1731
    group_nodes = set(self.group_info.members)
1732
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1733

    
1734
    unlocked_nodes = \
1735
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1736

    
1737
    unlocked_instances = \
1738
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1739

    
1740
    if unlocked_nodes:
1741
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1742
                                 utils.CommaJoin(unlocked_nodes))
1743

    
1744
    if unlocked_instances:
1745
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1746
                                 utils.CommaJoin(unlocked_instances))
1747

    
1748
    self.all_node_info = self.cfg.GetAllNodesInfo()
1749
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1750

    
1751
    self.my_node_names = utils.NiceSort(group_nodes)
1752
    self.my_inst_names = utils.NiceSort(group_instances)
1753

    
1754
    self.my_node_info = dict((name, self.all_node_info[name])
1755
                             for name in self.my_node_names)
1756

    
1757
    self.my_inst_info = dict((name, self.all_inst_info[name])
1758
                             for name in self.my_inst_names)
1759

    
1760
    # We detect here the nodes that will need the extra RPC calls for verifying
1761
    # split LV volumes; they should be locked.
1762
    extra_lv_nodes = set()
1763

    
1764
    for inst in self.my_inst_info.values():
1765
      if inst.disk_template in constants.DTS_INT_MIRROR:
1766
        group = self.my_node_info[inst.primary_node].group
1767
        for nname in inst.secondary_nodes:
1768
          if self.all_node_info[nname].group != group:
1769
            extra_lv_nodes.add(nname)
1770

    
1771
    unlocked_lv_nodes = \
1772
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773

    
1774
    if unlocked_lv_nodes:
1775
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1776
                                 utils.CommaJoin(unlocked_lv_nodes))
1777
    self.extra_lv_nodes = list(extra_lv_nodes)
1778

    
1779
  def _VerifyNode(self, ninfo, nresult):
1780
    """Perform some basic validation on data returned from a node.
1781

1782
      - check the result data structure is well formed and has all the
1783
        mandatory fields
1784
      - check ganeti version
1785

1786
    @type ninfo: L{objects.Node}
1787
    @param ninfo: the node to check
1788
    @param nresult: the results from the node
1789
    @rtype: boolean
1790
    @return: whether overall this call was successful (and we can expect
1791
         reasonable values in the respose)
1792

1793
    """
1794
    node = ninfo.name
1795
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1796

    
1797
    # main result, nresult should be a non-empty dict
1798
    test = not nresult or not isinstance(nresult, dict)
1799
    _ErrorIf(test, constants.CV_ENODERPC, node,
1800
                  "unable to verify node: no data returned")
1801
    if test:
1802
      return False
1803

    
1804
    # compares ganeti version
1805
    local_version = constants.PROTOCOL_VERSION
1806
    remote_version = nresult.get("version", None)
1807
    test = not (remote_version and
1808
                isinstance(remote_version, (list, tuple)) and
1809
                len(remote_version) == 2)
1810
    _ErrorIf(test, constants.CV_ENODERPC, node,
1811
             "connection to node returned invalid data")
1812
    if test:
1813
      return False
1814

    
1815
    test = local_version != remote_version[0]
1816
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1817
             "incompatible protocol versions: master %s,"
1818
             " node %s", local_version, remote_version[0])
1819
    if test:
1820
      return False
1821

    
1822
    # node seems compatible, we can actually try to look into its results
1823

    
1824
    # full package version
1825
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1826
                  constants.CV_ENODEVERSION, node,
1827
                  "software version mismatch: master %s, node %s",
1828
                  constants.RELEASE_VERSION, remote_version[1],
1829
                  code=self.ETYPE_WARNING)
1830

    
1831
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1832
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1833
      for hv_name, hv_result in hyp_result.iteritems():
1834
        test = hv_result is not None
1835
        _ErrorIf(test, constants.CV_ENODEHV, node,
1836
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1837

    
1838
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1839
    if ninfo.vm_capable and isinstance(hvp_result, list):
1840
      for item, hv_name, hv_result in hvp_result:
1841
        _ErrorIf(True, constants.CV_ENODEHV, node,
1842
                 "hypervisor %s parameter verify failure (source %s): %s",
1843
                 hv_name, item, hv_result)
1844

    
1845
    test = nresult.get(constants.NV_NODESETUP,
1846
                       ["Missing NODESETUP results"])
1847
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1848
             "; ".join(test))
1849

    
1850
    return True
1851

    
1852
  def _VerifyNodeTime(self, ninfo, nresult,
1853
                      nvinfo_starttime, nvinfo_endtime):
1854
    """Check the node time.
1855

1856
    @type ninfo: L{objects.Node}
1857
    @param ninfo: the node to check
1858
    @param nresult: the remote results for the node
1859
    @param nvinfo_starttime: the start time of the RPC call
1860
    @param nvinfo_endtime: the end time of the RPC call
1861

1862
    """
1863
    node = ninfo.name
1864
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1865

    
1866
    ntime = nresult.get(constants.NV_TIME, None)
1867
    try:
1868
      ntime_merged = utils.MergeTime(ntime)
1869
    except (ValueError, TypeError):
1870
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1871
      return
1872

    
1873
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1874
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1875
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1876
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1877
    else:
1878
      ntime_diff = None
1879

    
1880
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1881
             "Node time diverges by at least %s from master node time",
1882
             ntime_diff)
1883

    
1884
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1885
    """Check the node LVM results.
1886

1887
    @type ninfo: L{objects.Node}
1888
    @param ninfo: the node to check
1889
    @param nresult: the remote results for the node
1890
    @param vg_name: the configured VG name
1891

1892
    """
1893
    if vg_name is None:
1894
      return
1895

    
1896
    node = ninfo.name
1897
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1898

    
1899
    # checks vg existence and size > 20G
1900
    vglist = nresult.get(constants.NV_VGLIST, None)
1901
    test = not vglist
1902
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1903
    if not test:
1904
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1905
                                            constants.MIN_VG_SIZE)
1906
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1907

    
1908
    # check pv names
1909
    pvlist = nresult.get(constants.NV_PVLIST, None)
1910
    test = pvlist is None
1911
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1912
    if not test:
1913
      # check that ':' is not present in PV names, since it's a
1914
      # special character for lvcreate (denotes the range of PEs to
1915
      # use on the PV)
1916
      for _, pvname, owner_vg in pvlist:
1917
        test = ":" in pvname
1918
        _ErrorIf(test, constants.CV_ENODELVM, node,
1919
                 "Invalid character ':' in PV '%s' of VG '%s'",
1920
                 pvname, owner_vg)
1921

    
1922
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1923
    """Check the node bridges.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param bridges: the expected list of bridges
1929

1930
    """
1931
    if not bridges:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    missing = nresult.get(constants.NV_BRIDGES, None)
1938
    test = not isinstance(missing, list)
1939
    _ErrorIf(test, constants.CV_ENODENET, node,
1940
             "did not return valid bridge information")
1941
    if not test:
1942
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1943
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1944

    
1945
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1946
    """Check the results of user scripts presence and executability on the node
1947

1948
    @type ninfo: L{objects.Node}
1949
    @param ninfo: the node to check
1950
    @param nresult: the remote results for the node
1951

1952
    """
1953
    node = ninfo.name
1954

    
1955
    test = not constants.NV_USERSCRIPTS in nresult
1956
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1957
                  "did not return user scripts information")
1958

    
1959
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1960
    if not test:
1961
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1962
                    "user scripts not present or not executable: %s" %
1963
                    utils.CommaJoin(sorted(broken_scripts)))
1964

    
1965
  def _VerifyNodeNetwork(self, ninfo, nresult):
1966
    """Check the node network connectivity results.
1967

1968
    @type ninfo: L{objects.Node}
1969
    @param ninfo: the node to check
1970
    @param nresult: the remote results for the node
1971

1972
    """
1973
    node = ninfo.name
1974
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975

    
1976
    test = constants.NV_NODELIST not in nresult
1977
    _ErrorIf(test, constants.CV_ENODESSH, node,
1978
             "node hasn't returned node ssh connectivity data")
1979
    if not test:
1980
      if nresult[constants.NV_NODELIST]:
1981
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1982
          _ErrorIf(True, constants.CV_ENODESSH, node,
1983
                   "ssh communication with node '%s': %s", a_node, a_msg)
1984

    
1985
    test = constants.NV_NODENETTEST not in nresult
1986
    _ErrorIf(test, constants.CV_ENODENET, node,
1987
             "node hasn't returned node tcp connectivity data")
1988
    if not test:
1989
      if nresult[constants.NV_NODENETTEST]:
1990
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1991
        for anode in nlist:
1992
          _ErrorIf(True, constants.CV_ENODENET, node,
1993
                   "tcp communication with node '%s': %s",
1994
                   anode, nresult[constants.NV_NODENETTEST][anode])
1995

    
1996
    test = constants.NV_MASTERIP not in nresult
1997
    _ErrorIf(test, constants.CV_ENODENET, node,
1998
             "node hasn't returned node master IP reachability data")
1999
    if not test:
2000
      if not nresult[constants.NV_MASTERIP]:
2001
        if node == self.master_node:
2002
          msg = "the master node cannot reach the master IP (not configured?)"
2003
        else:
2004
          msg = "cannot reach the master IP"
2005
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2006

    
2007
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2008
                      diskstatus):
2009
    """Verify an instance.
2010

2011
    This function checks to see if the required block devices are
2012
    available on the instance's node.
2013

2014
    """
2015
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2016
    node_current = instanceconfig.primary_node
2017

    
2018
    node_vol_should = {}
2019
    instanceconfig.MapLVsByNode(node_vol_should)
2020

    
2021
    for node in node_vol_should:
2022
      n_img = node_image[node]
2023
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2024
        # ignore missing volumes on offline or broken nodes
2025
        continue
2026
      for volume in node_vol_should[node]:
2027
        test = volume not in n_img.volumes
2028
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2029
                 "volume %s missing on node %s", volume, node)
2030

    
2031
    if instanceconfig.admin_up:
2032
      pri_img = node_image[node_current]
2033
      test = instance not in pri_img.instances and not pri_img.offline
2034
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2035
               "instance not running on its primary node %s",
2036
               node_current)
2037

    
2038
    diskdata = [(nname, success, status, idx)
2039
                for (nname, disks) in diskstatus.items()
2040
                for idx, (success, status) in enumerate(disks)]
2041

    
2042
    for nname, success, bdev_status, idx in diskdata:
2043
      # the 'ghost node' construction in Exec() ensures that we have a
2044
      # node here
2045
      snode = node_image[nname]
2046
      bad_snode = snode.ghost or snode.offline
2047
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2048
               constants.CV_EINSTANCEFAULTYDISK, instance,
2049
               "couldn't retrieve status for disk/%s on %s: %s",
2050
               idx, nname, bdev_status)
2051
      _ErrorIf((instanceconfig.admin_up and success and
2052
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2053
               constants.CV_EINSTANCEFAULTYDISK, instance,
2054
               "disk/%s on %s is faulty", idx, nname)
2055

    
2056
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2057
    """Verify if there are any unknown volumes in the cluster.
2058

2059
    The .os, .swap and backup volumes are ignored. All other volumes are
2060
    reported as unknown.
2061

2062
    @type reserved: L{ganeti.utils.FieldSet}
2063
    @param reserved: a FieldSet of reserved volume names
2064

2065
    """
2066
    for node, n_img in node_image.items():
2067
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2068
        # skip non-healthy nodes
2069
        continue
2070
      for volume in n_img.volumes:
2071
        test = ((node not in node_vol_should or
2072
                volume not in node_vol_should[node]) and
2073
                not reserved.Matches(volume))
2074
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2075
                      "volume %s is unknown", volume)
2076

    
2077
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2078
    """Verify N+1 Memory Resilience.
2079

2080
    Check that if one single node dies we can still start all the
2081
    instances it was primary for.
2082

2083
    """
2084
    cluster_info = self.cfg.GetClusterInfo()
2085
    for node, n_img in node_image.items():
2086
      # This code checks that every node which is now listed as
2087
      # secondary has enough memory to host all instances it is
2088
      # supposed to should a single other node in the cluster fail.
2089
      # FIXME: not ready for failover to an arbitrary node
2090
      # FIXME: does not support file-backed instances
2091
      # WARNING: we currently take into account down instances as well
2092
      # as up ones, considering that even if they're down someone
2093
      # might want to start them even in the event of a node failure.
2094
      if n_img.offline:
2095
        # we're skipping offline nodes from the N+1 warning, since
2096
        # most likely we don't have good memory infromation from them;
2097
        # we already list instances living on such nodes, and that's
2098
        # enough warning
2099
        continue
2100
      for prinode, instances in n_img.sbp.items():
2101
        needed_mem = 0
2102
        for instance in instances:
2103
          bep = cluster_info.FillBE(instance_cfg[instance])
2104
          if bep[constants.BE_AUTO_BALANCE]:
2105
            needed_mem += bep[constants.BE_MEMORY]
2106
        test = n_img.mfree < needed_mem
2107
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2108
                      "not enough memory to accomodate instance failovers"
2109
                      " should node %s fail (%dMiB needed, %dMiB available)",
2110
                      prinode, needed_mem, n_img.mfree)
2111

    
2112
  @classmethod
2113
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2114
                   (files_all, files_opt, files_mc, files_vm)):
2115
    """Verifies file checksums collected from all nodes.
2116

2117
    @param errorif: Callback for reporting errors
2118
    @param nodeinfo: List of L{objects.Node} objects
2119
    @param master_node: Name of master node
2120
    @param all_nvinfo: RPC results
2121

2122
    """
2123
    # Define functions determining which nodes to consider for a file
2124
    files2nodefn = [
2125
      (files_all, None),
2126
      (files_mc, lambda node: (node.master_candidate or
2127
                               node.name == master_node)),
2128
      (files_vm, lambda node: node.vm_capable),
2129
      ]
2130

    
2131
    # Build mapping from filename to list of nodes which should have the file
2132
    nodefiles = {}
2133
    for (files, fn) in files2nodefn:
2134
      if fn is None:
2135
        filenodes = nodeinfo
2136
      else:
2137
        filenodes = filter(fn, nodeinfo)
2138
      nodefiles.update((filename,
2139
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2140
                       for filename in files)
2141

    
2142
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2143

    
2144
    fileinfo = dict((filename, {}) for filename in nodefiles)
2145
    ignore_nodes = set()
2146

    
2147
    for node in nodeinfo:
2148
      if node.offline:
2149
        ignore_nodes.add(node.name)
2150
        continue
2151

    
2152
      nresult = all_nvinfo[node.name]
2153

    
2154
      if nresult.fail_msg or not nresult.payload:
2155
        node_files = None
2156
      else:
2157
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2158

    
2159
      test = not (node_files and isinstance(node_files, dict))
2160
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2161
              "Node did not return file checksum data")
2162
      if test:
2163
        ignore_nodes.add(node.name)
2164
        continue
2165

    
2166
      # Build per-checksum mapping from filename to nodes having it
2167
      for (filename, checksum) in node_files.items():
2168
        assert filename in nodefiles
2169
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2170

    
2171
    for (filename, checksums) in fileinfo.items():
2172
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2173

    
2174
      # Nodes having the file
2175
      with_file = frozenset(node_name
2176
                            for nodes in fileinfo[filename].values()
2177
                            for node_name in nodes) - ignore_nodes
2178

    
2179
      expected_nodes = nodefiles[filename] - ignore_nodes
2180

    
2181
      # Nodes missing file
2182
      missing_file = expected_nodes - with_file
2183

    
2184
      if filename in files_opt:
2185
        # All or no nodes
2186
        errorif(missing_file and missing_file != expected_nodes,
2187
                constants.CV_ECLUSTERFILECHECK, None,
2188
                "File %s is optional, but it must exist on all or no"
2189
                " nodes (not found on %s)",
2190
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2191
      else:
2192
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2193
                "File %s is missing from node(s) %s", filename,
2194
                utils.CommaJoin(utils.NiceSort(missing_file)))
2195

    
2196
        # Warn if a node has a file it shouldn't
2197
        unexpected = with_file - expected_nodes
2198
        errorif(unexpected,
2199
                constants.CV_ECLUSTERFILECHECK, None,
2200
                "File %s should not exist on node(s) %s",
2201
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2202

    
2203
      # See if there are multiple versions of the file
2204
      test = len(checksums) > 1
2205
      if test:
2206
        variants = ["variant %s on %s" %
2207
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2208
                    for (idx, (checksum, nodes)) in
2209
                      enumerate(sorted(checksums.items()))]
2210
      else:
2211
        variants = []
2212

    
2213
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2214
              "File %s found with %s different checksums (%s)",
2215
              filename, len(checksums), "; ".join(variants))
2216

    
2217
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2218
                      drbd_map):
2219
    """Verifies and the node DRBD status.
2220

2221
    @type ninfo: L{objects.Node}
2222
    @param ninfo: the node to check
2223
    @param nresult: the remote results for the node
2224
    @param instanceinfo: the dict of instances
2225
    @param drbd_helper: the configured DRBD usermode helper
2226
    @param drbd_map: the DRBD map as returned by
2227
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2232

    
2233
    if drbd_helper:
2234
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2235
      test = (helper_result == None)
2236
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2237
               "no drbd usermode helper returned")
2238
      if helper_result:
2239
        status, payload = helper_result
2240
        test = not status
2241
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2242
                 "drbd usermode helper check unsuccessful: %s", payload)
2243
        test = status and (payload != drbd_helper)
2244
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2245
                 "wrong drbd usermode helper: %s", payload)
2246

    
2247
    # compute the DRBD minors
2248
    node_drbd = {}
2249
    for minor, instance in drbd_map[node].items():
2250
      test = instance not in instanceinfo
2251
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2252
               "ghost instance '%s' in temporary DRBD map", instance)
2253
        # ghost instance should not be running, but otherwise we
2254
        # don't give double warnings (both ghost instance and
2255
        # unallocated minor in use)
2256
      if test:
2257
        node_drbd[minor] = (instance, False)
2258
      else:
2259
        instance = instanceinfo[instance]
2260
        node_drbd[minor] = (instance.name, instance.admin_up)
2261

    
2262
    # and now check them
2263
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2264
    test = not isinstance(used_minors, (tuple, list))
2265
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2266
             "cannot parse drbd status file: %s", str(used_minors))
2267
    if test:
2268
      # we cannot check drbd status
2269
      return
2270

    
2271
    for minor, (iname, must_exist) in node_drbd.items():
2272
      test = minor not in used_minors and must_exist
2273
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2274
               "drbd minor %d of instance %s is not active", minor, iname)
2275
    for minor in used_minors:
2276
      test = minor not in node_drbd
2277
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2278
               "unallocated drbd minor %d is in use", minor)
2279

    
2280
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2281
    """Builds the node OS structures.
2282

2283
    @type ninfo: L{objects.Node}
2284
    @param ninfo: the node to check
2285
    @param nresult: the remote results for the node
2286
    @param nimg: the node image object
2287

2288
    """
2289
    node = ninfo.name
2290
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291

    
2292
    remote_os = nresult.get(constants.NV_OSLIST, None)
2293
    test = (not isinstance(remote_os, list) or
2294
            not compat.all(isinstance(v, list) and len(v) == 7
2295
                           for v in remote_os))
2296

    
2297
    _ErrorIf(test, constants.CV_ENODEOS, node,
2298
             "node hasn't returned valid OS data")
2299

    
2300
    nimg.os_fail = test
2301

    
2302
    if test:
2303
      return
2304

    
2305
    os_dict = {}
2306

    
2307
    for (name, os_path, status, diagnose,
2308
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2309

    
2310
      if name not in os_dict:
2311
        os_dict[name] = []
2312

    
2313
      # parameters is a list of lists instead of list of tuples due to
2314
      # JSON lacking a real tuple type, fix it:
2315
      parameters = [tuple(v) for v in parameters]
2316
      os_dict[name].append((os_path, status, diagnose,
2317
                            set(variants), set(parameters), set(api_ver)))
2318

    
2319
    nimg.oslist = os_dict
2320

    
2321
  def _VerifyNodeOS(self, ninfo, nimg, base):
2322
    """Verifies the node OS list.
2323

2324
    @type ninfo: L{objects.Node}
2325
    @param ninfo: the node to check
2326
    @param nimg: the node image object
2327
    @param base: the 'template' node we match against (e.g. from the master)
2328

2329
    """
2330
    node = ninfo.name
2331
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332

    
2333
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2334

    
2335
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2336
    for os_name, os_data in nimg.oslist.items():
2337
      assert os_data, "Empty OS status for OS %s?!" % os_name
2338
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2339
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2340
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2341
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2342
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2343
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2344
      # comparisons with the 'base' image
2345
      test = os_name not in base.oslist
2346
      _ErrorIf(test, constants.CV_ENODEOS, node,
2347
               "Extra OS %s not present on reference node (%s)",
2348
               os_name, base.name)
2349
      if test:
2350
        continue
2351
      assert base.oslist[os_name], "Base node has empty OS status?"
2352
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2353
      if not b_status:
2354
        # base OS is invalid, skipping
2355
        continue
2356
      for kind, a, b in [("API version", f_api, b_api),
2357
                         ("variants list", f_var, b_var),
2358
                         ("parameters", beautify_params(f_param),
2359
                          beautify_params(b_param))]:
2360
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2361
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2362
                 kind, os_name, base.name,
2363
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2364

    
2365
    # check any missing OSes
2366
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2367
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2368
             "OSes present on reference node %s but missing on this node: %s",
2369
             base.name, utils.CommaJoin(missing))
2370

    
2371
  def _VerifyOob(self, ninfo, nresult):
2372
    """Verifies out of band functionality of a node.
2373

2374
    @type ninfo: L{objects.Node}
2375
    @param ninfo: the node to check
2376
    @param nresult: the remote results for the node
2377

2378
    """
2379
    node = ninfo.name
2380
    # We just have to verify the paths on master and/or master candidates
2381
    # as the oob helper is invoked on the master
2382
    if ((ninfo.master_candidate or ninfo.master_capable) and
2383
        constants.NV_OOB_PATHS in nresult):
2384
      for path_result in nresult[constants.NV_OOB_PATHS]:
2385
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2386

    
2387
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2388
    """Verifies and updates the node volume data.
2389

2390
    This function will update a L{NodeImage}'s internal structures
2391
    with data from the remote call.
2392

2393
    @type ninfo: L{objects.Node}
2394
    @param ninfo: the node to check
2395
    @param nresult: the remote results for the node
2396
    @param nimg: the node image object
2397
    @param vg_name: the configured VG name
2398

2399
    """
2400
    node = ninfo.name
2401
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402

    
2403
    nimg.lvm_fail = True
2404
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2405
    if vg_name is None:
2406
      pass
2407
    elif isinstance(lvdata, basestring):
2408
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2409
               utils.SafeEncode(lvdata))
2410
    elif not isinstance(lvdata, dict):
2411
      _ErrorIf(True, constants.CV_ENODELVM, node,
2412
               "rpc call to node failed (lvlist)")
2413
    else:
2414
      nimg.volumes = lvdata
2415
      nimg.lvm_fail = False
2416

    
2417
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2418
    """Verifies and updates the node instance list.
2419

2420
    If the listing was successful, then updates this node's instance
2421
    list. Otherwise, it marks the RPC call as failed for the instance
2422
    list key.
2423

2424
    @type ninfo: L{objects.Node}
2425
    @param ninfo: the node to check
2426
    @param nresult: the remote results for the node
2427
    @param nimg: the node image object
2428

2429
    """
2430
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2431
    test = not isinstance(idata, list)
2432
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2433
                  "rpc call to node failed (instancelist): %s",
2434
                  utils.SafeEncode(str(idata)))
2435
    if test:
2436
      nimg.hyp_fail = True
2437
    else:
2438
      nimg.instances = idata
2439

    
2440
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2441
    """Verifies and computes a node information map
2442

2443
    @type ninfo: L{objects.Node}
2444
    @param ninfo: the node to check
2445
    @param nresult: the remote results for the node
2446
    @param nimg: the node image object
2447
    @param vg_name: the configured VG name
2448

2449
    """
2450
    node = ninfo.name
2451
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452

    
2453
    # try to read free memory (from the hypervisor)
2454
    hv_info = nresult.get(constants.NV_HVINFO, None)
2455
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2456
    _ErrorIf(test, constants.CV_ENODEHV, node,
2457
             "rpc call to node failed (hvinfo)")
2458
    if not test:
2459
      try:
2460
        nimg.mfree = int(hv_info["memory_free"])
2461
      except (ValueError, TypeError):
2462
        _ErrorIf(True, constants.CV_ENODERPC, node,
2463
                 "node returned invalid nodeinfo, check hypervisor")
2464

    
2465
    # FIXME: devise a free space model for file based instances as well
2466
    if vg_name is not None:
2467
      test = (constants.NV_VGLIST not in nresult or
2468
              vg_name not in nresult[constants.NV_VGLIST])
2469
      _ErrorIf(test, constants.CV_ENODELVM, node,
2470
               "node didn't return data for the volume group '%s'"
2471
               " - it is either missing or broken", vg_name)
2472
      if not test:
2473
        try:
2474
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475
        except (ValueError, TypeError):
2476
          _ErrorIf(True, constants.CV_ENODERPC, node,
2477
                   "node returned invalid LVM info, check LVM status")
2478

    
2479
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480
    """Gets per-disk status information for all instances.
2481

2482
    @type nodelist: list of strings
2483
    @param nodelist: Node names
2484
    @type node_image: dict of (name, L{objects.Node})
2485
    @param node_image: Node objects
2486
    @type instanceinfo: dict of (name, L{objects.Instance})
2487
    @param instanceinfo: Instance objects
2488
    @rtype: {instance: {node: [(succes, payload)]}}
2489
    @return: a dictionary of per-instance dictionaries with nodes as
2490
        keys and disk information as values; the disk information is a
2491
        list of tuples (success, payload)
2492

2493
    """
2494
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495

    
2496
    node_disks = {}
2497
    node_disks_devonly = {}
2498
    diskless_instances = set()
2499
    diskless = constants.DT_DISKLESS
2500

    
2501
    for nname in nodelist:
2502
      node_instances = list(itertools.chain(node_image[nname].pinst,
2503
                                            node_image[nname].sinst))
2504
      diskless_instances.update(inst for inst in node_instances
2505
                                if instanceinfo[inst].disk_template == diskless)
2506
      disks = [(inst, disk)
2507
               for inst in node_instances
2508
               for disk in instanceinfo[inst].disks]
2509

    
2510
      if not disks:
2511
        # No need to collect data
2512
        continue
2513

    
2514
      node_disks[nname] = disks
2515

    
2516
      # Creating copies as SetDiskID below will modify the objects and that can
2517
      # lead to incorrect data returned from nodes
2518
      devonly = [dev.Copy() for (_, dev) in disks]
2519

    
2520
      for dev in devonly:
2521
        self.cfg.SetDiskID(dev, nname)
2522

    
2523
      node_disks_devonly[nname] = devonly
2524

    
2525
    assert len(node_disks) == len(node_disks_devonly)
2526

    
2527
    # Collect data from all nodes with disks
2528
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529
                                                          node_disks_devonly)
2530

    
2531
    assert len(result) == len(node_disks)
2532

    
2533
    instdisk = {}
2534

    
2535
    for (nname, nres) in result.items():
2536
      disks = node_disks[nname]
2537

    
2538
      if nres.offline:
2539
        # No data from this node
2540
        data = len(disks) * [(False, "node offline")]
2541
      else:
2542
        msg = nres.fail_msg
2543
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2544
                 "while getting disk information: %s", msg)
2545
        if msg:
2546
          # No data from this node
2547
          data = len(disks) * [(False, msg)]
2548
        else:
2549
          data = []
2550
          for idx, i in enumerate(nres.payload):
2551
            if isinstance(i, (tuple, list)) and len(i) == 2:
2552
              data.append(i)
2553
            else:
2554
              logging.warning("Invalid result from node %s, entry %d: %s",
2555
                              nname, idx, i)
2556
              data.append((False, "Invalid result from the remote node"))
2557

    
2558
      for ((inst, _), status) in zip(disks, data):
2559
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560

    
2561
    # Add empty entries for diskless instances.
2562
    for inst in diskless_instances:
2563
      assert inst not in instdisk
2564
      instdisk[inst] = {}
2565

    
2566
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568
                      compat.all(isinstance(s, (tuple, list)) and
2569
                                 len(s) == 2 for s in statuses)
2570
                      for inst, nnames in instdisk.items()
2571
                      for nname, statuses in nnames.items())
2572
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573

    
2574
    return instdisk
2575

    
2576
  @staticmethod
2577
  def _SshNodeSelector(group_uuid, all_nodes):
2578
    """Create endless iterators for all potential SSH check hosts.
2579

2580
    """
2581
    nodes = [node for node in all_nodes
2582
             if (node.group != group_uuid and
2583
                 not node.offline)]
2584
    keyfunc = operator.attrgetter("group")
2585

    
2586
    return map(itertools.cycle,
2587
               [sorted(map(operator.attrgetter("name"), names))
2588
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589
                                                  keyfunc)])
2590

    
2591
  @classmethod
2592
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593
    """Choose which nodes should talk to which other nodes.
2594

2595
    We will make nodes contact all nodes in their group, and one node from
2596
    every other group.
2597

2598
    @warning: This algorithm has a known issue if one node group is much
2599
      smaller than others (e.g. just one node). In such a case all other
2600
      nodes will talk to the single node.
2601

2602
    """
2603
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605

    
2606
    return (online_nodes,
2607
            dict((name, sorted([i.next() for i in sel]))
2608
                 for name in online_nodes))
2609

    
2610
  def BuildHooksEnv(self):
2611
    """Build hooks env.
2612

2613
    Cluster-Verify hooks just ran in the post phase and their failure makes
2614
    the output be logged in the verify output and the verification to fail.
2615

2616
    """
2617
    env = {
2618
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619
      }
2620

    
2621
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622
               for node in self.my_node_info.values())
2623

    
2624
    return env
2625

    
2626
  def BuildHooksNodes(self):
2627
    """Build hooks nodes.
2628

2629
    """
2630
    return ([], self.my_node_names)
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Verify integrity of the node group, performing various test on nodes.
2634

2635
    """
2636
    # This method has too many local variables. pylint: disable=R0914
2637
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638

    
2639
    if not self.my_node_names:
2640
      # empty node group
2641
      feedback_fn("* Empty node group, skipping verification")
2642
      return True
2643

    
2644
    self.bad = False
2645
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646
    verbose = self.op.verbose
2647
    self._feedback_fn = feedback_fn
2648

    
2649
    vg_name = self.cfg.GetVGName()
2650
    drbd_helper = self.cfg.GetDRBDHelper()
2651
    cluster = self.cfg.GetClusterInfo()
2652
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653
    hypervisors = cluster.enabled_hypervisors
2654
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655

    
2656
    i_non_redundant = [] # Non redundant instances
2657
    i_non_a_balanced = [] # Non auto-balanced instances
2658
    n_offline = 0 # Count of offline nodes
2659
    n_drained = 0 # Count of nodes being drained
2660
    node_vol_should = {}
2661

    
2662
    # FIXME: verify OS list
2663

    
2664
    # File verification
2665
    filemap = _ComputeAncillaryFiles(cluster, False)
2666

    
2667
    # do local checksums
2668
    master_node = self.master_node = self.cfg.GetMasterNode()
2669
    master_ip = self.cfg.GetMasterIP()
2670

    
2671
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672

    
2673
    user_scripts = []
2674
    if self.cfg.GetUseExternalMipScript():
2675
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2676

    
2677
    node_verify_param = {
2678
      constants.NV_FILELIST:
2679
        utils.UniqueSequence(filename
2680
                             for files in filemap
2681
                             for filename in files),
2682
      constants.NV_NODELIST:
2683
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2684
                                  self.all_node_info.values()),
2685
      constants.NV_HYPERVISOR: hypervisors,
2686
      constants.NV_HVPARAMS:
2687
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2688
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2689
                                 for node in node_data_list
2690
                                 if not node.offline],
2691
      constants.NV_INSTANCELIST: hypervisors,
2692
      constants.NV_VERSION: None,
2693
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2694
      constants.NV_NODESETUP: None,
2695
      constants.NV_TIME: None,
2696
      constants.NV_MASTERIP: (master_node, master_ip),
2697
      constants.NV_OSLIST: None,
2698
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2699
      constants.NV_USERSCRIPTS: user_scripts,
2700
      }
2701

    
2702
    if vg_name is not None:
2703
      node_verify_param[constants.NV_VGLIST] = None
2704
      node_verify_param[constants.NV_LVLIST] = vg_name
2705
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2706
      node_verify_param[constants.NV_DRBDLIST] = None
2707

    
2708
    if drbd_helper:
2709
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2710

    
2711
    # bridge checks
2712
    # FIXME: this needs to be changed per node-group, not cluster-wide
2713
    bridges = set()
2714
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2715
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716
      bridges.add(default_nicpp[constants.NIC_LINK])
2717
    for instance in self.my_inst_info.values():
2718
      for nic in instance.nics:
2719
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2720
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2721
          bridges.add(full_nic[constants.NIC_LINK])
2722

    
2723
    if bridges:
2724
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2725

    
2726
    # Build our expected cluster state
2727
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2728
                                                 name=node.name,
2729
                                                 vm_capable=node.vm_capable))
2730
                      for node in node_data_list)
2731

    
2732
    # Gather OOB paths
2733
    oob_paths = []
2734
    for node in self.all_node_info.values():
2735
      path = _SupportsOob(self.cfg, node)
2736
      if path and path not in oob_paths:
2737
        oob_paths.append(path)
2738

    
2739
    if oob_paths:
2740
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2741

    
2742
    for instance in self.my_inst_names:
2743
      inst_config = self.my_inst_info[instance]
2744

    
2745
      for nname in inst_config.all_nodes:
2746
        if nname not in node_image:
2747
          gnode = self.NodeImage(name=nname)
2748
          gnode.ghost = (nname not in self.all_node_info)
2749
          node_image[nname] = gnode
2750

    
2751
      inst_config.MapLVsByNode(node_vol_should)
2752

    
2753
      pnode = inst_config.primary_node
2754
      node_image[pnode].pinst.append(instance)
2755

    
2756
      for snode in inst_config.secondary_nodes:
2757
        nimg = node_image[snode]
2758
        nimg.sinst.append(instance)
2759
        if pnode not in nimg.sbp:
2760
          nimg.sbp[pnode] = []
2761
        nimg.sbp[pnode].append(instance)
2762

    
2763
    # At this point, we have the in-memory data structures complete,
2764
    # except for the runtime information, which we'll gather next
2765

    
2766
    # Due to the way our RPC system works, exact response times cannot be
2767
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2768
    # time before and after executing the request, we can at least have a time
2769
    # window.
2770
    nvinfo_starttime = time.time()
2771
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2772
                                           node_verify_param,
2773
                                           self.cfg.GetClusterName())
2774
    nvinfo_endtime = time.time()
2775

    
2776
    if self.extra_lv_nodes and vg_name is not None:
2777
      extra_lv_nvinfo = \
2778
          self.rpc.call_node_verify(self.extra_lv_nodes,
2779
                                    {constants.NV_LVLIST: vg_name},
2780
                                    self.cfg.GetClusterName())
2781
    else:
2782
      extra_lv_nvinfo = {}
2783

    
2784
    all_drbd_map = self.cfg.ComputeDRBDMap()
2785

    
2786
    feedback_fn("* Gathering disk information (%s nodes)" %
2787
                len(self.my_node_names))
2788
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2789
                                     self.my_inst_info)
2790

    
2791
    feedback_fn("* Verifying configuration file consistency")
2792

    
2793
    # If not all nodes are being checked, we need to make sure the master node
2794
    # and a non-checked vm_capable node are in the list.
2795
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2796
    if absent_nodes:
2797
      vf_nvinfo = all_nvinfo.copy()
2798
      vf_node_info = list(self.my_node_info.values())
2799
      additional_nodes = []
2800
      if master_node not in self.my_node_info:
2801
        additional_nodes.append(master_node)
2802
        vf_node_info.append(self.all_node_info[master_node])
2803
      # Add the first vm_capable node we find which is not included
2804
      for node in absent_nodes:
2805
        nodeinfo = self.all_node_info[node]
2806
        if nodeinfo.vm_capable and not nodeinfo.offline:
2807
          additional_nodes.append(node)
2808
          vf_node_info.append(self.all_node_info[node])
2809
          break
2810
      key = constants.NV_FILELIST
2811
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2812
                                                 {key: node_verify_param[key]},
2813
                                                 self.cfg.GetClusterName()))
2814
    else:
2815
      vf_nvinfo = all_nvinfo
2816
      vf_node_info = self.my_node_info.values()
2817

    
2818
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2819

    
2820
    feedback_fn("* Verifying node status")
2821

    
2822
    refos_img = None
2823

    
2824
    for node_i in node_data_list:
2825
      node = node_i.name
2826
      nimg = node_image[node]
2827

    
2828
      if node_i.offline:
2829
        if verbose:
2830
          feedback_fn("* Skipping offline node %s" % (node,))
2831
        n_offline += 1
2832
        continue
2833

    
2834
      if node == master_node:
2835
        ntype = "master"
2836
      elif node_i.master_candidate:
2837
        ntype = "master candidate"
2838
      elif node_i.drained:
2839
        ntype = "drained"
2840
        n_drained += 1
2841
      else:
2842
        ntype = "regular"
2843
      if verbose:
2844
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2845

    
2846
      msg = all_nvinfo[node].fail_msg
2847
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2848
               msg)
2849
      if msg:
2850
        nimg.rpc_fail = True
2851
        continue
2852

    
2853
      nresult = all_nvinfo[node].payload
2854

    
2855
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2856
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2857
      self._VerifyNodeNetwork(node_i, nresult)
2858
      self._VerifyNodeUserScripts(node_i, nresult)
2859
      self._VerifyOob(node_i, nresult)
2860

    
2861
      if nimg.vm_capable:
2862
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2863
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2864
                             all_drbd_map)
2865

    
2866
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2867
        self._UpdateNodeInstances(node_i, nresult, nimg)
2868
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2869
        self._UpdateNodeOS(node_i, nresult, nimg)
2870

    
2871
        if not nimg.os_fail:
2872
          if refos_img is None:
2873
            refos_img = nimg
2874
          self._VerifyNodeOS(node_i, nimg, refos_img)
2875
        self._VerifyNodeBridges(node_i, nresult, bridges)
2876

    
2877
        # Check whether all running instancies are primary for the node. (This
2878
        # can no longer be done from _VerifyInstance below, since some of the
2879
        # wrong instances could be from other node groups.)
2880
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2881

    
2882
        for inst in non_primary_inst:
2883
          test = inst in self.all_inst_info
2884
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2885
                   "instance should not run on node %s", node_i.name)
2886
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2887
                   "node is running unknown instance %s", inst)
2888

    
2889
    for node, result in extra_lv_nvinfo.items():
2890
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2891
                              node_image[node], vg_name)
2892

    
2893
    feedback_fn("* Verifying instance status")
2894
    for instance in self.my_inst_names:
2895
      if verbose:
2896
        feedback_fn("* Verifying instance %s" % instance)
2897
      inst_config = self.my_inst_info[instance]
2898
      self._VerifyInstance(instance, inst_config, node_image,
2899
                           instdisk[instance])
2900
      inst_nodes_offline = []
2901

    
2902
      pnode = inst_config.primary_node
2903
      pnode_img = node_image[pnode]
2904
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2905
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2906
               " primary node failed", instance)
2907

    
2908
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2909
               constants.CV_EINSTANCEBADNODE, instance,
2910
               "instance is marked as running and lives on offline node %s",
2911
               inst_config.primary_node)
2912

    
2913
      # If the instance is non-redundant we cannot survive losing its primary
2914
      # node, so we are not N+1 compliant. On the other hand we have no disk
2915
      # templates with more than one secondary so that situation is not well
2916
      # supported either.
2917
      # FIXME: does not support file-backed instances
2918
      if not inst_config.secondary_nodes:
2919
        i_non_redundant.append(instance)
2920

    
2921
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2922
               constants.CV_EINSTANCELAYOUT,
2923
               instance, "instance has multiple secondary nodes: %s",
2924
               utils.CommaJoin(inst_config.secondary_nodes),
2925
               code=self.ETYPE_WARNING)
2926

    
2927
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2928
        pnode = inst_config.primary_node
2929
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2930
        instance_groups = {}
2931

    
2932
        for node in instance_nodes:
2933
          instance_groups.setdefault(self.all_node_info[node].group,
2934
                                     []).append(node)
2935

    
2936
        pretty_list = [
2937
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2938
          # Sort so that we always list the primary node first.
2939
          for group, nodes in sorted(instance_groups.items(),
2940
                                     key=lambda (_, nodes): pnode in nodes,
2941
                                     reverse=True)]
2942

    
2943
        self._ErrorIf(len(instance_groups) > 1,
2944
                      constants.CV_EINSTANCESPLITGROUPS,
2945
                      instance, "instance has primary and secondary nodes in"
2946
                      " different groups: %s", utils.CommaJoin(pretty_list),
2947
                      code=self.ETYPE_WARNING)
2948

    
2949
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2950
        i_non_a_balanced.append(instance)
2951

    
2952
      for snode in inst_config.secondary_nodes:
2953
        s_img = node_image[snode]
2954
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2955
                 snode, "instance %s, connection to secondary node failed",
2956
                 instance)
2957

    
2958
        if s_img.offline:
2959
          inst_nodes_offline.append(snode)
2960

    
2961
      # warn that the instance lives on offline nodes
2962
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2963
               "instance has offline secondary node(s) %s",
2964
               utils.CommaJoin(inst_nodes_offline))
2965
      # ... or ghost/non-vm_capable nodes
2966
      for node in inst_config.all_nodes:
2967
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2968
                 instance, "instance lives on ghost node %s", node)
2969
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2970
                 instance, "instance lives on non-vm_capable node %s", node)
2971

    
2972
    feedback_fn("* Verifying orphan volumes")
2973
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2974

    
2975
    # We will get spurious "unknown volume" warnings if any node of this group
2976
    # is secondary for an instance whose primary is in another group. To avoid
2977
    # them, we find these instances and add their volumes to node_vol_should.
2978
    for inst in self.all_inst_info.values():
2979
      for secondary in inst.secondary_nodes:
2980
        if (secondary in self.my_node_info
2981
            and inst.name not in self.my_inst_info):
2982
          inst.MapLVsByNode(node_vol_should)
2983
          break
2984

    
2985
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2986

    
2987
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2988
      feedback_fn("* Verifying N+1 Memory redundancy")
2989
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2990

    
2991
    feedback_fn("* Other Notes")
2992
    if i_non_redundant:
2993
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2994
                  % len(i_non_redundant))
2995

    
2996
    if i_non_a_balanced:
2997
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2998
                  % len(i_non_a_balanced))
2999

    
3000
    if n_offline:
3001
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3002

    
3003
    if n_drained:
3004
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3005

    
3006
    return not self.bad
3007

    
3008
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3009
    """Analyze the post-hooks' result
3010

3011
    This method analyses the hook result, handles it, and sends some
3012
    nicely-formatted feedback back to the user.
3013

3014
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3015
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3016
    @param hooks_results: the results of the multi-node hooks rpc call
3017
    @param feedback_fn: function used send feedback back to the caller
3018
    @param lu_result: previous Exec result
3019
    @return: the new Exec result, based on the previous result
3020
        and hook results
3021

3022
    """
3023
    # We only really run POST phase hooks, only for non-empty groups,
3024
    # and are only interested in their results
3025
    if not self.my_node_names:
3026
      # empty node group
3027
      pass
3028
    elif phase == constants.HOOKS_PHASE_POST:
3029
      # Used to change hooks' output to proper indentation
3030
      feedback_fn("* Hooks Results")
3031
      assert hooks_results, "invalid result from hooks"
3032

    
3033
      for node_name in hooks_results:
3034
        res = hooks_results[node_name]
3035
        msg = res.fail_msg
3036
        test = msg and not res.offline
3037
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3038
                      "Communication failure in hooks execution: %s", msg)
3039
        if res.offline or msg:
3040
          # No need to investigate payload if node is offline or gave
3041
          # an error.
3042
          continue
3043
        for script, hkr, output in res.payload:
3044
          test = hkr == constants.HKR_FAIL
3045
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3046
                        "Script %s failed, output:", script)
3047
          if test:
3048
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3049
            feedback_fn("%s" % output)
3050
            lu_result = False
3051

    
3052
    return lu_result
3053

    
3054

    
3055
class LUClusterVerifyDisks(NoHooksLU):
3056
  """Verifies the cluster disks status.
3057

3058
  """
3059
  REQ_BGL = False
3060

    
3061
  def ExpandNames(self):
3062
    self.share_locks = _ShareAll()
3063
    self.needed_locks = {
3064
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3065
      }
3066

    
3067
  def Exec(self, feedback_fn):
3068
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3069

    
3070
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3071
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3072
                           for group in group_names])
3073

    
3074

    
3075
class LUGroupVerifyDisks(NoHooksLU):
3076
  """Verifies the status of all disks in a node group.
3077

3078
  """
3079
  REQ_BGL = False
3080

    
3081
  def ExpandNames(self):
3082
    # Raises errors.OpPrereqError on its own if group can't be found
3083
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3084

    
3085
    self.share_locks = _ShareAll()
3086
    self.needed_locks = {
3087
      locking.LEVEL_INSTANCE: [],
3088
      locking.LEVEL_NODEGROUP: [],
3089
      locking.LEVEL_NODE: [],
3090
      }
3091

    
3092
  def DeclareLocks(self, level):
3093
    if level == locking.LEVEL_INSTANCE:
3094
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3095

    
3096
      # Lock instances optimistically, needs verification once node and group
3097
      # locks have been acquired
3098
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3099
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3100

    
3101
    elif level == locking.LEVEL_NODEGROUP:
3102
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3103

    
3104
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3105
        set([self.group_uuid] +
3106
            # Lock all groups used by instances optimistically; this requires
3107
            # going via the node before it's locked, requiring verification
3108
            # later on
3109
            [group_uuid
3110
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3111
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3112

    
3113
    elif level == locking.LEVEL_NODE:
3114
      # This will only lock the nodes in the group to be verified which contain
3115
      # actual instances
3116
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3117
      self._LockInstancesNodes()
3118

    
3119
      # Lock all nodes in group to be verified
3120
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3121
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3122
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3123

    
3124
  def CheckPrereq(self):
3125
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3126
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3127
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3128

    
3129
    assert self.group_uuid in owned_groups
3130

    
3131
    # Check if locked instances are still correct
3132
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3133

    
3134
    # Get instance information
3135
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3136

    
3137
    # Check if node groups for locked instances are still correct
3138
    for (instance_name, inst) in self.instances.items():
3139
      assert owned_nodes.issuperset(inst.all_nodes), \
3140
        "Instance %s's nodes changed while we kept the lock" % instance_name
3141

    
3142
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3143
                                             owned_groups)
3144

    
3145
      assert self.group_uuid in inst_groups, \
3146
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Verify integrity of cluster disks.
3150

3151
    @rtype: tuple of three items
3152
    @return: a tuple of (dict of node-to-node_error, list of instances
3153
        which need activate-disks, dict of instance: (node, volume) for
3154
        missing volumes
3155

3156
    """
3157
    res_nodes = {}
3158
    res_instances = set()
3159
    res_missing = {}
3160

    
3161
    nv_dict = _MapInstanceDisksToNodes([inst
3162
                                        for inst in self.instances.values()
3163
                                        if inst.admin_up])
3164

    
3165
    if nv_dict:
3166
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167
                             set(self.cfg.GetVmCapableNodeList()))
3168

    
3169
      node_lvs = self.rpc.call_lv_list(nodes, [])
3170

    
3171
      for (node, node_res) in node_lvs.items():
3172
        if node_res.offline:
3173
          continue
3174

    
3175
        msg = node_res.fail_msg
3176
        if msg:
3177
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178
          res_nodes[node] = msg
3179
          continue
3180

    
3181
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3182
          inst = nv_dict.pop((node, lv_name), None)
3183
          if not (lv_online or inst is None):
3184
            res_instances.add(inst)
3185

    
3186
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3187
      # better
3188
      for key, inst in nv_dict.iteritems():
3189
        res_missing.setdefault(inst, []).append(list(key))
3190

    
3191
    return (res_nodes, list(res_instances), res_missing)
3192

    
3193

    
3194
class LUClusterRepairDiskSizes(NoHooksLU):
3195
  """Verifies the cluster disks sizes.
3196

3197
  """
3198
  REQ_BGL = False
3199

    
3200
  def ExpandNames(self):
3201
    if self.op.instances:
3202
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203
      self.needed_locks = {
3204
        locking.LEVEL_NODE: [],
3205
        locking.LEVEL_INSTANCE: self.wanted_names,
3206
        }
3207
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3208
    else:
3209
      self.wanted_names = None
3210
      self.needed_locks = {
3211
        locking.LEVEL_NODE: locking.ALL_SET,
3212
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3213
        }
3214
    self.share_locks = _ShareAll()
3215

    
3216
  def DeclareLocks(self, level):
3217
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3218
      self._LockInstancesNodes(primary_only=True)
3219

    
3220
  def CheckPrereq(self):
3221
    """Check prerequisites.
3222

3223
    This only checks the optional instance list against the existing names.
3224

3225
    """
3226
    if self.wanted_names is None:
3227
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3228

    
3229
    self.wanted_instances = \
3230
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3231

    
3232
  def _EnsureChildSizes(self, disk):
3233
    """Ensure children of the disk have the needed disk size.
3234

3235
    This is valid mainly for DRBD8 and fixes an issue where the
3236
    children have smaller disk size.
3237

3238
    @param disk: an L{ganeti.objects.Disk} object
3239

3240
    """
3241
    if disk.dev_type == constants.LD_DRBD8:
3242
      assert disk.children, "Empty children for DRBD8?"
3243
      fchild = disk.children[0]
3244
      mismatch = fchild.size < disk.size
3245
      if mismatch:
3246
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3247
                     fchild.size, disk.size)
3248
        fchild.size = disk.size
3249

    
3250
      # and we recurse on this child only, not on the metadev
3251
      return self._EnsureChildSizes(fchild) or mismatch
3252
    else:
3253
      return False
3254

    
3255
  def Exec(self, feedback_fn):
3256
    """Verify the size of cluster disks.
3257

3258
    """
3259
    # TODO: check child disks too
3260
    # TODO: check differences in size between primary/secondary nodes
3261
    per_node_disks = {}
3262
    for instance in self.wanted_instances:
3263
      pnode = instance.primary_node
3264
      if pnode not in per_node_disks:
3265
        per_node_disks[pnode] = []
3266
      for idx, disk in enumerate(instance.disks):
3267
        per_node_disks[pnode].append((instance, idx, disk))
3268

    
3269
    changed = []
3270
    for node, dskl in per_node_disks.items():
3271
      newl = [v[2].Copy() for v in dskl]
3272
      for dsk in newl:
3273
        self.cfg.SetDiskID(dsk, node)
3274
      result = self.rpc.call_blockdev_getsize(node, newl)
3275
      if result.fail_msg:
3276
        self.LogWarning("Failure in blockdev_getsize call to node"
3277
                        " %s, ignoring", node)
3278
        continue
3279
      if len(result.payload) != len(dskl):
3280
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3281
                        " result.payload=%s", node, len(dskl), result.payload)
3282
        self.LogWarning("Invalid result from node %s, ignoring node results",
3283
                        node)
3284
        continue
3285
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3286
        if size is None:
3287
          self.LogWarning("Disk %d of instance %s did not return size"
3288
                          " information, ignoring", idx, instance.name)
3289
          continue
3290
        if not isinstance(size, (int, long)):
3291
          self.LogWarning("Disk %d of instance %s did not return valid"
3292
                          " size information, ignoring", idx, instance.name)
3293
          continue
3294
        size = size >> 20
3295
        if size != disk.size:
3296
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3297
                       " correcting: recorded %d, actual %d", idx,
3298
                       instance.name, disk.size, size)
3299
          disk.size = size
3300
          self.cfg.Update(instance, feedback_fn)
3301
          changed.append((instance.name, idx, size))
3302
        if self._EnsureChildSizes(disk):
3303
          self.cfg.Update(instance, feedback_fn)
3304
          changed.append((instance.name, idx, disk.size))
3305
    return changed
3306

    
3307

    
3308
class LUClusterRename(LogicalUnit):
3309
  """Rename the cluster.
3310

3311
  """
3312
  HPATH = "cluster-rename"
3313
  HTYPE = constants.HTYPE_CLUSTER
3314

    
3315
  def BuildHooksEnv(self):
3316
    """Build hooks env.
3317

3318
    """
3319
    return {
3320
      "OP_TARGET": self.cfg.GetClusterName(),
3321
      "NEW_NAME": self.op.name,
3322
      }
3323

    
3324
  def BuildHooksNodes(self):
3325
    """Build hooks nodes.
3326

3327
    """
3328
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3329

    
3330
  def CheckPrereq(self):
3331
    """Verify that the passed name is a valid one.
3332

3333
    """
3334
    hostname = netutils.GetHostname(name=self.op.name,
3335
                                    family=self.cfg.GetPrimaryIPFamily())
3336

    
3337
    new_name = hostname.name
3338
    self.ip = new_ip = hostname.ip
3339
    old_name = self.cfg.GetClusterName()
3340
    old_ip = self.cfg.GetMasterIP()
3341
    if new_name == old_name and new_ip == old_ip:
3342
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3343
                                 " cluster has changed",
3344
                                 errors.ECODE_INVAL)
3345
    if new_ip != old_ip:
3346
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3347
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3348
                                   " reachable on the network" %
3349
                                   new_ip, errors.ECODE_NOTUNIQUE)
3350

    
3351
    self.op.name = new_name
3352

    
3353
  def Exec(self, feedback_fn):
3354
    """Rename the cluster.
3355

3356
    """
3357
    clustername = self.op.name
3358
    new_ip = self.ip
3359

    
3360
    # shutdown the master IP
3361
    master_params = self.cfg.GetMasterNetworkParameters()
3362
    ems = self.cfg.GetUseExternalMipScript()
3363
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3364
                                                     master_params, ems)
3365
    result.Raise("Could not disable the master role")
3366

    
3367
    try:
3368
      cluster = self.cfg.GetClusterInfo()
3369
      cluster.cluster_name = clustername
3370
      cluster.master_ip = new_ip
3371
      self.cfg.Update(cluster, feedback_fn)
3372

    
3373
      # update the known hosts file
3374
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3375
      node_list = self.cfg.GetOnlineNodeList()
3376
      try:
3377
        node_list.remove(master_params.name)
3378
      except ValueError:
3379
        pass
3380
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3381
    finally:
3382
      master_params.ip = new_ip
3383
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3384
                                                     master_params, ems)
3385
      msg = result.fail_msg
3386
      if msg:
3387
        self.LogWarning("Could not re-enable the master role on"
3388
                        " the master, please restart manually: %s", msg)
3389

    
3390
    return clustername
3391

    
3392

    
3393
def _ValidateNetmask(cfg, netmask):
3394
  """Checks if a netmask is valid.
3395

3396
  @type cfg: L{config.ConfigWriter}
3397
  @param cfg: The cluster configuration
3398
  @type netmask: int
3399
  @param netmask: the netmask to be verified
3400
  @raise errors.OpPrereqError: if the validation fails
3401

3402
  """
3403
  ip_family = cfg.GetPrimaryIPFamily()
3404
  try:
3405
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3406
  except errors.ProgrammerError:
3407
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3408
                               ip_family)
3409
  if not ipcls.ValidateNetmask(netmask):
3410
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3411
                                (netmask))
3412

    
3413

    
3414
class LUClusterSetParams(LogicalUnit):
3415
  """Change the parameters of the cluster.
3416

3417
  """
3418
  HPATH = "cluster-modify"
3419
  HTYPE = constants.HTYPE_CLUSTER
3420
  REQ_BGL = False
3421

    
3422
  def CheckArguments(self):
3423
    """Check parameters
3424

3425
    """
3426
    if self.op.uid_pool:
3427
      uidpool.CheckUidPool(self.op.uid_pool)
3428

    
3429
    if self.op.add_uids:
3430
      uidpool.CheckUidPool(self.op.add_uids)
3431

    
3432
    if self.op.remove_uids:
3433
      uidpool.CheckUidPool(self.op.remove_uids)
3434

    
3435
    if self.op.master_netmask is not None:
3436
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3437

    
3438
  def ExpandNames(self):
3439
    # FIXME: in the future maybe other cluster params won't require checking on
3440
    # all nodes to be modified.
3441
    self.needed_locks = {
3442
      locking.LEVEL_NODE: locking.ALL_SET,
3443
    }
3444
    self.share_locks[locking.LEVEL_NODE] = 1
3445

    
3446
  def BuildHooksEnv(self):
3447
    """Build hooks env.
3448

3449
    """
3450
    return {
3451
      "OP_TARGET": self.cfg.GetClusterName(),
3452
      "NEW_VG_NAME": self.op.vg_name,
3453
      }
3454

    
3455
  def BuildHooksNodes(self):
3456
    """Build hooks nodes.
3457

3458
    """
3459
    mn = self.cfg.GetMasterNode()
3460
    return ([mn], [mn])
3461

    
3462
  def CheckPrereq(self):
3463
    """Check prerequisites.
3464

3465
    This checks whether the given params don't conflict and
3466
    if the given volume group is valid.
3467

3468
    """
3469
    if self.op.vg_name is not None and not self.op.vg_name:
3470
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3471
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3472
                                   " instances exist", errors.ECODE_INVAL)
3473

    
3474
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3475
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3476
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3477
                                   " drbd-based instances exist",
3478
                                   errors.ECODE_INVAL)
3479

    
3480
    node_list = self.owned_locks(locking.LEVEL_NODE)
3481

    
3482
    # if vg_name not None, checks given volume group on all nodes
3483
    if self.op.vg_name:
3484
      vglist = self.rpc.call_vg_list(node_list)
3485
      for node in node_list:
3486
        msg = vglist[node].fail_msg
3487
        if msg:
3488
          # ignoring down node
3489
          self.LogWarning("Error while gathering data on node %s"
3490
                          " (ignoring node): %s", node, msg)
3491
          continue
3492
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3493
                                              self.op.vg_name,
3494
                                              constants.MIN_VG_SIZE)
3495
        if vgstatus:
3496
          raise errors.OpPrereqError("Error on node '%s': %s" %
3497
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3498

    
3499
    if self.op.drbd_helper:
3500
      # checks given drbd helper on all nodes
3501
      helpers = self.rpc.call_drbd_helper(node_list)
3502
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3503
        if ninfo.offline:
3504
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3505
          continue
3506
        msg = helpers[node].fail_msg
3507
        if msg:
3508
          raise errors.OpPrereqError("Error checking drbd helper on node"
3509
                                     " '%s': %s" % (node, msg),
3510
                                     errors.ECODE_ENVIRON)
3511
        node_helper = helpers[node].payload
3512
        if node_helper != self.op.drbd_helper:
3513
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3514
                                     (node, node_helper), errors.ECODE_ENVIRON)
3515

    
3516
    self.cluster = cluster = self.cfg.GetClusterInfo()
3517
    # validate params changes
3518
    if self.op.beparams:
3519
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3520
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3521

    
3522
    if self.op.ndparams:
3523
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3524
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3525

    
3526
      # TODO: we need a more general way to handle resetting
3527
      # cluster-level parameters to default values
3528
      if self.new_ndparams["oob_program"] == "":
3529
        self.new_ndparams["oob_program"] = \
3530
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3531

    
3532
    if self.op.nicparams:
3533
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3534
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3535
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3536
      nic_errors = []
3537

    
3538
      # check all instances for consistency
3539
      for instance in self.cfg.GetAllInstancesInfo().values():
3540
        for nic_idx, nic in enumerate(instance.nics):
3541
          params_copy = copy.deepcopy(nic.nicparams)
3542
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3543

    
3544
          # check parameter syntax
3545
          try:
3546
            objects.NIC.CheckParameterSyntax(params_filled)
3547
          except errors.ConfigurationError, err:
3548
            nic_errors.append("Instance %s, nic/%d: %s" %
3549
                              (instance.name, nic_idx, err))
3550

    
3551
          # if we're moving instances to routed, check that they have an ip
3552
          target_mode = params_filled[constants.NIC_MODE]
3553
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3554
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3555
                              " address" % (instance.name, nic_idx))
3556
      if nic_errors:
3557
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3558
                                   "\n".join(nic_errors))
3559

    
3560
    # hypervisor list/parameters
3561
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3562
    if self.op.hvparams:
3563
      for hv_name, hv_dict in self.op.hvparams.items():
3564
        if hv_name not in self.new_hvparams:
3565
          self.new_hvparams[hv_name] = hv_dict
3566
        else:
3567
          self.new_hvparams[hv_name].update(hv_dict)
3568

    
3569
    # os hypervisor parameters
3570
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3571
    if self.op.os_hvp:
3572
      for os_name, hvs in self.op.os_hvp.items():
3573
        if os_name not in self.new_os_hvp:
3574
          self.new_os_hvp[os_name] = hvs
3575
        else:
3576
          for hv_name, hv_dict in hvs.items():
3577
            if hv_name not in self.new_os_hvp[os_name]:
3578
              self.new_os_hvp[os_name][hv_name] = hv_dict
3579
            else:
3580
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3581

    
3582
    # os parameters
3583
    self.new_osp = objects.FillDict(cluster.osparams, {})
3584
    if self.op.osparams:
3585
      for os_name, osp in self.op.osparams.items():
3586
        if os_name not in self.new_osp:
3587
          self.new_osp[os_name] = {}
3588

    
3589
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3590
                                                  use_none=True)
3591

    
3592
        if not self.new_osp[os_name]:
3593
          # we removed all parameters
3594
          del self.new_osp[os_name]
3595
        else:
3596
          # check the parameter validity (remote check)
3597
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3598
                         os_name, self.new_osp[os_name])
3599

    
3600
    # changes to the hypervisor list
3601
    if self.op.enabled_hypervisors is not None:
3602
      self.hv_list = self.op.enabled_hypervisors
3603
      for hv in self.hv_list:
3604
        # if the hypervisor doesn't already exist in the cluster
3605
        # hvparams, we initialize it to empty, and then (in both
3606
        # cases) we make sure to fill the defaults, as we might not
3607
        # have a complete defaults list if the hypervisor wasn't
3608
        # enabled before
3609
        if hv not in new_hvp:
3610
          new_hvp[hv] = {}
3611
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3612
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3613
    else:
3614
      self.hv_list = cluster.enabled_hypervisors
3615

    
3616
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3617
      # either the enabled list has changed, or the parameters have, validate
3618
      for hv_name, hv_params in self.new_hvparams.items():
3619
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3620
            (self.op.enabled_hypervisors and
3621
             hv_name in self.op.enabled_hypervisors)):
3622
          # either this is a new hypervisor, or its parameters have changed
3623
          hv_class = hypervisor.GetHypervisor(hv_name)
3624
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3625
          hv_class.CheckParameterSyntax(hv_params)
3626
          _CheckHVParams(self, node_list, hv_name, hv_params)
3627

    
3628
    if self.op.os_hvp:
3629
      # no need to check any newly-enabled hypervisors, since the
3630
      # defaults have already been checked in the above code-block
3631
      for os_name, os_hvp in self.new_os_hvp.items():
3632
        for hv_name, hv_params in os_hvp.items():
3633
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3634
          # we need to fill in the new os_hvp on top of the actual hv_p
3635
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3636
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3637
          hv_class = hypervisor.GetHypervisor(hv_name)
3638
          hv_class.CheckParameterSyntax(new_osp)
3639
          _CheckHVParams(self, node_list, hv_name, new_osp)
3640

    
3641
    if self.op.default_iallocator:
3642
      alloc_script = utils.FindFile(self.op.default_iallocator,
3643
                                    constants.IALLOCATOR_SEARCH_PATH,
3644
                                    os.path.isfile)
3645
      if alloc_script is None:
3646
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3647
                                   " specified" % self.op.default_iallocator,
3648
                                   errors.ECODE_INVAL)
3649

    
3650
  def Exec(self, feedback_fn):
3651
    """Change the parameters of the cluster.
3652

3653
    """
3654
    if self.op.vg_name is not None:
3655
      new_volume = self.op.vg_name
3656
      if not new_volume:
3657
        new_volume = None
3658
      if new_volume != self.cfg.GetVGName():
3659
        self.cfg.SetVGName(new_volume)
3660
      else:
3661
        feedback_fn("Cluster LVM configuration already in desired"
3662
                    " state, not changing")
3663
    if self.op.drbd_helper is not None:
3664
      new_helper = self.op.drbd_helper
3665
      if not new_helper:
3666
        new_helper = None
3667
      if new_helper != self.cfg.GetDRBDHelper():
3668
        self.cfg.SetDRBDHelper(new_helper)
3669
      else:
3670
        feedback_fn("Cluster DRBD helper already in desired state,"
3671
                    " not changing")
3672
    if self.op.hvparams:
3673
      self.cluster.hvparams = self.new_hvparams
3674
    if self.op.os_hvp:
3675
      self.cluster.os_hvp = self.new_os_hvp
3676
    if self.op.enabled_hypervisors is not None:
3677
      self.cluster.hvparams = self.new_hvparams
3678
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3679
    if self.op.beparams:
3680
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3681
    if self.op.nicparams:
3682
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3683
    if self.op.osparams:
3684
      self.cluster.osparams = self.new_osp
3685
    if self.op.ndparams:
3686
      self.cluster.ndparams = self.new_ndparams
3687

    
3688
    if self.op.candidate_pool_size is not None:
3689
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3690
      # we need to update the pool size here, otherwise the save will fail
3691
      _AdjustCandidatePool(self, [])
3692

    
3693
    if self.op.maintain_node_health is not None:
3694
      self.cluster.maintain_node_health = self.op.maintain_node_health
3695

    
3696
    if self.op.prealloc_wipe_disks is not None:
3697
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3698

    
3699
    if self.op.add_uids is not None:
3700
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3701

    
3702
    if self.op.remove_uids is not None:
3703
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3704

    
3705
    if self.op.uid_pool is not None:
3706
      self.cluster.uid_pool = self.op.uid_pool
3707

    
3708
    if self.op.default_iallocator is not None:
3709
      self.cluster.default_iallocator = self.op.default_iallocator
3710

    
3711
    if self.op.reserved_lvs is not None:
3712
      self.cluster.reserved_lvs = self.op.reserved_lvs
3713

    
3714
    if self.op.use_external_mip_script is not None:
3715
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3716

    
3717
    def helper_os(aname, mods, desc):
3718
      desc += " OS list"
3719
      lst = getattr(self.cluster, aname)
3720
      for key, val in mods:
3721
        if key == constants.DDM_ADD:
3722
          if val in lst:
3723
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3724
          else:
3725
            lst.append(val)
3726
        elif key == constants.DDM_REMOVE:
3727
          if val in lst:
3728
            lst.remove(val)
3729
          else:
3730
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3731
        else:
3732
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3733

    
3734
    if self.op.hidden_os:
3735
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3736

    
3737
    if self.op.blacklisted_os:
3738
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3739

    
3740
    if self.op.master_netdev:
3741
      master_params = self.cfg.GetMasterNetworkParameters()
3742
      ems = self.cfg.GetUseExternalMipScript()
3743
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3744
                  self.cluster.master_netdev)
3745
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3746
                                                       master_params, ems)
3747
      result.Raise("Could not disable the master ip")
3748
      feedback_fn("Changing master_netdev from %s to %s" %
3749
                  (master_params.netdev, self.op.master_netdev))
3750
      self.cluster.master_netdev = self.op.master_netdev
3751

    
3752
    if self.op.master_netmask:
3753
      master_params = self.cfg.GetMasterNetworkParameters()
3754
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3755
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3756
                                                        master_params.netmask,
3757
                                                        self.op.master_netmask,
3758
                                                        master_params.ip,
3759
                                                        master_params.netdev)
3760
      if result.fail_msg:
3761
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3762
        feedback_fn(msg)
3763

    
3764
      self.cluster.master_netmask = self.op.master_netmask
3765

    
3766
    self.cfg.Update(self.cluster, feedback_fn)
3767

    
3768
    if self.op.master_netdev:
3769
      master_params = self.cfg.GetMasterNetworkParameters()
3770
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3771
                  self.op.master_netdev)
3772
      ems = self.cfg.GetUseExternalMipScript()
3773
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3774
                                                     master_params, ems)
3775
      if result.fail_msg:
3776
        self.LogWarning("Could not re-enable the master ip on"
3777
                        " the master, please restart manually: %s",
3778
                        result.fail_msg)
3779

    
3780

    
3781
def _UploadHelper(lu, nodes, fname):
3782
  """Helper for uploading a file and showing warnings.
3783

3784
  """
3785
  if os.path.exists(fname):
3786
    result = lu.rpc.call_upload_file(nodes, fname)
3787
    for to_node, to_result in result.items():
3788
      msg = to_result.fail_msg
3789
      if msg:
3790
        msg = ("Copy of file %s to node %s failed: %s" %
3791
               (fname, to_node, msg))
3792
        lu.proc.LogWarning(msg)
3793

    
3794

    
3795
def _ComputeAncillaryFiles(cluster, redist):
3796
  """Compute files external to Ganeti which need to be consistent.
3797

3798
  @type redist: boolean
3799
  @param redist: Whether to include files which need to be redistributed
3800

3801
  """
3802
  # Compute files for all nodes
3803
  files_all = set([
3804
    constants.SSH_KNOWN_HOSTS_FILE,
3805
    constants.CONFD_HMAC_KEY,
3806
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3807
    constants.SPICE_CERT_FILE,
3808
    constants.SPICE_CACERT_FILE,
3809
    constants.RAPI_USERS_FILE,
3810
    ])
3811

    
3812
  if not redist:
3813
    files_all.update(constants.ALL_CERT_FILES)
3814
    files_all.update(ssconf.SimpleStore().GetFileList())
3815
  else:
3816
    # we need to ship at least the RAPI certificate
3817
    files_all.add(constants.RAPI_CERT_FILE)
3818

    
3819
  if cluster.modify_etc_hosts:
3820
    files_all.add(constants.ETC_HOSTS)
3821

    
3822
  # Files which are optional, these must:
3823
  # - be present in one other category as well
3824
  # - either exist or not exist on all nodes of that category (mc, vm all)
3825
  files_opt = set([
3826
    constants.RAPI_USERS_FILE,
3827
    ])
3828

    
3829
  # Files which should only be on master candidates
3830
  files_mc = set()
3831

    
3832
  if not redist:
3833
    files_mc.add(constants.CLUSTER_CONF_FILE)
3834

    
3835
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3836
    # replication
3837
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3838

    
3839
  # Files which should only be on VM-capable nodes
3840
  files_vm = set(filename
3841
    for hv_name in cluster.enabled_hypervisors
3842
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3843

    
3844
  files_opt |= set(filename
3845
    for hv_name in cluster.enabled_hypervisors
3846
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3847

    
3848
  # Filenames in each category must be unique
3849
  all_files_set = files_all | files_mc | files_vm
3850
  assert (len(all_files_set) ==
3851
          sum(map(len, [files_all, files_mc, files_vm]))), \
3852
         "Found file listed in more than one file list"
3853

    
3854
  # Optional files must be present in one other category
3855
  assert all_files_set.issuperset(files_opt), \
3856
         "Optional file not in a different required list"
3857

    
3858
  return (files_all, files_opt, files_mc, files_vm)
3859

    
3860

    
3861
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3862
  """Distribute additional files which are part of the cluster configuration.
3863

3864
  ConfigWriter takes care of distributing the config and ssconf files, but
3865
  there are more files which should be distributed to all nodes. This function
3866
  makes sure those are copied.
3867

3868
  @param lu: calling logical unit
3869
  @param additional_nodes: list of nodes not in the config to distribute to
3870
  @type additional_vm: boolean
3871
  @param additional_vm: whether the additional nodes are vm-capable or not
3872

3873
  """
3874
  # Gather target nodes
3875
  cluster = lu.cfg.GetClusterInfo()
3876
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3877

    
3878
  online_nodes = lu.cfg.GetOnlineNodeList()
3879
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3880

    
3881
  if additional_nodes is not None:
3882
    online_nodes.extend(additional_nodes)
3883
    if additional_vm:
3884
      vm_nodes.extend(additional_nodes)
3885

    
3886
  # Never distribute to master node
3887
  for nodelist in [online_nodes, vm_nodes]:
3888
    if master_info.name in nodelist:
3889
      nodelist.remove(master_info.name)
3890

    
3891
  # Gather file lists
3892
  (files_all, _, files_mc, files_vm) = \
3893
    _ComputeAncillaryFiles(cluster, True)
3894

    
3895
  # Never re-distribute configuration file from here
3896
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3897
              constants.CLUSTER_CONF_FILE in files_vm)
3898
  assert not files_mc, "Master candidates not handled in this function"
3899

    
3900
  filemap = [
3901
    (online_nodes, files_all),
3902
    (vm_nodes, files_vm),
3903
    ]
3904

    
3905
  # Upload the files
3906
  for (node_list, files) in filemap:
3907
    for fname in files:
3908
      _UploadHelper(lu, node_list, fname)
3909

    
3910

    
3911
class LUClusterRedistConf(NoHooksLU):
3912
  """Force the redistribution of cluster configuration.
3913

3914
  This is a very simple LU.
3915

3916
  """
3917
  REQ_BGL = False
3918

    
3919
  def ExpandNames(self):
3920
    self.needed_locks = {
3921
      locking.LEVEL_NODE: locking.ALL_SET,
3922
    }
3923
    self.share_locks[locking.LEVEL_NODE] = 1
3924

    
3925
  def Exec(self, feedback_fn):
3926
    """Redistribute the configuration.
3927

3928
    """
3929
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3930
    _RedistributeAncillaryFiles(self)
3931

    
3932

    
3933
class LUClusterActivateMasterIp(NoHooksLU):
3934
  """Activate the master IP on the master node.
3935

3936
  """
3937
  def Exec(self, feedback_fn):
3938
    """Activate the master IP.
3939

3940
    """
3941
    master_params = self.cfg.GetMasterNetworkParameters()
3942
    ems = self.cfg.GetUseExternalMipScript()
3943
    self.rpc.call_node_activate_master_ip(master_params.name,
3944
                                          master_params, ems)
3945

    
3946

    
3947
class LUClusterDeactivateMasterIp(NoHooksLU):
3948
  """Deactivate the master IP on the master node.
3949

3950
  """
3951
  def Exec(self, feedback_fn):
3952
    """Deactivate the master IP.
3953

3954
    """
3955
    master_params = self.cfg.GetMasterNetworkParameters()
3956
    ems = self.cfg.GetUseExternalMipScript()
3957
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3958
                                            ems)
3959

    
3960

    
3961
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3962
  """Sleep and poll for an instance's disk to sync.
3963

3964
  """
3965
  if not instance.disks or disks is not None and not disks:
3966
    return True
3967

    
3968
  disks = _ExpandCheckDisks(instance, disks)
3969

    
3970
  if not oneshot:
3971
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3972

    
3973
  node = instance.primary_node
3974

    
3975
  for dev in disks:
3976
    lu.cfg.SetDiskID(dev, node)
3977

    
3978
  # TODO: Convert to utils.Retry
3979

    
3980
  retries = 0
3981
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3982
  while True:
3983
    max_time = 0
3984
    done = True
3985
    cumul_degraded = False
3986
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3987
    msg = rstats.fail_msg
3988
    if msg:
3989
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3990
      retries += 1
3991
      if retries >= 10:
3992
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3993
                                 " aborting." % node)
3994
      time.sleep(6)
3995
      continue
3996
    rstats = rstats.payload
3997
    retries = 0
3998
    for i, mstat in enumerate(rstats):
3999
      if mstat is None:
4000
        lu.LogWarning("Can't compute data for node %s/%s",
4001
                           node, disks[i].iv_name)
4002
        continue
4003

    
4004
      cumul_degraded = (cumul_degraded or
4005
                        (mstat.is_degraded and mstat.sync_percent is None))
4006
      if mstat.sync_percent is not None:
4007
        done = False
4008
        if mstat.estimated_time is not None:
4009
          rem_time = ("%s remaining (estimated)" %
4010
                      utils.FormatSeconds(mstat.estimated_time))
4011
          max_time = mstat.estimated_time
4012
        else:
4013
          rem_time = "no time estimate"
4014
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4015
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4016

    
4017
    # if we're done but degraded, let's do a few small retries, to
4018
    # make sure we see a stable and not transient situation; therefore
4019
    # we force restart of the loop
4020
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4021
      logging.info("Degraded disks found, %d retries left", degr_retries)
4022
      degr_retries -= 1
4023
      time.sleep(1)
4024
      continue
4025

    
4026
    if done or oneshot:
4027
      break
4028

    
4029
    time.sleep(min(60, max_time))
4030

    
4031
  if done:
4032
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4033
  return not cumul_degraded
4034

    
4035

    
4036
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4037
  """Check that mirrors are not degraded.
4038

4039
  The ldisk parameter, if True, will change the test from the
4040
  is_degraded attribute (which represents overall non-ok status for
4041
  the device(s)) to the ldisk (representing the local storage status).
4042

4043
  """
4044
  lu.cfg.SetDiskID(dev, node)
4045

    
4046
  result = True
4047

    
4048
  if on_primary or dev.AssembleOnSecondary():
4049
    rstats = lu.rpc.call_blockdev_find(node, dev)
4050
    msg = rstats.fail_msg
4051
    if msg:
4052
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4053
      result = False
4054
    elif not rstats.payload:
4055
      lu.LogWarning("Can't find disk on node %s", node)
4056
      result = False
4057
    else:
4058
      if ldisk:
4059
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4060
      else:
4061
        result = result and not rstats.payload.is_degraded
4062

    
4063
  if dev.children:
4064
    for child in dev.children:
4065
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4066

    
4067
  return result
4068

    
4069

    
4070
class LUOobCommand(NoHooksLU):
4071
  """Logical unit for OOB handling.
4072

4073
  """
4074
  REG_BGL = False
4075
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4076

    
4077
  def ExpandNames(self):
4078
    """Gather locks we need.
4079

4080
    """
4081
    if self.op.node_names:
4082
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4083
      lock_names = self.op.node_names
4084
    else:
4085
      lock_names = locking.ALL_SET
4086

    
4087
    self.needed_locks = {
4088
      locking.LEVEL_NODE: lock_names,
4089
      }
4090

    
4091
  def CheckPrereq(self):
4092
    """Check prerequisites.
4093

4094
    This checks:
4095
     - the node exists in the configuration
4096
     - OOB is supported
4097

4098
    Any errors are signaled by raising errors.OpPrereqError.
4099

4100
    """
4101
    self.nodes = []
4102
    self.master_node = self.cfg.GetMasterNode()
4103

    
4104
    assert self.op.power_delay >= 0.0
4105

    
4106
    if self.op.node_names:
4107
      if (self.op.command in self._SKIP_MASTER and
4108
          self.master_node in self.op.node_names):
4109
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4110
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4111

    
4112
        if master_oob_handler:
4113
          additional_text = ("run '%s %s %s' if you want to operate on the"
4114
                             " master regardless") % (master_oob_handler,
4115
                                                      self.op.command,
4116
                                                      self.master_node)
4117
        else:
4118
          additional_text = "it does not support out-of-band operations"
4119

    
4120
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4121
                                    " allowed for %s; %s") %
4122
                                   (self.master_node, self.op.command,
4123
                                    additional_text), errors.ECODE_INVAL)
4124
    else:
4125
      self.op.node_names = self.cfg.GetNodeList()
4126
      if self.op.command in self._SKIP_MASTER:
4127
        self.op.node_names.remove(self.master_node)
4128

    
4129
    if self.op.command in self._SKIP_MASTER:
4130
      assert self.master_node not in self.op.node_names
4131

    
4132
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4133
      if node is None:
4134
        raise errors.OpPrereqError("Node %s not found" % node_name,
4135
                                   errors.ECODE_NOENT)
4136
      else:
4137
        self.nodes.append(node)
4138

    
4139
      if (not self.op.ignore_status and
4140
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4141
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4142
                                    " not marked offline") % node_name,
4143
                                   errors.ECODE_STATE)
4144

    
4145
  def Exec(self, feedback_fn):
4146
    """Execute OOB and return result if we expect any.
4147

4148
    """
4149
    master_node = self.master_node
4150
    ret = []
4151

    
4152
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4153
                                              key=lambda node: node.name)):
4154
      node_entry = [(constants.RS_NORMAL, node.name)]
4155
      ret.append(node_entry)
4156

    
4157
      oob_program = _SupportsOob(self.cfg, node)
4158

    
4159
      if not oob_program:
4160
        node_entry.append((constants.RS_UNAVAIL, None))
4161
        continue
4162

    
4163
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4164
                   self.op.command, oob_program, node.name)
4165
      result = self.rpc.call_run_oob(master_node, oob_program,
4166
                                     self.op.command, node.name,
4167
                                     self.op.timeout)
4168

    
4169
      if result.fail_msg:
4170
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4171
                        node.name, result.fail_msg)
4172
        node_entry.append((constants.RS_NODATA, None))
4173
      else:
4174
        try:
4175
          self._CheckPayload(result)
4176
        except errors.OpExecError, err:
4177
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4178
                          node.name, err)
4179
          node_entry.append((constants.RS_NODATA, None))
4180
        else:
4181
          if self.op.command == constants.OOB_HEALTH:
4182
            # For health we should log important events
4183
            for item, status in result.payload:
4184
              if status in [constants.OOB_STATUS_WARNING,
4185
                            constants.OOB_STATUS_CRITICAL]:
4186
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4187
                                item, node.name, status)
4188

    
4189
          if self.op.command == constants.OOB_POWER_ON:
4190
            node.powered = True
4191
          elif self.op.command == constants.OOB_POWER_OFF:
4192
            node.powered = False
4193
          elif self.op.command == constants.OOB_POWER_STATUS:
4194
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4195
            if powered != node.powered:
4196
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4197
                               " match actual power state (%s)"), node.powered,
4198
                              node.name, powered)
4199

    
4200
          # For configuration changing commands we should update the node
4201
          if self.op.command in (constants.OOB_POWER_ON,
4202
                                 constants.OOB_POWER_OFF):
4203
            self.cfg.Update(node, feedback_fn)
4204

    
4205
          node_entry.append((constants.RS_NORMAL, result.payload))
4206

    
4207
          if (self.op.command == constants.OOB_POWER_ON and
4208
              idx < len(self.nodes) - 1):
4209
            time.sleep(self.op.power_delay)
4210

    
4211
    return ret
4212

    
4213
  def _CheckPayload(self, result):
4214
    """Checks if the payload is valid.
4215

4216
    @param result: RPC result
4217
    @raises errors.OpExecError: If payload is not valid
4218

4219
    """
4220
    errs = []
4221
    if self.op.command == constants.OOB_HEALTH:
4222
      if not isinstance(result.payload, list):
4223
        errs.append("command 'health' is expected to return a list but got %s" %
4224
                    type(result.payload))
4225
      else:
4226
        for item, status in result.payload:
4227
          if status not in constants.OOB_STATUSES:
4228
            errs.append("health item '%s' has invalid status '%s'" %
4229
                        (item, status))
4230

    
4231
    if self.op.command == constants.OOB_POWER_STATUS:
4232
      if not isinstance(result.payload, dict):
4233
        errs.append("power-status is expected to return a dict but got %s" %
4234
                    type(result.payload))
4235

    
4236
    if self.op.command in [
4237
        constants.OOB_POWER_ON,
4238
        constants.OOB_POWER_OFF,
4239
        constants.OOB_POWER_CYCLE,
4240
        ]:
4241
      if result.payload is not None:
4242
        errs.append("%s is expected to not return payload but got '%s'" %
4243
                    (self.op.command, result.payload))
4244

    
4245
    if errs:
4246
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4247
                               utils.CommaJoin(errs))
4248

    
4249

    
4250
class _OsQuery(_QueryBase):
4251
  FIELDS = query.OS_FIELDS
4252

    
4253
  def ExpandNames(self, lu):
4254
    # Lock all nodes in shared mode
4255
    # Temporary removal of locks, should be reverted later
4256
    # TODO: reintroduce locks when they are lighter-weight
4257
    lu.needed_locks = {}
4258
    #self.share_locks[locking.LEVEL_NODE] = 1
4259
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4260

    
4261
    # The following variables interact with _QueryBase._GetNames
4262
    if self.names:
4263
      self.wanted = self.names
4264
    else:
4265
      self.wanted = locking.ALL_SET
4266

    
4267
    self.do_locking = self.use_locking
4268

    
4269
  def DeclareLocks(self, lu, level):
4270
    pass
4271

    
4272
  @staticmethod
4273
  def _DiagnoseByOS(rlist):
4274
    """Remaps a per-node return list into an a per-os per-node dictionary
4275

4276
    @param rlist: a map with node names as keys and OS objects as values
4277

4278
    @rtype: dict
4279
    @return: a dictionary with osnames as keys and as value another
4280
        map, with nodes as keys and tuples of (path, status, diagnose,
4281
        variants, parameters, api_versions) as values, eg::
4282

4283
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4284
                                     (/srv/..., False, "invalid api")],
4285
                           "node2": [(/srv/..., True, "", [], [])]}
4286
          }
4287

4288
    """
4289
    all_os = {}
4290
    # we build here the list of nodes that didn't fail the RPC (at RPC
4291
    # level), so that nodes with a non-responding node daemon don't
4292
    # make all OSes invalid
4293
    good_nodes = [node_name for node_name in rlist
4294
                  if not rlist[node_name].fail_msg]
4295
    for node_name, nr in rlist.items():
4296
      if nr.fail_msg or not nr.payload:
4297
        continue
4298
      for (name, path, status, diagnose, variants,
4299
           params, api_versions) in nr.payload:
4300
        if name not in all_os:
4301
          # build a list of nodes for this os containing empty lists
4302
          # for each node in node_list
4303
          all_os[name] = {}
4304
          for nname in good_nodes:
4305
            all_os[name][nname] = []
4306
        # convert params from [name, help] to (name, help)
4307
        params = [tuple(v) for v in params]
4308
        all_os[name][node_name].append((path, status, diagnose,
4309
                                        variants, params, api_versions))
4310
    return all_os
4311

    
4312
  def _GetQueryData(self, lu):
4313
    """Computes the list of nodes and their attributes.
4314

4315
    """
4316
    # Locking is not used
4317
    assert not (compat.any(lu.glm.is_owned(level)
4318
                           for level in locking.LEVELS
4319
                           if level != locking.LEVEL_CLUSTER) or
4320
                self.do_locking or self.use_locking)
4321

    
4322
    valid_nodes = [node.name
4323
                   for node in lu.cfg.GetAllNodesInfo().values()
4324
                   if not node.offline and node.vm_capable]
4325
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4326
    cluster = lu.cfg.GetClusterInfo()
4327

    
4328
    data = {}
4329

    
4330
    for (os_name, os_data) in pol.items():
4331
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4332
                          hidden=(os_name in cluster.hidden_os),
4333
                          blacklisted=(os_name in cluster.blacklisted_os))
4334

    
4335
      variants = set()
4336
      parameters = set()
4337
      api_versions = set()
4338

    
4339
      for idx, osl in enumerate(os_data.values()):
4340
        info.valid = bool(info.valid and osl and osl[0][1])
4341
        if not info.valid:
4342
          break
4343

    
4344
        (node_variants, node_params, node_api) = osl[0][3:6]
4345
        if idx == 0:
4346
          # First entry
4347
          variants.update(node_variants)
4348
          parameters.update(node_params)
4349
          api_versions.update(node_api)
4350
        else:
4351
          # Filter out inconsistent values
4352
          variants.intersection_update(node_variants)
4353
          parameters.intersection_update(node_params)
4354
          api_versions.intersection_update(node_api)
4355

    
4356
      info.variants = list(variants)
4357
      info.parameters = list(parameters)
4358
      info.api_versions = list(api_versions)
4359

    
4360
      data[os_name] = info
4361

    
4362
    # Prepare data in requested order
4363
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4364
            if name in data]
4365

    
4366

    
4367
class LUOsDiagnose(NoHooksLU):
4368
  """Logical unit for OS diagnose/query.
4369

4370
  """
4371
  REQ_BGL = False
4372

    
4373
  @staticmethod
4374
  def _BuildFilter(fields, names):
4375
    """Builds a filter for querying OSes.
4376

4377
    """
4378
    name_filter = qlang.MakeSimpleFilter("name", names)
4379

    
4380
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4381
    # respective field is not requested
4382
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4383
                     for fname in ["hidden", "blacklisted"]
4384
                     if fname not in fields]
4385
    if "valid" not in fields:
4386
      status_filter.append([qlang.OP_TRUE, "valid"])
4387

    
4388
    if status_filter:
4389
      status_filter.insert(0, qlang.OP_AND)
4390
    else:
4391
      status_filter = None
4392

    
4393
    if name_filter and status_filter:
4394
      return [qlang.OP_AND, name_filter, status_filter]
4395
    elif name_filter:
4396
      return name_filter
4397
    else:
4398
      return status_filter
4399

    
4400
  def CheckArguments(self):
4401
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4402
                       self.op.output_fields, False)
4403

    
4404
  def ExpandNames(self):
4405
    self.oq.ExpandNames(self)
4406

    
4407
  def Exec(self, feedback_fn):
4408
    return self.oq.OldStyleQuery(self)
4409

    
4410

    
4411
class LUNodeRemove(LogicalUnit):
4412
  """Logical unit for removing a node.
4413

4414
  """
4415
  HPATH = "node-remove"
4416
  HTYPE = constants.HTYPE_NODE
4417

    
4418
  def BuildHooksEnv(self):
4419
    """Build hooks env.
4420

4421
    This doesn't run on the target node in the pre phase as a failed
4422
    node would then be impossible to remove.
4423

4424
    """
4425
    return {
4426
      "OP_TARGET": self.op.node_name,
4427
      "NODE_NAME": self.op.node_name,
4428
      }
4429

    
4430
  def BuildHooksNodes(self):
4431
    """Build hooks nodes.
4432

4433
    """
4434
    all_nodes = self.cfg.GetNodeList()
4435
    try:
4436
      all_nodes.remove(self.op.node_name)
4437
    except ValueError:
4438
      logging.warning("Node '%s', which is about to be removed, was not found"
4439
                      " in the list of all nodes", self.op.node_name)
4440
    return (all_nodes, all_nodes)
4441

    
4442
  def CheckPrereq(self):
4443
    """Check prerequisites.
4444

4445
    This checks:
4446
     - the node exists in the configuration
4447
     - it does not have primary or secondary instances
4448
     - it's not the master
4449

4450
    Any errors are signaled by raising errors.OpPrereqError.
4451

4452
    """
4453
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4454
    node = self.cfg.GetNodeInfo(self.op.node_name)
4455
    assert node is not None
4456

    
4457
    masternode = self.cfg.GetMasterNode()
4458
    if node.name == masternode:
4459
      raise errors.OpPrereqError("Node is the master node, failover to another"
4460
                                 " node is required", errors.ECODE_INVAL)
4461

    
4462
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4463
      if node.name in instance.all_nodes:
4464
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4465
                                   " please remove first" % instance_name,
4466
                                   errors.ECODE_INVAL)
4467
    self.op.node_name = node.name
4468
    self.node = node
4469

    
4470
  def Exec(self, feedback_fn):
4471
    """Removes the node from the cluster.
4472

4473
    """
4474
    node = self.node
4475
    logging.info("Stopping the node daemon and removing configs from node %s",
4476
                 node.name)
4477

    
4478
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4479

    
4480
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4481
      "Not owning BGL"
4482

    
4483
    # Promote nodes to master candidate as needed
4484
    _AdjustCandidatePool(self, exceptions=[node.name])
4485
    self.context.RemoveNode(node.name)
4486

    
4487
    # Run post hooks on the node before it's removed
4488
    _RunPostHook(self, node.name)
4489

    
4490
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4491
    msg = result.fail_msg
4492
    if msg:
4493
      self.LogWarning("Errors encountered on the remote node while leaving"
4494
                      " the cluster: %s", msg)
4495

    
4496
    # Remove node from our /etc/hosts
4497
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4498
      master_node = self.cfg.GetMasterNode()
4499
      result = self.rpc.call_etc_hosts_modify(master_node,
4500
                                              constants.ETC_HOSTS_REMOVE,
4501
                                              node.name, None)
4502
      result.Raise("Can't update hosts file with new host data")
4503
      _RedistributeAncillaryFiles(self)
4504

    
4505

    
4506
class _NodeQuery(_QueryBase):
4507
  FIELDS = query.NODE_FIELDS
4508

    
4509
  def ExpandNames(self, lu):
4510
    lu.needed_locks = {}
4511
    lu.share_locks = _ShareAll()
4512

    
4513
    if self.names:
4514
      self.wanted = _GetWantedNodes(lu, self.names)
4515
    else:
4516
      self.wanted = locking.ALL_SET
4517

    
4518
    self.do_locking = (self.use_locking and
4519
                       query.NQ_LIVE in self.requested_data)
4520

    
4521
    if self.do_locking:
4522
      # If any non-static field is requested we need to lock the nodes
4523
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4524

    
4525
  def DeclareLocks(self, lu, level):
4526
    pass
4527

    
4528
  def _GetQueryData(self, lu):
4529
    """Computes the list of nodes and their attributes.
4530

4531
    """
4532
    all_info = lu.cfg.GetAllNodesInfo()
4533

    
4534
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4535

    
4536
    # Gather data as requested
4537
    if query.NQ_LIVE in self.requested_data:
4538
      # filter out non-vm_capable nodes
4539
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4540

    
4541
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4542
                                        lu.cfg.GetHypervisorType())
4543
      live_data = dict((name, nresult.payload)
4544
                       for (name, nresult) in node_data.items()
4545
                       if not nresult.fail_msg and nresult.payload)
4546
    else:
4547
      live_data = None
4548

    
4549
    if query.NQ_INST in self.requested_data:
4550
      node_to_primary = dict([(name, set()) for name in nodenames])
4551
      node_to_secondary = dict([(name, set()) for name in nodenames])
4552

    
4553
      inst_data = lu.cfg.GetAllInstancesInfo()
4554

    
4555
      for inst in inst_data.values():
4556
        if inst.primary_node in node_to_primary:
4557
          node_to_primary[inst.primary_node].add(inst.name)
4558
        for secnode in inst.secondary_nodes:
4559
          if secnode in node_to_secondary:
4560
            node_to_secondary[secnode].add(inst.name)
4561
    else:
4562
      node_to_primary = None
4563
      node_to_secondary = None
4564

    
4565
    if query.NQ_OOB in self.requested_data:
4566
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4567
                         for name, node in all_info.iteritems())
4568
    else:
4569
      oob_support = None
4570

    
4571
    if query.NQ_GROUP in self.requested_data:
4572
      groups = lu.cfg.GetAllNodeGroupsInfo()
4573
    else:
4574
      groups = {}
4575

    
4576
    return query.NodeQueryData([all_info[name] for name in nodenames],
4577
                               live_data, lu.cfg.GetMasterNode(),
4578
                               node_to_primary, node_to_secondary, groups,
4579
                               oob_support, lu.cfg.GetClusterInfo())
4580

    
4581

    
4582
class LUNodeQuery(NoHooksLU):
4583
  """Logical unit for querying nodes.
4584

4585
  """
4586
  # pylint: disable=W0142
4587
  REQ_BGL = False
4588

    
4589
  def CheckArguments(self):
4590
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4591
                         self.op.output_fields, self.op.use_locking)
4592

    
4593
  def ExpandNames(self):
4594
    self.nq.ExpandNames(self)
4595

    
4596
  def DeclareLocks(self, level):
4597
    self.nq.DeclareLocks(self, level)
4598

    
4599
  def Exec(self, feedback_fn):
4600
    return self.nq.OldStyleQuery(self)
4601

    
4602

    
4603
class LUNodeQueryvols(NoHooksLU):
4604
  """Logical unit for getting volumes on node(s).
4605

4606
  """
4607
  REQ_BGL = False
4608
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4609
  _FIELDS_STATIC = utils.FieldSet("node")
4610

    
4611
  def CheckArguments(self):
4612
    _CheckOutputFields(static=self._FIELDS_STATIC,
4613
                       dynamic=self._FIELDS_DYNAMIC,
4614
                       selected=self.op.output_fields)
4615

    
4616
  def ExpandNames(self):
4617
    self.share_locks = _ShareAll()
4618
    self.needed_locks = {}
4619

    
4620
    if not self.op.nodes:
4621
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4622
    else:
4623
      self.needed_locks[locking.LEVEL_NODE] = \
4624
        _GetWantedNodes(self, self.op.nodes)
4625

    
4626
  def Exec(self, feedback_fn):
4627
    """Computes the list of nodes and their attributes.
4628

4629
    """
4630
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4631
    volumes = self.rpc.call_node_volumes(nodenames)
4632

    
4633
    ilist = self.cfg.GetAllInstancesInfo()
4634
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4635

    
4636
    output = []
4637
    for node in nodenames:
4638
      nresult = volumes[node]
4639
      if nresult.offline:
4640
        continue
4641
      msg = nresult.fail_msg
4642
      if msg:
4643
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4644
        continue
4645

    
4646
      node_vols = sorted(nresult.payload,
4647
                         key=operator.itemgetter("dev"))
4648

    
4649
      for vol in node_vols:
4650
        node_output = []
4651
        for field in self.op.output_fields:
4652
          if field == "node":
4653
            val = node
4654
          elif field == "phys":
4655
            val = vol["dev"]
4656
          elif field == "vg":
4657
            val = vol["vg"]
4658
          elif field == "name":
4659
            val = vol["name"]
4660
          elif field == "size":
4661
            val = int(float(vol["size"]))
4662
          elif field == "instance":
4663
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4664
          else:
4665
            raise errors.ParameterError(field)
4666
          node_output.append(str(val))
4667

    
4668
        output.append(node_output)
4669

    
4670
    return output
4671

    
4672

    
4673
class LUNodeQueryStorage(NoHooksLU):
4674
  """Logical unit for getting information on storage units on node(s).
4675

4676
  """
4677
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4678
  REQ_BGL = False
4679

    
4680
  def CheckArguments(self):
4681
    _CheckOutputFields(static=self._FIELDS_STATIC,
4682
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4683
                       selected=self.op.output_fields)
4684

    
4685
  def ExpandNames(self):
4686
    self.share_locks = _ShareAll()
4687
    self.needed_locks = {}
4688

    
4689
    if self.op.nodes:
4690
      self.needed_locks[locking.LEVEL_NODE] = \
4691
        _GetWantedNodes(self, self.op.nodes)
4692
    else:
4693
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4694

    
4695
  def Exec(self, feedback_fn):
4696
    """Computes the list of nodes and their attributes.
4697

4698
    """
4699
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4700

    
4701
    # Always get name to sort by
4702
    if constants.SF_NAME in self.op.output_fields:
4703
      fields = self.op.output_fields[:]
4704
    else:
4705
      fields = [constants.SF_NAME] + self.op.output_fields
4706

    
4707
    # Never ask for node or type as it's only known to the LU
4708
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4709
      while extra in fields:
4710
        fields.remove(extra)
4711

    
4712
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4713
    name_idx = field_idx[constants.SF_NAME]
4714

    
4715
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4716
    data = self.rpc.call_storage_list(self.nodes,
4717
                                      self.op.storage_type, st_args,
4718
                                      self.op.name, fields)
4719

    
4720
    result = []
4721

    
4722
    for node in utils.NiceSort(self.nodes):
4723
      nresult = data[node]
4724
      if nresult.offline:
4725
        continue
4726

    
4727
      msg = nresult.fail_msg
4728
      if msg:
4729
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4730
        continue
4731

    
4732
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4733

    
4734
      for name in utils.NiceSort(rows.keys()):
4735
        row = rows[name]
4736

    
4737
        out = []
4738

    
4739
        for field in self.op.output_fields:
4740
          if field == constants.SF_NODE:
4741
            val = node
4742
          elif field == constants.SF_TYPE:
4743
            val = self.op.storage_type
4744
          elif field in field_idx:
4745
            val = row[field_idx[field]]
4746
          else:
4747
            raise errors.ParameterError(field)
4748

    
4749
          out.append(val)
4750

    
4751
        result.append(out)
4752

    
4753
    return result
4754

    
4755

    
4756
class _InstanceQuery(_QueryBase):
4757
  FIELDS = query.INSTANCE_FIELDS
4758

    
4759
  def ExpandNames(self, lu):
4760
    lu.needed_locks = {}
4761
    lu.share_locks = _ShareAll()
4762

    
4763
    if self.names:
4764
      self.wanted = _GetWantedInstances(lu, self.names)
4765
    else:
4766
      self.wanted = locking.ALL_SET
4767

    
4768
    self.do_locking = (self.use_locking and
4769
                       query.IQ_LIVE in self.requested_data)
4770
    if self.do_locking:
4771
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4772
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4773
      lu.needed_locks[locking.LEVEL_NODE] = []
4774
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4775

    
4776
    self.do_grouplocks = (self.do_locking and
4777
                          query.IQ_NODES in self.requested_data)
4778

    
4779
  def DeclareLocks(self, lu, level):
4780
    if self.do_locking:
4781
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4782
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4783

    
4784
        # Lock all groups used by instances optimistically; this requires going
4785
        # via the node before it's locked, requiring verification later on
4786
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4787
          set(group_uuid
4788
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4789
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4790
      elif level == locking.LEVEL_NODE:
4791
        lu._LockInstancesNodes() # pylint: disable=W0212
4792

    
4793
  @staticmethod
4794
  def _CheckGroupLocks(lu):
4795
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4796
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4797

    
4798
    # Check if node groups for locked instances are still correct
4799
    for instance_name in owned_instances:
4800
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4801

    
4802
  def _GetQueryData(self, lu):
4803
    """Computes the list of instances and their attributes.
4804

4805
    """
4806
    if self.do_grouplocks:
4807
      self._CheckGroupLocks(lu)
4808

    
4809
    cluster = lu.cfg.GetClusterInfo()
4810
    all_info = lu.cfg.GetAllInstancesInfo()
4811

    
4812
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4813

    
4814
    instance_list = [all_info[name] for name in instance_names]
4815
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4816
                                        for inst in instance_list)))
4817
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4818
    bad_nodes = []
4819
    offline_nodes = []
4820
    wrongnode_inst = set()
4821

    
4822
    # Gather data as requested
4823
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4824
      live_data = {}
4825
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4826
      for name in nodes:
4827
        result = node_data[name]
4828
        if result.offline:
4829
          # offline nodes will be in both lists
4830
          assert result.fail_msg
4831
          offline_nodes.append(name)
4832
        if result.fail_msg:
4833
          bad_nodes.append(name)
4834
        elif result.payload:
4835
          for inst in result.payload:
4836
            if inst in all_info:
4837
              if all_info[inst].primary_node == name:
4838
                live_data.update(result.payload)
4839
              else:
4840
                wrongnode_inst.add(inst)
4841
            else:
4842
              # orphan instance; we don't list it here as we don't
4843
              # handle this case yet in the output of instance listing
4844
              logging.warning("Orphan instance '%s' found on node %s",
4845
                              inst, name)
4846
        # else no instance is alive
4847
    else:
4848
      live_data = {}
4849

    
4850
    if query.IQ_DISKUSAGE in self.requested_data:
4851
      disk_usage = dict((inst.name,
4852
                         _ComputeDiskSize(inst.disk_template,
4853
                                          [{constants.IDISK_SIZE: disk.size}
4854
                                           for disk in inst.disks]))
4855
                        for inst in instance_list)
4856
    else:
4857
      disk_usage = None
4858

    
4859
    if query.IQ_CONSOLE in self.requested_data:
4860
      consinfo = {}
4861
      for inst in instance_list:
4862
        if inst.name in live_data:
4863
          # Instance is running
4864
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4865
        else:
4866
          consinfo[inst.name] = None
4867
      assert set(consinfo.keys()) == set(instance_names)
4868
    else:
4869
      consinfo = None
4870

    
4871
    if query.IQ_NODES in self.requested_data:
4872
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4873
                                            instance_list)))
4874
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4875
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4876
                    for uuid in set(map(operator.attrgetter("group"),
4877
                                        nodes.values())))
4878
    else:
4879
      nodes = None
4880
      groups = None
4881

    
4882
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4883
                                   disk_usage, offline_nodes, bad_nodes,
4884
                                   live_data, wrongnode_inst, consinfo,
4885
                                   nodes, groups)
4886

    
4887

    
4888
class LUQuery(NoHooksLU):
4889
  """Query for resources/items of a certain kind.
4890

4891
  """
4892
  # pylint: disable=W0142
4893
  REQ_BGL = False
4894

    
4895
  def CheckArguments(self):
4896
    qcls = _GetQueryImplementation(self.op.what)
4897

    
4898
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4899

    
4900
  def ExpandNames(self):
4901
    self.impl.ExpandNames(self)
4902

    
4903
  def DeclareLocks(self, level):
4904
    self.impl.DeclareLocks(self, level)
4905

    
4906
  def Exec(self, feedback_fn):
4907
    return self.impl.NewStyleQuery(self)
4908

    
4909

    
4910
class LUQueryFields(NoHooksLU):
4911
  """Query for resources/items of a certain kind.
4912

4913
  """
4914
  # pylint: disable=W0142
4915
  REQ_BGL = False
4916

    
4917
  def CheckArguments(self):
4918
    self.qcls = _GetQueryImplementation(self.op.what)
4919

    
4920
  def ExpandNames(self):
4921
    self.needed_locks = {}
4922

    
4923
  def Exec(self, feedback_fn):
4924
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4925

    
4926

    
4927
class LUNodeModifyStorage(NoHooksLU):
4928
  """Logical unit for modifying a storage volume on a node.
4929

4930
  """
4931
  REQ_BGL = False
4932

    
4933
  def CheckArguments(self):
4934
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4935

    
4936
    storage_type = self.op.storage_type
4937

    
4938
    try:
4939
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4940
    except KeyError:
4941
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4942
                                 " modified" % storage_type,
4943
                                 errors.ECODE_INVAL)
4944

    
4945
    diff = set(self.op.changes.keys()) - modifiable
4946
    if diff:
4947
      raise errors.OpPrereqError("The following fields can not be modified for"
4948
                                 " storage units of type '%s': %r" %
4949
                                 (storage_type, list(diff)),
4950
                                 errors.ECODE_INVAL)
4951

    
4952
  def ExpandNames(self):
4953
    self.needed_locks = {
4954
      locking.LEVEL_NODE: self.op.node_name,
4955
      }
4956

    
4957
  def Exec(self, feedback_fn):
4958
    """Computes the list of nodes and their attributes.
4959

4960
    """
4961
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4962
    result = self.rpc.call_storage_modify(self.op.node_name,
4963
                                          self.op.storage_type, st_args,
4964
                                          self.op.name, self.op.changes)
4965
    result.Raise("Failed to modify storage unit '%s' on %s" %
4966
                 (self.op.name, self.op.node_name))
4967

    
4968

    
4969
class LUNodeAdd(LogicalUnit):
4970
  """Logical unit for adding node to the cluster.
4971

4972
  """
4973
  HPATH = "node-add"
4974
  HTYPE = constants.HTYPE_NODE
4975
  _NFLAGS = ["master_capable", "vm_capable"]
4976

    
4977
  def CheckArguments(self):
4978
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4979
    # validate/normalize the node name
4980
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4981
                                         family=self.primary_ip_family)
4982
    self.op.node_name = self.hostname.name
4983

    
4984
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4985
      raise errors.OpPrereqError("Cannot readd the master node",
4986
                                 errors.ECODE_STATE)
4987

    
4988
    if self.op.readd and self.op.group:
4989
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4990
                                 " being readded", errors.ECODE_INVAL)
4991

    
4992
  def BuildHooksEnv(self):
4993
    """Build hooks env.
4994

4995
    This will run on all nodes before, and on all nodes + the new node after.
4996

4997
    """
4998
    return {
4999
      "OP_TARGET": self.op.node_name,
5000
      "NODE_NAME": self.op.node_name,
5001
      "NODE_PIP": self.op.primary_ip,
5002
      "NODE_SIP": self.op.secondary_ip,
5003
      "MASTER_CAPABLE": str(self.op.master_capable),
5004
      "VM_CAPABLE": str(self.op.vm_capable),
5005
      }
5006

    
5007
  def BuildHooksNodes(self):
5008
    """Build hooks nodes.
5009

5010
    """
5011
    # Exclude added node
5012
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5013
    post_nodes = pre_nodes + [self.op.node_name, ]
5014

    
5015
    return (pre_nodes, post_nodes)
5016

    
5017
  def CheckPrereq(self):
5018
    """Check prerequisites.
5019

5020
    This checks:
5021
     - the new node is not already in the config
5022
     - it is resolvable
5023
     - its parameters (single/dual homed) matches the cluster
5024

5025
    Any errors are signaled by raising errors.OpPrereqError.
5026

5027
    """
5028
    cfg = self.cfg
5029
    hostname = self.hostname
5030
    node = hostname.name
5031
    primary_ip = self.op.primary_ip = hostname.ip
5032
    if self.op.secondary_ip is None:
5033
      if self.primary_ip_family == netutils.IP6Address.family:
5034
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5035
                                   " IPv4 address must be given as secondary",
5036
                                   errors.ECODE_INVAL)
5037
      self.op.secondary_ip = primary_ip
5038

    
5039
    secondary_ip = self.op.secondary_ip
5040
    if not netutils.IP4Address.IsValid(secondary_ip):
5041
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5042
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5043

    
5044
    node_list = cfg.GetNodeList()
5045
    if not self.op.readd and node in node_list:
5046
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5047
                                 node, errors.ECODE_EXISTS)
5048
    elif self.op.readd and node not in node_list:
5049
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5050
                                 errors.ECODE_NOENT)
5051

    
5052
    self.changed_primary_ip = False
5053

    
5054
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5055
      if self.op.readd and node == existing_node_name:
5056
        if existing_node.secondary_ip != secondary_ip:
5057
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5058
                                     " address configuration as before",
5059
                                     errors.ECODE_INVAL)
5060
        if existing_node.primary_ip != primary_ip:
5061
          self.changed_primary_ip = True
5062

    
5063
        continue
5064

    
5065
      if (existing_node.primary_ip == primary_ip or
5066
          existing_node.secondary_ip == primary_ip or
5067
          existing_node.primary_ip == secondary_ip or
5068
          existing_node.secondary_ip == secondary_ip):
5069
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5070
                                   " existing node %s" % existing_node.name,
5071
                                   errors.ECODE_NOTUNIQUE)
5072

    
5073
    # After this 'if' block, None is no longer a valid value for the
5074
    # _capable op attributes
5075
    if self.op.readd:
5076
      old_node = self.cfg.GetNodeInfo(node)
5077
      assert old_node is not None, "Can't retrieve locked node %s" % node
5078
      for attr in self._NFLAGS:
5079
        if getattr(self.op, attr) is None:
5080
          setattr(self.op, attr, getattr(old_node, attr))
5081
    else:
5082
      for attr in self._NFLAGS:
5083
        if getattr(self.op, attr) is None:
5084
          setattr(self.op, attr, True)
5085

    
5086
    if self.op.readd and not self.op.vm_capable:
5087
      pri, sec = cfg.GetNodeInstances(node)
5088
      if pri or sec:
5089
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5090
                                   " flag set to false, but it already holds"
5091
                                   " instances" % node,
5092
                                   errors.ECODE_STATE)
5093

    
5094
    # check that the type of the node (single versus dual homed) is the
5095
    # same as for the master
5096
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5097
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5098
    newbie_singlehomed = secondary_ip == primary_ip
5099
    if master_singlehomed != newbie_singlehomed:
5100
      if master_singlehomed:
5101
        raise errors.OpPrereqError("The master has no secondary ip but the"
5102
                                   " new node has one",
5103
                                   errors.ECODE_INVAL)
5104
      else:
5105
        raise errors.OpPrereqError("The master has a secondary ip but the"
5106
                                   " new node doesn't have one",
5107
                                   errors.ECODE_INVAL)
5108

    
5109
    # checks reachability
5110
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5111
      raise errors.OpPrereqError("Node not reachable by ping",
5112
                                 errors.ECODE_ENVIRON)
5113

    
5114
    if not newbie_singlehomed:
5115
      # check reachability from my secondary ip to newbie's secondary ip
5116
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5117
                           source=myself.secondary_ip):
5118
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5119
                                   " based ping to node daemon port",
5120
                                   errors.ECODE_ENVIRON)
5121

    
5122
    if self.op.readd:
5123
      exceptions = [node]
5124
    else:
5125
      exceptions = []
5126

    
5127
    if self.op.master_capable:
5128
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5129
    else:
5130
      self.master_candidate = False
5131

    
5132
    if self.op.readd:
5133
      self.new_node = old_node
5134
    else:
5135
      node_group = cfg.LookupNodeGroup(self.op.group)
5136
      self.new_node = objects.Node(name=node,
5137
                                   primary_ip=primary_ip,
5138
                                   secondary_ip=secondary_ip,
5139
                                   master_candidate=self.master_candidate,
5140
                                   offline=False, drained=False,
5141
                                   group=node_group)
5142

    
5143
    if self.op.ndparams:
5144
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5145

    
5146
  def Exec(self, feedback_fn):
5147
    """Adds the new node to the cluster.
5148

5149
    """
5150
    new_node = self.new_node
5151
    node = new_node.name
5152

    
5153
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5154
      "Not owning BGL"
5155

    
5156
    # We adding a new node so we assume it's powered
5157
    new_node.powered = True
5158

    
5159
    # for re-adds, reset the offline/drained/master-candidate flags;
5160
    # we need to reset here, otherwise offline would prevent RPC calls
5161
    # later in the procedure; this also means that if the re-add
5162
    # fails, we are left with a non-offlined, broken node
5163
    if self.op.readd:
5164
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5165
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5166
      # if we demote the node, we do cleanup later in the procedure
5167
      new_node.master_candidate = self.master_candidate
5168
      if self.changed_primary_ip:
5169
        new_node.primary_ip = self.op.primary_ip
5170

    
5171
    # copy the master/vm_capable flags
5172
    for attr in self._NFLAGS:
5173
      setattr(new_node, attr, getattr(self.op, attr))
5174

    
5175
    # notify the user about any possible mc promotion
5176
    if new_node.master_candidate:
5177
      self.LogInfo("Node will be a master candidate")
5178

    
5179
    if self.op.ndparams:
5180
      new_node.ndparams = self.op.ndparams
5181
    else:
5182
      new_node.ndparams = {}
5183

    
5184
    # check connectivity
5185
    result = self.rpc.call_version([node])[node]
5186
    result.Raise("Can't get version information from node %s" % node)
5187
    if constants.PROTOCOL_VERSION == result.payload:
5188
      logging.info("Communication to node %s fine, sw version %s match",
5189
                   node, result.payload)
5190
    else:
5191
      raise errors.OpExecError("Version mismatch master version %s,"
5192
                               " node version %s" %
5193
                               (constants.PROTOCOL_VERSION, result.payload))
5194

    
5195
    # Add node to our /etc/hosts, and add key to known_hosts
5196
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5197
      master_node = self.cfg.GetMasterNode()
5198
      result = self.rpc.call_etc_hosts_modify(master_node,
5199
                                              constants.ETC_HOSTS_ADD,
5200
                                              self.hostname.name,
5201
                                              self.hostname.ip)
5202
      result.Raise("Can't update hosts file with new host data")
5203

    
5204
    if new_node.secondary_ip != new_node.primary_ip:
5205
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5206
                               False)
5207

    
5208
    node_verify_list = [self.cfg.GetMasterNode()]
5209
    node_verify_param = {
5210
      constants.NV_NODELIST: ([node], {}),
5211
      # TODO: do a node-net-test as well?
5212
    }
5213

    
5214
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5215
                                       self.cfg.GetClusterName())
5216
    for verifier in node_verify_list:
5217
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5218
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5219
      if nl_payload:
5220
        for failed in nl_payload:
5221
          feedback_fn("ssh/hostname verification failed"
5222
                      " (checking from %s): %s" %
5223
                      (verifier, nl_payload[failed]))
5224
        raise errors.OpExecError("ssh/hostname verification failed")
5225

    
5226
    if self.op.readd:
5227
      _RedistributeAncillaryFiles(self)
5228
      self.context.ReaddNode(new_node)
5229
      # make sure we redistribute the config
5230
      self.cfg.Update(new_node, feedback_fn)
5231
      # and make sure the new node will not have old files around
5232
      if not new_node.master_candidate:
5233
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5234
        msg = result.fail_msg
5235
        if msg:
5236
          self.LogWarning("Node failed to demote itself from master"
5237
                          " candidate status: %s" % msg)
5238
    else:
5239
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5240
                                  additional_vm=self.op.vm_capable)
5241
      self.context.AddNode(new_node, self.proc.GetECId())
5242

    
5243

    
5244
class LUNodeSetParams(LogicalUnit):
5245
  """Modifies the parameters of a node.
5246

5247
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5248
      to the node role (as _ROLE_*)
5249
  @cvar _R2F: a dictionary from node role to tuples of flags
5250
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5251

5252
  """
5253
  HPATH = "node-modify"
5254
  HTYPE = constants.HTYPE_NODE
5255
  REQ_BGL = False
5256
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5257
  _F2R = {
5258
    (True, False, False): _ROLE_CANDIDATE,
5259
    (False, True, False): _ROLE_DRAINED,
5260
    (False, False, True): _ROLE_OFFLINE,
5261
    (False, False, False): _ROLE_REGULAR,
5262
    }
5263
  _R2F = dict((v, k) for k, v in _F2R.items())
5264
  _FLAGS = ["master_candidate", "drained", "offline"]
5265

    
5266
  def CheckArguments(self):
5267
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5268
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5269
                self.op.master_capable, self.op.vm_capable,
5270
                self.op.secondary_ip, self.op.ndparams]
5271
    if all_mods.count(None) == len(all_mods):
5272
      raise errors.OpPrereqError("Please pass at least one modification",
5273
                                 errors.ECODE_INVAL)
5274
    if all_mods.count(True) > 1:
5275
      raise errors.OpPrereqError("Can't set the node into more than one"
5276
                                 " state at the same time",
5277
                                 errors.ECODE_INVAL)
5278

    
5279
    # Boolean value that tells us whether we might be demoting from MC
5280
    self.might_demote = (self.op.master_candidate == False or
5281
                         self.op.offline == True or
5282
                         self.op.drained == True or
5283
                         self.op.master_capable == False)
5284

    
5285
    if self.op.secondary_ip:
5286
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5287
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5288
                                   " address" % self.op.secondary_ip,
5289
                                   errors.ECODE_INVAL)
5290

    
5291
    self.lock_all = self.op.auto_promote and self.might_demote
5292
    self.lock_instances = self.op.secondary_ip is not None
5293

    
5294
  def _InstanceFilter(self, instance):
5295
    """Filter for getting affected instances.
5296

5297
    """
5298
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5299
            self.op.node_name in instance.all_nodes)
5300

    
5301
  def ExpandNames(self):
5302
    if self.lock_all:
5303
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5304
    else:
5305
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5306

    
5307
    if self.lock_instances:
5308
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5309
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5310

    
5311
  def BuildHooksEnv(self):
5312
    """Build hooks env.
5313

5314
    This runs on the master node.
5315

5316
    """
5317
    return {
5318
      "OP_TARGET": self.op.node_name,
5319
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5320
      "OFFLINE": str(self.op.offline),
5321
      "DRAINED": str(self.op.drained),
5322
      "MASTER_CAPABLE": str(self.op.master_capable),
5323
      "VM_CAPABLE": str(self.op.vm_capable),
5324
      }
5325

    
5326
  def BuildHooksNodes(self):
5327
    """Build hooks nodes.
5328

5329
    """
5330
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5331
    return (nl, nl)
5332

    
5333
  def CheckPrereq(self):
5334
    """Check prerequisites.
5335

5336
    This only checks the instance list against the existing names.
5337

5338
    """
5339
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5340

    
5341
    if self.lock_instances:
5342
      affected_instances = \
5343
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5344

    
5345
      # Verify instance locks
5346
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5347
      wanted_instances = frozenset(affected_instances.keys())
5348
      if wanted_instances - owned_instances:
5349
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5350
                                   " secondary IP address have changed since"
5351
                                   " locks were acquired, wanted '%s', have"
5352
                                   " '%s'; retry the operation" %
5353
                                   (self.op.node_name,
5354
                                    utils.CommaJoin(wanted_instances),
5355
                                    utils.CommaJoin(owned_instances)),
5356
                                   errors.ECODE_STATE)
5357
    else:
5358
      affected_instances = None
5359

    
5360
    if (self.op.master_candidate is not None or
5361
        self.op.drained is not None or
5362
        self.op.offline is not None):
5363
      # we can't change the master's node flags
5364
      if self.op.node_name == self.cfg.GetMasterNode():
5365
        raise errors.OpPrereqError("The master role can be changed"
5366
                                   " only via master-failover",
5367
                                   errors.ECODE_INVAL)
5368

    
5369
    if self.op.master_candidate and not node.master_capable:
5370
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5371
                                 " it a master candidate" % node.name,
5372
                                 errors.ECODE_STATE)
5373

    
5374
    if self.op.vm_capable == False:
5375
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5376
      if ipri or isec:
5377
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5378
                                   " the vm_capable flag" % node.name,
5379
                                   errors.ECODE_STATE)
5380

    
5381
    if node.master_candidate and self.might_demote and not self.lock_all:
5382
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5383
      # check if after removing the current node, we're missing master
5384
      # candidates
5385
      (mc_remaining, mc_should, _) = \
5386
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5387
      if mc_remaining < mc_should:
5388
        raise errors.OpPrereqError("Not enough master candidates, please"
5389
                                   " pass auto promote option to allow"
5390
                                   " promotion", errors.ECODE_STATE)
5391

    
5392
    self.old_flags = old_flags = (node.master_candidate,
5393
                                  node.drained, node.offline)
5394
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5395
    self.old_role = old_role = self._F2R[old_flags]
5396

    
5397
    # Check for ineffective changes
5398
    for attr in self._FLAGS:
5399
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5400
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5401
        setattr(self.op, attr, None)
5402

    
5403
    # Past this point, any flag change to False means a transition
5404
    # away from the respective state, as only real changes are kept
5405

    
5406
    # TODO: We might query the real power state if it supports OOB
5407
    if _SupportsOob(self.cfg, node):
5408
      if self.op.offline is False and not (node.powered or
5409
                                           self.op.powered == True):
5410
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5411
                                    " offline status can be reset") %
5412
                                   self.op.node_name)
5413
    elif self.op.powered is not None:
5414
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5415
                                  " as it does not support out-of-band"
5416
                                  " handling") % self.op.node_name)
5417

    
5418
    # If we're being deofflined/drained, we'll MC ourself if needed
5419
    if (self.op.drained == False or self.op.offline == False or
5420
        (self.op.master_capable and not node.master_capable)):
5421
      if _DecideSelfPromotion(self):
5422
        self.op.master_candidate = True
5423
        self.LogInfo("Auto-promoting node to master candidate")
5424

    
5425
    # If we're no longer master capable, we'll demote ourselves from MC
5426
    if self.op.master_capable == False and node.master_candidate:
5427
      self.LogInfo("Demoting from master candidate")
5428
      self.op.master_candidate = False
5429

    
5430
    # Compute new role
5431
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5432
    if self.op.master_candidate:
5433
      new_role = self._ROLE_CANDIDATE
5434
    elif self.op.drained:
5435
      new_role = self._ROLE_DRAINED
5436
    elif self.op.offline:
5437
      new_role = self._ROLE_OFFLINE
5438
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5439
      # False is still in new flags, which means we're un-setting (the
5440
      # only) True flag
5441
      new_role = self._ROLE_REGULAR
5442
    else: # no new flags, nothing, keep old role
5443
      new_role = old_role
5444

    
5445
    self.new_role = new_role
5446

    
5447
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5448
      # Trying to transition out of offline status
5449
      # TODO: Use standard RPC runner, but make sure it works when the node is
5450
      # still marked offline
5451
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5452
      if result.fail_msg:
5453
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5454
                                   " to report its version: %s" %
5455
                                   (node.name, result.fail_msg),
5456
                                   errors.ECODE_STATE)
5457
      else:
5458
        self.LogWarning("Transitioning node from offline to online state"
5459
                        " without using re-add. Please make sure the node"
5460
                        " is healthy!")
5461

    
5462
    if self.op.secondary_ip:
5463
      # Ok even without locking, because this can't be changed by any LU
5464
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5465
      master_singlehomed = master.secondary_ip == master.primary_ip
5466
      if master_singlehomed and self.op.secondary_ip:
5467
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5468
                                   " homed cluster", errors.ECODE_INVAL)
5469

    
5470
      assert not (frozenset(affected_instances) -
5471
                  self.owned_locks(locking.LEVEL_INSTANCE))
5472

    
5473
      if node.offline:
5474
        if affected_instances:
5475
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5476
                                     " offline node has instances (%s)"
5477
                                     " configured to use it" %
5478
                                     utils.CommaJoin(affected_instances.keys()))
5479
      else:
5480
        # On online nodes, check that no instances are running, and that
5481
        # the node has the new ip and we can reach it.
5482
        for instance in affected_instances.values():
5483
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5484

    
5485
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5486
        if master.name != node.name:
5487
          # check reachability from master secondary ip to new secondary ip
5488
          if not netutils.TcpPing(self.op.secondary_ip,
5489
                                  constants.DEFAULT_NODED_PORT,
5490
                                  source=master.secondary_ip):
5491
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5492
                                       " based ping to node daemon port",
5493
                                       errors.ECODE_ENVIRON)
5494

    
5495
    if self.op.ndparams:
5496
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5497
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5498
      self.new_ndparams = new_ndparams
5499

    
5500
  def Exec(self, feedback_fn):
5501
    """Modifies a node.
5502

5503
    """
5504
    node = self.node
5505
    old_role = self.old_role
5506
    new_role = self.new_role
5507

    
5508
    result = []
5509

    
5510
    if self.op.ndparams:
5511
      node.ndparams = self.new_ndparams
5512

    
5513
    if self.op.powered is not None:
5514
      node.powered = self.op.powered
5515

    
5516
    for attr in ["master_capable", "vm_capable"]:
5517
      val = getattr(self.op, attr)
5518
      if val is not None:
5519
        setattr(node, attr, val)
5520
        result.append((attr, str(val)))
5521

    
5522
    if new_role != old_role:
5523
      # Tell the node to demote itself, if no longer MC and not offline
5524
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5525
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5526
        if msg:
5527
          self.LogWarning("Node failed to demote itself: %s", msg)
5528

    
5529
      new_flags = self._R2F[new_role]
5530
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5531
        if of != nf:
5532
          result.append((desc, str(nf)))
5533
      (node.master_candidate, node.drained, node.offline) = new_flags
5534

    
5535
      # we locked all nodes, we adjust the CP before updating this node
5536
      if self.lock_all:
5537
        _AdjustCandidatePool(self, [node.name])
5538

    
5539
    if self.op.secondary_ip:
5540
      node.secondary_ip = self.op.secondary_ip
5541
      result.append(("secondary_ip", self.op.secondary_ip))
5542

    
5543
    # this will trigger configuration file update, if needed
5544
    self.cfg.Update(node, feedback_fn)
5545

    
5546
    # this will trigger job queue propagation or cleanup if the mc
5547
    # flag changed
5548
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5549
      self.context.ReaddNode(node)
5550

    
5551
    return result
5552

    
5553

    
5554
class LUNodePowercycle(NoHooksLU):
5555
  """Powercycles a node.
5556

5557
  """
5558
  REQ_BGL = False
5559

    
5560
  def CheckArguments(self):
5561
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5562
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5563
      raise errors.OpPrereqError("The node is the master and the force"
5564
                                 " parameter was not set",
5565
                                 errors.ECODE_INVAL)
5566

    
5567
  def ExpandNames(self):
5568
    """Locking for PowercycleNode.
5569

5570
    This is a last-resort option and shouldn't block on other
5571
    jobs. Therefore, we grab no locks.
5572

5573
    """
5574
    self.needed_locks = {}
5575

    
5576
  def Exec(self, feedback_fn):
5577
    """Reboots a node.
5578

5579
    """
5580
    result = self.rpc.call_node_powercycle(self.op.node_name,
5581
                                           self.cfg.GetHypervisorType())
5582
    result.Raise("Failed to schedule the reboot")
5583
    return result.payload
5584

    
5585

    
5586
class LUClusterQuery(NoHooksLU):
5587
  """Query cluster configuration.
5588

5589
  """
5590
  REQ_BGL = False
5591

    
5592
  def ExpandNames(self):
5593
    self.needed_locks = {}
5594

    
5595
  def Exec(self, feedback_fn):
5596
    """Return cluster config.
5597

5598
    """
5599
    cluster = self.cfg.GetClusterInfo()
5600
    os_hvp = {}
5601

    
5602
    # Filter just for enabled hypervisors
5603
    for os_name, hv_dict in cluster.os_hvp.items():
5604
      os_hvp[os_name] = {}
5605
      for hv_name, hv_params in hv_dict.items():
5606
        if hv_name in cluster.enabled_hypervisors:
5607
          os_hvp[os_name][hv_name] = hv_params
5608

    
5609
    # Convert ip_family to ip_version
5610
    primary_ip_version = constants.IP4_VERSION
5611
    if cluster.primary_ip_family == netutils.IP6Address.family:
5612
      primary_ip_version = constants.IP6_VERSION
5613

    
5614
    result = {
5615
      "software_version": constants.RELEASE_VERSION,
5616
      "protocol_version": constants.PROTOCOL_VERSION,
5617
      "config_version": constants.CONFIG_VERSION,
5618
      "os_api_version": max(constants.OS_API_VERSIONS),
5619
      "export_version": constants.EXPORT_VERSION,
5620
      "architecture": (platform.architecture()[0], platform.machine()),
5621
      "name": cluster.cluster_name,
5622
      "master": cluster.master_node,
5623
      "default_hypervisor": cluster.enabled_hypervisors[0],
5624
      "enabled_hypervisors": cluster.enabled_hypervisors,
5625
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5626
                        for hypervisor_name in cluster.enabled_hypervisors]),
5627
      "os_hvp": os_hvp,
5628
      "beparams": cluster.beparams,
5629
      "osparams": cluster.osparams,
5630
      "nicparams": cluster.nicparams,
5631
      "ndparams": cluster.ndparams,
5632
      "candidate_pool_size": cluster.candidate_pool_size,
5633
      "master_netdev": cluster.master_netdev,
5634
      "master_netmask": cluster.master_netmask,
5635
      "use_external_mip_script": cluster.use_external_mip_script,
5636
      "volume_group_name": cluster.volume_group_name,
5637
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5638
      "file_storage_dir": cluster.file_storage_dir,
5639
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5640
      "maintain_node_health": cluster.maintain_node_health,
5641
      "ctime": cluster.ctime,
5642
      "mtime": cluster.mtime,
5643
      "uuid": cluster.uuid,
5644
      "tags": list(cluster.GetTags()),
5645
      "uid_pool": cluster.uid_pool,
5646
      "default_iallocator": cluster.default_iallocator,
5647
      "reserved_lvs": cluster.reserved_lvs,
5648
      "primary_ip_version": primary_ip_version,
5649
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5650
      "hidden_os": cluster.hidden_os,
5651
      "blacklisted_os": cluster.blacklisted_os,
5652
      }
5653

    
5654
    return result
5655

    
5656

    
5657
class LUClusterConfigQuery(NoHooksLU):
5658
  """Return configuration values.
5659

5660
  """
5661
  REQ_BGL = False
5662
  _FIELDS_DYNAMIC = utils.FieldSet()
5663
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5664
                                  "watcher_pause", "volume_group_name")
5665

    
5666
  def CheckArguments(self):
5667
    _CheckOutputFields(static=self._FIELDS_STATIC,
5668
                       dynamic=self._FIELDS_DYNAMIC,
5669
                       selected=self.op.output_fields)
5670

    
5671
  def ExpandNames(self):
5672
    self.needed_locks = {}
5673

    
5674
  def Exec(self, feedback_fn):
5675
    """Dump a representation of the cluster config to the standard output.
5676

5677
    """
5678
    values = []
5679
    for field in self.op.output_fields:
5680
      if field == "cluster_name":
5681
        entry = self.cfg.GetClusterName()
5682
      elif field == "master_node":
5683
        entry = self.cfg.GetMasterNode()
5684
      elif field == "drain_flag":
5685
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5686
      elif field == "watcher_pause":
5687
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5688
      elif field == "volume_group_name":
5689
        entry = self.cfg.GetVGName()
5690
      else:
5691
        raise errors.ParameterError(field)
5692
      values.append(entry)
5693
    return values
5694

    
5695

    
5696
class LUInstanceActivateDisks(NoHooksLU):
5697
  """Bring up an instance's disks.
5698

5699
  """
5700
  REQ_BGL = False
5701

    
5702
  def ExpandNames(self):
5703
    self._ExpandAndLockInstance()
5704
    self.needed_locks[locking.LEVEL_NODE] = []
5705
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5706

    
5707
  def DeclareLocks(self, level):
5708
    if level == locking.LEVEL_NODE:
5709
      self._LockInstancesNodes()
5710

    
5711
  def CheckPrereq(self):
5712
    """Check prerequisites.
5713

5714
    This checks that the instance is in the cluster.
5715

5716
    """
5717
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5718
    assert self.instance is not None, \
5719
      "Cannot retrieve locked instance %s" % self.op.instance_name
5720
    _CheckNodeOnline(self, self.instance.primary_node)
5721

    
5722
  def Exec(self, feedback_fn):
5723
    """Activate the disks.
5724

5725
    """
5726
    disks_ok, disks_info = \
5727
              _AssembleInstanceDisks(self, self.instance,
5728
                                     ignore_size=self.op.ignore_size)
5729
    if not disks_ok:
5730
      raise errors.OpExecError("Cannot activate block devices")
5731

    
5732
    return disks_info
5733

    
5734

    
5735
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5736
                           ignore_size=False):
5737
  """Prepare the block devices for an instance.
5738

5739
  This sets up the block devices on all nodes.
5740

5741
  @type lu: L{LogicalUnit}
5742
  @param lu: the logical unit on whose behalf we execute
5743
  @type instance: L{objects.Instance}
5744
  @param instance: the instance for whose disks we assemble
5745
  @type disks: list of L{objects.Disk} or None
5746
  @param disks: which disks to assemble (or all, if None)
5747
  @type ignore_secondaries: boolean
5748
  @param ignore_secondaries: if true, errors on secondary nodes
5749
      won't result in an error return from the function
5750
  @type ignore_size: boolean
5751
  @param ignore_size: if true, the current known size of the disk
5752
      will not be used during the disk activation, useful for cases
5753
      when the size is wrong
5754
  @return: False if the operation failed, otherwise a list of
5755
      (host, instance_visible_name, node_visible_name)
5756
      with the mapping from node devices to instance devices
5757

5758
  """
5759
  device_info = []
5760
  disks_ok = True
5761
  iname = instance.name
5762
  disks = _ExpandCheckDisks(instance, disks)
5763

    
5764
  # With the two passes mechanism we try to reduce the window of
5765
  # opportunity for the race condition of switching DRBD to primary
5766
  # before handshaking occured, but we do not eliminate it
5767

    
5768
  # The proper fix would be to wait (with some limits) until the
5769
  # connection has been made and drbd transitions from WFConnection
5770
  # into any other network-connected state (Connected, SyncTarget,
5771
  # SyncSource, etc.)
5772

    
5773
  # 1st pass, assemble on all nodes in secondary mode
5774
  for idx, inst_disk in enumerate(disks):
5775
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5776
      if ignore_size:
5777
        node_disk = node_disk.Copy()
5778
        node_disk.UnsetSize()
5779
      lu.cfg.SetDiskID(node_disk, node)
5780
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5781
      msg = result.fail_msg
5782
      if msg:
5783
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5784
                           " (is_primary=False, pass=1): %s",
5785
                           inst_disk.iv_name, node, msg)
5786
        if not ignore_secondaries:
5787
          disks_ok = False
5788

    
5789
  # FIXME: race condition on drbd migration to primary
5790

    
5791
  # 2nd pass, do only the primary node
5792
  for idx, inst_disk in enumerate(disks):
5793
    dev_path = None
5794

    
5795
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5796
      if node != instance.primary_node:
5797
        continue
5798
      if ignore_size:
5799
        node_disk = node_disk.Copy()
5800
        node_disk.UnsetSize()
5801
      lu.cfg.SetDiskID(node_disk, node)
5802
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5803
      msg = result.fail_msg
5804
      if msg:
5805
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5806
                           " (is_primary=True, pass=2): %s",
5807
                           inst_disk.iv_name, node, msg)
5808
        disks_ok = False
5809
      else:
5810
        dev_path = result.payload
5811

    
5812
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5813

    
5814
  # leave the disks configured for the primary node
5815
  # this is a workaround that would be fixed better by
5816
  # improving the logical/physical id handling
5817
  for disk in disks:
5818
    lu.cfg.SetDiskID(disk, instance.primary_node)
5819

    
5820
  return disks_ok, device_info
5821

    
5822

    
5823
def _StartInstanceDisks(lu, instance, force):
5824
  """Start the disks of an instance.
5825

5826
  """
5827
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5828
                                           ignore_secondaries=force)
5829
  if not disks_ok:
5830
    _ShutdownInstanceDisks(lu, instance)
5831
    if force is not None and not force:
5832
      lu.proc.LogWarning("", hint="If the message above refers to a"
5833
                         " secondary node,"
5834
                         " you can retry the operation using '--force'.")
5835
    raise errors.OpExecError("Disk consistency error")
5836

    
5837

    
5838
class LUInstanceDeactivateDisks(NoHooksLU):
5839
  """Shutdown an instance's disks.
5840

5841
  """
5842
  REQ_BGL = False
5843

    
5844
  def ExpandNames(self):
5845
    self._ExpandAndLockInstance()
5846
    self.needed_locks[locking.LEVEL_NODE] = []
5847
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5848

    
5849
  def DeclareLocks(self, level):
5850
    if level == locking.LEVEL_NODE:
5851
      self._LockInstancesNodes()
5852

    
5853
  def CheckPrereq(self):
5854
    """Check prerequisites.
5855

5856
    This checks that the instance is in the cluster.
5857

5858
    """
5859
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5860
    assert self.instance is not None, \
5861
      "Cannot retrieve locked instance %s" % self.op.instance_name
5862

    
5863
  def Exec(self, feedback_fn):
5864
    """Deactivate the disks
5865

5866
    """
5867
    instance = self.instance
5868
    if self.op.force:
5869
      _ShutdownInstanceDisks(self, instance)
5870
    else:
5871
      _SafeShutdownInstanceDisks(self, instance)
5872

    
5873

    
5874
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5875
  """Shutdown block devices of an instance.
5876

5877
  This function checks if an instance is running, before calling
5878
  _ShutdownInstanceDisks.
5879

5880
  """
5881
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5882
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5883

    
5884

    
5885
def _ExpandCheckDisks(instance, disks):
5886
  """Return the instance disks selected by the disks list
5887

5888
  @type disks: list of L{objects.Disk} or None
5889
  @param disks: selected disks
5890
  @rtype: list of L{objects.Disk}
5891
  @return: selected instance disks to act on
5892

5893
  """
5894
  if disks is None:
5895
    return instance.disks
5896
  else:
5897
    if not set(disks).issubset(instance.disks):
5898
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5899
                                   " target instance")
5900
    return disks
5901

    
5902

    
5903
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5904
  """Shutdown block devices of an instance.
5905

5906
  This does the shutdown on all nodes of the instance.
5907

5908
  If the ignore_primary is false, errors on the primary node are
5909
  ignored.
5910

5911
  """
5912
  all_result = True
5913
  disks = _ExpandCheckDisks(instance, disks)
5914

    
5915
  for disk in disks:
5916
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5917
      lu.cfg.SetDiskID(top_disk, node)
5918
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5919
      msg = result.fail_msg
5920
      if msg:
5921
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5922
                      disk.iv_name, node, msg)
5923
        if ((node == instance.primary_node and not ignore_primary) or
5924
            (node != instance.primary_node and not result.offline)):
5925
          all_result = False
5926
  return all_result
5927

    
5928

    
5929
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5930
  """Checks if a node has enough free memory.
5931

5932
  This function check if a given node has the needed amount of free
5933
  memory. In case the node has less memory or we cannot get the
5934
  information from the node, this function raise an OpPrereqError
5935
  exception.
5936

5937
  @type lu: C{LogicalUnit}
5938
  @param lu: a logical unit from which we get configuration data
5939
  @type node: C{str}
5940
  @param node: the node to check
5941
  @type reason: C{str}
5942
  @param reason: string to use in the error message
5943
  @type requested: C{int}
5944
  @param requested: the amount of memory in MiB to check for
5945
  @type hypervisor_name: C{str}
5946
  @param hypervisor_name: the hypervisor to ask for memory stats
5947
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5948
      we cannot check the node
5949

5950
  """
5951
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5952
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5953
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5954
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5955
  if not isinstance(free_mem, int):
5956
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5957
                               " was '%s'" % (node, free_mem),
5958
                               errors.ECODE_ENVIRON)
5959
  if requested > free_mem:
5960
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5961
                               " needed %s MiB, available %s MiB" %
5962
                               (node, reason, requested, free_mem),
5963
                               errors.ECODE_NORES)
5964

    
5965

    
5966
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5967
  """Checks if nodes have enough free disk space in the all VGs.
5968

5969
  This function check if all given nodes have the needed amount of
5970
  free disk. In case any node has less disk or we cannot get the
5971
  information from the node, this function raise an OpPrereqError
5972
  exception.
5973

5974
  @type lu: C{LogicalUnit}
5975
  @param lu: a logical unit from which we get configuration data
5976
  @type nodenames: C{list}
5977
  @param nodenames: the list of node names to check
5978
  @type req_sizes: C{dict}
5979
  @param req_sizes: the hash of vg and corresponding amount of disk in
5980
      MiB to check for
5981
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5982
      or we cannot check the node
5983

5984
  """
5985
  for vg, req_size in req_sizes.items():
5986
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5987

    
5988

    
5989
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5990
  """Checks if nodes have enough free disk space in the specified VG.
5991

5992
  This function check if all given nodes have the needed amount of
5993
  free disk. In case any node has less disk or we cannot get the
5994
  information from the node, this function raise an OpPrereqError
5995
  exception.
5996

5997
  @type lu: C{LogicalUnit}
5998
  @param lu: a logical unit from which we get configuration data
5999
  @type nodenames: C{list}
6000
  @param nodenames: the list of node names to check
6001
  @type vg: C{str}
6002
  @param vg: the volume group to check
6003
  @type requested: C{int}
6004
  @param requested: the amount of disk in MiB to check for
6005
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6006
      or we cannot check the node
6007

6008
  """
6009
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6010
  for node in nodenames:
6011
    info = nodeinfo[node]
6012
    info.Raise("Cannot get current information from node %s" % node,
6013
               prereq=True, ecode=errors.ECODE_ENVIRON)
6014
    vg_free = info.payload.get("vg_free", None)
6015
    if not isinstance(vg_free, int):
6016
      raise errors.OpPrereqError("Can't compute free disk space on node"
6017
                                 " %s for vg %s, result was '%s'" %
6018
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6019
    if requested > vg_free:
6020
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6021
                                 " vg %s: required %d MiB, available %d MiB" %
6022
                                 (node, vg, requested, vg_free),
6023
                                 errors.ECODE_NORES)
6024

    
6025

    
6026
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6027
  """Checks if nodes have enough physical CPUs
6028

6029
  This function checks if all given nodes have the needed number of
6030
  physical CPUs. In case any node has less CPUs or we cannot get the
6031
  information from the node, this function raises an OpPrereqError
6032
  exception.
6033

6034
  @type lu: C{LogicalUnit}
6035
  @param lu: a logical unit from which we get configuration data
6036
  @type nodenames: C{list}
6037
  @param nodenames: the list of node names to check
6038
  @type requested: C{int}
6039
  @param requested: the minimum acceptable number of physical CPUs
6040
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6041
      or we cannot check the node
6042

6043
  """
6044
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6045
  for node in nodenames:
6046
    info = nodeinfo[node]
6047
    info.Raise("Cannot get current information from node %s" % node,
6048
               prereq=True, ecode=errors.ECODE_ENVIRON)
6049
    num_cpus = info.payload.get("cpu_total", None)
6050
    if not isinstance(num_cpus, int):
6051
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6052
                                 " on node %s, result was '%s'" %
6053
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6054
    if requested > num_cpus:
6055
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6056
                                 "required" % (node, num_cpus, requested),
6057
                                 errors.ECODE_NORES)
6058

    
6059

    
6060
class LUInstanceStartup(LogicalUnit):
6061
  """Starts an instance.
6062

6063
  """
6064
  HPATH = "instance-start"
6065
  HTYPE = constants.HTYPE_INSTANCE
6066
  REQ_BGL = False
6067

    
6068
  def CheckArguments(self):
6069
    # extra beparams
6070
    if self.op.beparams:
6071
      # fill the beparams dict
6072
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6073

    
6074
  def ExpandNames(self):
6075
    self._ExpandAndLockInstance()
6076

    
6077
  def BuildHooksEnv(self):
6078
    """Build hooks env.
6079

6080
    This runs on master, primary and secondary nodes of the instance.
6081

6082
    """
6083
    env = {
6084
      "FORCE": self.op.force,
6085
      }
6086

    
6087
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6088

    
6089
    return env
6090

    
6091
  def BuildHooksNodes(self):
6092
    """Build hooks nodes.
6093

6094
    """
6095
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6096
    return (nl, nl)
6097

    
6098
  def CheckPrereq(self):
6099
    """Check prerequisites.
6100

6101
    This checks that the instance is in the cluster.
6102

6103
    """
6104
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6105
    assert self.instance is not None, \
6106
      "Cannot retrieve locked instance %s" % self.op.instance_name
6107

    
6108
    # extra hvparams
6109
    if self.op.hvparams:
6110
      # check hypervisor parameter syntax (locally)
6111
      cluster = self.cfg.GetClusterInfo()
6112
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6113
      filled_hvp = cluster.FillHV(instance)
6114
      filled_hvp.update(self.op.hvparams)
6115
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6116
      hv_type.CheckParameterSyntax(filled_hvp)
6117
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6118

    
6119
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6120

    
6121
    if self.primary_offline and self.op.ignore_offline_nodes:
6122
      self.proc.LogWarning("Ignoring offline primary node")
6123

    
6124
      if self.op.hvparams or self.op.beparams:
6125
        self.proc.LogWarning("Overridden parameters are ignored")
6126
    else:
6127
      _CheckNodeOnline(self, instance.primary_node)
6128

    
6129
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6130

    
6131
      # check bridges existence
6132
      _CheckInstanceBridgesExist(self, instance)
6133

    
6134
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6135
                                                instance.name,
6136
                                                instance.hypervisor)
6137
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6138
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6139
      if not remote_info.payload: # not running already
6140
        _CheckNodeFreeMemory(self, instance.primary_node,
6141
                             "starting instance %s" % instance.name,
6142
                             bep[constants.BE_MEMORY], instance.hypervisor)
6143

    
6144
  def Exec(self, feedback_fn):
6145
    """Start the instance.
6146

6147
    """
6148
    instance = self.instance
6149
    force = self.op.force
6150

    
6151
    if not self.op.no_remember:
6152
      self.cfg.MarkInstanceUp(instance.name)
6153

    
6154
    if self.primary_offline:
6155
      assert self.op.ignore_offline_nodes
6156
      self.proc.LogInfo("Primary node offline, marked instance as started")
6157
    else:
6158
      node_current = instance.primary_node
6159

    
6160
      _StartInstanceDisks(self, instance, force)
6161

    
6162
      result = \
6163
        self.rpc.call_instance_start(node_current,
6164
                                     (instance, self.op.hvparams,
6165
                                      self.op.beparams),
6166
                                     self.op.startup_paused)
6167
      msg = result.fail_msg
6168
      if msg:
6169
        _ShutdownInstanceDisks(self, instance)
6170
        raise errors.OpExecError("Could not start instance: %s" % msg)
6171

    
6172

    
6173
class LUInstanceReboot(LogicalUnit):
6174
  """Reboot an instance.
6175

6176
  """
6177
  HPATH = "instance-reboot"
6178
  HTYPE = constants.HTYPE_INSTANCE
6179
  REQ_BGL = False
6180

    
6181
  def ExpandNames(self):
6182
    self._ExpandAndLockInstance()
6183

    
6184
  def BuildHooksEnv(self):
6185
    """Build hooks env.
6186

6187
    This runs on master, primary and secondary nodes of the instance.
6188

6189
    """
6190
    env = {
6191
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6192
      "REBOOT_TYPE": self.op.reboot_type,
6193
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6194
      }
6195

    
6196
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6197

    
6198
    return env
6199

    
6200
  def BuildHooksNodes(self):
6201
    """Build hooks nodes.
6202

6203
    """
6204
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6205
    return (nl, nl)
6206

    
6207
  def CheckPrereq(self):
6208
    """Check prerequisites.
6209

6210
    This checks that the instance is in the cluster.
6211

6212
    """
6213
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6214
    assert self.instance is not None, \
6215
      "Cannot retrieve locked instance %s" % self.op.instance_name
6216

    
6217
    _CheckNodeOnline(self, instance.primary_node)
6218

    
6219
    # check bridges existence
6220
    _CheckInstanceBridgesExist(self, instance)
6221

    
6222
  def Exec(self, feedback_fn):
6223
    """Reboot the instance.
6224

6225
    """
6226
    instance = self.instance
6227
    ignore_secondaries = self.op.ignore_secondaries
6228
    reboot_type = self.op.reboot_type
6229

    
6230
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6231
                                              instance.name,
6232
                                              instance.hypervisor)
6233
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6234
    instance_running = bool(remote_info.payload)
6235

    
6236
    node_current = instance.primary_node
6237

    
6238
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6239
                                            constants.INSTANCE_REBOOT_HARD]:
6240
      for disk in instance.disks:
6241
        self.cfg.SetDiskID(disk, node_current)
6242
      result = self.rpc.call_instance_reboot(node_current, instance,
6243
                                             reboot_type,
6244
                                             self.op.shutdown_timeout)
6245
      result.Raise("Could not reboot instance")
6246
    else:
6247
      if instance_running:
6248
        result = self.rpc.call_instance_shutdown(node_current, instance,
6249
                                                 self.op.shutdown_timeout)
6250
        result.Raise("Could not shutdown instance for full reboot")
6251
        _ShutdownInstanceDisks(self, instance)
6252
      else:
6253
        self.LogInfo("Instance %s was already stopped, starting now",
6254
                     instance.name)
6255
      _StartInstanceDisks(self, instance, ignore_secondaries)
6256
      result = self.rpc.call_instance_start(node_current,
6257
                                            (instance, None, None), False)
6258
      msg = result.fail_msg
6259
      if msg:
6260
        _ShutdownInstanceDisks(self, instance)
6261
        raise errors.OpExecError("Could not start instance for"
6262
                                 " full reboot: %s" % msg)
6263

    
6264
    self.cfg.MarkInstanceUp(instance.name)
6265

    
6266

    
6267
class LUInstanceShutdown(LogicalUnit):
6268
  """Shutdown an instance.
6269

6270
  """
6271
  HPATH = "instance-stop"
6272
  HTYPE = constants.HTYPE_INSTANCE
6273
  REQ_BGL = False
6274

    
6275
  def ExpandNames(self):
6276
    self._ExpandAndLockInstance()
6277

    
6278
  def BuildHooksEnv(self):
6279
    """Build hooks env.
6280

6281
    This runs on master, primary and secondary nodes of the instance.
6282

6283
    """
6284
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6285
    env["TIMEOUT"] = self.op.timeout
6286
    return env
6287

    
6288
  def BuildHooksNodes(self):
6289
    """Build hooks nodes.
6290

6291
    """
6292
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6293
    return (nl, nl)
6294

    
6295
  def CheckPrereq(self):
6296
    """Check prerequisites.
6297

6298
    This checks that the instance is in the cluster.
6299

6300
    """
6301
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6302
    assert self.instance is not None, \
6303
      "Cannot retrieve locked instance %s" % self.op.instance_name
6304

    
6305
    self.primary_offline = \
6306
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6307

    
6308
    if self.primary_offline and self.op.ignore_offline_nodes:
6309
      self.proc.LogWarning("Ignoring offline primary node")
6310
    else:
6311
      _CheckNodeOnline(self, self.instance.primary_node)
6312

    
6313
  def Exec(self, feedback_fn):
6314
    """Shutdown the instance.
6315

6316
    """
6317
    instance = self.instance
6318
    node_current = instance.primary_node
6319
    timeout = self.op.timeout
6320

    
6321
    if not self.op.no_remember:
6322
      self.cfg.MarkInstanceDown(instance.name)
6323

    
6324
    if self.primary_offline:
6325
      assert self.op.ignore_offline_nodes
6326
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6327
    else:
6328
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6329
      msg = result.fail_msg
6330
      if msg:
6331
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6332

    
6333
      _ShutdownInstanceDisks(self, instance)
6334

    
6335

    
6336
class LUInstanceReinstall(LogicalUnit):
6337
  """Reinstall an instance.
6338

6339
  """
6340
  HPATH = "instance-reinstall"
6341
  HTYPE = constants.HTYPE_INSTANCE
6342
  REQ_BGL = False
6343

    
6344
  def ExpandNames(self):
6345
    self._ExpandAndLockInstance()
6346

    
6347
  def BuildHooksEnv(self):
6348
    """Build hooks env.
6349

6350
    This runs on master, primary and secondary nodes of the instance.
6351

6352
    """
6353
    return _BuildInstanceHookEnvByObject(self, self.instance)
6354

    
6355
  def BuildHooksNodes(self):
6356
    """Build hooks nodes.
6357

6358
    """
6359
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6360
    return (nl, nl)
6361

    
6362
  def CheckPrereq(self):
6363
    """Check prerequisites.
6364

6365
    This checks that the instance is in the cluster and is not running.
6366

6367
    """
6368
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6369
    assert instance is not None, \
6370
      "Cannot retrieve locked instance %s" % self.op.instance_name
6371
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6372
                     " offline, cannot reinstall")
6373
    for node in instance.secondary_nodes:
6374
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6375
                       " cannot reinstall")
6376

    
6377
    if instance.disk_template == constants.DT_DISKLESS:
6378
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6379
                                 self.op.instance_name,
6380
                                 errors.ECODE_INVAL)
6381
    _CheckInstanceDown(self, instance, "cannot reinstall")
6382

    
6383
    if self.op.os_type is not None:
6384
      # OS verification
6385
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6386
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6387
      instance_os = self.op.os_type
6388
    else:
6389
      instance_os = instance.os
6390

    
6391
    nodelist = list(instance.all_nodes)
6392

    
6393
    if self.op.osparams:
6394
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6395
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6396
      self.os_inst = i_osdict # the new dict (without defaults)
6397
    else:
6398
      self.os_inst = None
6399

    
6400
    self.instance = instance
6401

    
6402
  def Exec(self, feedback_fn):
6403
    """Reinstall the instance.
6404

6405
    """
6406
    inst = self.instance
6407

    
6408
    if self.op.os_type is not None:
6409
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6410
      inst.os = self.op.os_type
6411
      # Write to configuration
6412
      self.cfg.Update(inst, feedback_fn)
6413

    
6414
    _StartInstanceDisks(self, inst, None)
6415
    try:
6416
      feedback_fn("Running the instance OS create scripts...")
6417
      # FIXME: pass debug option from opcode to backend
6418
      result = self.rpc.call_instance_os_add(inst.primary_node,
6419
                                             (inst, self.os_inst), True,
6420
                                             self.op.debug_level)
6421
      result.Raise("Could not install OS for instance %s on node %s" %
6422
                   (inst.name, inst.primary_node))
6423
    finally:
6424
      _ShutdownInstanceDisks(self, inst)
6425

    
6426

    
6427
class LUInstanceRecreateDisks(LogicalUnit):
6428
  """Recreate an instance's missing disks.
6429

6430
  """
6431
  HPATH = "instance-recreate-disks"
6432
  HTYPE = constants.HTYPE_INSTANCE
6433
  REQ_BGL = False
6434

    
6435
  def CheckArguments(self):
6436
    # normalise the disk list
6437
    self.op.disks = sorted(frozenset(self.op.disks))
6438

    
6439
  def ExpandNames(self):
6440
    self._ExpandAndLockInstance()
6441
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6442
    if self.op.nodes:
6443
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6444
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6445
    else:
6446
      self.needed_locks[locking.LEVEL_NODE] = []
6447

    
6448
  def DeclareLocks(self, level):
6449
    if level == locking.LEVEL_NODE:
6450
      # if we replace the nodes, we only need to lock the old primary,
6451
      # otherwise we need to lock all nodes for disk re-creation
6452
      primary_only = bool(self.op.nodes)
6453
      self._LockInstancesNodes(primary_only=primary_only)
6454

    
6455
  def BuildHooksEnv(self):
6456
    """Build hooks env.
6457

6458
    This runs on master, primary and secondary nodes of the instance.
6459

6460
    """
6461
    return _BuildInstanceHookEnvByObject(self, self.instance)
6462

    
6463
  def BuildHooksNodes(self):
6464
    """Build hooks nodes.
6465

6466
    """
6467
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6468
    return (nl, nl)
6469

    
6470
  def CheckPrereq(self):
6471
    """Check prerequisites.
6472

6473
    This checks that the instance is in the cluster and is not running.
6474

6475
    """
6476
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6477
    assert instance is not None, \
6478
      "Cannot retrieve locked instance %s" % self.op.instance_name
6479
    if self.op.nodes:
6480
      if len(self.op.nodes) != len(instance.all_nodes):
6481
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6482
                                   " %d replacement nodes were specified" %
6483
                                   (instance.name, len(instance.all_nodes),
6484
                                    len(self.op.nodes)),
6485
                                   errors.ECODE_INVAL)
6486
      assert instance.disk_template != constants.DT_DRBD8 or \
6487
          len(self.op.nodes) == 2
6488
      assert instance.disk_template != constants.DT_PLAIN or \
6489
          len(self.op.nodes) == 1
6490
      primary_node = self.op.nodes[0]
6491
    else:
6492
      primary_node = instance.primary_node
6493
    _CheckNodeOnline(self, primary_node)
6494

    
6495
    if instance.disk_template == constants.DT_DISKLESS:
6496
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6497
                                 self.op.instance_name, errors.ECODE_INVAL)
6498
    # if we replace nodes *and* the old primary is offline, we don't
6499
    # check
6500
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6501
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6502
    if not (self.op.nodes and old_pnode.offline):
6503
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6504

    
6505
    if not self.op.disks:
6506
      self.op.disks = range(len(instance.disks))
6507
    else:
6508
      for idx in self.op.disks:
6509
        if idx >= len(instance.disks):
6510
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6511
                                     errors.ECODE_INVAL)
6512
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6513
      raise errors.OpPrereqError("Can't recreate disks partially and"
6514
                                 " change the nodes at the same time",
6515
                                 errors.ECODE_INVAL)
6516
    self.instance = instance
6517

    
6518
  def Exec(self, feedback_fn):
6519
    """Recreate the disks.
6520

6521
    """
6522
    instance = self.instance
6523

    
6524
    to_skip = []
6525
    mods = [] # keeps track of needed logical_id changes
6526

    
6527
    for idx, disk in enumerate(instance.disks):
6528
      if idx not in self.op.disks: # disk idx has not been passed in
6529
        to_skip.append(idx)
6530
        continue
6531
      # update secondaries for disks, if needed
6532
      if self.op.nodes:
6533
        if disk.dev_type == constants.LD_DRBD8:
6534
          # need to update the nodes and minors
6535
          assert len(self.op.nodes) == 2
6536
          assert len(disk.logical_id) == 6 # otherwise disk internals
6537
                                           # have changed
6538
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6539
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6540
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6541
                    new_minors[0], new_minors[1], old_secret)
6542
          assert len(disk.logical_id) == len(new_id)
6543
          mods.append((idx, new_id))
6544

    
6545
    # now that we have passed all asserts above, we can apply the mods
6546
    # in a single run (to avoid partial changes)
6547
    for idx, new_id in mods:
6548
      instance.disks[idx].logical_id = new_id
6549

    
6550
    # change primary node, if needed
6551
    if self.op.nodes:
6552
      instance.primary_node = self.op.nodes[0]
6553
      self.LogWarning("Changing the instance's nodes, you will have to"
6554
                      " remove any disks left on the older nodes manually")
6555

    
6556
    if self.op.nodes:
6557
      self.cfg.Update(instance, feedback_fn)
6558

    
6559
    _CreateDisks(self, instance, to_skip=to_skip)
6560

    
6561

    
6562
class LUInstanceRename(LogicalUnit):
6563
  """Rename an instance.
6564

6565
  """
6566
  HPATH = "instance-rename"
6567
  HTYPE = constants.HTYPE_INSTANCE
6568

    
6569
  def CheckArguments(self):
6570
    """Check arguments.
6571

6572
    """
6573
    if self.op.ip_check and not self.op.name_check:
6574
      # TODO: make the ip check more flexible and not depend on the name check
6575
      raise errors.OpPrereqError("IP address check requires a name check",
6576
                                 errors.ECODE_INVAL)
6577

    
6578
  def BuildHooksEnv(self):
6579
    """Build hooks env.
6580

6581
    This runs on master, primary and secondary nodes of the instance.
6582

6583
    """
6584
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6585
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6586
    return env
6587

    
6588
  def BuildHooksNodes(self):
6589
    """Build hooks nodes.
6590

6591
    """
6592
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6593
    return (nl, nl)
6594

    
6595
  def CheckPrereq(self):
6596
    """Check prerequisites.
6597

6598
    This checks that the instance is in the cluster and is not running.
6599

6600
    """
6601
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6602
                                                self.op.instance_name)
6603
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6604
    assert instance is not None
6605
    _CheckNodeOnline(self, instance.primary_node)
6606
    _CheckInstanceDown(self, instance, "cannot rename")
6607
    self.instance = instance
6608

    
6609
    new_name = self.op.new_name
6610
    if self.op.name_check:
6611
      hostname = netutils.GetHostname(name=new_name)
6612
      if hostname != new_name:
6613
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6614
                     hostname.name)
6615
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6616
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6617
                                    " same as given hostname '%s'") %
6618
                                    (hostname.name, self.op.new_name),
6619
                                    errors.ECODE_INVAL)
6620
      new_name = self.op.new_name = hostname.name
6621
      if (self.op.ip_check and
6622
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6623
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6624
                                   (hostname.ip, new_name),
6625
                                   errors.ECODE_NOTUNIQUE)
6626

    
6627
    instance_list = self.cfg.GetInstanceList()
6628
    if new_name in instance_list and new_name != instance.name:
6629
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6630
                                 new_name, errors.ECODE_EXISTS)
6631

    
6632
  def Exec(self, feedback_fn):
6633
    """Rename the instance.
6634

6635
    """
6636
    inst = self.instance
6637
    old_name = inst.name
6638

    
6639
    rename_file_storage = False
6640
    if (inst.disk_template in constants.DTS_FILEBASED and
6641
        self.op.new_name != inst.name):
6642
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6643
      rename_file_storage = True
6644

    
6645
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6646
    # Change the instance lock. This is definitely safe while we hold the BGL.
6647
    # Otherwise the new lock would have to be added in acquired mode.
6648
    assert self.REQ_BGL
6649
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6650
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6651

    
6652
    # re-read the instance from the configuration after rename
6653
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6654

    
6655
    if rename_file_storage:
6656
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6657
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6658
                                                     old_file_storage_dir,
6659
                                                     new_file_storage_dir)
6660
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6661
                   " (but the instance has been renamed in Ganeti)" %
6662
                   (inst.primary_node, old_file_storage_dir,
6663
                    new_file_storage_dir))
6664

    
6665
    _StartInstanceDisks(self, inst, None)
6666
    try:
6667
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6668
                                                 old_name, self.op.debug_level)
6669
      msg = result.fail_msg
6670
      if msg:
6671
        msg = ("Could not run OS rename script for instance %s on node %s"
6672
               " (but the instance has been renamed in Ganeti): %s" %
6673
               (inst.name, inst.primary_node, msg))
6674
        self.proc.LogWarning(msg)
6675
    finally:
6676
      _ShutdownInstanceDisks(self, inst)
6677

    
6678
    return inst.name
6679

    
6680

    
6681
class LUInstanceRemove(LogicalUnit):
6682
  """Remove an instance.
6683

6684
  """
6685
  HPATH = "instance-remove"
6686
  HTYPE = constants.HTYPE_INSTANCE
6687
  REQ_BGL = False
6688

    
6689
  def ExpandNames(self):
6690
    self._ExpandAndLockInstance()
6691
    self.needed_locks[locking.LEVEL_NODE] = []
6692
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6693

    
6694
  def DeclareLocks(self, level):
6695
    if level == locking.LEVEL_NODE:
6696
      self._LockInstancesNodes()
6697

    
6698
  def BuildHooksEnv(self):
6699
    """Build hooks env.
6700

6701
    This runs on master, primary and secondary nodes of the instance.
6702

6703
    """
6704
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6705
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6706
    return env
6707

    
6708
  def BuildHooksNodes(self):
6709
    """Build hooks nodes.
6710

6711
    """
6712
    nl = [self.cfg.GetMasterNode()]
6713
    nl_post = list(self.instance.all_nodes) + nl
6714
    return (nl, nl_post)
6715

    
6716
  def CheckPrereq(self):
6717
    """Check prerequisites.
6718

6719
    This checks that the instance is in the cluster.
6720

6721
    """
6722
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6723
    assert self.instance is not None, \
6724
      "Cannot retrieve locked instance %s" % self.op.instance_name
6725

    
6726
  def Exec(self, feedback_fn):
6727
    """Remove the instance.
6728

6729
    """
6730
    instance = self.instance
6731
    logging.info("Shutting down instance %s on node %s",
6732
                 instance.name, instance.primary_node)
6733

    
6734
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6735
                                             self.op.shutdown_timeout)
6736
    msg = result.fail_msg
6737
    if msg:
6738
      if self.op.ignore_failures:
6739
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6740
      else:
6741
        raise errors.OpExecError("Could not shutdown instance %s on"
6742
                                 " node %s: %s" %
6743
                                 (instance.name, instance.primary_node, msg))
6744

    
6745
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6746

    
6747

    
6748
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6749
  """Utility function to remove an instance.
6750

6751
  """
6752
  logging.info("Removing block devices for instance %s", instance.name)
6753

    
6754
  if not _RemoveDisks(lu, instance):
6755
    if not ignore_failures:
6756
      raise errors.OpExecError("Can't remove instance's disks")
6757
    feedback_fn("Warning: can't remove instance's disks")
6758

    
6759
  logging.info("Removing instance %s out of cluster config", instance.name)
6760

    
6761
  lu.cfg.RemoveInstance(instance.name)
6762

    
6763
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6764
    "Instance lock removal conflict"
6765

    
6766
  # Remove lock for the instance
6767
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6768

    
6769

    
6770
class LUInstanceQuery(NoHooksLU):
6771
  """Logical unit for querying instances.
6772

6773
  """
6774
  # pylint: disable=W0142
6775
  REQ_BGL = False
6776

    
6777
  def CheckArguments(self):
6778
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6779
                             self.op.output_fields, self.op.use_locking)
6780

    
6781
  def ExpandNames(self):
6782
    self.iq.ExpandNames(self)
6783

    
6784
  def DeclareLocks(self, level):
6785
    self.iq.DeclareLocks(self, level)
6786

    
6787
  def Exec(self, feedback_fn):
6788
    return self.iq.OldStyleQuery(self)
6789

    
6790

    
6791
class LUInstanceFailover(LogicalUnit):
6792
  """Failover an instance.
6793

6794
  """
6795
  HPATH = "instance-failover"
6796
  HTYPE = constants.HTYPE_INSTANCE
6797
  REQ_BGL = False
6798

    
6799
  def CheckArguments(self):
6800
    """Check the arguments.
6801

6802
    """
6803
    self.iallocator = getattr(self.op, "iallocator", None)
6804
    self.target_node = getattr(self.op, "target_node", None)
6805

    
6806
  def ExpandNames(self):
6807
    self._ExpandAndLockInstance()
6808

    
6809
    if self.op.target_node is not None:
6810
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6811

    
6812
    self.needed_locks[locking.LEVEL_NODE] = []
6813
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6814

    
6815
    ignore_consistency = self.op.ignore_consistency
6816
    shutdown_timeout = self.op.shutdown_timeout
6817
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6818
                                       cleanup=False,
6819
                                       failover=True,
6820
                                       ignore_consistency=ignore_consistency,
6821
                                       shutdown_timeout=shutdown_timeout)
6822
    self.tasklets = [self._migrater]
6823

    
6824
  def DeclareLocks(self, level):
6825
    if level == locking.LEVEL_NODE:
6826
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6827
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6828
        if self.op.target_node is None:
6829
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6830
        else:
6831
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6832
                                                   self.op.target_node]
6833
        del self.recalculate_locks[locking.LEVEL_NODE]
6834
      else:
6835
        self._LockInstancesNodes()
6836

    
6837
  def BuildHooksEnv(self):
6838
    """Build hooks env.
6839

6840
    This runs on master, primary and secondary nodes of the instance.
6841

6842
    """
6843
    instance = self._migrater.instance
6844
    source_node = instance.primary_node
6845
    target_node = self.op.target_node
6846
    env = {
6847
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6848
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6849
      "OLD_PRIMARY": source_node,
6850
      "NEW_PRIMARY": target_node,
6851
      }
6852

    
6853
    if instance.disk_template in constants.DTS_INT_MIRROR:
6854
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6855
      env["NEW_SECONDARY"] = source_node
6856
    else:
6857
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6858

    
6859
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6860

    
6861
    return env
6862

    
6863
  def BuildHooksNodes(self):
6864
    """Build hooks nodes.
6865

6866
    """
6867
    instance = self._migrater.instance
6868
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6869
    return (nl, nl + [instance.primary_node])
6870

    
6871

    
6872
class LUInstanceMigrate(LogicalUnit):
6873
  """Migrate an instance.
6874

6875
  This is migration without shutting down, compared to the failover,
6876
  which is done with shutdown.
6877

6878
  """
6879
  HPATH = "instance-migrate"
6880
  HTYPE = constants.HTYPE_INSTANCE
6881
  REQ_BGL = False
6882

    
6883
  def ExpandNames(self):
6884
    self._ExpandAndLockInstance()
6885

    
6886
    if self.op.target_node is not None:
6887
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6888

    
6889
    self.needed_locks[locking.LEVEL_NODE] = []
6890
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6891

    
6892
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6893
                                       cleanup=self.op.cleanup,
6894
                                       failover=False,
6895
                                       fallback=self.op.allow_failover)
6896
    self.tasklets = [self._migrater]
6897

    
6898
  def DeclareLocks(self, level):
6899
    if level == locking.LEVEL_NODE:
6900
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6901
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6902
        if self.op.target_node is None:
6903
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6904
        else:
6905
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6906
                                                   self.op.target_node]
6907
        del self.recalculate_locks[locking.LEVEL_NODE]
6908
      else:
6909
        self._LockInstancesNodes()
6910

    
6911
  def BuildHooksEnv(self):
6912
    """Build hooks env.
6913

6914
    This runs on master, primary and secondary nodes of the instance.
6915

6916
    """
6917
    instance = self._migrater.instance
6918
    source_node = instance.primary_node
6919
    target_node = self.op.target_node
6920
    env = _BuildInstanceHookEnvByObject(self, instance)
6921
    env.update({
6922
      "MIGRATE_LIVE": self._migrater.live,
6923
      "MIGRATE_CLEANUP": self.op.cleanup,
6924
      "OLD_PRIMARY": source_node,
6925
      "NEW_PRIMARY": target_node,
6926
      })
6927

    
6928
    if instance.disk_template in constants.DTS_INT_MIRROR:
6929
      env["OLD_SECONDARY"] = target_node
6930
      env["NEW_SECONDARY"] = source_node
6931
    else:
6932
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6933

    
6934
    return env
6935

    
6936
  def BuildHooksNodes(self):
6937
    """Build hooks nodes.
6938

6939
    """
6940
    instance = self._migrater.instance
6941
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6942
    return (nl, nl + [instance.primary_node])
6943

    
6944

    
6945
class LUInstanceMove(LogicalUnit):
6946
  """Move an instance by data-copying.
6947

6948
  """
6949
  HPATH = "instance-move"
6950
  HTYPE = constants.HTYPE_INSTANCE
6951
  REQ_BGL = False
6952

    
6953
  def ExpandNames(self):
6954
    self._ExpandAndLockInstance()
6955
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6956
    self.op.target_node = target_node
6957
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6958
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6959

    
6960
  def DeclareLocks(self, level):
6961
    if level == locking.LEVEL_NODE:
6962
      self._LockInstancesNodes(primary_only=True)
6963

    
6964
  def BuildHooksEnv(self):
6965
    """Build hooks env.
6966

6967
    This runs on master, primary and secondary nodes of the instance.
6968

6969
    """
6970
    env = {
6971
      "TARGET_NODE": self.op.target_node,
6972
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6973
      }
6974
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6975
    return env
6976

    
6977
  def BuildHooksNodes(self):
6978
    """Build hooks nodes.
6979

6980
    """
6981
    nl = [
6982
      self.cfg.GetMasterNode(),
6983
      self.instance.primary_node,
6984
      self.op.target_node,
6985
      ]
6986
    return (nl, nl)
6987

    
6988
  def CheckPrereq(self):
6989
    """Check prerequisites.
6990

6991
    This checks that the instance is in the cluster.
6992

6993
    """
6994
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995
    assert self.instance is not None, \
6996
      "Cannot retrieve locked instance %s" % self.op.instance_name
6997

    
6998
    node = self.cfg.GetNodeInfo(self.op.target_node)
6999
    assert node is not None, \
7000
      "Cannot retrieve locked node %s" % self.op.target_node
7001

    
7002
    self.target_node = target_node = node.name
7003

    
7004
    if target_node == instance.primary_node:
7005
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7006
                                 (instance.name, target_node),
7007
                                 errors.ECODE_STATE)
7008

    
7009
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7010

    
7011
    for idx, dsk in enumerate(instance.disks):
7012
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7013
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7014
                                   " cannot copy" % idx, errors.ECODE_STATE)
7015

    
7016
    _CheckNodeOnline(self, target_node)
7017
    _CheckNodeNotDrained(self, target_node)
7018
    _CheckNodeVmCapable(self, target_node)
7019

    
7020
    if instance.admin_up:
7021
      # check memory requirements on the secondary node
7022
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7023
                           instance.name, bep[constants.BE_MEMORY],
7024
                           instance.hypervisor)
7025
    else:
7026
      self.LogInfo("Not checking memory on the secondary node as"
7027
                   " instance will not be started")
7028

    
7029
    # check bridge existance
7030
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7031

    
7032
  def Exec(self, feedback_fn):
7033
    """Move an instance.
7034

7035
    The move is done by shutting it down on its present node, copying
7036
    the data over (slow) and starting it on the new node.
7037

7038
    """
7039
    instance = self.instance
7040

    
7041
    source_node = instance.primary_node
7042
    target_node = self.target_node
7043

    
7044
    self.LogInfo("Shutting down instance %s on source node %s",
7045
                 instance.name, source_node)
7046

    
7047
    result = self.rpc.call_instance_shutdown(source_node, instance,
7048
                                             self.op.shutdown_timeout)
7049
    msg = result.fail_msg
7050
    if msg:
7051
      if self.op.ignore_consistency:
7052
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7053
                             " Proceeding anyway. Please make sure node"
7054
                             " %s is down. Error details: %s",
7055
                             instance.name, source_node, source_node, msg)
7056
      else:
7057
        raise errors.OpExecError("Could not shutdown instance %s on"
7058
                                 " node %s: %s" %
7059
                                 (instance.name, source_node, msg))
7060

    
7061
    # create the target disks
7062
    try:
7063
      _CreateDisks(self, instance, target_node=target_node)
7064
    except errors.OpExecError:
7065
      self.LogWarning("Device creation failed, reverting...")
7066
      try:
7067
        _RemoveDisks(self, instance, target_node=target_node)
7068
      finally:
7069
        self.cfg.ReleaseDRBDMinors(instance.name)
7070
        raise
7071

    
7072
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7073

    
7074
    errs = []
7075
    # activate, get path, copy the data over
7076
    for idx, disk in enumerate(instance.disks):
7077
      self.LogInfo("Copying data for disk %d", idx)
7078
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7079
                                               instance.name, True, idx)
7080
      if result.fail_msg:
7081
        self.LogWarning("Can't assemble newly created disk %d: %s",
7082
                        idx, result.fail_msg)
7083
        errs.append(result.fail_msg)
7084
        break
7085
      dev_path = result.payload
7086
      result = self.rpc.call_blockdev_export(source_node, disk,
7087
                                             target_node, dev_path,
7088
                                             cluster_name)
7089
      if result.fail_msg:
7090
        self.LogWarning("Can't copy data over for disk %d: %s",
7091
                        idx, result.fail_msg)
7092
        errs.append(result.fail_msg)
7093
        break
7094

    
7095
    if errs:
7096
      self.LogWarning("Some disks failed to copy, aborting")
7097
      try:
7098
        _RemoveDisks(self, instance, target_node=target_node)
7099
      finally:
7100
        self.cfg.ReleaseDRBDMinors(instance.name)
7101
        raise errors.OpExecError("Errors during disk copy: %s" %
7102
                                 (",".join(errs),))
7103

    
7104
    instance.primary_node = target_node
7105
    self.cfg.Update(instance, feedback_fn)
7106

    
7107
    self.LogInfo("Removing the disks on the original node")
7108
    _RemoveDisks(self, instance, target_node=source_node)
7109

    
7110
    # Only start the instance if it's marked as up
7111
    if instance.admin_up:
7112
      self.LogInfo("Starting instance %s on node %s",
7113
                   instance.name, target_node)
7114

    
7115
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7116
                                           ignore_secondaries=True)
7117
      if not disks_ok:
7118
        _ShutdownInstanceDisks(self, instance)
7119
        raise errors.OpExecError("Can't activate the instance's disks")
7120

    
7121
      result = self.rpc.call_instance_start(target_node,
7122
                                            (instance, None, None), False)
7123
      msg = result.fail_msg
7124
      if msg:
7125
        _ShutdownInstanceDisks(self, instance)
7126
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7127
                                 (instance.name, target_node, msg))
7128

    
7129

    
7130
class LUNodeMigrate(LogicalUnit):
7131
  """Migrate all instances from a node.
7132

7133
  """
7134
  HPATH = "node-migrate"
7135
  HTYPE = constants.HTYPE_NODE
7136
  REQ_BGL = False
7137

    
7138
  def CheckArguments(self):
7139
    pass
7140

    
7141
  def ExpandNames(self):
7142
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7143

    
7144
    self.share_locks = _ShareAll()
7145
    self.needed_locks = {
7146
      locking.LEVEL_NODE: [self.op.node_name],
7147
      }
7148

    
7149
  def BuildHooksEnv(self):
7150
    """Build hooks env.
7151

7152
    This runs on the master, the primary and all the secondaries.
7153

7154
    """
7155
    return {
7156
      "NODE_NAME": self.op.node_name,
7157
      }
7158

    
7159
  def BuildHooksNodes(self):
7160
    """Build hooks nodes.
7161

7162
    """
7163
    nl = [self.cfg.GetMasterNode()]
7164
    return (nl, nl)
7165

    
7166
  def CheckPrereq(self):
7167
    pass
7168

    
7169
  def Exec(self, feedback_fn):
7170
    # Prepare jobs for migration instances
7171
    jobs = [
7172
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7173
                                 mode=self.op.mode,
7174
                                 live=self.op.live,
7175
                                 iallocator=self.op.iallocator,
7176
                                 target_node=self.op.target_node)]
7177
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7178
      ]
7179

    
7180
    # TODO: Run iallocator in this opcode and pass correct placement options to
7181
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7182
    # running the iallocator and the actual migration, a good consistency model
7183
    # will have to be found.
7184

    
7185
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7186
            frozenset([self.op.node_name]))
7187

    
7188
    return ResultWithJobs(jobs)
7189

    
7190

    
7191
class TLMigrateInstance(Tasklet):
7192
  """Tasklet class for instance migration.
7193

7194
  @type live: boolean
7195
  @ivar live: whether the migration will be done live or non-live;
7196
      this variable is initalized only after CheckPrereq has run
7197
  @type cleanup: boolean
7198
  @ivar cleanup: Wheater we cleanup from a failed migration
7199
  @type iallocator: string
7200
  @ivar iallocator: The iallocator used to determine target_node
7201
  @type target_node: string
7202
  @ivar target_node: If given, the target_node to reallocate the instance to
7203
  @type failover: boolean
7204
  @ivar failover: Whether operation results in failover or migration
7205
  @type fallback: boolean
7206
  @ivar fallback: Whether fallback to failover is allowed if migration not
7207
                  possible
7208
  @type ignore_consistency: boolean
7209
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7210
                            and target node
7211
  @type shutdown_timeout: int
7212
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7213

7214
  """
7215

    
7216
  # Constants
7217
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7218
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7219

    
7220
  def __init__(self, lu, instance_name, cleanup=False,
7221
               failover=False, fallback=False,
7222
               ignore_consistency=False,
7223
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7224
    """Initializes this class.
7225

7226
    """
7227
    Tasklet.__init__(self, lu)
7228

    
7229
    # Parameters
7230
    self.instance_name = instance_name
7231
    self.cleanup = cleanup
7232
    self.live = False # will be overridden later
7233
    self.failover = failover
7234
    self.fallback = fallback
7235
    self.ignore_consistency = ignore_consistency
7236
    self.shutdown_timeout = shutdown_timeout
7237

    
7238
  def CheckPrereq(self):
7239
    """Check prerequisites.
7240

7241
    This checks that the instance is in the cluster.
7242

7243
    """
7244
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7245
    instance = self.cfg.GetInstanceInfo(instance_name)
7246
    assert instance is not None
7247
    self.instance = instance
7248

    
7249
    if (not self.cleanup and not instance.admin_up and not self.failover and
7250
        self.fallback):
7251
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7252
                      " to failover")
7253
      self.failover = True
7254

    
7255
    if instance.disk_template not in constants.DTS_MIRRORED:
7256
      if self.failover:
7257
        text = "failovers"
7258
      else:
7259
        text = "migrations"
7260
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7261
                                 " %s" % (instance.disk_template, text),
7262
                                 errors.ECODE_STATE)
7263

    
7264
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7265
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7266

    
7267
      if self.lu.op.iallocator:
7268
        self._RunAllocator()
7269
      else:
7270
        # We set set self.target_node as it is required by
7271
        # BuildHooksEnv
7272
        self.target_node = self.lu.op.target_node
7273

    
7274
      # self.target_node is already populated, either directly or by the
7275
      # iallocator run
7276
      target_node = self.target_node
7277
      if self.target_node == instance.primary_node:
7278
        raise errors.OpPrereqError("Cannot migrate instance %s"
7279
                                   " to its primary (%s)" %
7280
                                   (instance.name, instance.primary_node))
7281

    
7282
      if len(self.lu.tasklets) == 1:
7283
        # It is safe to release locks only when we're the only tasklet
7284
        # in the LU
7285
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7286
                      keep=[instance.primary_node, self.target_node])
7287

    
7288
    else:
7289
      secondary_nodes = instance.secondary_nodes
7290
      if not secondary_nodes:
7291
        raise errors.ConfigurationError("No secondary node but using"
7292
                                        " %s disk template" %
7293
                                        instance.disk_template)
7294
      target_node = secondary_nodes[0]
7295
      if self.lu.op.iallocator or (self.lu.op.target_node and
7296
                                   self.lu.op.target_node != target_node):
7297
        if self.failover:
7298
          text = "failed over"
7299
        else:
7300
          text = "migrated"
7301
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7302
                                   " be %s to arbitrary nodes"
7303
                                   " (neither an iallocator nor a target"
7304
                                   " node can be passed)" %
7305
                                   (instance.disk_template, text),
7306
                                   errors.ECODE_INVAL)
7307

    
7308
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7309

    
7310
    # check memory requirements on the secondary node
7311
    if not self.failover or instance.admin_up:
7312
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7313
                           instance.name, i_be[constants.BE_MEMORY],
7314
                           instance.hypervisor)
7315
    else:
7316
      self.lu.LogInfo("Not checking memory on the secondary node as"
7317
                      " instance will not be started")
7318

    
7319
    # check bridge existance
7320
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7321

    
7322
    if not self.cleanup:
7323
      _CheckNodeNotDrained(self.lu, target_node)
7324
      if not self.failover:
7325
        result = self.rpc.call_instance_migratable(instance.primary_node,
7326
                                                   instance)
7327
        if result.fail_msg and self.fallback:
7328
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7329
                          " failover")
7330
          self.failover = True
7331
        else:
7332
          result.Raise("Can't migrate, please use failover",
7333
                       prereq=True, ecode=errors.ECODE_STATE)
7334

    
7335
    assert not (self.failover and self.cleanup)
7336

    
7337
    if not self.failover:
7338
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7339
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7340
                                   " parameters are accepted",
7341
                                   errors.ECODE_INVAL)
7342
      if self.lu.op.live is not None:
7343
        if self.lu.op.live:
7344
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7345
        else:
7346
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7347
        # reset the 'live' parameter to None so that repeated
7348
        # invocations of CheckPrereq do not raise an exception
7349
        self.lu.op.live = None
7350
      elif self.lu.op.mode is None:
7351
        # read the default value from the hypervisor
7352
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7353
                                                skip_globals=False)
7354
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7355

    
7356
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7357
    else:
7358
      # Failover is never live
7359
      self.live = False
7360

    
7361
  def _RunAllocator(self):
7362
    """Run the allocator based on input opcode.
7363

7364
    """
7365
    ial = IAllocator(self.cfg, self.rpc,
7366
                     mode=constants.IALLOCATOR_MODE_RELOC,
7367
                     name=self.instance_name,
7368
                     # TODO See why hail breaks with a single node below
7369
                     relocate_from=[self.instance.primary_node,
7370
                                    self.instance.primary_node],
7371
                     )
7372

    
7373
    ial.Run(self.lu.op.iallocator)
7374

    
7375
    if not ial.success:
7376
      raise errors.OpPrereqError("Can't compute nodes using"
7377
                                 " iallocator '%s': %s" %
7378
                                 (self.lu.op.iallocator, ial.info),
7379
                                 errors.ECODE_NORES)
7380
    if len(ial.result) != ial.required_nodes:
7381
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7382
                                 " of nodes (%s), required %s" %
7383
                                 (self.lu.op.iallocator, len(ial.result),
7384
                                  ial.required_nodes), errors.ECODE_FAULT)
7385
    self.target_node = ial.result[0]
7386
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7387
                 self.instance_name, self.lu.op.iallocator,
7388
                 utils.CommaJoin(ial.result))
7389

    
7390
  def _WaitUntilSync(self):
7391
    """Poll with custom rpc for disk sync.
7392

7393
    This uses our own step-based rpc call.
7394

7395
    """
7396
    self.feedback_fn("* wait until resync is done")
7397
    all_done = False
7398
    while not all_done:
7399
      all_done = True
7400
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7401
                                            self.nodes_ip,
7402
                                            self.instance.disks)
7403
      min_percent = 100
7404
      for node, nres in result.items():
7405
        nres.Raise("Cannot resync disks on node %s" % node)
7406
        node_done, node_percent = nres.payload
7407
        all_done = all_done and node_done
7408
        if node_percent is not None:
7409
          min_percent = min(min_percent, node_percent)
7410
      if not all_done:
7411
        if min_percent < 100:
7412
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7413
        time.sleep(2)
7414

    
7415
  def _EnsureSecondary(self, node):
7416
    """Demote a node to secondary.
7417

7418
    """
7419
    self.feedback_fn("* switching node %s to secondary mode" % node)
7420

    
7421
    for dev in self.instance.disks:
7422
      self.cfg.SetDiskID(dev, node)
7423

    
7424
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7425
                                          self.instance.disks)
7426
    result.Raise("Cannot change disk to secondary on node %s" % node)
7427

    
7428
  def _GoStandalone(self):
7429
    """Disconnect from the network.
7430

7431
    """
7432
    self.feedback_fn("* changing into standalone mode")
7433
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7434
                                               self.instance.disks)
7435
    for node, nres in result.items():
7436
      nres.Raise("Cannot disconnect disks node %s" % node)
7437

    
7438
  def _GoReconnect(self, multimaster):
7439
    """Reconnect to the network.
7440

7441
    """
7442
    if multimaster:
7443
      msg = "dual-master"
7444
    else:
7445
      msg = "single-master"
7446
    self.feedback_fn("* changing disks into %s mode" % msg)
7447
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7448
                                           self.instance.disks,
7449
                                           self.instance.name, multimaster)
7450
    for node, nres in result.items():
7451
      nres.Raise("Cannot change disks config on node %s" % node)
7452

    
7453
  def _ExecCleanup(self):
7454
    """Try to cleanup after a failed migration.
7455

7456
    The cleanup is done by:
7457
      - check that the instance is running only on one node
7458
        (and update the config if needed)
7459
      - change disks on its secondary node to secondary
7460
      - wait until disks are fully synchronized
7461
      - disconnect from the network
7462
      - change disks into single-master mode
7463
      - wait again until disks are fully synchronized
7464

7465
    """
7466
    instance = self.instance
7467
    target_node = self.target_node
7468
    source_node = self.source_node
7469

    
7470
    # check running on only one node
7471
    self.feedback_fn("* checking where the instance actually runs"
7472
                     " (if this hangs, the hypervisor might be in"
7473
                     " a bad state)")
7474
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7475
    for node, result in ins_l.items():
7476
      result.Raise("Can't contact node %s" % node)
7477

    
7478
    runningon_source = instance.name in ins_l[source_node].payload
7479
    runningon_target = instance.name in ins_l[target_node].payload
7480

    
7481
    if runningon_source and runningon_target:
7482
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7483
                               " or the hypervisor is confused; you will have"
7484
                               " to ensure manually that it runs only on one"
7485
                               " and restart this operation")
7486

    
7487
    if not (runningon_source or runningon_target):
7488
      raise errors.OpExecError("Instance does not seem to be running at all;"
7489
                               " in this case it's safer to repair by"
7490
                               " running 'gnt-instance stop' to ensure disk"
7491
                               " shutdown, and then restarting it")
7492

    
7493
    if runningon_target:
7494
      # the migration has actually succeeded, we need to update the config
7495
      self.feedback_fn("* instance running on secondary node (%s),"
7496
                       " updating config" % target_node)
7497
      instance.primary_node = target_node
7498
      self.cfg.Update(instance, self.feedback_fn)
7499
      demoted_node = source_node
7500
    else:
7501
      self.feedback_fn("* instance confirmed to be running on its"
7502
                       " primary node (%s)" % source_node)
7503
      demoted_node = target_node
7504

    
7505
    if instance.disk_template in constants.DTS_INT_MIRROR:
7506
      self._EnsureSecondary(demoted_node)
7507
      try:
7508
        self._WaitUntilSync()
7509
      except errors.OpExecError:
7510
        # we ignore here errors, since if the device is standalone, it
7511
        # won't be able to sync
7512
        pass
7513
      self._GoStandalone()
7514
      self._GoReconnect(False)
7515
      self._WaitUntilSync()
7516

    
7517
    self.feedback_fn("* done")
7518

    
7519
  def _RevertDiskStatus(self):
7520
    """Try to revert the disk status after a failed migration.
7521

7522
    """
7523
    target_node = self.target_node
7524
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7525
      return
7526

    
7527
    try:
7528
      self._EnsureSecondary(target_node)
7529
      self._GoStandalone()
7530
      self._GoReconnect(False)
7531
      self._WaitUntilSync()
7532
    except errors.OpExecError, err:
7533
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7534
                         " please try to recover the instance manually;"
7535
                         " error '%s'" % str(err))
7536

    
7537
  def _AbortMigration(self):
7538
    """Call the hypervisor code to abort a started migration.
7539

7540
    """
7541
    instance = self.instance
7542
    target_node = self.target_node
7543
    source_node = self.source_node
7544
    migration_info = self.migration_info
7545

    
7546
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7547
                                                                 instance,
7548
                                                                 migration_info,
7549
                                                                 False)
7550
    abort_msg = abort_result.fail_msg
7551
    if abort_msg:
7552
      logging.error("Aborting migration failed on target node %s: %s",
7553
                    target_node, abort_msg)
7554
      # Don't raise an exception here, as we stil have to try to revert the
7555
      # disk status, even if this step failed.
7556

    
7557
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7558
        instance, False, self.live)
7559
    abort_msg = abort_result.fail_msg
7560
    if abort_msg:
7561
      logging.error("Aborting migration failed on source node %s: %s",
7562
                    source_node, abort_msg)
7563

    
7564
  def _ExecMigration(self):
7565
    """Migrate an instance.
7566

7567
    The migrate is done by:
7568
      - change the disks into dual-master mode
7569
      - wait until disks are fully synchronized again
7570
      - migrate the instance
7571
      - change disks on the new secondary node (the old primary) to secondary
7572
      - wait until disks are fully synchronized
7573
      - change disks into single-master mode
7574

7575
    """
7576
    instance = self.instance
7577
    target_node = self.target_node
7578
    source_node = self.source_node
7579

    
7580
    # Check for hypervisor version mismatch and warn the user.
7581
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7582
                                       None, self.instance.hypervisor)
7583
    src_info = nodeinfo[source_node]
7584
    dst_info = nodeinfo[target_node]
7585

    
7586
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7587
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7588
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7589
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7590
      if src_version != dst_version:
7591
        self.feedback_fn("* warning: hypervisor version mismatch between"
7592
                         " source (%s) and target (%s) node" %
7593
                         (src_version, dst_version))
7594

    
7595
    self.feedback_fn("* checking disk consistency between source and target")
7596
    for dev in instance.disks:
7597
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7598
        raise errors.OpExecError("Disk %s is degraded or not fully"
7599
                                 " synchronized on target node,"
7600
                                 " aborting migration" % dev.iv_name)
7601

    
7602
    # First get the migration information from the remote node
7603
    result = self.rpc.call_migration_info(source_node, instance)
7604
    msg = result.fail_msg
7605
    if msg:
7606
      log_err = ("Failed fetching source migration information from %s: %s" %
7607
                 (source_node, msg))
7608
      logging.error(log_err)
7609
      raise errors.OpExecError(log_err)
7610

    
7611
    self.migration_info = migration_info = result.payload
7612

    
7613
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7614
      # Then switch the disks to master/master mode
7615
      self._EnsureSecondary(target_node)
7616
      self._GoStandalone()
7617
      self._GoReconnect(True)
7618
      self._WaitUntilSync()
7619

    
7620
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7621
    result = self.rpc.call_accept_instance(target_node,
7622
                                           instance,
7623
                                           migration_info,
7624
                                           self.nodes_ip[target_node])
7625

    
7626
    msg = result.fail_msg
7627
    if msg:
7628
      logging.error("Instance pre-migration failed, trying to revert"
7629
                    " disk status: %s", msg)
7630
      self.feedback_fn("Pre-migration failed, aborting")
7631
      self._AbortMigration()
7632
      self._RevertDiskStatus()
7633
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7634
                               (instance.name, msg))
7635

    
7636
    self.feedback_fn("* migrating instance to %s" % target_node)
7637
    result = self.rpc.call_instance_migrate(source_node, instance,
7638
                                            self.nodes_ip[target_node],
7639
                                            self.live)
7640
    msg = result.fail_msg
7641
    if msg:
7642
      logging.error("Instance migration failed, trying to revert"
7643
                    " disk status: %s", msg)
7644
      self.feedback_fn("Migration failed, aborting")
7645
      self._AbortMigration()
7646
      self._RevertDiskStatus()
7647
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7648
                               (instance.name, msg))
7649

    
7650
    self.feedback_fn("* starting memory transfer")
7651
    last_feedback = time.time()
7652
    while True:
7653
      result = self.rpc.call_instance_get_migration_status(source_node,
7654
                                                           instance)
7655
      msg = result.fail_msg
7656
      ms = result.payload   # MigrationStatus instance
7657
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7658
        logging.error("Instance migration failed, trying to revert"
7659
                      " disk status: %s", msg)
7660
        self.feedback_fn("Migration failed, aborting")
7661
        self._AbortMigration()
7662
        self._RevertDiskStatus()
7663
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7664
                                 (instance.name, msg))
7665

    
7666
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7667
        self.feedback_fn("* memory transfer complete")
7668
        break
7669

    
7670
      if (utils.TimeoutExpired(last_feedback,
7671
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7672
          ms.transferred_ram is not None):
7673
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7674
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7675
        last_feedback = time.time()
7676

    
7677
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7678

    
7679
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7680
                                                           instance,
7681
                                                           True,
7682
                                                           self.live)
7683
    msg = result.fail_msg
7684
    if msg:
7685
      logging.error("Instance migration succeeded, but finalization failed"
7686
                    " on the source node: %s", msg)
7687
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7688
                               msg)
7689

    
7690
    instance.primary_node = target_node
7691

    
7692
    # distribute new instance config to the other nodes
7693
    self.cfg.Update(instance, self.feedback_fn)
7694

    
7695
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7696
                                                           instance,
7697
                                                           migration_info,
7698
                                                           True)
7699
    msg = result.fail_msg
7700
    if msg:
7701
      logging.error("Instance migration succeeded, but finalization failed"
7702
                    " on the target node: %s", msg)
7703
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7704
                               msg)
7705

    
7706
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7707
      self._EnsureSecondary(source_node)
7708
      self._WaitUntilSync()
7709
      self._GoStandalone()
7710
      self._GoReconnect(False)
7711
      self._WaitUntilSync()
7712

    
7713
    self.feedback_fn("* done")
7714

    
7715
  def _ExecFailover(self):
7716
    """Failover an instance.
7717

7718
    The failover is done by shutting it down on its present node and
7719
    starting it on the secondary.
7720

7721
    """
7722
    instance = self.instance
7723
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7724

    
7725
    source_node = instance.primary_node
7726
    target_node = self.target_node
7727

    
7728
    if instance.admin_up:
7729
      self.feedback_fn("* checking disk consistency between source and target")
7730
      for dev in instance.disks:
7731
        # for drbd, these are drbd over lvm
7732
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733
          if primary_node.offline:
7734
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7735
                             " target node %s" %
7736
                             (primary_node.name, dev.iv_name, target_node))
7737
          elif not self.ignore_consistency:
7738
            raise errors.OpExecError("Disk %s is degraded on target node,"
7739
                                     " aborting failover" % dev.iv_name)
7740
    else:
7741
      self.feedback_fn("* not checking disk consistency as instance is not"
7742
                       " running")
7743

    
7744
    self.feedback_fn("* shutting down instance on source node")
7745
    logging.info("Shutting down instance %s on node %s",
7746
                 instance.name, source_node)
7747

    
7748
    result = self.rpc.call_instance_shutdown(source_node, instance,
7749
                                             self.shutdown_timeout)
7750
    msg = result.fail_msg
7751
    if msg:
7752
      if self.ignore_consistency or primary_node.offline:
7753
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7754
                           " proceeding anyway; please make sure node"
7755
                           " %s is down; error details: %s",
7756
                           instance.name, source_node, source_node, msg)
7757
      else:
7758
        raise errors.OpExecError("Could not shutdown instance %s on"
7759
                                 " node %s: %s" %
7760
                                 (instance.name, source_node, msg))
7761

    
7762
    self.feedback_fn("* deactivating the instance's disks on source node")
7763
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7764
      raise errors.OpExecError("Can't shut down the instance's disks")
7765

    
7766
    instance.primary_node = target_node
7767
    # distribute new instance config to the other nodes
7768
    self.cfg.Update(instance, self.feedback_fn)
7769

    
7770
    # Only start the instance if it's marked as up
7771
    if instance.admin_up:
7772
      self.feedback_fn("* activating the instance's disks on target node %s" %
7773
                       target_node)
7774
      logging.info("Starting instance %s on node %s",
7775
                   instance.name, target_node)
7776

    
7777
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7778
                                           ignore_secondaries=True)
7779
      if not disks_ok:
7780
        _ShutdownInstanceDisks(self.lu, instance)
7781
        raise errors.OpExecError("Can't activate the instance's disks")
7782

    
7783
      self.feedback_fn("* starting the instance on the target node %s" %
7784
                       target_node)
7785
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7786
                                            False)
7787
      msg = result.fail_msg
7788
      if msg:
7789
        _ShutdownInstanceDisks(self.lu, instance)
7790
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7791
                                 (instance.name, target_node, msg))
7792

    
7793
  def Exec(self, feedback_fn):
7794
    """Perform the migration.
7795

7796
    """
7797
    self.feedback_fn = feedback_fn
7798
    self.source_node = self.instance.primary_node
7799

    
7800
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7801
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7802
      self.target_node = self.instance.secondary_nodes[0]
7803
      # Otherwise self.target_node has been populated either
7804
      # directly, or through an iallocator.
7805

    
7806
    self.all_nodes = [self.source_node, self.target_node]
7807
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7808
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7809

    
7810
    if self.failover:
7811
      feedback_fn("Failover instance %s" % self.instance.name)
7812
      self._ExecFailover()
7813
    else:
7814
      feedback_fn("Migrating instance %s" % self.instance.name)
7815

    
7816
      if self.cleanup:
7817
        return self._ExecCleanup()
7818
      else:
7819
        return self._ExecMigration()
7820

    
7821

    
7822
def _CreateBlockDev(lu, node, instance, device, force_create,
7823
                    info, force_open):
7824
  """Create a tree of block devices on a given node.
7825

7826
  If this device type has to be created on secondaries, create it and
7827
  all its children.
7828

7829
  If not, just recurse to children keeping the same 'force' value.
7830

7831
  @param lu: the lu on whose behalf we execute
7832
  @param node: the node on which to create the device
7833
  @type instance: L{objects.Instance}
7834
  @param instance: the instance which owns the device
7835
  @type device: L{objects.Disk}
7836
  @param device: the device to create
7837
  @type force_create: boolean
7838
  @param force_create: whether to force creation of this device; this
7839
      will be change to True whenever we find a device which has
7840
      CreateOnSecondary() attribute
7841
  @param info: the extra 'metadata' we should attach to the device
7842
      (this will be represented as a LVM tag)
7843
  @type force_open: boolean
7844
  @param force_open: this parameter will be passes to the
7845
      L{backend.BlockdevCreate} function where it specifies
7846
      whether we run on primary or not, and it affects both
7847
      the child assembly and the device own Open() execution
7848

7849
  """
7850
  if device.CreateOnSecondary():
7851
    force_create = True
7852

    
7853
  if device.children:
7854
    for child in device.children:
7855
      _CreateBlockDev(lu, node, instance, child, force_create,
7856
                      info, force_open)
7857

    
7858
  if not force_create:
7859
    return
7860

    
7861
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7862

    
7863

    
7864
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7865
  """Create a single block device on a given node.
7866

7867
  This will not recurse over children of the device, so they must be
7868
  created in advance.
7869

7870
  @param lu: the lu on whose behalf we execute
7871
  @param node: the node on which to create the device
7872
  @type instance: L{objects.Instance}
7873
  @param instance: the instance which owns the device
7874
  @type device: L{objects.Disk}
7875
  @param device: the device to create
7876
  @param info: the extra 'metadata' we should attach to the device
7877
      (this will be represented as a LVM tag)
7878
  @type force_open: boolean
7879
  @param force_open: this parameter will be passes to the
7880
      L{backend.BlockdevCreate} function where it specifies
7881
      whether we run on primary or not, and it affects both
7882
      the child assembly and the device own Open() execution
7883

7884
  """
7885
  lu.cfg.SetDiskID(device, node)
7886
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7887
                                       instance.name, force_open, info)
7888
  result.Raise("Can't create block device %s on"
7889
               " node %s for instance %s" % (device, node, instance.name))
7890
  if device.physical_id is None:
7891
    device.physical_id = result.payload
7892

    
7893

    
7894
def _GenerateUniqueNames(lu, exts):
7895
  """Generate a suitable LV name.
7896

7897
  This will generate a logical volume name for the given instance.
7898

7899
  """
7900
  results = []
7901
  for val in exts:
7902
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7903
    results.append("%s%s" % (new_id, val))
7904
  return results
7905

    
7906

    
7907
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7908
                         iv_name, p_minor, s_minor):
7909
  """Generate a drbd8 device complete with its children.
7910

7911
  """
7912
  assert len(vgnames) == len(names) == 2
7913
  port = lu.cfg.AllocatePort()
7914
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7915
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7916
                          logical_id=(vgnames[0], names[0]))
7917
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7918
                          logical_id=(vgnames[1], names[1]))
7919
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7920
                          logical_id=(primary, secondary, port,
7921
                                      p_minor, s_minor,
7922
                                      shared_secret),
7923
                          children=[dev_data, dev_meta],
7924
                          iv_name=iv_name)
7925
  return drbd_dev
7926

    
7927

    
7928
def _GenerateDiskTemplate(lu, template_name,
7929
                          instance_name, primary_node,
7930
                          secondary_nodes, disk_info,
7931
                          file_storage_dir, file_driver,
7932
                          base_index, feedback_fn):
7933
  """Generate the entire disk layout for a given template type.
7934

7935
  """
7936
  #TODO: compute space requirements
7937

    
7938
  vgname = lu.cfg.GetVGName()
7939
  disk_count = len(disk_info)
7940
  disks = []
7941
  if template_name == constants.DT_DISKLESS:
7942
    pass
7943
  elif template_name == constants.DT_PLAIN:
7944
    if len(secondary_nodes) != 0:
7945
      raise errors.ProgrammerError("Wrong template configuration")
7946

    
7947
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7948
                                      for i in range(disk_count)])
7949
    for idx, disk in enumerate(disk_info):
7950
      disk_index = idx + base_index
7951
      vg = disk.get(constants.IDISK_VG, vgname)
7952
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7953
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7954
                              size=disk[constants.IDISK_SIZE],
7955
                              logical_id=(vg, names[idx]),
7956
                              iv_name="disk/%d" % disk_index,
7957
                              mode=disk[constants.IDISK_MODE])
7958
      disks.append(disk_dev)
7959
  elif template_name == constants.DT_DRBD8:
7960
    if len(secondary_nodes) != 1:
7961
      raise errors.ProgrammerError("Wrong template configuration")
7962
    remote_node = secondary_nodes[0]
7963
    minors = lu.cfg.AllocateDRBDMinor(
7964
      [primary_node, remote_node] * len(disk_info), instance_name)
7965

    
7966
    names = []
7967
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7968
                                               for i in range(disk_count)]):
7969
      names.append(lv_prefix + "_data")
7970
      names.append(lv_prefix + "_meta")
7971
    for idx, disk in enumerate(disk_info):
7972
      disk_index = idx + base_index
7973
      data_vg = disk.get(constants.IDISK_VG, vgname)
7974
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7975
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7976
                                      disk[constants.IDISK_SIZE],
7977
                                      [data_vg, meta_vg],
7978
                                      names[idx * 2:idx * 2 + 2],
7979
                                      "disk/%d" % disk_index,
7980
                                      minors[idx * 2], minors[idx * 2 + 1])
7981
      disk_dev.mode = disk[constants.IDISK_MODE]
7982
      disks.append(disk_dev)
7983
  elif template_name == constants.DT_FILE:
7984
    if len(secondary_nodes) != 0:
7985
      raise errors.ProgrammerError("Wrong template configuration")
7986

    
7987
    opcodes.RequireFileStorage()
7988

    
7989
    for idx, disk in enumerate(disk_info):
7990
      disk_index = idx + base_index
7991
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7992
                              size=disk[constants.IDISK_SIZE],
7993
                              iv_name="disk/%d" % disk_index,
7994
                              logical_id=(file_driver,
7995
                                          "%s/disk%d" % (file_storage_dir,
7996
                                                         disk_index)),
7997
                              mode=disk[constants.IDISK_MODE])
7998
      disks.append(disk_dev)
7999
  elif template_name == constants.DT_SHARED_FILE:
8000
    if len(secondary_nodes) != 0:
8001
      raise errors.ProgrammerError("Wrong template configuration")
8002

    
8003
    opcodes.RequireSharedFileStorage()
8004

    
8005
    for idx, disk in enumerate(disk_info):
8006
      disk_index = idx + base_index
8007
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8008
                              size=disk[constants.IDISK_SIZE],
8009
                              iv_name="disk/%d" % disk_index,
8010
                              logical_id=(file_driver,
8011
                                          "%s/disk%d" % (file_storage_dir,
8012
                                                         disk_index)),
8013
                              mode=disk[constants.IDISK_MODE])
8014
      disks.append(disk_dev)
8015
  elif template_name == constants.DT_BLOCK:
8016
    if len(secondary_nodes) != 0:
8017
      raise errors.ProgrammerError("Wrong template configuration")
8018

    
8019
    for idx, disk in enumerate(disk_info):
8020
      disk_index = idx + base_index
8021
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8022
                              size=disk[constants.IDISK_SIZE],
8023
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8024
                                          disk[constants.IDISK_ADOPT]),
8025
                              iv_name="disk/%d" % disk_index,
8026
                              mode=disk[constants.IDISK_MODE])
8027
      disks.append(disk_dev)
8028

    
8029
  else:
8030
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8031
  return disks
8032

    
8033

    
8034
def _GetInstanceInfoText(instance):
8035
  """Compute that text that should be added to the disk's metadata.
8036

8037
  """
8038
  return "originstname+%s" % instance.name
8039

    
8040

    
8041
def _CalcEta(time_taken, written, total_size):
8042
  """Calculates the ETA based on size written and total size.
8043

8044
  @param time_taken: The time taken so far
8045
  @param written: amount written so far
8046
  @param total_size: The total size of data to be written
8047
  @return: The remaining time in seconds
8048

8049
  """
8050
  avg_time = time_taken / float(written)
8051
  return (total_size - written) * avg_time
8052

    
8053

    
8054
def _WipeDisks(lu, instance):
8055
  """Wipes instance disks.
8056

8057
  @type lu: L{LogicalUnit}
8058
  @param lu: the logical unit on whose behalf we execute
8059
  @type instance: L{objects.Instance}
8060
  @param instance: the instance whose disks we should create
8061
  @return: the success of the wipe
8062

8063
  """
8064
  node = instance.primary_node
8065

    
8066
  for device in instance.disks:
8067
    lu.cfg.SetDiskID(device, node)
8068

    
8069
  logging.info("Pause sync of instance %s disks", instance.name)
8070
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8071

    
8072
  for idx, success in enumerate(result.payload):
8073
    if not success:
8074
      logging.warn("pause-sync of instance %s for disks %d failed",
8075
                   instance.name, idx)
8076

    
8077
  try:
8078
    for idx, device in enumerate(instance.disks):
8079
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8080
      # MAX_WIPE_CHUNK at max
8081
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8082
                            constants.MIN_WIPE_CHUNK_PERCENT)
8083
      # we _must_ make this an int, otherwise rounding errors will
8084
      # occur
8085
      wipe_chunk_size = int(wipe_chunk_size)
8086

    
8087
      lu.LogInfo("* Wiping disk %d", idx)
8088
      logging.info("Wiping disk %d for instance %s, node %s using"
8089
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8090

    
8091
      offset = 0
8092
      size = device.size
8093
      last_output = 0
8094
      start_time = time.time()
8095

    
8096
      while offset < size:
8097
        wipe_size = min(wipe_chunk_size, size - offset)
8098
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8099
                      idx, offset, wipe_size)
8100
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8101
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8102
                     (idx, offset, wipe_size))
8103
        now = time.time()
8104
        offset += wipe_size
8105
        if now - last_output >= 60:
8106
          eta = _CalcEta(now - start_time, offset, size)
8107
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8108
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8109
          last_output = now
8110
  finally:
8111
    logging.info("Resume sync of instance %s disks", instance.name)
8112

    
8113
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8114

    
8115
    for idx, success in enumerate(result.payload):
8116
      if not success:
8117
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8118
                      " look at the status and troubleshoot the issue", idx)
8119
        logging.warn("resume-sync of instance %s for disks %d failed",
8120
                     instance.name, idx)
8121

    
8122

    
8123
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8124
  """Create all disks for an instance.
8125

8126
  This abstracts away some work from AddInstance.
8127

8128
  @type lu: L{LogicalUnit}
8129
  @param lu: the logical unit on whose behalf we execute
8130
  @type instance: L{objects.Instance}
8131
  @param instance: the instance whose disks we should create
8132
  @type to_skip: list
8133
  @param to_skip: list of indices to skip
8134
  @type target_node: string
8135
  @param target_node: if passed, overrides the target node for creation
8136
  @rtype: boolean
8137
  @return: the success of the creation
8138

8139
  """
8140
  info = _GetInstanceInfoText(instance)
8141
  if target_node is None:
8142
    pnode = instance.primary_node
8143
    all_nodes = instance.all_nodes
8144
  else:
8145
    pnode = target_node
8146
    all_nodes = [pnode]
8147

    
8148
  if instance.disk_template in constants.DTS_FILEBASED:
8149
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8150
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8151

    
8152
    result.Raise("Failed to create directory '%s' on"
8153
                 " node %s" % (file_storage_dir, pnode))
8154

    
8155
  # Note: this needs to be kept in sync with adding of disks in
8156
  # LUInstanceSetParams
8157
  for idx, device in enumerate(instance.disks):
8158
    if to_skip and idx in to_skip:
8159
      continue
8160
    logging.info("Creating volume %s for instance %s",
8161
                 device.iv_name, instance.name)
8162
    #HARDCODE
8163
    for node in all_nodes:
8164
      f_create = node == pnode
8165
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8166

    
8167

    
8168
def _RemoveDisks(lu, instance, target_node=None):
8169
  """Remove all disks for an instance.
8170

8171
  This abstracts away some work from `AddInstance()` and
8172
  `RemoveInstance()`. Note that in case some of the devices couldn't
8173
  be removed, the removal will continue with the other ones (compare
8174
  with `_CreateDisks()`).
8175

8176
  @type lu: L{LogicalUnit}
8177
  @param lu: the logical unit on whose behalf we execute
8178
  @type instance: L{objects.Instance}
8179
  @param instance: the instance whose disks we should remove
8180
  @type target_node: string
8181
  @param target_node: used to override the node on which to remove the disks
8182
  @rtype: boolean
8183
  @return: the success of the removal
8184

8185
  """
8186
  logging.info("Removing block devices for instance %s", instance.name)
8187

    
8188
  all_result = True
8189
  for device in instance.disks:
8190
    if target_node:
8191
      edata = [(target_node, device)]
8192
    else:
8193
      edata = device.ComputeNodeTree(instance.primary_node)
8194
    for node, disk in edata:
8195
      lu.cfg.SetDiskID(disk, node)
8196
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8197
      if msg:
8198
        lu.LogWarning("Could not remove block device %s on node %s,"
8199
                      " continuing anyway: %s", device.iv_name, node, msg)
8200
        all_result = False
8201

    
8202
  if instance.disk_template == constants.DT_FILE:
8203
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8204
    if target_node:
8205
      tgt = target_node
8206
    else:
8207
      tgt = instance.primary_node
8208
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8209
    if result.fail_msg:
8210
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8211
                    file_storage_dir, instance.primary_node, result.fail_msg)
8212
      all_result = False
8213

    
8214
  return all_result
8215

    
8216

    
8217
def _ComputeDiskSizePerVG(disk_template, disks):
8218
  """Compute disk size requirements in the volume group
8219

8220
  """
8221
  def _compute(disks, payload):
8222
    """Universal algorithm.
8223

8224
    """
8225
    vgs = {}
8226
    for disk in disks:
8227
      vgs[disk[constants.IDISK_VG]] = \
8228
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8229

    
8230
    return vgs
8231

    
8232
  # Required free disk space as a function of disk and swap space
8233
  req_size_dict = {
8234
    constants.DT_DISKLESS: {},
8235
    constants.DT_PLAIN: _compute(disks, 0),
8236
    # 128 MB are added for drbd metadata for each disk
8237
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8238
    constants.DT_FILE: {},
8239
    constants.DT_SHARED_FILE: {},
8240
  }
8241

    
8242
  if disk_template not in req_size_dict:
8243
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8244
                                 " is unknown" % disk_template)
8245

    
8246
  return req_size_dict[disk_template]
8247

    
8248

    
8249
def _ComputeDiskSize(disk_template, disks):
8250
  """Compute disk size requirements in the volume group
8251

8252
  """
8253
  # Required free disk space as a function of disk and swap space
8254
  req_size_dict = {
8255
    constants.DT_DISKLESS: None,
8256
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8257
    # 128 MB are added for drbd metadata for each disk
8258
    constants.DT_DRBD8:
8259
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8260
    constants.DT_FILE: None,
8261
    constants.DT_SHARED_FILE: 0,
8262
    constants.DT_BLOCK: 0,
8263
  }
8264

    
8265
  if disk_template not in req_size_dict:
8266
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8267
                                 " is unknown" % disk_template)
8268

    
8269
  return req_size_dict[disk_template]
8270

    
8271

    
8272
def _FilterVmNodes(lu, nodenames):
8273
  """Filters out non-vm_capable nodes from a list.
8274

8275
  @type lu: L{LogicalUnit}
8276
  @param lu: the logical unit for which we check
8277
  @type nodenames: list
8278
  @param nodenames: the list of nodes on which we should check
8279
  @rtype: list
8280
  @return: the list of vm-capable nodes
8281

8282
  """
8283
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8284
  return [name for name in nodenames if name not in vm_nodes]
8285

    
8286

    
8287
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8288
  """Hypervisor parameter validation.
8289

8290
  This function abstract the hypervisor parameter validation to be
8291
  used in both instance create and instance modify.
8292

8293
  @type lu: L{LogicalUnit}
8294
  @param lu: the logical unit for which we check
8295
  @type nodenames: list
8296
  @param nodenames: the list of nodes on which we should check
8297
  @type hvname: string
8298
  @param hvname: the name of the hypervisor we should use
8299
  @type hvparams: dict
8300
  @param hvparams: the parameters which we need to check
8301
  @raise errors.OpPrereqError: if the parameters are not valid
8302

8303
  """
8304
  nodenames = _FilterVmNodes(lu, nodenames)
8305

    
8306
  cluster = lu.cfg.GetClusterInfo()
8307
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8308

    
8309
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8310
  for node in nodenames:
8311
    info = hvinfo[node]
8312
    if info.offline:
8313
      continue
8314
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8315

    
8316

    
8317
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8318
  """OS parameters validation.
8319

8320
  @type lu: L{LogicalUnit}
8321
  @param lu: the logical unit for which we check
8322
  @type required: boolean
8323
  @param required: whether the validation should fail if the OS is not
8324
      found
8325
  @type nodenames: list
8326
  @param nodenames: the list of nodes on which we should check
8327
  @type osname: string
8328
  @param osname: the name of the hypervisor we should use
8329
  @type osparams: dict
8330
  @param osparams: the parameters which we need to check
8331
  @raise errors.OpPrereqError: if the parameters are not valid
8332

8333
  """
8334
  nodenames = _FilterVmNodes(lu, nodenames)
8335
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8336
                                   [constants.OS_VALIDATE_PARAMETERS],
8337
                                   osparams)
8338
  for node, nres in result.items():
8339
    # we don't check for offline cases since this should be run only
8340
    # against the master node and/or an instance's nodes
8341
    nres.Raise("OS Parameters validation failed on node %s" % node)
8342
    if not nres.payload:
8343
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8344
                 osname, node)
8345

    
8346

    
8347
class LUInstanceCreate(LogicalUnit):
8348
  """Create an instance.
8349

8350
  """
8351
  HPATH = "instance-add"
8352
  HTYPE = constants.HTYPE_INSTANCE
8353
  REQ_BGL = False
8354

    
8355
  def CheckArguments(self):
8356
    """Check arguments.
8357

8358
    """
8359
    # do not require name_check to ease forward/backward compatibility
8360
    # for tools
8361
    if self.op.no_install and self.op.start:
8362
      self.LogInfo("No-installation mode selected, disabling startup")
8363
      self.op.start = False
8364
    # validate/normalize the instance name
8365
    self.op.instance_name = \
8366
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8367

    
8368
    if self.op.ip_check and not self.op.name_check:
8369
      # TODO: make the ip check more flexible and not depend on the name check
8370
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8371
                                 " check", errors.ECODE_INVAL)
8372

    
8373
    # check nics' parameter names
8374
    for nic in self.op.nics:
8375
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8376

    
8377
    # check disks. parameter names and consistent adopt/no-adopt strategy
8378
    has_adopt = has_no_adopt = False
8379
    for disk in self.op.disks:
8380
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8381
      if constants.IDISK_ADOPT in disk:
8382
        has_adopt = True
8383
      else:
8384
        has_no_adopt = True
8385
    if has_adopt and has_no_adopt:
8386
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8387
                                 errors.ECODE_INVAL)
8388
    if has_adopt:
8389
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8390
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8391
                                   " '%s' disk template" %
8392
                                   self.op.disk_template,
8393
                                   errors.ECODE_INVAL)
8394
      if self.op.iallocator is not None:
8395
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8396
                                   " iallocator script", errors.ECODE_INVAL)
8397
      if self.op.mode == constants.INSTANCE_IMPORT:
8398
        raise errors.OpPrereqError("Disk adoption not allowed for"
8399
                                   " instance import", errors.ECODE_INVAL)
8400
    else:
8401
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8402
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8403
                                   " but no 'adopt' parameter given" %
8404
                                   self.op.disk_template,
8405
                                   errors.ECODE_INVAL)
8406

    
8407
    self.adopt_disks = has_adopt
8408

    
8409
    # instance name verification
8410
    if self.op.name_check:
8411
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8412
      self.op.instance_name = self.hostname1.name
8413
      # used in CheckPrereq for ip ping check
8414
      self.check_ip = self.hostname1.ip
8415
    else:
8416
      self.check_ip = None
8417

    
8418
    # file storage checks
8419
    if (self.op.file_driver and
8420
        not self.op.file_driver in constants.FILE_DRIVER):
8421
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8422
                                 self.op.file_driver, errors.ECODE_INVAL)
8423

    
8424
    if self.op.disk_template == constants.DT_FILE:
8425
      opcodes.RequireFileStorage()
8426
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8427
      opcodes.RequireSharedFileStorage()
8428

    
8429
    ### Node/iallocator related checks
8430
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8431

    
8432
    if self.op.pnode is not None:
8433
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8434
        if self.op.snode is None:
8435
          raise errors.OpPrereqError("The networked disk templates need"
8436
                                     " a mirror node", errors.ECODE_INVAL)
8437
      elif self.op.snode:
8438
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8439
                        " template")
8440
        self.op.snode = None
8441

    
8442
    self._cds = _GetClusterDomainSecret()
8443

    
8444
    if self.op.mode == constants.INSTANCE_IMPORT:
8445
      # On import force_variant must be True, because if we forced it at
8446
      # initial install, our only chance when importing it back is that it
8447
      # works again!
8448
      self.op.force_variant = True
8449

    
8450
      if self.op.no_install:
8451
        self.LogInfo("No-installation mode has no effect during import")
8452

    
8453
    elif self.op.mode == constants.INSTANCE_CREATE:
8454
      if self.op.os_type is None:
8455
        raise errors.OpPrereqError("No guest OS specified",
8456
                                   errors.ECODE_INVAL)
8457
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8458
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8459
                                   " installation" % self.op.os_type,
8460
                                   errors.ECODE_STATE)
8461
      if self.op.disk_template is None:
8462
        raise errors.OpPrereqError("No disk template specified",
8463
                                   errors.ECODE_INVAL)
8464

    
8465
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8466
      # Check handshake to ensure both clusters have the same domain secret
8467
      src_handshake = self.op.source_handshake
8468
      if not src_handshake:
8469
        raise errors.OpPrereqError("Missing source handshake",
8470
                                   errors.ECODE_INVAL)
8471

    
8472
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8473
                                                           src_handshake)
8474
      if errmsg:
8475
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8476
                                   errors.ECODE_INVAL)
8477

    
8478
      # Load and check source CA
8479
      self.source_x509_ca_pem = self.op.source_x509_ca
8480
      if not self.source_x509_ca_pem:
8481
        raise errors.OpPrereqError("Missing source X509 CA",
8482
                                   errors.ECODE_INVAL)
8483

    
8484
      try:
8485
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8486
                                                    self._cds)
8487
      except OpenSSL.crypto.Error, err:
8488
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8489
                                   (err, ), errors.ECODE_INVAL)
8490

    
8491
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8492
      if errcode is not None:
8493
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8494
                                   errors.ECODE_INVAL)
8495

    
8496
      self.source_x509_ca = cert
8497

    
8498
      src_instance_name = self.op.source_instance_name
8499
      if not src_instance_name:
8500
        raise errors.OpPrereqError("Missing source instance name",
8501
                                   errors.ECODE_INVAL)
8502

    
8503
      self.source_instance_name = \
8504
          netutils.GetHostname(name=src_instance_name).name
8505

    
8506
    else:
8507
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8508
                                 self.op.mode, errors.ECODE_INVAL)
8509

    
8510
  def ExpandNames(self):
8511
    """ExpandNames for CreateInstance.
8512

8513
    Figure out the right locks for instance creation.
8514

8515
    """
8516
    self.needed_locks = {}
8517

    
8518
    instance_name = self.op.instance_name
8519
    # this is just a preventive check, but someone might still add this
8520
    # instance in the meantime, and creation will fail at lock-add time
8521
    if instance_name in self.cfg.GetInstanceList():
8522
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8523
                                 instance_name, errors.ECODE_EXISTS)
8524

    
8525
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8526

    
8527
    if self.op.iallocator:
8528
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8529
    else:
8530
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8531
      nodelist = [self.op.pnode]
8532
      if self.op.snode is not None:
8533
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8534
        nodelist.append(self.op.snode)
8535
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8536

    
8537
    # in case of import lock the source node too
8538
    if self.op.mode == constants.INSTANCE_IMPORT:
8539
      src_node = self.op.src_node
8540
      src_path = self.op.src_path
8541

    
8542
      if src_path is None:
8543
        self.op.src_path = src_path = self.op.instance_name
8544

    
8545
      if src_node is None:
8546
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8547
        self.op.src_node = None
8548
        if os.path.isabs(src_path):
8549
          raise errors.OpPrereqError("Importing an instance from a path"
8550
                                     " requires a source node option",
8551
                                     errors.ECODE_INVAL)
8552
      else:
8553
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8554
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8555
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8556
        if not os.path.isabs(src_path):
8557
          self.op.src_path = src_path = \
8558
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8559

    
8560
  def _RunAllocator(self):
8561
    """Run the allocator based on input opcode.
8562

8563
    """
8564
    nics = [n.ToDict() for n in self.nics]
8565
    ial = IAllocator(self.cfg, self.rpc,
8566
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8567
                     name=self.op.instance_name,
8568
                     disk_template=self.op.disk_template,
8569
                     tags=self.op.tags,
8570
                     os=self.op.os_type,
8571
                     vcpus=self.be_full[constants.BE_VCPUS],
8572
                     memory=self.be_full[constants.BE_MEMORY],
8573
                     disks=self.disks,
8574
                     nics=nics,
8575
                     hypervisor=self.op.hypervisor,
8576
                     )
8577

    
8578
    ial.Run(self.op.iallocator)
8579

    
8580
    if not ial.success:
8581
      raise errors.OpPrereqError("Can't compute nodes using"
8582
                                 " iallocator '%s': %s" %
8583
                                 (self.op.iallocator, ial.info),
8584
                                 errors.ECODE_NORES)
8585
    if len(ial.result) != ial.required_nodes:
8586
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8587
                                 " of nodes (%s), required %s" %
8588
                                 (self.op.iallocator, len(ial.result),
8589
                                  ial.required_nodes), errors.ECODE_FAULT)
8590
    self.op.pnode = ial.result[0]
8591
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8592
                 self.op.instance_name, self.op.iallocator,
8593
                 utils.CommaJoin(ial.result))
8594
    if ial.required_nodes == 2:
8595
      self.op.snode = ial.result[1]
8596

    
8597
  def BuildHooksEnv(self):
8598
    """Build hooks env.
8599

8600
    This runs on master, primary and secondary nodes of the instance.
8601

8602
    """
8603
    env = {
8604
      "ADD_MODE": self.op.mode,
8605
      }
8606
    if self.op.mode == constants.INSTANCE_IMPORT:
8607
      env["SRC_NODE"] = self.op.src_node
8608
      env["SRC_PATH"] = self.op.src_path
8609
      env["SRC_IMAGES"] = self.src_images
8610

    
8611
    env.update(_BuildInstanceHookEnv(
8612
      name=self.op.instance_name,
8613
      primary_node=self.op.pnode,
8614
      secondary_nodes=self.secondaries,
8615
      status=self.op.start,
8616
      os_type=self.op.os_type,
8617
      memory=self.be_full[constants.BE_MEMORY],
8618
      vcpus=self.be_full[constants.BE_VCPUS],
8619
      nics=_NICListToTuple(self, self.nics),
8620
      disk_template=self.op.disk_template,
8621
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8622
             for d in self.disks],
8623
      bep=self.be_full,
8624
      hvp=self.hv_full,
8625
      hypervisor_name=self.op.hypervisor,
8626
      tags=self.op.tags,
8627
    ))
8628

    
8629
    return env
8630

    
8631
  def BuildHooksNodes(self):
8632
    """Build hooks nodes.
8633

8634
    """
8635
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8636
    return nl, nl
8637

    
8638
  def _ReadExportInfo(self):
8639
    """Reads the export information from disk.
8640

8641
    It will override the opcode source node and path with the actual
8642
    information, if these two were not specified before.
8643

8644
    @return: the export information
8645

8646
    """
8647
    assert self.op.mode == constants.INSTANCE_IMPORT
8648

    
8649
    src_node = self.op.src_node
8650
    src_path = self.op.src_path
8651

    
8652
    if src_node is None:
8653
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8654
      exp_list = self.rpc.call_export_list(locked_nodes)
8655
      found = False
8656
      for node in exp_list:
8657
        if exp_list[node].fail_msg:
8658
          continue
8659
        if src_path in exp_list[node].payload:
8660
          found = True
8661
          self.op.src_node = src_node = node
8662
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8663
                                                       src_path)
8664
          break
8665
      if not found:
8666
        raise errors.OpPrereqError("No export found for relative path %s" %
8667
                                    src_path, errors.ECODE_INVAL)
8668

    
8669
    _CheckNodeOnline(self, src_node)
8670
    result = self.rpc.call_export_info(src_node, src_path)
8671
    result.Raise("No export or invalid export found in dir %s" % src_path)
8672

    
8673
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8674
    if not export_info.has_section(constants.INISECT_EXP):
8675
      raise errors.ProgrammerError("Corrupted export config",
8676
                                   errors.ECODE_ENVIRON)
8677

    
8678
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8679
    if (int(ei_version) != constants.EXPORT_VERSION):
8680
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8681
                                 (ei_version, constants.EXPORT_VERSION),
8682
                                 errors.ECODE_ENVIRON)
8683
    return export_info
8684

    
8685
  def _ReadExportParams(self, einfo):
8686
    """Use export parameters as defaults.
8687

8688
    In case the opcode doesn't specify (as in override) some instance
8689
    parameters, then try to use them from the export information, if
8690
    that declares them.
8691

8692
    """
8693
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8694

    
8695
    if self.op.disk_template is None:
8696
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8697
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8698
                                          "disk_template")
8699
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8700
          raise errors.OpPrereqError("Disk template specified in configuration"
8701
                                     " file is not one of the allowed values:"
8702
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8703
      else:
8704
        raise errors.OpPrereqError("No disk template specified and the export"
8705
                                   " is missing the disk_template information",
8706
                                   errors.ECODE_INVAL)
8707

    
8708
    if not self.op.disks:
8709
      disks = []
8710
      # TODO: import the disk iv_name too
8711
      for idx in range(constants.MAX_DISKS):
8712
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8713
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8714
          disks.append({constants.IDISK_SIZE: disk_sz})
8715
      self.op.disks = disks
8716
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8717
        raise errors.OpPrereqError("No disk info specified and the export"
8718
                                   " is missing the disk information",
8719
                                   errors.ECODE_INVAL)
8720

    
8721
    if not self.op.nics:
8722
      nics = []
8723
      for idx in range(constants.MAX_NICS):
8724
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8725
          ndict = {}
8726
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8727
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8728
            ndict[name] = v
8729
          nics.append(ndict)
8730
        else:
8731
          break
8732
      self.op.nics = nics
8733

    
8734
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8735
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8736

    
8737
    if (self.op.hypervisor is None and
8738
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8739
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8740

    
8741
    if einfo.has_section(constants.INISECT_HYP):
8742
      # use the export parameters but do not override the ones
8743
      # specified by the user
8744
      for name, value in einfo.items(constants.INISECT_HYP):
8745
        if name not in self.op.hvparams:
8746
          self.op.hvparams[name] = value
8747

    
8748
    if einfo.has_section(constants.INISECT_BEP):
8749
      # use the parameters, without overriding
8750
      for name, value in einfo.items(constants.INISECT_BEP):
8751
        if name not in self.op.beparams:
8752
          self.op.beparams[name] = value
8753
    else:
8754
      # try to read the parameters old style, from the main section
8755
      for name in constants.BES_PARAMETERS:
8756
        if (name not in self.op.beparams and
8757
            einfo.has_option(constants.INISECT_INS, name)):
8758
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8759

    
8760
    if einfo.has_section(constants.INISECT_OSP):
8761
      # use the parameters, without overriding
8762
      for name, value in einfo.items(constants.INISECT_OSP):
8763
        if name not in self.op.osparams:
8764
          self.op.osparams[name] = value
8765

    
8766
  def _RevertToDefaults(self, cluster):
8767
    """Revert the instance parameters to the default values.
8768

8769
    """
8770
    # hvparams
8771
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8772
    for name in self.op.hvparams.keys():
8773
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8774
        del self.op.hvparams[name]
8775
    # beparams
8776
    be_defs = cluster.SimpleFillBE({})
8777
    for name in self.op.beparams.keys():
8778
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8779
        del self.op.beparams[name]
8780
    # nic params
8781
    nic_defs = cluster.SimpleFillNIC({})
8782
    for nic in self.op.nics:
8783
      for name in constants.NICS_PARAMETERS:
8784
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8785
          del nic[name]
8786
    # osparams
8787
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8788
    for name in self.op.osparams.keys():
8789
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8790
        del self.op.osparams[name]
8791

    
8792
  def _CalculateFileStorageDir(self):
8793
    """Calculate final instance file storage dir.
8794

8795
    """
8796
    # file storage dir calculation/check
8797
    self.instance_file_storage_dir = None
8798
    if self.op.disk_template in constants.DTS_FILEBASED:
8799
      # build the full file storage dir path
8800
      joinargs = []
8801

    
8802
      if self.op.disk_template == constants.DT_SHARED_FILE:
8803
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8804
      else:
8805
        get_fsd_fn = self.cfg.GetFileStorageDir
8806

    
8807
      cfg_storagedir = get_fsd_fn()
8808
      if not cfg_storagedir:
8809
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8810
      joinargs.append(cfg_storagedir)
8811

    
8812
      if self.op.file_storage_dir is not None:
8813
        joinargs.append(self.op.file_storage_dir)
8814

    
8815
      joinargs.append(self.op.instance_name)
8816

    
8817
      # pylint: disable=W0142
8818
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8819

    
8820
  def CheckPrereq(self):
8821
    """Check prerequisites.
8822

8823
    """
8824
    self._CalculateFileStorageDir()
8825

    
8826
    if self.op.mode == constants.INSTANCE_IMPORT:
8827
      export_info = self._ReadExportInfo()
8828
      self._ReadExportParams(export_info)
8829

    
8830
    if (not self.cfg.GetVGName() and
8831
        self.op.disk_template not in constants.DTS_NOT_LVM):
8832
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8833
                                 " instances", errors.ECODE_STATE)
8834

    
8835
    if (self.op.hypervisor is None or
8836
        self.op.hypervisor == constants.VALUE_AUTO):
8837
      self.op.hypervisor = self.cfg.GetHypervisorType()
8838

    
8839
    cluster = self.cfg.GetClusterInfo()
8840
    enabled_hvs = cluster.enabled_hypervisors
8841
    if self.op.hypervisor not in enabled_hvs:
8842
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8843
                                 " cluster (%s)" % (self.op.hypervisor,
8844
                                  ",".join(enabled_hvs)),
8845
                                 errors.ECODE_STATE)
8846

    
8847
    # Check tag validity
8848
    for tag in self.op.tags:
8849
      objects.TaggableObject.ValidateTag(tag)
8850

    
8851
    # check hypervisor parameter syntax (locally)
8852
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8853
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8854
                                      self.op.hvparams)
8855
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8856
    hv_type.CheckParameterSyntax(filled_hvp)
8857
    self.hv_full = filled_hvp
8858
    # check that we don't specify global parameters on an instance
8859
    _CheckGlobalHvParams(self.op.hvparams)
8860

    
8861
    # fill and remember the beparams dict
8862
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8863
    for param, value in self.op.beparams.iteritems():
8864
      if value == constants.VALUE_AUTO:
8865
        self.op.beparams[param] = default_beparams[param]
8866
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8867
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8868

    
8869
    # build os parameters
8870
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8871

    
8872
    # now that hvp/bep are in final format, let's reset to defaults,
8873
    # if told to do so
8874
    if self.op.identify_defaults:
8875
      self._RevertToDefaults(cluster)
8876

    
8877
    # NIC buildup
8878
    self.nics = []
8879
    for idx, nic in enumerate(self.op.nics):
8880
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8881
      nic_mode = nic_mode_req
8882
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8883
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8884

    
8885
      # in routed mode, for the first nic, the default ip is 'auto'
8886
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8887
        default_ip_mode = constants.VALUE_AUTO
8888
      else:
8889
        default_ip_mode = constants.VALUE_NONE
8890

    
8891
      # ip validity checks
8892
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8893
      if ip is None or ip.lower() == constants.VALUE_NONE:
8894
        nic_ip = None
8895
      elif ip.lower() == constants.VALUE_AUTO:
8896
        if not self.op.name_check:
8897
          raise errors.OpPrereqError("IP address set to auto but name checks"
8898
                                     " have been skipped",
8899
                                     errors.ECODE_INVAL)
8900
        nic_ip = self.hostname1.ip
8901
      else:
8902
        if not netutils.IPAddress.IsValid(ip):
8903
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8904
                                     errors.ECODE_INVAL)
8905
        nic_ip = ip
8906

    
8907
      # TODO: check the ip address for uniqueness
8908
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8909
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8910
                                   errors.ECODE_INVAL)
8911

    
8912
      # MAC address verification
8913
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8914
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8915
        mac = utils.NormalizeAndValidateMac(mac)
8916

    
8917
        try:
8918
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8919
        except errors.ReservationError:
8920
          raise errors.OpPrereqError("MAC address %s already in use"
8921
                                     " in cluster" % mac,
8922
                                     errors.ECODE_NOTUNIQUE)
8923

    
8924
      #  Build nic parameters
8925
      link = nic.get(constants.INIC_LINK, None)
8926
      if link == constants.VALUE_AUTO:
8927
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8928
      nicparams = {}
8929
      if nic_mode_req:
8930
        nicparams[constants.NIC_MODE] = nic_mode
8931
      if link:
8932
        nicparams[constants.NIC_LINK] = link
8933

    
8934
      check_params = cluster.SimpleFillNIC(nicparams)
8935
      objects.NIC.CheckParameterSyntax(check_params)
8936
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8937

    
8938
    # disk checks/pre-build
8939
    default_vg = self.cfg.GetVGName()
8940
    self.disks = []
8941
    for disk in self.op.disks:
8942
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8943
      if mode not in constants.DISK_ACCESS_SET:
8944
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8945
                                   mode, errors.ECODE_INVAL)
8946
      size = disk.get(constants.IDISK_SIZE, None)
8947
      if size is None:
8948
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8949
      try:
8950
        size = int(size)
8951
      except (TypeError, ValueError):
8952
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8953
                                   errors.ECODE_INVAL)
8954

    
8955
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8956
      new_disk = {
8957
        constants.IDISK_SIZE: size,
8958
        constants.IDISK_MODE: mode,
8959
        constants.IDISK_VG: data_vg,
8960
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8961
        }
8962
      if constants.IDISK_ADOPT in disk:
8963
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8964
      self.disks.append(new_disk)
8965

    
8966
    if self.op.mode == constants.INSTANCE_IMPORT:
8967
      disk_images = []
8968
      for idx in range(len(self.disks)):
8969
        option = "disk%d_dump" % idx
8970
        if export_info.has_option(constants.INISECT_INS, option):
8971
          # FIXME: are the old os-es, disk sizes, etc. useful?
8972
          export_name = export_info.get(constants.INISECT_INS, option)
8973
          image = utils.PathJoin(self.op.src_path, export_name)
8974
          disk_images.append(image)
8975
        else:
8976
          disk_images.append(False)
8977

    
8978
      self.src_images = disk_images
8979

    
8980
      old_name = export_info.get(constants.INISECT_INS, "name")
8981
      if self.op.instance_name == old_name:
8982
        for idx, nic in enumerate(self.nics):
8983
          if nic.mac == constants.VALUE_AUTO:
8984
            nic_mac_ini = "nic%d_mac" % idx
8985
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8986

    
8987
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8988

    
8989
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8990
    if self.op.ip_check:
8991
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8992
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8993
                                   (self.check_ip, self.op.instance_name),
8994
                                   errors.ECODE_NOTUNIQUE)
8995

    
8996
    #### mac address generation
8997
    # By generating here the mac address both the allocator and the hooks get
8998
    # the real final mac address rather than the 'auto' or 'generate' value.
8999
    # There is a race condition between the generation and the instance object
9000
    # creation, which means that we know the mac is valid now, but we're not
9001
    # sure it will be when we actually add the instance. If things go bad
9002
    # adding the instance will abort because of a duplicate mac, and the
9003
    # creation job will fail.
9004
    for nic in self.nics:
9005
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9006
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9007

    
9008
    #### allocator run
9009

    
9010
    if self.op.iallocator is not None:
9011
      self._RunAllocator()
9012

    
9013
    #### node related checks
9014

    
9015
    # check primary node
9016
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9017
    assert self.pnode is not None, \
9018
      "Cannot retrieve locked node %s" % self.op.pnode
9019
    if pnode.offline:
9020
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9021
                                 pnode.name, errors.ECODE_STATE)
9022
    if pnode.drained:
9023
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9024
                                 pnode.name, errors.ECODE_STATE)
9025
    if not pnode.vm_capable:
9026
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9027
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9028

    
9029
    self.secondaries = []
9030

    
9031
    # mirror node verification
9032
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9033
      if self.op.snode == pnode.name:
9034
        raise errors.OpPrereqError("The secondary node cannot be the"
9035
                                   " primary node", errors.ECODE_INVAL)
9036
      _CheckNodeOnline(self, self.op.snode)
9037
      _CheckNodeNotDrained(self, self.op.snode)
9038
      _CheckNodeVmCapable(self, self.op.snode)
9039
      self.secondaries.append(self.op.snode)
9040

    
9041
    nodenames = [pnode.name] + self.secondaries
9042

    
9043
    if not self.adopt_disks:
9044
      # Check lv size requirements, if not adopting
9045
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9046
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9047

    
9048
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9049
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9050
                                disk[constants.IDISK_ADOPT])
9051
                     for disk in self.disks])
9052
      if len(all_lvs) != len(self.disks):
9053
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9054
                                   errors.ECODE_INVAL)
9055
      for lv_name in all_lvs:
9056
        try:
9057
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9058
          # to ReserveLV uses the same syntax
9059
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9060
        except errors.ReservationError:
9061
          raise errors.OpPrereqError("LV named %s used by another instance" %
9062
                                     lv_name, errors.ECODE_NOTUNIQUE)
9063

    
9064
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9065
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9066

    
9067
      node_lvs = self.rpc.call_lv_list([pnode.name],
9068
                                       vg_names.payload.keys())[pnode.name]
9069
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9070
      node_lvs = node_lvs.payload
9071

    
9072
      delta = all_lvs.difference(node_lvs.keys())
9073
      if delta:
9074
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9075
                                   utils.CommaJoin(delta),
9076
                                   errors.ECODE_INVAL)
9077
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9078
      if online_lvs:
9079
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9080
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9081
                                   errors.ECODE_STATE)
9082
      # update the size of disk based on what is found
9083
      for dsk in self.disks:
9084
        dsk[constants.IDISK_SIZE] = \
9085
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9086
                                        dsk[constants.IDISK_ADOPT])][0]))
9087

    
9088
    elif self.op.disk_template == constants.DT_BLOCK:
9089
      # Normalize and de-duplicate device paths
9090
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9091
                       for disk in self.disks])
9092
      if len(all_disks) != len(self.disks):
9093
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9094
                                   errors.ECODE_INVAL)
9095
      baddisks = [d for d in all_disks
9096
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9097
      if baddisks:
9098
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9099
                                   " cannot be adopted" %
9100
                                   (", ".join(baddisks),
9101
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9102
                                   errors.ECODE_INVAL)
9103

    
9104
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9105
                                            list(all_disks))[pnode.name]
9106
      node_disks.Raise("Cannot get block device information from node %s" %
9107
                       pnode.name)
9108
      node_disks = node_disks.payload
9109
      delta = all_disks.difference(node_disks.keys())
9110
      if delta:
9111
        raise errors.OpPrereqError("Missing block device(s): %s" %
9112
                                   utils.CommaJoin(delta),
9113
                                   errors.ECODE_INVAL)
9114
      for dsk in self.disks:
9115
        dsk[constants.IDISK_SIZE] = \
9116
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9117

    
9118
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9119

    
9120
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9121
    # check OS parameters (remotely)
9122
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9123

    
9124
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9125

    
9126
    # memory check on primary node
9127
    if self.op.start:
9128
      _CheckNodeFreeMemory(self, self.pnode.name,
9129
                           "creating instance %s" % self.op.instance_name,
9130
                           self.be_full[constants.BE_MEMORY],
9131
                           self.op.hypervisor)
9132

    
9133
    self.dry_run_result = list(nodenames)
9134

    
9135
  def Exec(self, feedback_fn):
9136
    """Create and add the instance to the cluster.
9137

9138
    """
9139
    instance = self.op.instance_name
9140
    pnode_name = self.pnode.name
9141

    
9142
    ht_kind = self.op.hypervisor
9143
    if ht_kind in constants.HTS_REQ_PORT:
9144
      network_port = self.cfg.AllocatePort()
9145
    else:
9146
      network_port = None
9147

    
9148
    disks = _GenerateDiskTemplate(self,
9149
                                  self.op.disk_template,
9150
                                  instance, pnode_name,
9151
                                  self.secondaries,
9152
                                  self.disks,
9153
                                  self.instance_file_storage_dir,
9154
                                  self.op.file_driver,
9155
                                  0,
9156
                                  feedback_fn)
9157

    
9158
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9159
                            primary_node=pnode_name,
9160
                            nics=self.nics, disks=disks,
9161
                            disk_template=self.op.disk_template,
9162
                            admin_up=False,
9163
                            network_port=network_port,
9164
                            beparams=self.op.beparams,
9165
                            hvparams=self.op.hvparams,
9166
                            hypervisor=self.op.hypervisor,
9167
                            osparams=self.op.osparams,
9168
                            )
9169

    
9170
    if self.op.tags:
9171
      for tag in self.op.tags:
9172
        iobj.AddTag(tag)
9173

    
9174
    if self.adopt_disks:
9175
      if self.op.disk_template == constants.DT_PLAIN:
9176
        # rename LVs to the newly-generated names; we need to construct
9177
        # 'fake' LV disks with the old data, plus the new unique_id
9178
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9179
        rename_to = []
9180
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9181
          rename_to.append(t_dsk.logical_id)
9182
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9183
          self.cfg.SetDiskID(t_dsk, pnode_name)
9184
        result = self.rpc.call_blockdev_rename(pnode_name,
9185
                                               zip(tmp_disks, rename_to))
9186
        result.Raise("Failed to rename adoped LVs")
9187
    else:
9188
      feedback_fn("* creating instance disks...")
9189
      try:
9190
        _CreateDisks(self, iobj)
9191
      except errors.OpExecError:
9192
        self.LogWarning("Device creation failed, reverting...")
9193
        try:
9194
          _RemoveDisks(self, iobj)
9195
        finally:
9196
          self.cfg.ReleaseDRBDMinors(instance)
9197
          raise
9198

    
9199
    feedback_fn("adding instance %s to cluster config" % instance)
9200

    
9201
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9202

    
9203
    # Declare that we don't want to remove the instance lock anymore, as we've
9204
    # added the instance to the config
9205
    del self.remove_locks[locking.LEVEL_INSTANCE]
9206

    
9207
    if self.op.mode == constants.INSTANCE_IMPORT:
9208
      # Release unused nodes
9209
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9210
    else:
9211
      # Release all nodes
9212
      _ReleaseLocks(self, locking.LEVEL_NODE)
9213

    
9214
    disk_abort = False
9215
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9216
      feedback_fn("* wiping instance disks...")
9217
      try:
9218
        _WipeDisks(self, iobj)
9219
      except errors.OpExecError, err:
9220
        logging.exception("Wiping disks failed")
9221
        self.LogWarning("Wiping instance disks failed (%s)", err)
9222
        disk_abort = True
9223

    
9224
    if disk_abort:
9225
      # Something is already wrong with the disks, don't do anything else
9226
      pass
9227
    elif self.op.wait_for_sync:
9228
      disk_abort = not _WaitForSync(self, iobj)
9229
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9230
      # make sure the disks are not degraded (still sync-ing is ok)
9231
      feedback_fn("* checking mirrors status")
9232
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9233
    else:
9234
      disk_abort = False
9235

    
9236
    if disk_abort:
9237
      _RemoveDisks(self, iobj)
9238
      self.cfg.RemoveInstance(iobj.name)
9239
      # Make sure the instance lock gets removed
9240
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9241
      raise errors.OpExecError("There are some degraded disks for"
9242
                               " this instance")
9243

    
9244
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9245
      if self.op.mode == constants.INSTANCE_CREATE:
9246
        if not self.op.no_install:
9247
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9248
                        not self.op.wait_for_sync)
9249
          if pause_sync:
9250
            feedback_fn("* pausing disk sync to install instance OS")
9251
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9252
                                                              iobj.disks, True)
9253
            for idx, success in enumerate(result.payload):
9254
              if not success:
9255
                logging.warn("pause-sync of instance %s for disk %d failed",
9256
                             instance, idx)
9257

    
9258
          feedback_fn("* running the instance OS create scripts...")
9259
          # FIXME: pass debug option from opcode to backend
9260
          os_add_result = \
9261
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9262
                                          self.op.debug_level)
9263
          if pause_sync:
9264
            feedback_fn("* resuming disk sync")
9265
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9266
                                                              iobj.disks, False)
9267
            for idx, success in enumerate(result.payload):
9268
              if not success:
9269
                logging.warn("resume-sync of instance %s for disk %d failed",
9270
                             instance, idx)
9271

    
9272
          os_add_result.Raise("Could not add os for instance %s"
9273
                              " on node %s" % (instance, pnode_name))
9274

    
9275
      elif self.op.mode == constants.INSTANCE_IMPORT:
9276
        feedback_fn("* running the instance OS import scripts...")
9277

    
9278
        transfers = []
9279

    
9280
        for idx, image in enumerate(self.src_images):
9281
          if not image:
9282
            continue
9283

    
9284
          # FIXME: pass debug option from opcode to backend
9285
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9286
                                             constants.IEIO_FILE, (image, ),
9287
                                             constants.IEIO_SCRIPT,
9288
                                             (iobj.disks[idx], idx),
9289
                                             None)
9290
          transfers.append(dt)
9291

    
9292
        import_result = \
9293
          masterd.instance.TransferInstanceData(self, feedback_fn,
9294
                                                self.op.src_node, pnode_name,
9295
                                                self.pnode.secondary_ip,
9296
                                                iobj, transfers)
9297
        if not compat.all(import_result):
9298
          self.LogWarning("Some disks for instance %s on node %s were not"
9299
                          " imported successfully" % (instance, pnode_name))
9300

    
9301
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9302
        feedback_fn("* preparing remote import...")
9303
        # The source cluster will stop the instance before attempting to make a
9304
        # connection. In some cases stopping an instance can take a long time,
9305
        # hence the shutdown timeout is added to the connection timeout.
9306
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9307
                           self.op.source_shutdown_timeout)
9308
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9309

    
9310
        assert iobj.primary_node == self.pnode.name
9311
        disk_results = \
9312
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9313
                                        self.source_x509_ca,
9314
                                        self._cds, timeouts)
9315
        if not compat.all(disk_results):
9316
          # TODO: Should the instance still be started, even if some disks
9317
          # failed to import (valid for local imports, too)?
9318
          self.LogWarning("Some disks for instance %s on node %s were not"
9319
                          " imported successfully" % (instance, pnode_name))
9320

    
9321
        # Run rename script on newly imported instance
9322
        assert iobj.name == instance
9323
        feedback_fn("Running rename script for %s" % instance)
9324
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9325
                                                   self.source_instance_name,
9326
                                                   self.op.debug_level)
9327
        if result.fail_msg:
9328
          self.LogWarning("Failed to run rename script for %s on node"
9329
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9330

    
9331
      else:
9332
        # also checked in the prereq part
9333
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9334
                                     % self.op.mode)
9335

    
9336
    if self.op.start:
9337
      iobj.admin_up = True
9338
      self.cfg.Update(iobj, feedback_fn)
9339
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9340
      feedback_fn("* starting instance...")
9341
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9342
                                            False)
9343
      result.Raise("Could not start instance")
9344

    
9345
    return list(iobj.all_nodes)
9346

    
9347

    
9348
class LUInstanceConsole(NoHooksLU):
9349
  """Connect to an instance's console.
9350

9351
  This is somewhat special in that it returns the command line that
9352
  you need to run on the master node in order to connect to the
9353
  console.
9354

9355
  """
9356
  REQ_BGL = False
9357

    
9358
  def ExpandNames(self):
9359
    self.share_locks = _ShareAll()
9360
    self._ExpandAndLockInstance()
9361

    
9362
  def CheckPrereq(self):
9363
    """Check prerequisites.
9364

9365
    This checks that the instance is in the cluster.
9366

9367
    """
9368
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9369
    assert self.instance is not None, \
9370
      "Cannot retrieve locked instance %s" % self.op.instance_name
9371
    _CheckNodeOnline(self, self.instance.primary_node)
9372

    
9373
  def Exec(self, feedback_fn):
9374
    """Connect to the console of an instance
9375

9376
    """
9377
    instance = self.instance
9378
    node = instance.primary_node
9379

    
9380
    node_insts = self.rpc.call_instance_list([node],
9381
                                             [instance.hypervisor])[node]
9382
    node_insts.Raise("Can't get node information from %s" % node)
9383

    
9384
    if instance.name not in node_insts.payload:
9385
      if instance.admin_up:
9386
        state = constants.INSTST_ERRORDOWN
9387
      else:
9388
        state = constants.INSTST_ADMINDOWN
9389
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9390
                               (instance.name, state))
9391

    
9392
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9393

    
9394
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9395

    
9396

    
9397
def _GetInstanceConsole(cluster, instance):
9398
  """Returns console information for an instance.
9399

9400
  @type cluster: L{objects.Cluster}
9401
  @type instance: L{objects.Instance}
9402
  @rtype: dict
9403

9404
  """
9405
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9406
  # beparams and hvparams are passed separately, to avoid editing the
9407
  # instance and then saving the defaults in the instance itself.
9408
  hvparams = cluster.FillHV(instance)
9409
  beparams = cluster.FillBE(instance)
9410
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9411

    
9412
  assert console.instance == instance.name
9413
  assert console.Validate()
9414

    
9415
  return console.ToDict()
9416

    
9417

    
9418
class LUInstanceReplaceDisks(LogicalUnit):
9419
  """Replace the disks of an instance.
9420

9421
  """
9422
  HPATH = "mirrors-replace"
9423
  HTYPE = constants.HTYPE_INSTANCE
9424
  REQ_BGL = False
9425

    
9426
  def CheckArguments(self):
9427
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9428
                                  self.op.iallocator)
9429

    
9430
  def ExpandNames(self):
9431
    self._ExpandAndLockInstance()
9432

    
9433
    assert locking.LEVEL_NODE not in self.needed_locks
9434
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9435

    
9436
    assert self.op.iallocator is None or self.op.remote_node is None, \
9437
      "Conflicting options"
9438

    
9439
    if self.op.remote_node is not None:
9440
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9441

    
9442
      # Warning: do not remove the locking of the new secondary here
9443
      # unless DRBD8.AddChildren is changed to work in parallel;
9444
      # currently it doesn't since parallel invocations of
9445
      # FindUnusedMinor will conflict
9446
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9447
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9448
    else:
9449
      self.needed_locks[locking.LEVEL_NODE] = []
9450
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9451

    
9452
      if self.op.iallocator is not None:
9453
        # iallocator will select a new node in the same group
9454
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9455

    
9456
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9457
                                   self.op.iallocator, self.op.remote_node,
9458
                                   self.op.disks, False, self.op.early_release)
9459

    
9460
    self.tasklets = [self.replacer]
9461

    
9462
  def DeclareLocks(self, level):
9463
    if level == locking.LEVEL_NODEGROUP:
9464
      assert self.op.remote_node is None
9465
      assert self.op.iallocator is not None
9466
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9467

    
9468
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9469
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9470
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9471

    
9472
    elif level == locking.LEVEL_NODE:
9473
      if self.op.iallocator is not None:
9474
        assert self.op.remote_node is None
9475
        assert not self.needed_locks[locking.LEVEL_NODE]
9476

    
9477
        # Lock member nodes of all locked groups
9478
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9479
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9480
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9481
      else:
9482
        self._LockInstancesNodes()
9483

    
9484
  def BuildHooksEnv(self):
9485
    """Build hooks env.
9486

9487
    This runs on the master, the primary and all the secondaries.
9488

9489
    """
9490
    instance = self.replacer.instance
9491
    env = {
9492
      "MODE": self.op.mode,
9493
      "NEW_SECONDARY": self.op.remote_node,
9494
      "OLD_SECONDARY": instance.secondary_nodes[0],
9495
      }
9496
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9497
    return env
9498

    
9499
  def BuildHooksNodes(self):
9500
    """Build hooks nodes.
9501

9502
    """
9503
    instance = self.replacer.instance
9504
    nl = [
9505
      self.cfg.GetMasterNode(),
9506
      instance.primary_node,
9507
      ]
9508
    if self.op.remote_node is not None:
9509
      nl.append(self.op.remote_node)
9510
    return nl, nl
9511

    
9512
  def CheckPrereq(self):
9513
    """Check prerequisites.
9514

9515
    """
9516
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9517
            self.op.iallocator is None)
9518

    
9519
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9520
    if owned_groups:
9521
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9522

    
9523
    return LogicalUnit.CheckPrereq(self)
9524

    
9525

    
9526
class TLReplaceDisks(Tasklet):
9527
  """Replaces disks for an instance.
9528

9529
  Note: Locking is not within the scope of this class.
9530

9531
  """
9532
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9533
               disks, delay_iallocator, early_release):
9534
    """Initializes this class.
9535

9536
    """
9537
    Tasklet.__init__(self, lu)
9538

    
9539
    # Parameters
9540
    self.instance_name = instance_name
9541
    self.mode = mode
9542
    self.iallocator_name = iallocator_name
9543
    self.remote_node = remote_node
9544
    self.disks = disks
9545
    self.delay_iallocator = delay_iallocator
9546
    self.early_release = early_release
9547

    
9548
    # Runtime data
9549
    self.instance = None
9550
    self.new_node = None
9551
    self.target_node = None
9552
    self.other_node = None
9553
    self.remote_node_info = None
9554
    self.node_secondary_ip = None
9555

    
9556
  @staticmethod
9557
  def CheckArguments(mode, remote_node, iallocator):
9558
    """Helper function for users of this class.
9559

9560
    """
9561
    # check for valid parameter combination
9562
    if mode == constants.REPLACE_DISK_CHG:
9563
      if remote_node is None and iallocator is None:
9564
        raise errors.OpPrereqError("When changing the secondary either an"
9565
                                   " iallocator script must be used or the"
9566
                                   " new node given", errors.ECODE_INVAL)
9567

    
9568
      if remote_node is not None and iallocator is not None:
9569
        raise errors.OpPrereqError("Give either the iallocator or the new"
9570
                                   " secondary, not both", errors.ECODE_INVAL)
9571

    
9572
    elif remote_node is not None or iallocator is not None:
9573
      # Not replacing the secondary
9574
      raise errors.OpPrereqError("The iallocator and new node options can"
9575
                                 " only be used when changing the"
9576
                                 " secondary node", errors.ECODE_INVAL)
9577

    
9578
  @staticmethod
9579
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9580
    """Compute a new secondary node using an IAllocator.
9581

9582
    """
9583
    ial = IAllocator(lu.cfg, lu.rpc,
9584
                     mode=constants.IALLOCATOR_MODE_RELOC,
9585
                     name=instance_name,
9586
                     relocate_from=list(relocate_from))
9587

    
9588
    ial.Run(iallocator_name)
9589

    
9590
    if not ial.success:
9591
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9592
                                 " %s" % (iallocator_name, ial.info),
9593
                                 errors.ECODE_NORES)
9594

    
9595
    if len(ial.result) != ial.required_nodes:
9596
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9597
                                 " of nodes (%s), required %s" %
9598
                                 (iallocator_name,
9599
                                  len(ial.result), ial.required_nodes),
9600
                                 errors.ECODE_FAULT)
9601

    
9602
    remote_node_name = ial.result[0]
9603

    
9604
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9605
               instance_name, remote_node_name)
9606

    
9607
    return remote_node_name
9608

    
9609
  def _FindFaultyDisks(self, node_name):
9610
    """Wrapper for L{_FindFaultyInstanceDisks}.
9611

9612
    """
9613
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9614
                                    node_name, True)
9615

    
9616
  def _CheckDisksActivated(self, instance):
9617
    """Checks if the instance disks are activated.
9618

9619
    @param instance: The instance to check disks
9620
    @return: True if they are activated, False otherwise
9621

9622
    """
9623
    nodes = instance.all_nodes
9624

    
9625
    for idx, dev in enumerate(instance.disks):
9626
      for node in nodes:
9627
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9628
        self.cfg.SetDiskID(dev, node)
9629

    
9630
        result = self.rpc.call_blockdev_find(node, dev)
9631

    
9632
        if result.offline:
9633
          continue
9634
        elif result.fail_msg or not result.payload:
9635
          return False
9636

    
9637
    return True
9638

    
9639
  def CheckPrereq(self):
9640
    """Check prerequisites.
9641

9642
    This checks that the instance is in the cluster.
9643

9644
    """
9645
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9646
    assert instance is not None, \
9647
      "Cannot retrieve locked instance %s" % self.instance_name
9648

    
9649
    if instance.disk_template != constants.DT_DRBD8:
9650
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9651
                                 " instances", errors.ECODE_INVAL)
9652

    
9653
    if len(instance.secondary_nodes) != 1:
9654
      raise errors.OpPrereqError("The instance has a strange layout,"
9655
                                 " expected one secondary but found %d" %
9656
                                 len(instance.secondary_nodes),
9657
                                 errors.ECODE_FAULT)
9658

    
9659
    if not self.delay_iallocator:
9660
      self._CheckPrereq2()
9661

    
9662
  def _CheckPrereq2(self):
9663
    """Check prerequisites, second part.
9664

9665
    This function should always be part of CheckPrereq. It was separated and is
9666
    now called from Exec because during node evacuation iallocator was only
9667
    called with an unmodified cluster model, not taking planned changes into
9668
    account.
9669

9670
    """
9671
    instance = self.instance
9672
    secondary_node = instance.secondary_nodes[0]
9673

    
9674
    if self.iallocator_name is None:
9675
      remote_node = self.remote_node
9676
    else:
9677
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9678
                                       instance.name, instance.secondary_nodes)
9679

    
9680
    if remote_node is None:
9681
      self.remote_node_info = None
9682
    else:
9683
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9684
             "Remote node '%s' is not locked" % remote_node
9685

    
9686
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9687
      assert self.remote_node_info is not None, \
9688
        "Cannot retrieve locked node %s" % remote_node
9689

    
9690
    if remote_node == self.instance.primary_node:
9691
      raise errors.OpPrereqError("The specified node is the primary node of"
9692
                                 " the instance", errors.ECODE_INVAL)
9693

    
9694
    if remote_node == secondary_node:
9695
      raise errors.OpPrereqError("The specified node is already the"
9696
                                 " secondary node of the instance",
9697
                                 errors.ECODE_INVAL)
9698

    
9699
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9700
                                    constants.REPLACE_DISK_CHG):
9701
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9702
                                 errors.ECODE_INVAL)
9703

    
9704
    if self.mode == constants.REPLACE_DISK_AUTO:
9705
      if not self._CheckDisksActivated(instance):
9706
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9707
                                   " first" % self.instance_name,
9708
                                   errors.ECODE_STATE)
9709
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9710
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9711

    
9712
      if faulty_primary and faulty_secondary:
9713
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9714
                                   " one node and can not be repaired"
9715
                                   " automatically" % self.instance_name,
9716
                                   errors.ECODE_STATE)
9717

    
9718
      if faulty_primary:
9719
        self.disks = faulty_primary
9720
        self.target_node = instance.primary_node
9721
        self.other_node = secondary_node
9722
        check_nodes = [self.target_node, self.other_node]
9723
      elif faulty_secondary:
9724
        self.disks = faulty_secondary
9725
        self.target_node = secondary_node
9726
        self.other_node = instance.primary_node
9727
        check_nodes = [self.target_node, self.other_node]
9728
      else:
9729
        self.disks = []
9730
        check_nodes = []
9731

    
9732
    else:
9733
      # Non-automatic modes
9734
      if self.mode == constants.REPLACE_DISK_PRI:
9735
        self.target_node = instance.primary_node
9736
        self.other_node = secondary_node
9737
        check_nodes = [self.target_node, self.other_node]
9738

    
9739
      elif self.mode == constants.REPLACE_DISK_SEC:
9740
        self.target_node = secondary_node
9741
        self.other_node = instance.primary_node
9742
        check_nodes = [self.target_node, self.other_node]
9743

    
9744
      elif self.mode == constants.REPLACE_DISK_CHG:
9745
        self.new_node = remote_node
9746
        self.other_node = instance.primary_node
9747
        self.target_node = secondary_node
9748
        check_nodes = [self.new_node, self.other_node]
9749

    
9750
        _CheckNodeNotDrained(self.lu, remote_node)
9751
        _CheckNodeVmCapable(self.lu, remote_node)
9752

    
9753
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9754
        assert old_node_info is not None
9755
        if old_node_info.offline and not self.early_release:
9756
          # doesn't make sense to delay the release
9757
          self.early_release = True
9758
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9759
                          " early-release mode", secondary_node)
9760

    
9761
      else:
9762
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9763
                                     self.mode)
9764

    
9765
      # If not specified all disks should be replaced
9766
      if not self.disks:
9767
        self.disks = range(len(self.instance.disks))
9768

    
9769
    for node in check_nodes:
9770
      _CheckNodeOnline(self.lu, node)
9771

    
9772
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9773
                                                          self.other_node,
9774
                                                          self.target_node]
9775
                              if node_name is not None)
9776

    
9777
    # Release unneeded node locks
9778
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9779

    
9780
    # Release any owned node group
9781
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9782
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9783

    
9784
    # Check whether disks are valid
9785
    for disk_idx in self.disks:
9786
      instance.FindDisk(disk_idx)
9787

    
9788
    # Get secondary node IP addresses
9789
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9790
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9791

    
9792
  def Exec(self, feedback_fn):
9793
    """Execute disk replacement.
9794

9795
    This dispatches the disk replacement to the appropriate handler.
9796

9797
    """
9798
    if self.delay_iallocator:
9799
      self._CheckPrereq2()
9800

    
9801
    if __debug__:
9802
      # Verify owned locks before starting operation
9803
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9804
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9805
          ("Incorrect node locks, owning %s, expected %s" %
9806
           (owned_nodes, self.node_secondary_ip.keys()))
9807

    
9808
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9809
      assert list(owned_instances) == [self.instance_name], \
9810
          "Instance '%s' not locked" % self.instance_name
9811

    
9812
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9813
          "Should not own any node group lock at this point"
9814

    
9815
    if not self.disks:
9816
      feedback_fn("No disks need replacement")
9817
      return
9818

    
9819
    feedback_fn("Replacing disk(s) %s for %s" %
9820
                (utils.CommaJoin(self.disks), self.instance.name))
9821

    
9822
    activate_disks = (not self.instance.admin_up)
9823

    
9824
    # Activate the instance disks if we're replacing them on a down instance
9825
    if activate_disks:
9826
      _StartInstanceDisks(self.lu, self.instance, True)
9827

    
9828
    try:
9829
      # Should we replace the secondary node?
9830
      if self.new_node is not None:
9831
        fn = self._ExecDrbd8Secondary
9832
      else:
9833
        fn = self._ExecDrbd8DiskOnly
9834

    
9835
      result = fn(feedback_fn)
9836
    finally:
9837
      # Deactivate the instance disks if we're replacing them on a
9838
      # down instance
9839
      if activate_disks:
9840
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9841

    
9842
    if __debug__:
9843
      # Verify owned locks
9844
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9845
      nodes = frozenset(self.node_secondary_ip)
9846
      assert ((self.early_release and not owned_nodes) or
9847
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9848
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9849
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9850

    
9851
    return result
9852

    
9853
  def _CheckVolumeGroup(self, nodes):
9854
    self.lu.LogInfo("Checking volume groups")
9855

    
9856
    vgname = self.cfg.GetVGName()
9857

    
9858
    # Make sure volume group exists on all involved nodes
9859
    results = self.rpc.call_vg_list(nodes)
9860
    if not results:
9861
      raise errors.OpExecError("Can't list volume groups on the nodes")
9862

    
9863
    for node in nodes:
9864
      res = results[node]
9865
      res.Raise("Error checking node %s" % node)
9866
      if vgname not in res.payload:
9867
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9868
                                 (vgname, node))
9869

    
9870
  def _CheckDisksExistence(self, nodes):
9871
    # Check disk existence
9872
    for idx, dev in enumerate(self.instance.disks):
9873
      if idx not in self.disks:
9874
        continue
9875

    
9876
      for node in nodes:
9877
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9878
        self.cfg.SetDiskID(dev, node)
9879

    
9880
        result = self.rpc.call_blockdev_find(node, dev)
9881

    
9882
        msg = result.fail_msg
9883
        if msg or not result.payload:
9884
          if not msg:
9885
            msg = "disk not found"
9886
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9887
                                   (idx, node, msg))
9888

    
9889
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9890
    for idx, dev in enumerate(self.instance.disks):
9891
      if idx not in self.disks:
9892
        continue
9893

    
9894
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9895
                      (idx, node_name))
9896

    
9897
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9898
                                   ldisk=ldisk):
9899
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9900
                                 " replace disks for instance %s" %
9901
                                 (node_name, self.instance.name))
9902

    
9903
  def _CreateNewStorage(self, node_name):
9904
    """Create new storage on the primary or secondary node.
9905

9906
    This is only used for same-node replaces, not for changing the
9907
    secondary node, hence we don't want to modify the existing disk.
9908

9909
    """
9910
    iv_names = {}
9911

    
9912
    for idx, dev in enumerate(self.instance.disks):
9913
      if idx not in self.disks:
9914
        continue
9915

    
9916
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9917

    
9918
      self.cfg.SetDiskID(dev, node_name)
9919

    
9920
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9921
      names = _GenerateUniqueNames(self.lu, lv_names)
9922

    
9923
      vg_data = dev.children[0].logical_id[0]
9924
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9925
                             logical_id=(vg_data, names[0]))
9926
      vg_meta = dev.children[1].logical_id[0]
9927
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9928
                             logical_id=(vg_meta, names[1]))
9929

    
9930
      new_lvs = [lv_data, lv_meta]
9931
      old_lvs = [child.Copy() for child in dev.children]
9932
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9933

    
9934
      # we pass force_create=True to force the LVM creation
9935
      for new_lv in new_lvs:
9936
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9937
                        _GetInstanceInfoText(self.instance), False)
9938

    
9939
    return iv_names
9940

    
9941
  def _CheckDevices(self, node_name, iv_names):
9942
    for name, (dev, _, _) in iv_names.iteritems():
9943
      self.cfg.SetDiskID(dev, node_name)
9944

    
9945
      result = self.rpc.call_blockdev_find(node_name, dev)
9946

    
9947
      msg = result.fail_msg
9948
      if msg or not result.payload:
9949
        if not msg:
9950
          msg = "disk not found"
9951
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9952
                                 (name, msg))
9953

    
9954
      if result.payload.is_degraded:
9955
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9956

    
9957
  def _RemoveOldStorage(self, node_name, iv_names):
9958
    for name, (_, old_lvs, _) in iv_names.iteritems():
9959
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9960

    
9961
      for lv in old_lvs:
9962
        self.cfg.SetDiskID(lv, node_name)
9963

    
9964
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9965
        if msg:
9966
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9967
                             hint="remove unused LVs manually")
9968

    
9969
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9970
    """Replace a disk on the primary or secondary for DRBD 8.
9971

9972
    The algorithm for replace is quite complicated:
9973

9974
      1. for each disk to be replaced:
9975

9976
        1. create new LVs on the target node with unique names
9977
        1. detach old LVs from the drbd device
9978
        1. rename old LVs to name_replaced.<time_t>
9979
        1. rename new LVs to old LVs
9980
        1. attach the new LVs (with the old names now) to the drbd device
9981

9982
      1. wait for sync across all devices
9983

9984
      1. for each modified disk:
9985

9986
        1. remove old LVs (which have the name name_replaces.<time_t>)
9987

9988
    Failures are not very well handled.
9989

9990
    """
9991
    steps_total = 6
9992

    
9993
    # Step: check device activation
9994
    self.lu.LogStep(1, steps_total, "Check device existence")
9995
    self._CheckDisksExistence([self.other_node, self.target_node])
9996
    self._CheckVolumeGroup([self.target_node, self.other_node])
9997

    
9998
    # Step: check other node consistency
9999
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10000
    self._CheckDisksConsistency(self.other_node,
10001
                                self.other_node == self.instance.primary_node,
10002
                                False)
10003

    
10004
    # Step: create new storage
10005
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10006
    iv_names = self._CreateNewStorage(self.target_node)
10007

    
10008
    # Step: for each lv, detach+rename*2+attach
10009
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10010
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10011
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10012

    
10013
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10014
                                                     old_lvs)
10015
      result.Raise("Can't detach drbd from local storage on node"
10016
                   " %s for device %s" % (self.target_node, dev.iv_name))
10017
      #dev.children = []
10018
      #cfg.Update(instance)
10019

    
10020
      # ok, we created the new LVs, so now we know we have the needed
10021
      # storage; as such, we proceed on the target node to rename
10022
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10023
      # using the assumption that logical_id == physical_id (which in
10024
      # turn is the unique_id on that node)
10025

    
10026
      # FIXME(iustin): use a better name for the replaced LVs
10027
      temp_suffix = int(time.time())
10028
      ren_fn = lambda d, suff: (d.physical_id[0],
10029
                                d.physical_id[1] + "_replaced-%s" % suff)
10030

    
10031
      # Build the rename list based on what LVs exist on the node
10032
      rename_old_to_new = []
10033
      for to_ren in old_lvs:
10034
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10035
        if not result.fail_msg and result.payload:
10036
          # device exists
10037
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10038

    
10039
      self.lu.LogInfo("Renaming the old LVs on the target node")
10040
      result = self.rpc.call_blockdev_rename(self.target_node,
10041
                                             rename_old_to_new)
10042
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10043

    
10044
      # Now we rename the new LVs to the old LVs
10045
      self.lu.LogInfo("Renaming the new LVs on the target node")
10046
      rename_new_to_old = [(new, old.physical_id)
10047
                           for old, new in zip(old_lvs, new_lvs)]
10048
      result = self.rpc.call_blockdev_rename(self.target_node,
10049
                                             rename_new_to_old)
10050
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10051

    
10052
      # Intermediate steps of in memory modifications
10053
      for old, new in zip(old_lvs, new_lvs):
10054
        new.logical_id = old.logical_id
10055
        self.cfg.SetDiskID(new, self.target_node)
10056

    
10057
      # We need to modify old_lvs so that removal later removes the
10058
      # right LVs, not the newly added ones; note that old_lvs is a
10059
      # copy here
10060
      for disk in old_lvs:
10061
        disk.logical_id = ren_fn(disk, temp_suffix)
10062
        self.cfg.SetDiskID(disk, self.target_node)
10063

    
10064
      # Now that the new lvs have the old name, we can add them to the device
10065
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10066
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10067
                                                  new_lvs)
10068
      msg = result.fail_msg
10069
      if msg:
10070
        for new_lv in new_lvs:
10071
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10072
                                               new_lv).fail_msg
10073
          if msg2:
10074
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10075
                               hint=("cleanup manually the unused logical"
10076
                                     "volumes"))
10077
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10078

    
10079
    cstep = 5
10080
    if self.early_release:
10081
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10082
      cstep += 1
10083
      self._RemoveOldStorage(self.target_node, iv_names)
10084
      # WARNING: we release both node locks here, do not do other RPCs
10085
      # than WaitForSync to the primary node
10086
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10087
                    names=[self.target_node, self.other_node])
10088

    
10089
    # Wait for sync
10090
    # This can fail as the old devices are degraded and _WaitForSync
10091
    # does a combined result over all disks, so we don't check its return value
10092
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10093
    cstep += 1
10094
    _WaitForSync(self.lu, self.instance)
10095

    
10096
    # Check all devices manually
10097
    self._CheckDevices(self.instance.primary_node, iv_names)
10098

    
10099
    # Step: remove old storage
10100
    if not self.early_release:
10101
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10102
      cstep += 1
10103
      self._RemoveOldStorage(self.target_node, iv_names)
10104

    
10105
  def _ExecDrbd8Secondary(self, feedback_fn):
10106
    """Replace the secondary node for DRBD 8.
10107

10108
    The algorithm for replace is quite complicated:
10109
      - for all disks of the instance:
10110
        - create new LVs on the new node with same names
10111
        - shutdown the drbd device on the old secondary
10112
        - disconnect the drbd network on the primary
10113
        - create the drbd device on the new secondary
10114
        - network attach the drbd on the primary, using an artifice:
10115
          the drbd code for Attach() will connect to the network if it
10116
          finds a device which is connected to the good local disks but
10117
          not network enabled
10118
      - wait for sync across all devices
10119
      - remove all disks from the old secondary
10120

10121
    Failures are not very well handled.
10122

10123
    """
10124
    steps_total = 6
10125

    
10126
    pnode = self.instance.primary_node
10127

    
10128
    # Step: check device activation
10129
    self.lu.LogStep(1, steps_total, "Check device existence")
10130
    self._CheckDisksExistence([self.instance.primary_node])
10131
    self._CheckVolumeGroup([self.instance.primary_node])
10132

    
10133
    # Step: check other node consistency
10134
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10135
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10136

    
10137
    # Step: create new storage
10138
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10139
    for idx, dev in enumerate(self.instance.disks):
10140
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10141
                      (self.new_node, idx))
10142
      # we pass force_create=True to force LVM creation
10143
      for new_lv in dev.children:
10144
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10145
                        _GetInstanceInfoText(self.instance), False)
10146

    
10147
    # Step 4: dbrd minors and drbd setups changes
10148
    # after this, we must manually remove the drbd minors on both the
10149
    # error and the success paths
10150
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10151
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10152
                                         for dev in self.instance.disks],
10153
                                        self.instance.name)
10154
    logging.debug("Allocated minors %r", minors)
10155

    
10156
    iv_names = {}
10157
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10158
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10159
                      (self.new_node, idx))
10160
      # create new devices on new_node; note that we create two IDs:
10161
      # one without port, so the drbd will be activated without
10162
      # networking information on the new node at this stage, and one
10163
      # with network, for the latter activation in step 4
10164
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10165
      if self.instance.primary_node == o_node1:
10166
        p_minor = o_minor1
10167
      else:
10168
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10169
        p_minor = o_minor2
10170

    
10171
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10172
                      p_minor, new_minor, o_secret)
10173
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10174
                    p_minor, new_minor, o_secret)
10175

    
10176
      iv_names[idx] = (dev, dev.children, new_net_id)
10177
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10178
                    new_net_id)
10179
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10180
                              logical_id=new_alone_id,
10181
                              children=dev.children,
10182
                              size=dev.size)
10183
      try:
10184
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10185
                              _GetInstanceInfoText(self.instance), False)
10186
      except errors.GenericError:
10187
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10188
        raise
10189

    
10190
    # We have new devices, shutdown the drbd on the old secondary
10191
    for idx, dev in enumerate(self.instance.disks):
10192
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10193
      self.cfg.SetDiskID(dev, self.target_node)
10194
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10195
      if msg:
10196
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10197
                           "node: %s" % (idx, msg),
10198
                           hint=("Please cleanup this device manually as"
10199
                                 " soon as possible"))
10200

    
10201
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10202
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10203
                                               self.instance.disks)[pnode]
10204

    
10205
    msg = result.fail_msg
10206
    if msg:
10207
      # detaches didn't succeed (unlikely)
10208
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10209
      raise errors.OpExecError("Can't detach the disks from the network on"
10210
                               " old node: %s" % (msg,))
10211

    
10212
    # if we managed to detach at least one, we update all the disks of
10213
    # the instance to point to the new secondary
10214
    self.lu.LogInfo("Updating instance configuration")
10215
    for dev, _, new_logical_id in iv_names.itervalues():
10216
      dev.logical_id = new_logical_id
10217
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10218

    
10219
    self.cfg.Update(self.instance, feedback_fn)
10220

    
10221
    # and now perform the drbd attach
10222
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10223
                    " (standalone => connected)")
10224
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10225
                                            self.new_node],
10226
                                           self.node_secondary_ip,
10227
                                           self.instance.disks,
10228
                                           self.instance.name,
10229
                                           False)
10230
    for to_node, to_result in result.items():
10231
      msg = to_result.fail_msg
10232
      if msg:
10233
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10234
                           to_node, msg,
10235
                           hint=("please do a gnt-instance info to see the"
10236
                                 " status of disks"))
10237
    cstep = 5
10238
    if self.early_release:
10239
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10240
      cstep += 1
10241
      self._RemoveOldStorage(self.target_node, iv_names)
10242
      # WARNING: we release all node locks here, do not do other RPCs
10243
      # than WaitForSync to the primary node
10244
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10245
                    names=[self.instance.primary_node,
10246
                           self.target_node,
10247
                           self.new_node])
10248

    
10249
    # Wait for sync
10250
    # This can fail as the old devices are degraded and _WaitForSync
10251
    # does a combined result over all disks, so we don't check its return value
10252
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10253
    cstep += 1
10254
    _WaitForSync(self.lu, self.instance)
10255

    
10256
    # Check all devices manually
10257
    self._CheckDevices(self.instance.primary_node, iv_names)
10258

    
10259
    # Step: remove old storage
10260
    if not self.early_release:
10261
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10262
      self._RemoveOldStorage(self.target_node, iv_names)
10263

    
10264

    
10265
class LURepairNodeStorage(NoHooksLU):
10266
  """Repairs the volume group on a node.
10267

10268
  """
10269
  REQ_BGL = False
10270

    
10271
  def CheckArguments(self):
10272
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10273

    
10274
    storage_type = self.op.storage_type
10275

    
10276
    if (constants.SO_FIX_CONSISTENCY not in
10277
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10278
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10279
                                 " repaired" % storage_type,
10280
                                 errors.ECODE_INVAL)
10281

    
10282
  def ExpandNames(self):
10283
    self.needed_locks = {
10284
      locking.LEVEL_NODE: [self.op.node_name],
10285
      }
10286

    
10287
  def _CheckFaultyDisks(self, instance, node_name):
10288
    """Ensure faulty disks abort the opcode or at least warn."""
10289
    try:
10290
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10291
                                  node_name, True):
10292
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10293
                                   " node '%s'" % (instance.name, node_name),
10294
                                   errors.ECODE_STATE)
10295
    except errors.OpPrereqError, err:
10296
      if self.op.ignore_consistency:
10297
        self.proc.LogWarning(str(err.args[0]))
10298
      else:
10299
        raise
10300

    
10301
  def CheckPrereq(self):
10302
    """Check prerequisites.
10303

10304
    """
10305
    # Check whether any instance on this node has faulty disks
10306
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10307
      if not inst.admin_up:
10308
        continue
10309
      check_nodes = set(inst.all_nodes)
10310
      check_nodes.discard(self.op.node_name)
10311
      for inst_node_name in check_nodes:
10312
        self._CheckFaultyDisks(inst, inst_node_name)
10313

    
10314
  def Exec(self, feedback_fn):
10315
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10316
                (self.op.name, self.op.node_name))
10317

    
10318
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10319
    result = self.rpc.call_storage_execute(self.op.node_name,
10320
                                           self.op.storage_type, st_args,
10321
                                           self.op.name,
10322
                                           constants.SO_FIX_CONSISTENCY)
10323
    result.Raise("Failed to repair storage unit '%s' on %s" %
10324
                 (self.op.name, self.op.node_name))
10325

    
10326

    
10327
class LUNodeEvacuate(NoHooksLU):
10328
  """Evacuates instances off a list of nodes.
10329

10330
  """
10331
  REQ_BGL = False
10332

    
10333
  def CheckArguments(self):
10334
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10335

    
10336
  def ExpandNames(self):
10337
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10338

    
10339
    if self.op.remote_node is not None:
10340
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10341
      assert self.op.remote_node
10342

    
10343
      if self.op.remote_node == self.op.node_name:
10344
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10345
                                   " secondary node", errors.ECODE_INVAL)
10346

    
10347
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10348
        raise errors.OpPrereqError("Without the use of an iallocator only"
10349
                                   " secondary instances can be evacuated",
10350
                                   errors.ECODE_INVAL)
10351

    
10352
    # Declare locks
10353
    self.share_locks = _ShareAll()
10354
    self.needed_locks = {
10355
      locking.LEVEL_INSTANCE: [],
10356
      locking.LEVEL_NODEGROUP: [],
10357
      locking.LEVEL_NODE: [],
10358
      }
10359

    
10360
    if self.op.remote_node is None:
10361
      # Iallocator will choose any node(s) in the same group
10362
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10363
    else:
10364
      group_nodes = frozenset([self.op.remote_node])
10365

    
10366
    # Determine nodes to be locked
10367
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10368

    
10369
  def _DetermineInstances(self):
10370
    """Builds list of instances to operate on.
10371

10372
    """
10373
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10374

    
10375
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10376
      # Primary instances only
10377
      inst_fn = _GetNodePrimaryInstances
10378
      assert self.op.remote_node is None, \
10379
        "Evacuating primary instances requires iallocator"
10380
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10381
      # Secondary instances only
10382
      inst_fn = _GetNodeSecondaryInstances
10383
    else:
10384
      # All instances
10385
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10386
      inst_fn = _GetNodeInstances
10387

    
10388
    return inst_fn(self.cfg, self.op.node_name)
10389

    
10390
  def DeclareLocks(self, level):
10391
    if level == locking.LEVEL_INSTANCE:
10392
      # Lock instances optimistically, needs verification once node and group
10393
      # locks have been acquired
10394
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10395
        set(i.name for i in self._DetermineInstances())
10396

    
10397
    elif level == locking.LEVEL_NODEGROUP:
10398
      # Lock node groups optimistically, needs verification once nodes have
10399
      # been acquired
10400
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10401
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10402

    
10403
    elif level == locking.LEVEL_NODE:
10404
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10405

    
10406
  def CheckPrereq(self):
10407
    # Verify locks
10408
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10409
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10410
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10411

    
10412
    assert owned_nodes == self.lock_nodes
10413

    
10414
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10415
    if owned_groups != wanted_groups:
10416
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10417
                               " current groups are '%s', used to be '%s'" %
10418
                               (utils.CommaJoin(wanted_groups),
10419
                                utils.CommaJoin(owned_groups)))
10420

    
10421
    # Determine affected instances
10422
    self.instances = self._DetermineInstances()
10423
    self.instance_names = [i.name for i in self.instances]
10424

    
10425
    if set(self.instance_names) != owned_instances:
10426
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10427
                               " were acquired, current instances are '%s',"
10428
                               " used to be '%s'" %
10429
                               (self.op.node_name,
10430
                                utils.CommaJoin(self.instance_names),
10431
                                utils.CommaJoin(owned_instances)))
10432

    
10433
    if self.instance_names:
10434
      self.LogInfo("Evacuating instances from node '%s': %s",
10435
                   self.op.node_name,
10436
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10437
    else:
10438
      self.LogInfo("No instances to evacuate from node '%s'",
10439
                   self.op.node_name)
10440

    
10441
    if self.op.remote_node is not None:
10442
      for i in self.instances:
10443
        if i.primary_node == self.op.remote_node:
10444
          raise errors.OpPrereqError("Node %s is the primary node of"
10445
                                     " instance %s, cannot use it as"
10446
                                     " secondary" %
10447
                                     (self.op.remote_node, i.name),
10448
                                     errors.ECODE_INVAL)
10449

    
10450
  def Exec(self, feedback_fn):
10451
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10452

    
10453
    if not self.instance_names:
10454
      # No instances to evacuate
10455
      jobs = []
10456

    
10457
    elif self.op.iallocator is not None:
10458
      # TODO: Implement relocation to other group
10459
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10460
                       evac_mode=self.op.mode,
10461
                       instances=list(self.instance_names))
10462

    
10463
      ial.Run(self.op.iallocator)
10464

    
10465
      if not ial.success:
10466
        raise errors.OpPrereqError("Can't compute node evacuation using"
10467
                                   " iallocator '%s': %s" %
10468
                                   (self.op.iallocator, ial.info),
10469
                                   errors.ECODE_NORES)
10470

    
10471
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10472

    
10473
    elif self.op.remote_node is not None:
10474
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10475
      jobs = [
10476
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10477
                                        remote_node=self.op.remote_node,
10478
                                        disks=[],
10479
                                        mode=constants.REPLACE_DISK_CHG,
10480
                                        early_release=self.op.early_release)]
10481
        for instance_name in self.instance_names
10482
        ]
10483

    
10484
    else:
10485
      raise errors.ProgrammerError("No iallocator or remote node")
10486

    
10487
    return ResultWithJobs(jobs)
10488

    
10489

    
10490
def _SetOpEarlyRelease(early_release, op):
10491
  """Sets C{early_release} flag on opcodes if available.
10492

10493
  """
10494
  try:
10495
    op.early_release = early_release
10496
  except AttributeError:
10497
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10498

    
10499
  return op
10500

    
10501

    
10502
def _NodeEvacDest(use_nodes, group, nodes):
10503
  """Returns group or nodes depending on caller's choice.
10504

10505
  """
10506
  if use_nodes:
10507
    return utils.CommaJoin(nodes)
10508
  else:
10509
    return group
10510

    
10511

    
10512
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10513
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10514

10515
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10516
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10517

10518
  @type lu: L{LogicalUnit}
10519
  @param lu: Logical unit instance
10520
  @type alloc_result: tuple/list
10521
  @param alloc_result: Result from iallocator
10522
  @type early_release: bool
10523
  @param early_release: Whether to release locks early if possible
10524
  @type use_nodes: bool
10525
  @param use_nodes: Whether to display node names instead of groups
10526

10527
  """
10528
  (moved, failed, jobs) = alloc_result
10529

    
10530
  if failed:
10531
    lu.LogWarning("Unable to evacuate instances %s",
10532
                  utils.CommaJoin("%s (%s)" % (name, reason)
10533
                                  for (name, reason) in failed))
10534

    
10535
  if moved:
10536
    lu.LogInfo("Instances to be moved: %s",
10537
               utils.CommaJoin("%s (to %s)" %
10538
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10539
                               for (name, group, nodes) in moved))
10540

    
10541
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10542
              map(opcodes.OpCode.LoadOpCode, ops))
10543
          for ops in jobs]
10544

    
10545

    
10546
class LUInstanceGrowDisk(LogicalUnit):
10547
  """Grow a disk of an instance.
10548

10549
  """
10550
  HPATH = "disk-grow"
10551
  HTYPE = constants.HTYPE_INSTANCE
10552
  REQ_BGL = False
10553

    
10554
  def ExpandNames(self):
10555
    self._ExpandAndLockInstance()
10556
    self.needed_locks[locking.LEVEL_NODE] = []
10557
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10558

    
10559
  def DeclareLocks(self, level):
10560
    if level == locking.LEVEL_NODE:
10561
      self._LockInstancesNodes()
10562

    
10563
  def BuildHooksEnv(self):
10564
    """Build hooks env.
10565

10566
    This runs on the master, the primary and all the secondaries.
10567

10568
    """
10569
    env = {
10570
      "DISK": self.op.disk,
10571
      "AMOUNT": self.op.amount,
10572
      }
10573
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10574
    return env
10575

    
10576
  def BuildHooksNodes(self):
10577
    """Build hooks nodes.
10578

10579
    """
10580
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10581
    return (nl, nl)
10582

    
10583
  def CheckPrereq(self):
10584
    """Check prerequisites.
10585

10586
    This checks that the instance is in the cluster.
10587

10588
    """
10589
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10590
    assert instance is not None, \
10591
      "Cannot retrieve locked instance %s" % self.op.instance_name
10592
    nodenames = list(instance.all_nodes)
10593
    for node in nodenames:
10594
      _CheckNodeOnline(self, node)
10595

    
10596
    self.instance = instance
10597

    
10598
    if instance.disk_template not in constants.DTS_GROWABLE:
10599
      raise errors.OpPrereqError("Instance's disk layout does not support"
10600
                                 " growing", errors.ECODE_INVAL)
10601

    
10602
    self.disk = instance.FindDisk(self.op.disk)
10603

    
10604
    if instance.disk_template not in (constants.DT_FILE,
10605
                                      constants.DT_SHARED_FILE):
10606
      # TODO: check the free disk space for file, when that feature will be
10607
      # supported
10608
      _CheckNodesFreeDiskPerVG(self, nodenames,
10609
                               self.disk.ComputeGrowth(self.op.amount))
10610

    
10611
  def Exec(self, feedback_fn):
10612
    """Execute disk grow.
10613

10614
    """
10615
    instance = self.instance
10616
    disk = self.disk
10617

    
10618
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10619
    if not disks_ok:
10620
      raise errors.OpExecError("Cannot activate block device to grow")
10621

    
10622
    # First run all grow ops in dry-run mode
10623
    for node in instance.all_nodes:
10624
      self.cfg.SetDiskID(disk, node)
10625
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10626
      result.Raise("Grow request failed to node %s" % node)
10627

    
10628
    # We know that (as far as we can test) operations across different
10629
    # nodes will succeed, time to run it for real
10630
    for node in instance.all_nodes:
10631
      self.cfg.SetDiskID(disk, node)
10632
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10633
      result.Raise("Grow request failed to node %s" % node)
10634

    
10635
      # TODO: Rewrite code to work properly
10636
      # DRBD goes into sync mode for a short amount of time after executing the
10637
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10638
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10639
      # time is a work-around.
10640
      time.sleep(5)
10641

    
10642
    disk.RecordGrow(self.op.amount)
10643
    self.cfg.Update(instance, feedback_fn)
10644
    if self.op.wait_for_sync:
10645
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10646
      if disk_abort:
10647
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10648
                             " status; please check the instance")
10649
      if not instance.admin_up:
10650
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10651
    elif not instance.admin_up:
10652
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10653
                           " not supposed to be running because no wait for"
10654
                           " sync mode was requested")
10655

    
10656

    
10657
class LUInstanceQueryData(NoHooksLU):
10658
  """Query runtime instance data.
10659

10660
  """
10661
  REQ_BGL = False
10662

    
10663
  def ExpandNames(self):
10664
    self.needed_locks = {}
10665

    
10666
    # Use locking if requested or when non-static information is wanted
10667
    if not (self.op.static or self.op.use_locking):
10668
      self.LogWarning("Non-static data requested, locks need to be acquired")
10669
      self.op.use_locking = True
10670

    
10671
    if self.op.instances or not self.op.use_locking:
10672
      # Expand instance names right here
10673
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10674
    else:
10675
      # Will use acquired locks
10676
      self.wanted_names = None
10677

    
10678
    if self.op.use_locking:
10679
      self.share_locks = _ShareAll()
10680

    
10681
      if self.wanted_names is None:
10682
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10683
      else:
10684
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10685

    
10686
      self.needed_locks[locking.LEVEL_NODE] = []
10687
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10688

    
10689
  def DeclareLocks(self, level):
10690
    if self.op.use_locking and level == locking.LEVEL_NODE:
10691
      self._LockInstancesNodes()
10692

    
10693
  def CheckPrereq(self):
10694
    """Check prerequisites.
10695

10696
    This only checks the optional instance list against the existing names.
10697

10698
    """
10699
    if self.wanted_names is None:
10700
      assert self.op.use_locking, "Locking was not used"
10701
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10702

    
10703
    self.wanted_instances = \
10704
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10705

    
10706
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10707
    """Returns the status of a block device
10708

10709
    """
10710
    if self.op.static or not node:
10711
      return None
10712

    
10713
    self.cfg.SetDiskID(dev, node)
10714

    
10715
    result = self.rpc.call_blockdev_find(node, dev)
10716
    if result.offline:
10717
      return None
10718

    
10719
    result.Raise("Can't compute disk status for %s" % instance_name)
10720

    
10721
    status = result.payload
10722
    if status is None:
10723
      return None
10724

    
10725
    return (status.dev_path, status.major, status.minor,
10726
            status.sync_percent, status.estimated_time,
10727
            status.is_degraded, status.ldisk_status)
10728

    
10729
  def _ComputeDiskStatus(self, instance, snode, dev):
10730
    """Compute block device status.
10731

10732
    """
10733
    if dev.dev_type in constants.LDS_DRBD:
10734
      # we change the snode then (otherwise we use the one passed in)
10735
      if dev.logical_id[0] == instance.primary_node:
10736
        snode = dev.logical_id[1]
10737
      else:
10738
        snode = dev.logical_id[0]
10739

    
10740
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10741
                                              instance.name, dev)
10742
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10743

    
10744
    if dev.children:
10745
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10746
                                        instance, snode),
10747
                         dev.children)
10748
    else:
10749
      dev_children = []
10750

    
10751
    return {
10752
      "iv_name": dev.iv_name,
10753
      "dev_type": dev.dev_type,
10754
      "logical_id": dev.logical_id,
10755
      "physical_id": dev.physical_id,
10756
      "pstatus": dev_pstatus,
10757
      "sstatus": dev_sstatus,
10758
      "children": dev_children,
10759
      "mode": dev.mode,
10760
      "size": dev.size,
10761
      }
10762

    
10763
  def Exec(self, feedback_fn):
10764
    """Gather and return data"""
10765
    result = {}
10766

    
10767
    cluster = self.cfg.GetClusterInfo()
10768

    
10769
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10770
                                          for i in self.wanted_instances)
10771
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10772
      if self.op.static or pnode.offline:
10773
        remote_state = None
10774
        if pnode.offline:
10775
          self.LogWarning("Primary node %s is marked offline, returning static"
10776
                          " information only for instance %s" %
10777
                          (pnode.name, instance.name))
10778
      else:
10779
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10780
                                                  instance.name,
10781
                                                  instance.hypervisor)
10782
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10783
        remote_info = remote_info.payload
10784
        if remote_info and "state" in remote_info:
10785
          remote_state = "up"
10786
        else:
10787
          remote_state = "down"
10788

    
10789
      if instance.admin_up:
10790
        config_state = "up"
10791
      else:
10792
        config_state = "down"
10793

    
10794
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10795
                  instance.disks)
10796

    
10797
      result[instance.name] = {
10798
        "name": instance.name,
10799
        "config_state": config_state,
10800
        "run_state": remote_state,
10801
        "pnode": instance.primary_node,
10802
        "snodes": instance.secondary_nodes,
10803
        "os": instance.os,
10804
        # this happens to be the same format used for hooks
10805
        "nics": _NICListToTuple(self, instance.nics),
10806
        "disk_template": instance.disk_template,
10807
        "disks": disks,
10808
        "hypervisor": instance.hypervisor,
10809
        "network_port": instance.network_port,
10810
        "hv_instance": instance.hvparams,
10811
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10812
        "be_instance": instance.beparams,
10813
        "be_actual": cluster.FillBE(instance),
10814
        "os_instance": instance.osparams,
10815
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10816
        "serial_no": instance.serial_no,
10817
        "mtime": instance.mtime,
10818
        "ctime": instance.ctime,
10819
        "uuid": instance.uuid,
10820
        }
10821

    
10822
    return result
10823

    
10824

    
10825
class LUInstanceSetParams(LogicalUnit):
10826
  """Modifies an instances's parameters.
10827

10828
  """
10829
  HPATH = "instance-modify"
10830
  HTYPE = constants.HTYPE_INSTANCE
10831
  REQ_BGL = False
10832

    
10833
  def CheckArguments(self):
10834
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10835
            self.op.hvparams or self.op.beparams or self.op.os_name):
10836
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10837

    
10838
    if self.op.hvparams:
10839
      _CheckGlobalHvParams(self.op.hvparams)
10840

    
10841
    # Disk validation
10842
    disk_addremove = 0
10843
    for disk_op, disk_dict in self.op.disks:
10844
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10845
      if disk_op == constants.DDM_REMOVE:
10846
        disk_addremove += 1
10847
        continue
10848
      elif disk_op == constants.DDM_ADD:
10849
        disk_addremove += 1
10850
      else:
10851
        if not isinstance(disk_op, int):
10852
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10853
        if not isinstance(disk_dict, dict):
10854
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10855
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10856

    
10857
      if disk_op == constants.DDM_ADD:
10858
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10859
        if mode not in constants.DISK_ACCESS_SET:
10860
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10861
                                     errors.ECODE_INVAL)
10862
        size = disk_dict.get(constants.IDISK_SIZE, None)
10863
        if size is None:
10864
          raise errors.OpPrereqError("Required disk parameter size missing",
10865
                                     errors.ECODE_INVAL)
10866
        try:
10867
          size = int(size)
10868
        except (TypeError, ValueError), err:
10869
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10870
                                     str(err), errors.ECODE_INVAL)
10871
        disk_dict[constants.IDISK_SIZE] = size
10872
      else:
10873
        # modification of disk
10874
        if constants.IDISK_SIZE in disk_dict:
10875
          raise errors.OpPrereqError("Disk size change not possible, use"
10876
                                     " grow-disk", errors.ECODE_INVAL)
10877

    
10878
    if disk_addremove > 1:
10879
      raise errors.OpPrereqError("Only one disk add or remove operation"
10880
                                 " supported at a time", errors.ECODE_INVAL)
10881

    
10882
    if self.op.disks and self.op.disk_template is not None:
10883
      raise errors.OpPrereqError("Disk template conversion and other disk"
10884
                                 " changes not supported at the same time",
10885
                                 errors.ECODE_INVAL)
10886

    
10887
    if (self.op.disk_template and
10888
        self.op.disk_template in constants.DTS_INT_MIRROR and
10889
        self.op.remote_node is None):
10890
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10891
                                 " one requires specifying a secondary node",
10892
                                 errors.ECODE_INVAL)
10893

    
10894
    # NIC validation
10895
    nic_addremove = 0
10896
    for nic_op, nic_dict in self.op.nics:
10897
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10898
      if nic_op == constants.DDM_REMOVE:
10899
        nic_addremove += 1
10900
        continue
10901
      elif nic_op == constants.DDM_ADD:
10902
        nic_addremove += 1
10903
      else:
10904
        if not isinstance(nic_op, int):
10905
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10906
        if not isinstance(nic_dict, dict):
10907
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10908
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10909

    
10910
      # nic_dict should be a dict
10911
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10912
      if nic_ip is not None:
10913
        if nic_ip.lower() == constants.VALUE_NONE:
10914
          nic_dict[constants.INIC_IP] = None
10915
        else:
10916
          if not netutils.IPAddress.IsValid(nic_ip):
10917
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10918
                                       errors.ECODE_INVAL)
10919

    
10920
      nic_bridge = nic_dict.get("bridge", None)
10921
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10922
      if nic_bridge and nic_link:
10923
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10924
                                   " at the same time", errors.ECODE_INVAL)
10925
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10926
        nic_dict["bridge"] = None
10927
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10928
        nic_dict[constants.INIC_LINK] = None
10929

    
10930
      if nic_op == constants.DDM_ADD:
10931
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10932
        if nic_mac is None:
10933
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10934

    
10935
      if constants.INIC_MAC in nic_dict:
10936
        nic_mac = nic_dict[constants.INIC_MAC]
10937
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10938
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10939

    
10940
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10941
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10942
                                     " modifying an existing nic",
10943
                                     errors.ECODE_INVAL)
10944

    
10945
    if nic_addremove > 1:
10946
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10947
                                 " supported at a time", errors.ECODE_INVAL)
10948

    
10949
  def ExpandNames(self):
10950
    self._ExpandAndLockInstance()
10951
    self.needed_locks[locking.LEVEL_NODE] = []
10952
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10953

    
10954
  def DeclareLocks(self, level):
10955
    if level == locking.LEVEL_NODE:
10956
      self._LockInstancesNodes()
10957
      if self.op.disk_template and self.op.remote_node:
10958
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10959
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10960

    
10961
  def BuildHooksEnv(self):
10962
    """Build hooks env.
10963

10964
    This runs on the master, primary and secondaries.
10965

10966
    """
10967
    args = dict()
10968
    if constants.BE_MEMORY in self.be_new:
10969
      args["memory"] = self.be_new[constants.BE_MEMORY]
10970
    if constants.BE_VCPUS in self.be_new:
10971
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10972
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10973
    # information at all.
10974
    if self.op.nics:
10975
      args["nics"] = []
10976
      nic_override = dict(self.op.nics)
10977
      for idx, nic in enumerate(self.instance.nics):
10978
        if idx in nic_override:
10979
          this_nic_override = nic_override[idx]
10980
        else:
10981
          this_nic_override = {}
10982
        if constants.INIC_IP in this_nic_override:
10983
          ip = this_nic_override[constants.INIC_IP]
10984
        else:
10985
          ip = nic.ip
10986
        if constants.INIC_MAC in this_nic_override:
10987
          mac = this_nic_override[constants.INIC_MAC]
10988
        else:
10989
          mac = nic.mac
10990
        if idx in self.nic_pnew:
10991
          nicparams = self.nic_pnew[idx]
10992
        else:
10993
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10994
        mode = nicparams[constants.NIC_MODE]
10995
        link = nicparams[constants.NIC_LINK]
10996
        args["nics"].append((ip, mac, mode, link))
10997
      if constants.DDM_ADD in nic_override:
10998
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10999
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11000
        nicparams = self.nic_pnew[constants.DDM_ADD]
11001
        mode = nicparams[constants.NIC_MODE]
11002
        link = nicparams[constants.NIC_LINK]
11003
        args["nics"].append((ip, mac, mode, link))
11004
      elif constants.DDM_REMOVE in nic_override:
11005
        del args["nics"][-1]
11006

    
11007
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11008
    if self.op.disk_template:
11009
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11010

    
11011
    return env
11012

    
11013
  def BuildHooksNodes(self):
11014
    """Build hooks nodes.
11015

11016
    """
11017
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11018
    return (nl, nl)
11019

    
11020
  def CheckPrereq(self):
11021
    """Check prerequisites.
11022

11023
    This only checks the instance list against the existing names.
11024

11025
    """
11026
    # checking the new params on the primary/secondary nodes
11027

    
11028
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11029
    cluster = self.cluster = self.cfg.GetClusterInfo()
11030
    assert self.instance is not None, \
11031
      "Cannot retrieve locked instance %s" % self.op.instance_name
11032
    pnode = instance.primary_node
11033
    nodelist = list(instance.all_nodes)
11034

    
11035
    # OS change
11036
    if self.op.os_name and not self.op.force:
11037
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11038
                      self.op.force_variant)
11039
      instance_os = self.op.os_name
11040
    else:
11041
      instance_os = instance.os
11042

    
11043
    if self.op.disk_template:
11044
      if instance.disk_template == self.op.disk_template:
11045
        raise errors.OpPrereqError("Instance already has disk template %s" %
11046
                                   instance.disk_template, errors.ECODE_INVAL)
11047

    
11048
      if (instance.disk_template,
11049
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11050
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11051
                                   " %s to %s" % (instance.disk_template,
11052
                                                  self.op.disk_template),
11053
                                   errors.ECODE_INVAL)
11054
      _CheckInstanceDown(self, instance, "cannot change disk template")
11055
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11056
        if self.op.remote_node == pnode:
11057
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11058
                                     " as the primary node of the instance" %
11059
                                     self.op.remote_node, errors.ECODE_STATE)
11060
        _CheckNodeOnline(self, self.op.remote_node)
11061
        _CheckNodeNotDrained(self, self.op.remote_node)
11062
        # FIXME: here we assume that the old instance type is DT_PLAIN
11063
        assert instance.disk_template == constants.DT_PLAIN
11064
        disks = [{constants.IDISK_SIZE: d.size,
11065
                  constants.IDISK_VG: d.logical_id[0]}
11066
                 for d in instance.disks]
11067
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11068
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11069

    
11070
    # hvparams processing
11071
    if self.op.hvparams:
11072
      hv_type = instance.hypervisor
11073
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11074
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11075
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11076

    
11077
      # local check
11078
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11079
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11080
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11081
      self.hv_inst = i_hvdict # the new dict (without defaults)
11082
    else:
11083
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11084
                                              instance.hvparams)
11085
      self.hv_new = self.hv_inst = {}
11086

    
11087
    # beparams processing
11088
    if self.op.beparams:
11089
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11090
                                   use_none=True)
11091
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11092
      be_new = cluster.SimpleFillBE(i_bedict)
11093
      self.be_proposed = self.be_new = be_new # the new actual values
11094
      self.be_inst = i_bedict # the new dict (without defaults)
11095
    else:
11096
      self.be_new = self.be_inst = {}
11097
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11098
    be_old = cluster.FillBE(instance)
11099

    
11100
    # CPU param validation -- checking every time a paramtere is
11101
    # changed to cover all cases where either CPU mask or vcpus have
11102
    # changed
11103
    if (constants.BE_VCPUS in self.be_proposed and
11104
        constants.HV_CPU_MASK in self.hv_proposed):
11105
      cpu_list = \
11106
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11107
      # Verify mask is consistent with number of vCPUs. Can skip this
11108
      # test if only 1 entry in the CPU mask, which means same mask
11109
      # is applied to all vCPUs.
11110
      if (len(cpu_list) > 1 and
11111
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11112
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11113
                                   " CPU mask [%s]" %
11114
                                   (self.be_proposed[constants.BE_VCPUS],
11115
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11116
                                   errors.ECODE_INVAL)
11117

    
11118
      # Only perform this test if a new CPU mask is given
11119
      if constants.HV_CPU_MASK in self.hv_new:
11120
        # Calculate the largest CPU number requested
11121
        max_requested_cpu = max(map(max, cpu_list))
11122
        # Check that all of the instance's nodes have enough physical CPUs to
11123
        # satisfy the requested CPU mask
11124
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11125
                                max_requested_cpu + 1, instance.hypervisor)
11126

    
11127
    # osparams processing
11128
    if self.op.osparams:
11129
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11130
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11131
      self.os_inst = i_osdict # the new dict (without defaults)
11132
    else:
11133
      self.os_inst = {}
11134

    
11135
    self.warn = []
11136

    
11137
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11138
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11139
      mem_check_list = [pnode]
11140
      if be_new[constants.BE_AUTO_BALANCE]:
11141
        # either we changed auto_balance to yes or it was from before
11142
        mem_check_list.extend(instance.secondary_nodes)
11143
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11144
                                                  instance.hypervisor)
11145
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11146
                                         instance.hypervisor)
11147
      pninfo = nodeinfo[pnode]
11148
      msg = pninfo.fail_msg
11149
      if msg:
11150
        # Assume the primary node is unreachable and go ahead
11151
        self.warn.append("Can't get info from primary node %s: %s" %
11152
                         (pnode, msg))
11153
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11154
        self.warn.append("Node data from primary node %s doesn't contain"
11155
                         " free memory information" % pnode)
11156
      elif instance_info.fail_msg:
11157
        self.warn.append("Can't get instance runtime information: %s" %
11158
                        instance_info.fail_msg)
11159
      else:
11160
        if instance_info.payload:
11161
          current_mem = int(instance_info.payload["memory"])
11162
        else:
11163
          # Assume instance not running
11164
          # (there is a slight race condition here, but it's not very probable,
11165
          # and we have no other way to check)
11166
          current_mem = 0
11167
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11168
                    pninfo.payload["memory_free"])
11169
        if miss_mem > 0:
11170
          raise errors.OpPrereqError("This change will prevent the instance"
11171
                                     " from starting, due to %d MB of memory"
11172
                                     " missing on its primary node" % miss_mem,
11173
                                     errors.ECODE_NORES)
11174

    
11175
      if be_new[constants.BE_AUTO_BALANCE]:
11176
        for node, nres in nodeinfo.items():
11177
          if node not in instance.secondary_nodes:
11178
            continue
11179
          nres.Raise("Can't get info from secondary node %s" % node,
11180
                     prereq=True, ecode=errors.ECODE_STATE)
11181
          if not isinstance(nres.payload.get("memory_free", None), int):
11182
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11183
                                       " memory information" % node,
11184
                                       errors.ECODE_STATE)
11185
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11186
            raise errors.OpPrereqError("This change will prevent the instance"
11187
                                       " from failover to its secondary node"
11188
                                       " %s, due to not enough memory" % node,
11189
                                       errors.ECODE_STATE)
11190

    
11191
    # NIC processing
11192
    self.nic_pnew = {}
11193
    self.nic_pinst = {}
11194
    for nic_op, nic_dict in self.op.nics:
11195
      if nic_op == constants.DDM_REMOVE:
11196
        if not instance.nics:
11197
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11198
                                     errors.ECODE_INVAL)
11199
        continue
11200
      if nic_op != constants.DDM_ADD:
11201
        # an existing nic
11202
        if not instance.nics:
11203
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11204
                                     " no NICs" % nic_op,
11205
                                     errors.ECODE_INVAL)
11206
        if nic_op < 0 or nic_op >= len(instance.nics):
11207
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11208
                                     " are 0 to %d" %
11209
                                     (nic_op, len(instance.nics) - 1),
11210
                                     errors.ECODE_INVAL)
11211
        old_nic_params = instance.nics[nic_op].nicparams
11212
        old_nic_ip = instance.nics[nic_op].ip
11213
      else:
11214
        old_nic_params = {}
11215
        old_nic_ip = None
11216

    
11217
      update_params_dict = dict([(key, nic_dict[key])
11218
                                 for key in constants.NICS_PARAMETERS
11219
                                 if key in nic_dict])
11220

    
11221
      if "bridge" in nic_dict:
11222
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11223

    
11224
      new_nic_params = _GetUpdatedParams(old_nic_params,
11225
                                         update_params_dict)
11226
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11227
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11228
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11229
      self.nic_pinst[nic_op] = new_nic_params
11230
      self.nic_pnew[nic_op] = new_filled_nic_params
11231
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11232

    
11233
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11234
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11235
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11236
        if msg:
11237
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11238
          if self.op.force:
11239
            self.warn.append(msg)
11240
          else:
11241
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11242
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11243
        if constants.INIC_IP in nic_dict:
11244
          nic_ip = nic_dict[constants.INIC_IP]
11245
        else:
11246
          nic_ip = old_nic_ip
11247
        if nic_ip is None:
11248
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11249
                                     " on a routed nic", errors.ECODE_INVAL)
11250
      if constants.INIC_MAC in nic_dict:
11251
        nic_mac = nic_dict[constants.INIC_MAC]
11252
        if nic_mac is None:
11253
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11254
                                     errors.ECODE_INVAL)
11255
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11256
          # otherwise generate the mac
11257
          nic_dict[constants.INIC_MAC] = \
11258
            self.cfg.GenerateMAC(self.proc.GetECId())
11259
        else:
11260
          # or validate/reserve the current one
11261
          try:
11262
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11263
          except errors.ReservationError:
11264
            raise errors.OpPrereqError("MAC address %s already in use"
11265
                                       " in cluster" % nic_mac,
11266
                                       errors.ECODE_NOTUNIQUE)
11267

    
11268
    # DISK processing
11269
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11270
      raise errors.OpPrereqError("Disk operations not supported for"
11271
                                 " diskless instances",
11272
                                 errors.ECODE_INVAL)
11273
    for disk_op, _ in self.op.disks:
11274
      if disk_op == constants.DDM_REMOVE:
11275
        if len(instance.disks) == 1:
11276
          raise errors.OpPrereqError("Cannot remove the last disk of"
11277
                                     " an instance", errors.ECODE_INVAL)
11278
        _CheckInstanceDown(self, instance, "cannot remove disks")
11279

    
11280
      if (disk_op == constants.DDM_ADD and
11281
          len(instance.disks) >= constants.MAX_DISKS):
11282
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11283
                                   " add more" % constants.MAX_DISKS,
11284
                                   errors.ECODE_STATE)
11285
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11286
        # an existing disk
11287
        if disk_op < 0 or disk_op >= len(instance.disks):
11288
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11289
                                     " are 0 to %d" %
11290
                                     (disk_op, len(instance.disks)),
11291
                                     errors.ECODE_INVAL)
11292

    
11293
    return
11294

    
11295
  def _ConvertPlainToDrbd(self, feedback_fn):
11296
    """Converts an instance from plain to drbd.
11297

11298
    """
11299
    feedback_fn("Converting template to drbd")
11300
    instance = self.instance
11301
    pnode = instance.primary_node
11302
    snode = self.op.remote_node
11303

    
11304
    # create a fake disk info for _GenerateDiskTemplate
11305
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11306
                  constants.IDISK_VG: d.logical_id[0]}
11307
                 for d in instance.disks]
11308
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11309
                                      instance.name, pnode, [snode],
11310
                                      disk_info, None, None, 0, feedback_fn)
11311
    info = _GetInstanceInfoText(instance)
11312
    feedback_fn("Creating aditional volumes...")
11313
    # first, create the missing data and meta devices
11314
    for disk in new_disks:
11315
      # unfortunately this is... not too nice
11316
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11317
                            info, True)
11318
      for child in disk.children:
11319
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11320
    # at this stage, all new LVs have been created, we can rename the
11321
    # old ones
11322
    feedback_fn("Renaming original volumes...")
11323
    rename_list = [(o, n.children[0].logical_id)
11324
                   for (o, n) in zip(instance.disks, new_disks)]
11325
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11326
    result.Raise("Failed to rename original LVs")
11327

    
11328
    feedback_fn("Initializing DRBD devices...")
11329
    # all child devices are in place, we can now create the DRBD devices
11330
    for disk in new_disks:
11331
      for node in [pnode, snode]:
11332
        f_create = node == pnode
11333
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11334

    
11335
    # at this point, the instance has been modified
11336
    instance.disk_template = constants.DT_DRBD8
11337
    instance.disks = new_disks
11338
    self.cfg.Update(instance, feedback_fn)
11339

    
11340
    # disks are created, waiting for sync
11341
    disk_abort = not _WaitForSync(self, instance,
11342
                                  oneshot=not self.op.wait_for_sync)
11343
    if disk_abort:
11344
      raise errors.OpExecError("There are some degraded disks for"
11345
                               " this instance, please cleanup manually")
11346

    
11347
  def _ConvertDrbdToPlain(self, feedback_fn):
11348
    """Converts an instance from drbd to plain.
11349

11350
    """
11351
    instance = self.instance
11352
    assert len(instance.secondary_nodes) == 1
11353
    pnode = instance.primary_node
11354
    snode = instance.secondary_nodes[0]
11355
    feedback_fn("Converting template to plain")
11356

    
11357
    old_disks = instance.disks
11358
    new_disks = [d.children[0] for d in old_disks]
11359

    
11360
    # copy over size and mode
11361
    for parent, child in zip(old_disks, new_disks):
11362
      child.size = parent.size
11363
      child.mode = parent.mode
11364

    
11365
    # update instance structure
11366
    instance.disks = new_disks
11367
    instance.disk_template = constants.DT_PLAIN
11368
    self.cfg.Update(instance, feedback_fn)
11369

    
11370
    feedback_fn("Removing volumes on the secondary node...")
11371
    for disk in old_disks:
11372
      self.cfg.SetDiskID(disk, snode)
11373
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11374
      if msg:
11375
        self.LogWarning("Could not remove block device %s on node %s,"
11376
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11377

    
11378
    feedback_fn("Removing unneeded volumes on the primary node...")
11379
    for idx, disk in enumerate(old_disks):
11380
      meta = disk.children[1]
11381
      self.cfg.SetDiskID(meta, pnode)
11382
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11383
      if msg:
11384
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11385
                        " continuing anyway: %s", idx, pnode, msg)
11386

    
11387
  def Exec(self, feedback_fn):
11388
    """Modifies an instance.
11389

11390
    All parameters take effect only at the next restart of the instance.
11391

11392
    """
11393
    # Process here the warnings from CheckPrereq, as we don't have a
11394
    # feedback_fn there.
11395
    for warn in self.warn:
11396
      feedback_fn("WARNING: %s" % warn)
11397

    
11398
    result = []
11399
    instance = self.instance
11400
    # disk changes
11401
    for disk_op, disk_dict in self.op.disks:
11402
      if disk_op == constants.DDM_REMOVE:
11403
        # remove the last disk
11404
        device = instance.disks.pop()
11405
        device_idx = len(instance.disks)
11406
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11407
          self.cfg.SetDiskID(disk, node)
11408
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11409
          if msg:
11410
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11411
                            " continuing anyway", device_idx, node, msg)
11412
        result.append(("disk/%d" % device_idx, "remove"))
11413
      elif disk_op == constants.DDM_ADD:
11414
        # add a new disk
11415
        if instance.disk_template in (constants.DT_FILE,
11416
                                        constants.DT_SHARED_FILE):
11417
          file_driver, file_path = instance.disks[0].logical_id
11418
          file_path = os.path.dirname(file_path)
11419
        else:
11420
          file_driver = file_path = None
11421
        disk_idx_base = len(instance.disks)
11422
        new_disk = _GenerateDiskTemplate(self,
11423
                                         instance.disk_template,
11424
                                         instance.name, instance.primary_node,
11425
                                         instance.secondary_nodes,
11426
                                         [disk_dict],
11427
                                         file_path,
11428
                                         file_driver,
11429
                                         disk_idx_base, feedback_fn)[0]
11430
        instance.disks.append(new_disk)
11431
        info = _GetInstanceInfoText(instance)
11432

    
11433
        logging.info("Creating volume %s for instance %s",
11434
                     new_disk.iv_name, instance.name)
11435
        # Note: this needs to be kept in sync with _CreateDisks
11436
        #HARDCODE
11437
        for node in instance.all_nodes:
11438
          f_create = node == instance.primary_node
11439
          try:
11440
            _CreateBlockDev(self, node, instance, new_disk,
11441
                            f_create, info, f_create)
11442
          except errors.OpExecError, err:
11443
            self.LogWarning("Failed to create volume %s (%s) on"
11444
                            " node %s: %s",
11445
                            new_disk.iv_name, new_disk, node, err)
11446
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11447
                       (new_disk.size, new_disk.mode)))
11448
      else:
11449
        # change a given disk
11450
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11451
        result.append(("disk.mode/%d" % disk_op,
11452
                       disk_dict[constants.IDISK_MODE]))
11453

    
11454
    if self.op.disk_template:
11455
      r_shut = _ShutdownInstanceDisks(self, instance)
11456
      if not r_shut:
11457
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11458
                                 " proceed with disk template conversion")
11459
      mode = (instance.disk_template, self.op.disk_template)
11460
      try:
11461
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11462
      except:
11463
        self.cfg.ReleaseDRBDMinors(instance.name)
11464
        raise
11465
      result.append(("disk_template", self.op.disk_template))
11466

    
11467
    # NIC changes
11468
    for nic_op, nic_dict in self.op.nics:
11469
      if nic_op == constants.DDM_REMOVE:
11470
        # remove the last nic
11471
        del instance.nics[-1]
11472
        result.append(("nic.%d" % len(instance.nics), "remove"))
11473
      elif nic_op == constants.DDM_ADD:
11474
        # mac and bridge should be set, by now
11475
        mac = nic_dict[constants.INIC_MAC]
11476
        ip = nic_dict.get(constants.INIC_IP, None)
11477
        nicparams = self.nic_pinst[constants.DDM_ADD]
11478
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11479
        instance.nics.append(new_nic)
11480
        result.append(("nic.%d" % (len(instance.nics) - 1),
11481
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11482
                       (new_nic.mac, new_nic.ip,
11483
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11484
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11485
                       )))
11486
      else:
11487
        for key in (constants.INIC_MAC, constants.INIC_IP):
11488
          if key in nic_dict:
11489
            setattr(instance.nics[nic_op], key, nic_dict[key])
11490
        if nic_op in self.nic_pinst:
11491
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11492
        for key, val in nic_dict.iteritems():
11493
          result.append(("nic.%s/%d" % (key, nic_op), val))
11494

    
11495
    # hvparams changes
11496
    if self.op.hvparams:
11497
      instance.hvparams = self.hv_inst
11498
      for key, val in self.op.hvparams.iteritems():
11499
        result.append(("hv/%s" % key, val))
11500

    
11501
    # beparams changes
11502
    if self.op.beparams:
11503
      instance.beparams = self.be_inst
11504
      for key, val in self.op.beparams.iteritems():
11505
        result.append(("be/%s" % key, val))
11506

    
11507
    # OS change
11508
    if self.op.os_name:
11509
      instance.os = self.op.os_name
11510

    
11511
    # osparams changes
11512
    if self.op.osparams:
11513
      instance.osparams = self.os_inst
11514
      for key, val in self.op.osparams.iteritems():
11515
        result.append(("os/%s" % key, val))
11516

    
11517
    self.cfg.Update(instance, feedback_fn)
11518

    
11519
    return result
11520

    
11521
  _DISK_CONVERSIONS = {
11522
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11523
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11524
    }
11525

    
11526

    
11527
class LUInstanceChangeGroup(LogicalUnit):
11528
  HPATH = "instance-change-group"
11529
  HTYPE = constants.HTYPE_INSTANCE
11530
  REQ_BGL = False
11531

    
11532
  def ExpandNames(self):
11533
    self.share_locks = _ShareAll()
11534
    self.needed_locks = {
11535
      locking.LEVEL_NODEGROUP: [],
11536
      locking.LEVEL_NODE: [],
11537
      }
11538

    
11539
    self._ExpandAndLockInstance()
11540

    
11541
    if self.op.target_groups:
11542
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11543
                                  self.op.target_groups)
11544
    else:
11545
      self.req_target_uuids = None
11546

    
11547
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11548

    
11549
  def DeclareLocks(self, level):
11550
    if level == locking.LEVEL_NODEGROUP:
11551
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11552

    
11553
      if self.req_target_uuids:
11554
        lock_groups = set(self.req_target_uuids)
11555

    
11556
        # Lock all groups used by instance optimistically; this requires going
11557
        # via the node before it's locked, requiring verification later on
11558
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11559
        lock_groups.update(instance_groups)
11560
      else:
11561
        # No target groups, need to lock all of them
11562
        lock_groups = locking.ALL_SET
11563

    
11564
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11565

    
11566
    elif level == locking.LEVEL_NODE:
11567
      if self.req_target_uuids:
11568
        # Lock all nodes used by instances
11569
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11570
        self._LockInstancesNodes()
11571

    
11572
        # Lock all nodes in all potential target groups
11573
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11574
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11575
        member_nodes = [node_name
11576
                        for group in lock_groups
11577
                        for node_name in self.cfg.GetNodeGroup(group).members]
11578
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11579
      else:
11580
        # Lock all nodes as all groups are potential targets
11581
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11582

    
11583
  def CheckPrereq(self):
11584
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11585
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11586
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11587

    
11588
    assert (self.req_target_uuids is None or
11589
            owned_groups.issuperset(self.req_target_uuids))
11590
    assert owned_instances == set([self.op.instance_name])
11591

    
11592
    # Get instance information
11593
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11594

    
11595
    # Check if node groups for locked instance are still correct
11596
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11597
      ("Instance %s's nodes changed while we kept the lock" %
11598
       self.op.instance_name)
11599

    
11600
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11601
                                           owned_groups)
11602

    
11603
    if self.req_target_uuids:
11604
      # User requested specific target groups
11605
      self.target_uuids = self.req_target_uuids
11606
    else:
11607
      # All groups except those used by the instance are potential targets
11608
      self.target_uuids = owned_groups - inst_groups
11609

    
11610
    conflicting_groups = self.target_uuids & inst_groups
11611
    if conflicting_groups:
11612
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11613
                                 " used by the instance '%s'" %
11614
                                 (utils.CommaJoin(conflicting_groups),
11615
                                  self.op.instance_name),
11616
                                 errors.ECODE_INVAL)
11617

    
11618
    if not self.target_uuids:
11619
      raise errors.OpPrereqError("There are no possible target groups",
11620
                                 errors.ECODE_INVAL)
11621

    
11622
  def BuildHooksEnv(self):
11623
    """Build hooks env.
11624

11625
    """
11626
    assert self.target_uuids
11627

    
11628
    env = {
11629
      "TARGET_GROUPS": " ".join(self.target_uuids),
11630
      }
11631

    
11632
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11633

    
11634
    return env
11635

    
11636
  def BuildHooksNodes(self):
11637
    """Build hooks nodes.
11638

11639
    """
11640
    mn = self.cfg.GetMasterNode()
11641
    return ([mn], [mn])
11642

    
11643
  def Exec(self, feedback_fn):
11644
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11645

    
11646
    assert instances == [self.op.instance_name], "Instance not locked"
11647

    
11648
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11649
                     instances=instances, target_groups=list(self.target_uuids))
11650

    
11651
    ial.Run(self.op.iallocator)
11652

    
11653
    if not ial.success:
11654
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11655
                                 " instance '%s' using iallocator '%s': %s" %
11656
                                 (self.op.instance_name, self.op.iallocator,
11657
                                  ial.info),
11658
                                 errors.ECODE_NORES)
11659

    
11660
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11661

    
11662
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11663
                 " instance '%s'", len(jobs), self.op.instance_name)
11664

    
11665
    return ResultWithJobs(jobs)
11666

    
11667

    
11668
class LUBackupQuery(NoHooksLU):
11669
  """Query the exports list
11670

11671
  """
11672
  REQ_BGL = False
11673

    
11674
  def ExpandNames(self):
11675
    self.needed_locks = {}
11676
    self.share_locks[locking.LEVEL_NODE] = 1
11677
    if not self.op.nodes:
11678
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11679
    else:
11680
      self.needed_locks[locking.LEVEL_NODE] = \
11681
        _GetWantedNodes(self, self.op.nodes)
11682

    
11683
  def Exec(self, feedback_fn):
11684
    """Compute the list of all the exported system images.
11685

11686
    @rtype: dict
11687
    @return: a dictionary with the structure node->(export-list)
11688
        where export-list is a list of the instances exported on
11689
        that node.
11690

11691
    """
11692
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11693
    rpcresult = self.rpc.call_export_list(self.nodes)
11694
    result = {}
11695
    for node in rpcresult:
11696
      if rpcresult[node].fail_msg:
11697
        result[node] = False
11698
      else:
11699
        result[node] = rpcresult[node].payload
11700

    
11701
    return result
11702

    
11703

    
11704
class LUBackupPrepare(NoHooksLU):
11705
  """Prepares an instance for an export and returns useful information.
11706

11707
  """
11708
  REQ_BGL = False
11709

    
11710
  def ExpandNames(self):
11711
    self._ExpandAndLockInstance()
11712

    
11713
  def CheckPrereq(self):
11714
    """Check prerequisites.
11715

11716
    """
11717
    instance_name = self.op.instance_name
11718

    
11719
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11720
    assert self.instance is not None, \
11721
          "Cannot retrieve locked instance %s" % self.op.instance_name
11722
    _CheckNodeOnline(self, self.instance.primary_node)
11723

    
11724
    self._cds = _GetClusterDomainSecret()
11725

    
11726
  def Exec(self, feedback_fn):
11727
    """Prepares an instance for an export.
11728

11729
    """
11730
    instance = self.instance
11731

    
11732
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11733
      salt = utils.GenerateSecret(8)
11734

    
11735
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11736
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11737
                                              constants.RIE_CERT_VALIDITY)
11738
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11739

    
11740
      (name, cert_pem) = result.payload
11741

    
11742
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11743
                                             cert_pem)
11744

    
11745
      return {
11746
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11747
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11748
                          salt),
11749
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11750
        }
11751

    
11752
    return None
11753

    
11754

    
11755
class LUBackupExport(LogicalUnit):
11756
  """Export an instance to an image in the cluster.
11757

11758
  """
11759
  HPATH = "instance-export"
11760
  HTYPE = constants.HTYPE_INSTANCE
11761
  REQ_BGL = False
11762

    
11763
  def CheckArguments(self):
11764
    """Check the arguments.
11765

11766
    """
11767
    self.x509_key_name = self.op.x509_key_name
11768
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11769

    
11770
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11771
      if not self.x509_key_name:
11772
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11773
                                   errors.ECODE_INVAL)
11774

    
11775
      if not self.dest_x509_ca_pem:
11776
        raise errors.OpPrereqError("Missing destination X509 CA",
11777
                                   errors.ECODE_INVAL)
11778

    
11779
  def ExpandNames(self):
11780
    self._ExpandAndLockInstance()
11781

    
11782
    # Lock all nodes for local exports
11783
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11784
      # FIXME: lock only instance primary and destination node
11785
      #
11786
      # Sad but true, for now we have do lock all nodes, as we don't know where
11787
      # the previous export might be, and in this LU we search for it and
11788
      # remove it from its current node. In the future we could fix this by:
11789
      #  - making a tasklet to search (share-lock all), then create the
11790
      #    new one, then one to remove, after
11791
      #  - removing the removal operation altogether
11792
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11793

    
11794
  def DeclareLocks(self, level):
11795
    """Last minute lock declaration."""
11796
    # All nodes are locked anyway, so nothing to do here.
11797

    
11798
  def BuildHooksEnv(self):
11799
    """Build hooks env.
11800

11801
    This will run on the master, primary node and target node.
11802

11803
    """
11804
    env = {
11805
      "EXPORT_MODE": self.op.mode,
11806
      "EXPORT_NODE": self.op.target_node,
11807
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11808
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11809
      # TODO: Generic function for boolean env variables
11810
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11811
      }
11812

    
11813
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11814

    
11815
    return env
11816

    
11817
  def BuildHooksNodes(self):
11818
    """Build hooks nodes.
11819

11820
    """
11821
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11822

    
11823
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11824
      nl.append(self.op.target_node)
11825

    
11826
    return (nl, nl)
11827

    
11828
  def CheckPrereq(self):
11829
    """Check prerequisites.
11830

11831
    This checks that the instance and node names are valid.
11832

11833
    """
11834
    instance_name = self.op.instance_name
11835

    
11836
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11837
    assert self.instance is not None, \
11838
          "Cannot retrieve locked instance %s" % self.op.instance_name
11839
    _CheckNodeOnline(self, self.instance.primary_node)
11840

    
11841
    if (self.op.remove_instance and self.instance.admin_up and
11842
        not self.op.shutdown):
11843
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11844
                                 " down before")
11845

    
11846
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11847
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11848
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11849
      assert self.dst_node is not None
11850

    
11851
      _CheckNodeOnline(self, self.dst_node.name)
11852
      _CheckNodeNotDrained(self, self.dst_node.name)
11853

    
11854
      self._cds = None
11855
      self.dest_disk_info = None
11856
      self.dest_x509_ca = None
11857

    
11858
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11859
      self.dst_node = None
11860

    
11861
      if len(self.op.target_node) != len(self.instance.disks):
11862
        raise errors.OpPrereqError(("Received destination information for %s"
11863
                                    " disks, but instance %s has %s disks") %
11864
                                   (len(self.op.target_node), instance_name,
11865
                                    len(self.instance.disks)),
11866
                                   errors.ECODE_INVAL)
11867

    
11868
      cds = _GetClusterDomainSecret()
11869

    
11870
      # Check X509 key name
11871
      try:
11872
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11873
      except (TypeError, ValueError), err:
11874
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11875

    
11876
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11877
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11878
                                   errors.ECODE_INVAL)
11879

    
11880
      # Load and verify CA
11881
      try:
11882
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11883
      except OpenSSL.crypto.Error, err:
11884
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11885
                                   (err, ), errors.ECODE_INVAL)
11886

    
11887
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11888
      if errcode is not None:
11889
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11890
                                   (msg, ), errors.ECODE_INVAL)
11891

    
11892
      self.dest_x509_ca = cert
11893

    
11894
      # Verify target information
11895
      disk_info = []
11896
      for idx, disk_data in enumerate(self.op.target_node):
11897
        try:
11898
          (host, port, magic) = \
11899
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11900
        except errors.GenericError, err:
11901
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11902
                                     (idx, err), errors.ECODE_INVAL)
11903

    
11904
        disk_info.append((host, port, magic))
11905

    
11906
      assert len(disk_info) == len(self.op.target_node)
11907
      self.dest_disk_info = disk_info
11908

    
11909
    else:
11910
      raise errors.ProgrammerError("Unhandled export mode %r" %
11911
                                   self.op.mode)
11912

    
11913
    # instance disk type verification
11914
    # TODO: Implement export support for file-based disks
11915
    for disk in self.instance.disks:
11916
      if disk.dev_type == constants.LD_FILE:
11917
        raise errors.OpPrereqError("Export not supported for instances with"
11918
                                   " file-based disks", errors.ECODE_INVAL)
11919

    
11920
  def _CleanupExports(self, feedback_fn):
11921
    """Removes exports of current instance from all other nodes.
11922

11923
    If an instance in a cluster with nodes A..D was exported to node C, its
11924
    exports will be removed from the nodes A, B and D.
11925

11926
    """
11927
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11928

    
11929
    nodelist = self.cfg.GetNodeList()
11930
    nodelist.remove(self.dst_node.name)
11931

    
11932
    # on one-node clusters nodelist will be empty after the removal
11933
    # if we proceed the backup would be removed because OpBackupQuery
11934
    # substitutes an empty list with the full cluster node list.
11935
    iname = self.instance.name
11936
    if nodelist:
11937
      feedback_fn("Removing old exports for instance %s" % iname)
11938
      exportlist = self.rpc.call_export_list(nodelist)
11939
      for node in exportlist:
11940
        if exportlist[node].fail_msg:
11941
          continue
11942
        if iname in exportlist[node].payload:
11943
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11944
          if msg:
11945
            self.LogWarning("Could not remove older export for instance %s"
11946
                            " on node %s: %s", iname, node, msg)
11947

    
11948
  def Exec(self, feedback_fn):
11949
    """Export an instance to an image in the cluster.
11950

11951
    """
11952
    assert self.op.mode in constants.EXPORT_MODES
11953

    
11954
    instance = self.instance
11955
    src_node = instance.primary_node
11956

    
11957
    if self.op.shutdown:
11958
      # shutdown the instance, but not the disks
11959
      feedback_fn("Shutting down instance %s" % instance.name)
11960
      result = self.rpc.call_instance_shutdown(src_node, instance,
11961
                                               self.op.shutdown_timeout)
11962
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11963
      result.Raise("Could not shutdown instance %s on"
11964
                   " node %s" % (instance.name, src_node))
11965

    
11966
    # set the disks ID correctly since call_instance_start needs the
11967
    # correct drbd minor to create the symlinks
11968
    for disk in instance.disks:
11969
      self.cfg.SetDiskID(disk, src_node)
11970

    
11971
    activate_disks = (not instance.admin_up)
11972

    
11973
    if activate_disks:
11974
      # Activate the instance disks if we'exporting a stopped instance
11975
      feedback_fn("Activating disks for %s" % instance.name)
11976
      _StartInstanceDisks(self, instance, None)
11977

    
11978
    try:
11979
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11980
                                                     instance)
11981

    
11982
      helper.CreateSnapshots()
11983
      try:
11984
        if (self.op.shutdown and instance.admin_up and
11985
            not self.op.remove_instance):
11986
          assert not activate_disks
11987
          feedback_fn("Starting instance %s" % instance.name)
11988
          result = self.rpc.call_instance_start(src_node,
11989
                                                (instance, None, None), False)
11990
          msg = result.fail_msg
11991
          if msg:
11992
            feedback_fn("Failed to start instance: %s" % msg)
11993
            _ShutdownInstanceDisks(self, instance)
11994
            raise errors.OpExecError("Could not start instance: %s" % msg)
11995

    
11996
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11997
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11998
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11999
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12000
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12001

    
12002
          (key_name, _, _) = self.x509_key_name
12003

    
12004
          dest_ca_pem = \
12005
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12006
                                            self.dest_x509_ca)
12007

    
12008
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12009
                                                     key_name, dest_ca_pem,
12010
                                                     timeouts)
12011
      finally:
12012
        helper.Cleanup()
12013

    
12014
      # Check for backwards compatibility
12015
      assert len(dresults) == len(instance.disks)
12016
      assert compat.all(isinstance(i, bool) for i in dresults), \
12017
             "Not all results are boolean: %r" % dresults
12018

    
12019
    finally:
12020
      if activate_disks:
12021
        feedback_fn("Deactivating disks for %s" % instance.name)
12022
        _ShutdownInstanceDisks(self, instance)
12023

    
12024
    if not (compat.all(dresults) and fin_resu):
12025
      failures = []
12026
      if not fin_resu:
12027
        failures.append("export finalization")
12028
      if not compat.all(dresults):
12029
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12030
                               if not dsk)
12031
        failures.append("disk export: disk(s) %s" % fdsk)
12032

    
12033
      raise errors.OpExecError("Export failed, errors in %s" %
12034
                               utils.CommaJoin(failures))
12035

    
12036
    # At this point, the export was successful, we can cleanup/finish
12037

    
12038
    # Remove instance if requested
12039
    if self.op.remove_instance:
12040
      feedback_fn("Removing instance %s" % instance.name)
12041
      _RemoveInstance(self, feedback_fn, instance,
12042
                      self.op.ignore_remove_failures)
12043

    
12044
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12045
      self._CleanupExports(feedback_fn)
12046

    
12047
    return fin_resu, dresults
12048

    
12049

    
12050
class LUBackupRemove(NoHooksLU):
12051
  """Remove exports related to the named instance.
12052

12053
  """
12054
  REQ_BGL = False
12055

    
12056
  def ExpandNames(self):
12057
    self.needed_locks = {}
12058
    # We need all nodes to be locked in order for RemoveExport to work, but we
12059
    # don't need to lock the instance itself, as nothing will happen to it (and
12060
    # we can remove exports also for a removed instance)
12061
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12062

    
12063
  def Exec(self, feedback_fn):
12064
    """Remove any export.
12065

12066
    """
12067
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12068
    # If the instance was not found we'll try with the name that was passed in.
12069
    # This will only work if it was an FQDN, though.
12070
    fqdn_warn = False
12071
    if not instance_name:
12072
      fqdn_warn = True
12073
      instance_name = self.op.instance_name
12074

    
12075
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12076
    exportlist = self.rpc.call_export_list(locked_nodes)
12077
    found = False
12078
    for node in exportlist:
12079
      msg = exportlist[node].fail_msg
12080
      if msg:
12081
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12082
        continue
12083
      if instance_name in exportlist[node].payload:
12084
        found = True
12085
        result = self.rpc.call_export_remove(node, instance_name)
12086
        msg = result.fail_msg
12087
        if msg:
12088
          logging.error("Could not remove export for instance %s"
12089
                        " on node %s: %s", instance_name, node, msg)
12090

    
12091
    if fqdn_warn and not found:
12092
      feedback_fn("Export not found. If trying to remove an export belonging"
12093
                  " to a deleted instance please use its Fully Qualified"
12094
                  " Domain Name.")
12095

    
12096

    
12097
class LUGroupAdd(LogicalUnit):
12098
  """Logical unit for creating node groups.
12099

12100
  """
12101
  HPATH = "group-add"
12102
  HTYPE = constants.HTYPE_GROUP
12103
  REQ_BGL = False
12104

    
12105
  def ExpandNames(self):
12106
    # We need the new group's UUID here so that we can create and acquire the
12107
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12108
    # that it should not check whether the UUID exists in the configuration.
12109
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12110
    self.needed_locks = {}
12111
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12112

    
12113
  def CheckPrereq(self):
12114
    """Check prerequisites.
12115

12116
    This checks that the given group name is not an existing node group
12117
    already.
12118

12119
    """
12120
    try:
12121
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12122
    except errors.OpPrereqError:
12123
      pass
12124
    else:
12125
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12126
                                 " node group (UUID: %s)" %
12127
                                 (self.op.group_name, existing_uuid),
12128
                                 errors.ECODE_EXISTS)
12129

    
12130
    if self.op.ndparams:
12131
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12132

    
12133
  def BuildHooksEnv(self):
12134
    """Build hooks env.
12135

12136
    """
12137
    return {
12138
      "GROUP_NAME": self.op.group_name,
12139
      }
12140

    
12141
  def BuildHooksNodes(self):
12142
    """Build hooks nodes.
12143

12144
    """
12145
    mn = self.cfg.GetMasterNode()
12146
    return ([mn], [mn])
12147

    
12148
  def Exec(self, feedback_fn):
12149
    """Add the node group to the cluster.
12150

12151
    """
12152
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12153
                                  uuid=self.group_uuid,
12154
                                  alloc_policy=self.op.alloc_policy,
12155
                                  ndparams=self.op.ndparams)
12156

    
12157
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12158
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12159

    
12160

    
12161
class LUGroupAssignNodes(NoHooksLU):
12162
  """Logical unit for assigning nodes to groups.
12163

12164
  """
12165
  REQ_BGL = False
12166

    
12167
  def ExpandNames(self):
12168
    # These raise errors.OpPrereqError on their own:
12169
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12170
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12171

    
12172
    # We want to lock all the affected nodes and groups. We have readily
12173
    # available the list of nodes, and the *destination* group. To gather the
12174
    # list of "source" groups, we need to fetch node information later on.
12175
    self.needed_locks = {
12176
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12177
      locking.LEVEL_NODE: self.op.nodes,
12178
      }
12179

    
12180
  def DeclareLocks(self, level):
12181
    if level == locking.LEVEL_NODEGROUP:
12182
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12183

    
12184
      # Try to get all affected nodes' groups without having the group or node
12185
      # lock yet. Needs verification later in the code flow.
12186
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12187

    
12188
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12189

    
12190
  def CheckPrereq(self):
12191
    """Check prerequisites.
12192

12193
    """
12194
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12195
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12196
            frozenset(self.op.nodes))
12197

    
12198
    expected_locks = (set([self.group_uuid]) |
12199
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12200
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12201
    if actual_locks != expected_locks:
12202
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12203
                               " current groups are '%s', used to be '%s'" %
12204
                               (utils.CommaJoin(expected_locks),
12205
                                utils.CommaJoin(actual_locks)))
12206

    
12207
    self.node_data = self.cfg.GetAllNodesInfo()
12208
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12209
    instance_data = self.cfg.GetAllInstancesInfo()
12210

    
12211
    if self.group is None:
12212
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12213
                               (self.op.group_name, self.group_uuid))
12214

    
12215
    (new_splits, previous_splits) = \
12216
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12217
                                             for node in self.op.nodes],
12218
                                            self.node_data, instance_data)
12219

    
12220
    if new_splits:
12221
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12222

    
12223
      if not self.op.force:
12224
        raise errors.OpExecError("The following instances get split by this"
12225
                                 " change and --force was not given: %s" %
12226
                                 fmt_new_splits)
12227
      else:
12228
        self.LogWarning("This operation will split the following instances: %s",
12229
                        fmt_new_splits)
12230

    
12231
        if previous_splits:
12232
          self.LogWarning("In addition, these already-split instances continue"
12233
                          " to be split across groups: %s",
12234
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12235

    
12236
  def Exec(self, feedback_fn):
12237
    """Assign nodes to a new group.
12238

12239
    """
12240
    for node in self.op.nodes:
12241
      self.node_data[node].group = self.group_uuid
12242

    
12243
    # FIXME: Depends on side-effects of modifying the result of
12244
    # C{cfg.GetAllNodesInfo}
12245

    
12246
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12247

    
12248
  @staticmethod
12249
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12250
    """Check for split instances after a node assignment.
12251

12252
    This method considers a series of node assignments as an atomic operation,
12253
    and returns information about split instances after applying the set of
12254
    changes.
12255

12256
    In particular, it returns information about newly split instances, and
12257
    instances that were already split, and remain so after the change.
12258

12259
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12260
    considered.
12261

12262
    @type changes: list of (node_name, new_group_uuid) pairs.
12263
    @param changes: list of node assignments to consider.
12264
    @param node_data: a dict with data for all nodes
12265
    @param instance_data: a dict with all instances to consider
12266
    @rtype: a two-tuple
12267
    @return: a list of instances that were previously okay and result split as a
12268
      consequence of this change, and a list of instances that were previously
12269
      split and this change does not fix.
12270

12271
    """
12272
    changed_nodes = dict((node, group) for node, group in changes
12273
                         if node_data[node].group != group)
12274

    
12275
    all_split_instances = set()
12276
    previously_split_instances = set()
12277

    
12278
    def InstanceNodes(instance):
12279
      return [instance.primary_node] + list(instance.secondary_nodes)
12280

    
12281
    for inst in instance_data.values():
12282
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12283
        continue
12284

    
12285
      instance_nodes = InstanceNodes(inst)
12286

    
12287
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12288
        previously_split_instances.add(inst.name)
12289

    
12290
      if len(set(changed_nodes.get(node, node_data[node].group)
12291
                 for node in instance_nodes)) > 1:
12292
        all_split_instances.add(inst.name)
12293

    
12294
    return (list(all_split_instances - previously_split_instances),
12295
            list(previously_split_instances & all_split_instances))
12296

    
12297

    
12298
class _GroupQuery(_QueryBase):
12299
  FIELDS = query.GROUP_FIELDS
12300

    
12301
  def ExpandNames(self, lu):
12302
    lu.needed_locks = {}
12303

    
12304
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12305
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12306

    
12307
    if not self.names:
12308
      self.wanted = [name_to_uuid[name]
12309
                     for name in utils.NiceSort(name_to_uuid.keys())]
12310
    else:
12311
      # Accept names to be either names or UUIDs.
12312
      missing = []
12313
      self.wanted = []
12314
      all_uuid = frozenset(self._all_groups.keys())
12315

    
12316
      for name in self.names:
12317
        if name in all_uuid:
12318
          self.wanted.append(name)
12319
        elif name in name_to_uuid:
12320
          self.wanted.append(name_to_uuid[name])
12321
        else:
12322
          missing.append(name)
12323

    
12324
      if missing:
12325
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12326
                                   utils.CommaJoin(missing),
12327
                                   errors.ECODE_NOENT)
12328

    
12329
  def DeclareLocks(self, lu, level):
12330
    pass
12331

    
12332
  def _GetQueryData(self, lu):
12333
    """Computes the list of node groups and their attributes.
12334

12335
    """
12336
    do_nodes = query.GQ_NODE in self.requested_data
12337
    do_instances = query.GQ_INST in self.requested_data
12338

    
12339
    group_to_nodes = None
12340
    group_to_instances = None
12341

    
12342
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12343
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12344
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12345
    # instance->node. Hence, we will need to process nodes even if we only need
12346
    # instance information.
12347
    if do_nodes or do_instances:
12348
      all_nodes = lu.cfg.GetAllNodesInfo()
12349
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12350
      node_to_group = {}
12351

    
12352
      for node in all_nodes.values():
12353
        if node.group in group_to_nodes:
12354
          group_to_nodes[node.group].append(node.name)
12355
          node_to_group[node.name] = node.group
12356

    
12357
      if do_instances:
12358
        all_instances = lu.cfg.GetAllInstancesInfo()
12359
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12360

    
12361
        for instance in all_instances.values():
12362
          node = instance.primary_node
12363
          if node in node_to_group:
12364
            group_to_instances[node_to_group[node]].append(instance.name)
12365

    
12366
        if not do_nodes:
12367
          # Do not pass on node information if it was not requested.
12368
          group_to_nodes = None
12369

    
12370
    return query.GroupQueryData([self._all_groups[uuid]
12371
                                 for uuid in self.wanted],
12372
                                group_to_nodes, group_to_instances)
12373

    
12374

    
12375
class LUGroupQuery(NoHooksLU):
12376
  """Logical unit for querying node groups.
12377

12378
  """
12379
  REQ_BGL = False
12380

    
12381
  def CheckArguments(self):
12382
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12383
                          self.op.output_fields, False)
12384

    
12385
  def ExpandNames(self):
12386
    self.gq.ExpandNames(self)
12387

    
12388
  def DeclareLocks(self, level):
12389
    self.gq.DeclareLocks(self, level)
12390

    
12391
  def Exec(self, feedback_fn):
12392
    return self.gq.OldStyleQuery(self)
12393

    
12394

    
12395
class LUGroupSetParams(LogicalUnit):
12396
  """Modifies the parameters of a node group.
12397

12398
  """
12399
  HPATH = "group-modify"
12400
  HTYPE = constants.HTYPE_GROUP
12401
  REQ_BGL = False
12402

    
12403
  def CheckArguments(self):
12404
    all_changes = [
12405
      self.op.ndparams,
12406
      self.op.alloc_policy,
12407
      ]
12408

    
12409
    if all_changes.count(None) == len(all_changes):
12410
      raise errors.OpPrereqError("Please pass at least one modification",
12411
                                 errors.ECODE_INVAL)
12412

    
12413
  def ExpandNames(self):
12414
    # This raises errors.OpPrereqError on its own:
12415
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12416

    
12417
    self.needed_locks = {
12418
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12419
      }
12420

    
12421
  def CheckPrereq(self):
12422
    """Check prerequisites.
12423

12424
    """
12425
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12426

    
12427
    if self.group is None:
12428
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12429
                               (self.op.group_name, self.group_uuid))
12430

    
12431
    if self.op.ndparams:
12432
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12433
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12434
      self.new_ndparams = new_ndparams
12435

    
12436
  def BuildHooksEnv(self):
12437
    """Build hooks env.
12438

12439
    """
12440
    return {
12441
      "GROUP_NAME": self.op.group_name,
12442
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12443
      }
12444

    
12445
  def BuildHooksNodes(self):
12446
    """Build hooks nodes.
12447

12448
    """
12449
    mn = self.cfg.GetMasterNode()
12450
    return ([mn], [mn])
12451

    
12452
  def Exec(self, feedback_fn):
12453
    """Modifies the node group.
12454

12455
    """
12456
    result = []
12457

    
12458
    if self.op.ndparams:
12459
      self.group.ndparams = self.new_ndparams
12460
      result.append(("ndparams", str(self.group.ndparams)))
12461

    
12462
    if self.op.alloc_policy:
12463
      self.group.alloc_policy = self.op.alloc_policy
12464

    
12465
    self.cfg.Update(self.group, feedback_fn)
12466
    return result
12467

    
12468

    
12469
class LUGroupRemove(LogicalUnit):
12470
  HPATH = "group-remove"
12471
  HTYPE = constants.HTYPE_GROUP
12472
  REQ_BGL = False
12473

    
12474
  def ExpandNames(self):
12475
    # This will raises errors.OpPrereqError on its own:
12476
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12477
    self.needed_locks = {
12478
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12479
      }
12480

    
12481
  def CheckPrereq(self):
12482
    """Check prerequisites.
12483

12484
    This checks that the given group name exists as a node group, that is
12485
    empty (i.e., contains no nodes), and that is not the last group of the
12486
    cluster.
12487

12488
    """
12489
    # Verify that the group is empty.
12490
    group_nodes = [node.name
12491
                   for node in self.cfg.GetAllNodesInfo().values()
12492
                   if node.group == self.group_uuid]
12493

    
12494
    if group_nodes:
12495
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12496
                                 " nodes: %s" %
12497
                                 (self.op.group_name,
12498
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12499
                                 errors.ECODE_STATE)
12500

    
12501
    # Verify the cluster would not be left group-less.
12502
    if len(self.cfg.GetNodeGroupList()) == 1:
12503
      raise errors.OpPrereqError("Group '%s' is the only group,"
12504
                                 " cannot be removed" %
12505
                                 self.op.group_name,
12506
                                 errors.ECODE_STATE)
12507

    
12508
  def BuildHooksEnv(self):
12509
    """Build hooks env.
12510

12511
    """
12512
    return {
12513
      "GROUP_NAME": self.op.group_name,
12514
      }
12515

    
12516
  def BuildHooksNodes(self):
12517
    """Build hooks nodes.
12518

12519
    """
12520
    mn = self.cfg.GetMasterNode()
12521
    return ([mn], [mn])
12522

    
12523
  def Exec(self, feedback_fn):
12524
    """Remove the node group.
12525

12526
    """
12527
    try:
12528
      self.cfg.RemoveNodeGroup(self.group_uuid)
12529
    except errors.ConfigurationError:
12530
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12531
                               (self.op.group_name, self.group_uuid))
12532

    
12533
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12534

    
12535

    
12536
class LUGroupRename(LogicalUnit):
12537
  HPATH = "group-rename"
12538
  HTYPE = constants.HTYPE_GROUP
12539
  REQ_BGL = False
12540

    
12541
  def ExpandNames(self):
12542
    # This raises errors.OpPrereqError on its own:
12543
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12544

    
12545
    self.needed_locks = {
12546
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12547
      }
12548

    
12549
  def CheckPrereq(self):
12550
    """Check prerequisites.
12551

12552
    Ensures requested new name is not yet used.
12553

12554
    """
12555
    try:
12556
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12557
    except errors.OpPrereqError:
12558
      pass
12559
    else:
12560
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12561
                                 " node group (UUID: %s)" %
12562
                                 (self.op.new_name, new_name_uuid),
12563
                                 errors.ECODE_EXISTS)
12564

    
12565
  def BuildHooksEnv(self):
12566
    """Build hooks env.
12567

12568
    """
12569
    return {
12570
      "OLD_NAME": self.op.group_name,
12571
      "NEW_NAME": self.op.new_name,
12572
      }
12573

    
12574
  def BuildHooksNodes(self):
12575
    """Build hooks nodes.
12576

12577
    """
12578
    mn = self.cfg.GetMasterNode()
12579

    
12580
    all_nodes = self.cfg.GetAllNodesInfo()
12581
    all_nodes.pop(mn, None)
12582

    
12583
    run_nodes = [mn]
12584
    run_nodes.extend(node.name for node in all_nodes.values()
12585
                     if node.group == self.group_uuid)
12586

    
12587
    return (run_nodes, run_nodes)
12588

    
12589
  def Exec(self, feedback_fn):
12590
    """Rename the node group.
12591

12592
    """
12593
    group = self.cfg.GetNodeGroup(self.group_uuid)
12594

    
12595
    if group is None:
12596
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12597
                               (self.op.group_name, self.group_uuid))
12598

    
12599
    group.name = self.op.new_name
12600
    self.cfg.Update(group, feedback_fn)
12601

    
12602
    return self.op.new_name
12603

    
12604

    
12605
class LUGroupEvacuate(LogicalUnit):
12606
  HPATH = "group-evacuate"
12607
  HTYPE = constants.HTYPE_GROUP
12608
  REQ_BGL = False
12609

    
12610
  def ExpandNames(self):
12611
    # This raises errors.OpPrereqError on its own:
12612
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12613

    
12614
    if self.op.target_groups:
12615
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12616
                                  self.op.target_groups)
12617
    else:
12618
      self.req_target_uuids = []
12619

    
12620
    if self.group_uuid in self.req_target_uuids:
12621
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12622
                                 " as a target group (targets are %s)" %
12623
                                 (self.group_uuid,
12624
                                  utils.CommaJoin(self.req_target_uuids)),
12625
                                 errors.ECODE_INVAL)
12626

    
12627
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12628

    
12629
    self.share_locks = _ShareAll()
12630
    self.needed_locks = {
12631
      locking.LEVEL_INSTANCE: [],
12632
      locking.LEVEL_NODEGROUP: [],
12633
      locking.LEVEL_NODE: [],
12634
      }
12635

    
12636
  def DeclareLocks(self, level):
12637
    if level == locking.LEVEL_INSTANCE:
12638
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12639

    
12640
      # Lock instances optimistically, needs verification once node and group
12641
      # locks have been acquired
12642
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12643
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12644

    
12645
    elif level == locking.LEVEL_NODEGROUP:
12646
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12647

    
12648
      if self.req_target_uuids:
12649
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12650

    
12651
        # Lock all groups used by instances optimistically; this requires going
12652
        # via the node before it's locked, requiring verification later on
12653
        lock_groups.update(group_uuid
12654
                           for instance_name in
12655
                             self.owned_locks(locking.LEVEL_INSTANCE)
12656
                           for group_uuid in
12657
                             self.cfg.GetInstanceNodeGroups(instance_name))
12658
      else:
12659
        # No target groups, need to lock all of them
12660
        lock_groups = locking.ALL_SET
12661

    
12662
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12663

    
12664
    elif level == locking.LEVEL_NODE:
12665
      # This will only lock the nodes in the group to be evacuated which
12666
      # contain actual instances
12667
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12668
      self._LockInstancesNodes()
12669

    
12670
      # Lock all nodes in group to be evacuated and target groups
12671
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12672
      assert self.group_uuid in owned_groups
12673
      member_nodes = [node_name
12674
                      for group in owned_groups
12675
                      for node_name in self.cfg.GetNodeGroup(group).members]
12676
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12677

    
12678
  def CheckPrereq(self):
12679
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12680
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12681
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12682

    
12683
    assert owned_groups.issuperset(self.req_target_uuids)
12684
    assert self.group_uuid in owned_groups
12685

    
12686
    # Check if locked instances are still correct
12687
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12688

    
12689
    # Get instance information
12690
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12691

    
12692
    # Check if node groups for locked instances are still correct
12693
    for instance_name in owned_instances:
12694
      inst = self.instances[instance_name]
12695
      assert owned_nodes.issuperset(inst.all_nodes), \
12696
        "Instance %s's nodes changed while we kept the lock" % instance_name
12697

    
12698
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12699
                                             owned_groups)
12700

    
12701
      assert self.group_uuid in inst_groups, \
12702
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12703

    
12704
    if self.req_target_uuids:
12705
      # User requested specific target groups
12706
      self.target_uuids = self.req_target_uuids
12707
    else:
12708
      # All groups except the one to be evacuated are potential targets
12709
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12710
                           if group_uuid != self.group_uuid]
12711

    
12712
      if not self.target_uuids:
12713
        raise errors.OpPrereqError("There are no possible target groups",
12714
                                   errors.ECODE_INVAL)
12715

    
12716
  def BuildHooksEnv(self):
12717
    """Build hooks env.
12718

12719
    """
12720
    return {
12721
      "GROUP_NAME": self.op.group_name,
12722
      "TARGET_GROUPS": " ".join(self.target_uuids),
12723
      }
12724

    
12725
  def BuildHooksNodes(self):
12726
    """Build hooks nodes.
12727

12728
    """
12729
    mn = self.cfg.GetMasterNode()
12730

    
12731
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12732

    
12733
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12734

    
12735
    return (run_nodes, run_nodes)
12736

    
12737
  def Exec(self, feedback_fn):
12738
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12739

    
12740
    assert self.group_uuid not in self.target_uuids
12741

    
12742
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12743
                     instances=instances, target_groups=self.target_uuids)
12744

    
12745
    ial.Run(self.op.iallocator)
12746

    
12747
    if not ial.success:
12748
      raise errors.OpPrereqError("Can't compute group evacuation using"
12749
                                 " iallocator '%s': %s" %
12750
                                 (self.op.iallocator, ial.info),
12751
                                 errors.ECODE_NORES)
12752

    
12753
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12754

    
12755
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12756
                 len(jobs), self.op.group_name)
12757

    
12758
    return ResultWithJobs(jobs)
12759

    
12760

    
12761
class TagsLU(NoHooksLU): # pylint: disable=W0223
12762
  """Generic tags LU.
12763

12764
  This is an abstract class which is the parent of all the other tags LUs.
12765

12766
  """
12767
  def ExpandNames(self):
12768
    self.group_uuid = None
12769
    self.needed_locks = {}
12770
    if self.op.kind == constants.TAG_NODE:
12771
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12772
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12773
    elif self.op.kind == constants.TAG_INSTANCE:
12774
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12775
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12776
    elif self.op.kind == constants.TAG_NODEGROUP:
12777
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12778

    
12779
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12780
    # not possible to acquire the BGL based on opcode parameters)
12781

    
12782
  def CheckPrereq(self):
12783
    """Check prerequisites.
12784

12785
    """
12786
    if self.op.kind == constants.TAG_CLUSTER:
12787
      self.target = self.cfg.GetClusterInfo()
12788
    elif self.op.kind == constants.TAG_NODE:
12789
      self.target = self.cfg.GetNodeInfo(self.op.name)
12790
    elif self.op.kind == constants.TAG_INSTANCE:
12791
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12792
    elif self.op.kind == constants.TAG_NODEGROUP:
12793
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12794
    else:
12795
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12796
                                 str(self.op.kind), errors.ECODE_INVAL)
12797

    
12798

    
12799
class LUTagsGet(TagsLU):
12800
  """Returns the tags of a given object.
12801

12802
  """
12803
  REQ_BGL = False
12804

    
12805
  def ExpandNames(self):
12806
    TagsLU.ExpandNames(self)
12807

    
12808
    # Share locks as this is only a read operation
12809
    self.share_locks = _ShareAll()
12810

    
12811
  def Exec(self, feedback_fn):
12812
    """Returns the tag list.
12813

12814
    """
12815
    return list(self.target.GetTags())
12816

    
12817

    
12818
class LUTagsSearch(NoHooksLU):
12819
  """Searches the tags for a given pattern.
12820

12821
  """
12822
  REQ_BGL = False
12823

    
12824
  def ExpandNames(self):
12825
    self.needed_locks = {}
12826

    
12827
  def CheckPrereq(self):
12828
    """Check prerequisites.
12829

12830
    This checks the pattern passed for validity by compiling it.
12831

12832
    """
12833
    try:
12834
      self.re = re.compile(self.op.pattern)
12835
    except re.error, err:
12836
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12837
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12838

    
12839
  def Exec(self, feedback_fn):
12840
    """Returns the tag list.
12841

12842
    """
12843
    cfg = self.cfg
12844
    tgts = [("/cluster", cfg.GetClusterInfo())]
12845
    ilist = cfg.GetAllInstancesInfo().values()
12846
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12847
    nlist = cfg.GetAllNodesInfo().values()
12848
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12849
    tgts.extend(("/nodegroup/%s" % n.name, n)
12850
                for n in cfg.GetAllNodeGroupsInfo().values())
12851
    results = []
12852
    for path, target in tgts:
12853
      for tag in target.GetTags():
12854
        if self.re.search(tag):
12855
          results.append((path, tag))
12856
    return results
12857

    
12858

    
12859
class LUTagsSet(TagsLU):
12860
  """Sets a tag on a given object.
12861

12862
  """
12863
  REQ_BGL = False
12864

    
12865
  def CheckPrereq(self):
12866
    """Check prerequisites.
12867

12868
    This checks the type and length of the tag name and value.
12869

12870
    """
12871
    TagsLU.CheckPrereq(self)
12872
    for tag in self.op.tags:
12873
      objects.TaggableObject.ValidateTag(tag)
12874

    
12875
  def Exec(self, feedback_fn):
12876
    """Sets the tag.
12877

12878
    """
12879
    try:
12880
      for tag in self.op.tags:
12881
        self.target.AddTag(tag)
12882
    except errors.TagError, err:
12883
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12884
    self.cfg.Update(self.target, feedback_fn)
12885

    
12886

    
12887
class LUTagsDel(TagsLU):
12888
  """Delete a list of tags from a given object.
12889

12890
  """
12891
  REQ_BGL = False
12892

    
12893
  def CheckPrereq(self):
12894
    """Check prerequisites.
12895

12896
    This checks that we have the given tag.
12897

12898
    """
12899
    TagsLU.CheckPrereq(self)
12900
    for tag in self.op.tags:
12901
      objects.TaggableObject.ValidateTag(tag)
12902
    del_tags = frozenset(self.op.tags)
12903
    cur_tags = self.target.GetTags()
12904

    
12905
    diff_tags = del_tags - cur_tags
12906
    if diff_tags:
12907
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12908
      raise errors.OpPrereqError("Tag(s) %s not found" %
12909
                                 (utils.CommaJoin(diff_names), ),
12910
                                 errors.ECODE_NOENT)
12911

    
12912
  def Exec(self, feedback_fn):
12913
    """Remove the tag from the object.
12914

12915
    """
12916
    for tag in self.op.tags:
12917
      self.target.RemoveTag(tag)
12918
    self.cfg.Update(self.target, feedback_fn)
12919

    
12920

    
12921
class LUTestDelay(NoHooksLU):
12922
  """Sleep for a specified amount of time.
12923

12924
  This LU sleeps on the master and/or nodes for a specified amount of
12925
  time.
12926

12927
  """
12928
  REQ_BGL = False
12929

    
12930
  def ExpandNames(self):
12931
    """Expand names and set required locks.
12932

12933
    This expands the node list, if any.
12934

12935
    """
12936
    self.needed_locks = {}
12937
    if self.op.on_nodes:
12938
      # _GetWantedNodes can be used here, but is not always appropriate to use
12939
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12940
      # more information.
12941
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12942
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12943

    
12944
  def _TestDelay(self):
12945
    """Do the actual sleep.
12946

12947
    """
12948
    if self.op.on_master:
12949
      if not utils.TestDelay(self.op.duration):
12950
        raise errors.OpExecError("Error during master delay test")
12951
    if self.op.on_nodes:
12952
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12953
      for node, node_result in result.items():
12954
        node_result.Raise("Failure during rpc call to node %s" % node)
12955

    
12956
  def Exec(self, feedback_fn):
12957
    """Execute the test delay opcode, with the wanted repetitions.
12958

12959
    """
12960
    if self.op.repeat == 0:
12961
      self._TestDelay()
12962
    else:
12963
      top_value = self.op.repeat - 1
12964
      for i in range(self.op.repeat):
12965
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12966
        self._TestDelay()
12967

    
12968

    
12969
class LUTestJqueue(NoHooksLU):
12970
  """Utility LU to test some aspects of the job queue.
12971

12972
  """
12973
  REQ_BGL = False
12974

    
12975
  # Must be lower than default timeout for WaitForJobChange to see whether it
12976
  # notices changed jobs
12977
  _CLIENT_CONNECT_TIMEOUT = 20.0
12978
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12979

    
12980
  @classmethod
12981
  def _NotifyUsingSocket(cls, cb, errcls):
12982
    """Opens a Unix socket and waits for another program to connect.
12983

12984
    @type cb: callable
12985
    @param cb: Callback to send socket name to client
12986
    @type errcls: class
12987
    @param errcls: Exception class to use for errors
12988

12989
    """
12990
    # Using a temporary directory as there's no easy way to create temporary
12991
    # sockets without writing a custom loop around tempfile.mktemp and
12992
    # socket.bind
12993
    tmpdir = tempfile.mkdtemp()
12994
    try:
12995
      tmpsock = utils.PathJoin(tmpdir, "sock")
12996

    
12997
      logging.debug("Creating temporary socket at %s", tmpsock)
12998
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12999
      try:
13000
        sock.bind(tmpsock)
13001
        sock.listen(1)
13002

    
13003
        # Send details to client
13004
        cb(tmpsock)
13005

    
13006
        # Wait for client to connect before continuing
13007
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13008
        try:
13009
          (conn, _) = sock.accept()
13010
        except socket.error, err:
13011
          raise errcls("Client didn't connect in time (%s)" % err)
13012
      finally:
13013
        sock.close()
13014
    finally:
13015
      # Remove as soon as client is connected
13016
      shutil.rmtree(tmpdir)
13017

    
13018
    # Wait for client to close
13019
    try:
13020
      try:
13021
        # pylint: disable=E1101
13022
        # Instance of '_socketobject' has no ... member
13023
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13024
        conn.recv(1)
13025
      except socket.error, err:
13026
        raise errcls("Client failed to confirm notification (%s)" % err)
13027
    finally:
13028
      conn.close()
13029

    
13030
  def _SendNotification(self, test, arg, sockname):
13031
    """Sends a notification to the client.
13032

13033
    @type test: string
13034
    @param test: Test name
13035
    @param arg: Test argument (depends on test)
13036
    @type sockname: string
13037
    @param sockname: Socket path
13038

13039
    """
13040
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13041

    
13042
  def _Notify(self, prereq, test, arg):
13043
    """Notifies the client of a test.
13044

13045
    @type prereq: bool
13046
    @param prereq: Whether this is a prereq-phase test
13047
    @type test: string
13048
    @param test: Test name
13049
    @param arg: Test argument (depends on test)
13050

13051
    """
13052
    if prereq:
13053
      errcls = errors.OpPrereqError
13054
    else:
13055
      errcls = errors.OpExecError
13056

    
13057
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13058
                                                  test, arg),
13059
                                   errcls)
13060

    
13061
  def CheckArguments(self):
13062
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13063
    self.expandnames_calls = 0
13064

    
13065
  def ExpandNames(self):
13066
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13067
    if checkargs_calls < 1:
13068
      raise errors.ProgrammerError("CheckArguments was not called")
13069

    
13070
    self.expandnames_calls += 1
13071

    
13072
    if self.op.notify_waitlock:
13073
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13074

    
13075
    self.LogInfo("Expanding names")
13076

    
13077
    # Get lock on master node (just to get a lock, not for a particular reason)
13078
    self.needed_locks = {
13079
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13080
      }
13081

    
13082
  def Exec(self, feedback_fn):
13083
    if self.expandnames_calls < 1:
13084
      raise errors.ProgrammerError("ExpandNames was not called")
13085

    
13086
    if self.op.notify_exec:
13087
      self._Notify(False, constants.JQT_EXEC, None)
13088

    
13089
    self.LogInfo("Executing")
13090

    
13091
    if self.op.log_messages:
13092
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13093
      for idx, msg in enumerate(self.op.log_messages):
13094
        self.LogInfo("Sending log message %s", idx + 1)
13095
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13096
        # Report how many test messages have been sent
13097
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13098

    
13099
    if self.op.fail:
13100
      raise errors.OpExecError("Opcode failure was requested")
13101

    
13102
    return True
13103

    
13104

    
13105
class IAllocator(object):
13106
  """IAllocator framework.
13107

13108
  An IAllocator instance has three sets of attributes:
13109
    - cfg that is needed to query the cluster
13110
    - input data (all members of the _KEYS class attribute are required)
13111
    - four buffer attributes (in|out_data|text), that represent the
13112
      input (to the external script) in text and data structure format,
13113
      and the output from it, again in two formats
13114
    - the result variables from the script (success, info, nodes) for
13115
      easy usage
13116

13117
  """
13118
  # pylint: disable=R0902
13119
  # lots of instance attributes
13120

    
13121
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13122
    self.cfg = cfg
13123
    self.rpc = rpc_runner
13124
    # init buffer variables
13125
    self.in_text = self.out_text = self.in_data = self.out_data = None
13126
    # init all input fields so that pylint is happy
13127
    self.mode = mode
13128
    self.memory = self.disks = self.disk_template = None
13129
    self.os = self.tags = self.nics = self.vcpus = None
13130
    self.hypervisor = None
13131
    self.relocate_from = None
13132
    self.name = None
13133
    self.instances = None
13134
    self.evac_mode = None
13135
    self.target_groups = []
13136
    # computed fields
13137
    self.required_nodes = None
13138
    # init result fields
13139
    self.success = self.info = self.result = None
13140

    
13141
    try:
13142
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13143
    except KeyError:
13144
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13145
                                   " IAllocator" % self.mode)
13146

    
13147
    keyset = [n for (n, _) in keydata]
13148

    
13149
    for key in kwargs:
13150
      if key not in keyset:
13151
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13152
                                     " IAllocator" % key)
13153
      setattr(self, key, kwargs[key])
13154

    
13155
    for key in keyset:
13156
      if key not in kwargs:
13157
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13158
                                     " IAllocator" % key)
13159
    self._BuildInputData(compat.partial(fn, self), keydata)
13160

    
13161
  def _ComputeClusterData(self):
13162
    """Compute the generic allocator input data.
13163

13164
    This is the data that is independent of the actual operation.
13165

13166
    """
13167
    cfg = self.cfg
13168
    cluster_info = cfg.GetClusterInfo()
13169
    # cluster data
13170
    data = {
13171
      "version": constants.IALLOCATOR_VERSION,
13172
      "cluster_name": cfg.GetClusterName(),
13173
      "cluster_tags": list(cluster_info.GetTags()),
13174
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13175
      # we don't have job IDs
13176
      }
13177
    ninfo = cfg.GetAllNodesInfo()
13178
    iinfo = cfg.GetAllInstancesInfo().values()
13179
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13180

    
13181
    # node data
13182
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13183

    
13184
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13185
      hypervisor_name = self.hypervisor
13186
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13187
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13188
    else:
13189
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13190

    
13191
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13192
                                        hypervisor_name)
13193
    node_iinfo = \
13194
      self.rpc.call_all_instances_info(node_list,
13195
                                       cluster_info.enabled_hypervisors)
13196

    
13197
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13198

    
13199
    config_ndata = self._ComputeBasicNodeData(ninfo)
13200
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13201
                                                 i_list, config_ndata)
13202
    assert len(data["nodes"]) == len(ninfo), \
13203
        "Incomplete node data computed"
13204

    
13205
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13206

    
13207
    self.in_data = data
13208

    
13209
  @staticmethod
13210
  def _ComputeNodeGroupData(cfg):
13211
    """Compute node groups data.
13212

13213
    """
13214
    ng = dict((guuid, {
13215
      "name": gdata.name,
13216
      "alloc_policy": gdata.alloc_policy,
13217
      })
13218
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13219

    
13220
    return ng
13221

    
13222
  @staticmethod
13223
  def _ComputeBasicNodeData(node_cfg):
13224
    """Compute global node data.
13225

13226
    @rtype: dict
13227
    @returns: a dict of name: (node dict, node config)
13228

13229
    """
13230
    # fill in static (config-based) values
13231
    node_results = dict((ninfo.name, {
13232
      "tags": list(ninfo.GetTags()),
13233
      "primary_ip": ninfo.primary_ip,
13234
      "secondary_ip": ninfo.secondary_ip,
13235
      "offline": ninfo.offline,
13236
      "drained": ninfo.drained,
13237
      "master_candidate": ninfo.master_candidate,
13238
      "group": ninfo.group,
13239
      "master_capable": ninfo.master_capable,
13240
      "vm_capable": ninfo.vm_capable,
13241
      })
13242
      for ninfo in node_cfg.values())
13243

    
13244
    return node_results
13245

    
13246
  @staticmethod
13247
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13248
                              node_results):
13249
    """Compute global node data.
13250

13251
    @param node_results: the basic node structures as filled from the config
13252

13253
    """
13254
    # make a copy of the current dict
13255
    node_results = dict(node_results)
13256
    for nname, nresult in node_data.items():
13257
      assert nname in node_results, "Missing basic data for node %s" % nname
13258
      ninfo = node_cfg[nname]
13259

    
13260
      if not (ninfo.offline or ninfo.drained):
13261
        nresult.Raise("Can't get data for node %s" % nname)
13262
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13263
                                nname)
13264
        remote_info = nresult.payload
13265

    
13266
        for attr in ["memory_total", "memory_free", "memory_dom0",
13267
                     "vg_size", "vg_free", "cpu_total"]:
13268
          if attr not in remote_info:
13269
            raise errors.OpExecError("Node '%s' didn't return attribute"
13270
                                     " '%s'" % (nname, attr))
13271
          if not isinstance(remote_info[attr], int):
13272
            raise errors.OpExecError("Node '%s' returned invalid value"
13273
                                     " for '%s': %s" %
13274
                                     (nname, attr, remote_info[attr]))
13275
        # compute memory used by primary instances
13276
        i_p_mem = i_p_up_mem = 0
13277
        for iinfo, beinfo in i_list:
13278
          if iinfo.primary_node == nname:
13279
            i_p_mem += beinfo[constants.BE_MEMORY]
13280
            if iinfo.name not in node_iinfo[nname].payload:
13281
              i_used_mem = 0
13282
            else:
13283
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13284
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13285
            remote_info["memory_free"] -= max(0, i_mem_diff)
13286

    
13287
            if iinfo.admin_up:
13288
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13289

    
13290
        # compute memory used by instances
13291
        pnr_dyn = {
13292
          "total_memory": remote_info["memory_total"],
13293
          "reserved_memory": remote_info["memory_dom0"],
13294
          "free_memory": remote_info["memory_free"],
13295
          "total_disk": remote_info["vg_size"],
13296
          "free_disk": remote_info["vg_free"],
13297
          "total_cpus": remote_info["cpu_total"],
13298
          "i_pri_memory": i_p_mem,
13299
          "i_pri_up_memory": i_p_up_mem,
13300
          }
13301
        pnr_dyn.update(node_results[nname])
13302
        node_results[nname] = pnr_dyn
13303

    
13304
    return node_results
13305

    
13306
  @staticmethod
13307
  def _ComputeInstanceData(cluster_info, i_list):
13308
    """Compute global instance data.
13309

13310
    """
13311
    instance_data = {}
13312
    for iinfo, beinfo in i_list:
13313
      nic_data = []
13314
      for nic in iinfo.nics:
13315
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13316
        nic_dict = {
13317
          "mac": nic.mac,
13318
          "ip": nic.ip,
13319
          "mode": filled_params[constants.NIC_MODE],
13320
          "link": filled_params[constants.NIC_LINK],
13321
          }
13322
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13323
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13324
        nic_data.append(nic_dict)
13325
      pir = {
13326
        "tags": list(iinfo.GetTags()),
13327
        "admin_up": iinfo.admin_up,
13328
        "vcpus": beinfo[constants.BE_VCPUS],
13329
        "memory": beinfo[constants.BE_MEMORY],
13330
        "os": iinfo.os,
13331
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13332
        "nics": nic_data,
13333
        "disks": [{constants.IDISK_SIZE: dsk.size,
13334
                   constants.IDISK_MODE: dsk.mode}
13335
                  for dsk in iinfo.disks],
13336
        "disk_template": iinfo.disk_template,
13337
        "hypervisor": iinfo.hypervisor,
13338
        }
13339
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13340
                                                 pir["disks"])
13341
      instance_data[iinfo.name] = pir
13342

    
13343
    return instance_data
13344

    
13345
  def _AddNewInstance(self):
13346
    """Add new instance data to allocator structure.
13347

13348
    This in combination with _AllocatorGetClusterData will create the
13349
    correct structure needed as input for the allocator.
13350

13351
    The checks for the completeness of the opcode must have already been
13352
    done.
13353

13354
    """
13355
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13356

    
13357
    if self.disk_template in constants.DTS_INT_MIRROR:
13358
      self.required_nodes = 2
13359
    else:
13360
      self.required_nodes = 1
13361

    
13362
    request = {
13363
      "name": self.name,
13364
      "disk_template": self.disk_template,
13365
      "tags": self.tags,
13366
      "os": self.os,
13367
      "vcpus": self.vcpus,
13368
      "memory": self.memory,
13369
      "disks": self.disks,
13370
      "disk_space_total": disk_space,
13371
      "nics": self.nics,
13372
      "required_nodes": self.required_nodes,
13373
      "hypervisor": self.hypervisor,
13374
      }
13375

    
13376
    return request
13377

    
13378
  def _AddRelocateInstance(self):
13379
    """Add relocate instance data to allocator structure.
13380

13381
    This in combination with _IAllocatorGetClusterData will create the
13382
    correct structure needed as input for the allocator.
13383

13384
    The checks for the completeness of the opcode must have already been
13385
    done.
13386

13387
    """
13388
    instance = self.cfg.GetInstanceInfo(self.name)
13389
    if instance is None:
13390
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13391
                                   " IAllocator" % self.name)
13392

    
13393
    if instance.disk_template not in constants.DTS_MIRRORED:
13394
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13395
                                 errors.ECODE_INVAL)
13396

    
13397
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13398
        len(instance.secondary_nodes) != 1:
13399
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13400
                                 errors.ECODE_STATE)
13401

    
13402
    self.required_nodes = 1
13403
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13404
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13405

    
13406
    request = {
13407
      "name": self.name,
13408
      "disk_space_total": disk_space,
13409
      "required_nodes": self.required_nodes,
13410
      "relocate_from": self.relocate_from,
13411
      }
13412
    return request
13413

    
13414
  def _AddNodeEvacuate(self):
13415
    """Get data for node-evacuate requests.
13416

13417
    """
13418
    return {
13419
      "instances": self.instances,
13420
      "evac_mode": self.evac_mode,
13421
      }
13422

    
13423
  def _AddChangeGroup(self):
13424
    """Get data for node-evacuate requests.
13425

13426
    """
13427
    return {
13428
      "instances": self.instances,
13429
      "target_groups": self.target_groups,
13430
      }
13431

    
13432
  def _BuildInputData(self, fn, keydata):
13433
    """Build input data structures.
13434

13435
    """
13436
    self._ComputeClusterData()
13437

    
13438
    request = fn()
13439
    request["type"] = self.mode
13440
    for keyname, keytype in keydata:
13441
      if keyname not in request:
13442
        raise errors.ProgrammerError("Request parameter %s is missing" %
13443
                                     keyname)
13444
      val = request[keyname]
13445
      if not keytype(val):
13446
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13447
                                     " validation, value %s, expected"
13448
                                     " type %s" % (keyname, val, keytype))
13449
    self.in_data["request"] = request
13450

    
13451
    self.in_text = serializer.Dump(self.in_data)
13452

    
13453
  _STRING_LIST = ht.TListOf(ht.TString)
13454
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13455
     # pylint: disable=E1101
13456
     # Class '...' has no 'OP_ID' member
13457
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13458
                          opcodes.OpInstanceMigrate.OP_ID,
13459
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13460
     })))
13461

    
13462
  _NEVAC_MOVED = \
13463
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13464
                       ht.TItems([ht.TNonEmptyString,
13465
                                  ht.TNonEmptyString,
13466
                                  ht.TListOf(ht.TNonEmptyString),
13467
                                 ])))
13468
  _NEVAC_FAILED = \
13469
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13470
                       ht.TItems([ht.TNonEmptyString,
13471
                                  ht.TMaybeString,
13472
                                 ])))
13473
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13474
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13475

    
13476
  _MODE_DATA = {
13477
    constants.IALLOCATOR_MODE_ALLOC:
13478
      (_AddNewInstance,
13479
       [
13480
        ("name", ht.TString),
13481
        ("memory", ht.TInt),
13482
        ("disks", ht.TListOf(ht.TDict)),
13483
        ("disk_template", ht.TString),
13484
        ("os", ht.TString),
13485
        ("tags", _STRING_LIST),
13486
        ("nics", ht.TListOf(ht.TDict)),
13487
        ("vcpus", ht.TInt),
13488
        ("hypervisor", ht.TString),
13489
        ], ht.TList),
13490
    constants.IALLOCATOR_MODE_RELOC:
13491
      (_AddRelocateInstance,
13492
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13493
       ht.TList),
13494
     constants.IALLOCATOR_MODE_NODE_EVAC:
13495
      (_AddNodeEvacuate, [
13496
        ("instances", _STRING_LIST),
13497
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13498
        ], _NEVAC_RESULT),
13499
     constants.IALLOCATOR_MODE_CHG_GROUP:
13500
      (_AddChangeGroup, [
13501
        ("instances", _STRING_LIST),
13502
        ("target_groups", _STRING_LIST),
13503
        ], _NEVAC_RESULT),
13504
    }
13505

    
13506
  def Run(self, name, validate=True, call_fn=None):
13507
    """Run an instance allocator and return the results.
13508

13509
    """
13510
    if call_fn is None:
13511
      call_fn = self.rpc.call_iallocator_runner
13512

    
13513
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13514
    result.Raise("Failure while running the iallocator script")
13515

    
13516
    self.out_text = result.payload
13517
    if validate:
13518
      self._ValidateResult()
13519

    
13520
  def _ValidateResult(self):
13521
    """Process the allocator results.
13522

13523
    This will process and if successful save the result in
13524
    self.out_data and the other parameters.
13525

13526
    """
13527
    try:
13528
      rdict = serializer.Load(self.out_text)
13529
    except Exception, err:
13530
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13531

    
13532
    if not isinstance(rdict, dict):
13533
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13534

    
13535
    # TODO: remove backwards compatiblity in later versions
13536
    if "nodes" in rdict and "result" not in rdict:
13537
      rdict["result"] = rdict["nodes"]
13538
      del rdict["nodes"]
13539

    
13540
    for key in "success", "info", "result":
13541
      if key not in rdict:
13542
        raise errors.OpExecError("Can't parse iallocator results:"
13543
                                 " missing key '%s'" % key)
13544
      setattr(self, key, rdict[key])
13545

    
13546
    if not self._result_check(self.result):
13547
      raise errors.OpExecError("Iallocator returned invalid result,"
13548
                               " expected %s, got %s" %
13549
                               (self._result_check, self.result),
13550
                               errors.ECODE_INVAL)
13551

    
13552
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13553
      assert self.relocate_from is not None
13554
      assert self.required_nodes == 1
13555

    
13556
      node2group = dict((name, ndata["group"])
13557
                        for (name, ndata) in self.in_data["nodes"].items())
13558

    
13559
      fn = compat.partial(self._NodesToGroups, node2group,
13560
                          self.in_data["nodegroups"])
13561

    
13562
      instance = self.cfg.GetInstanceInfo(self.name)
13563
      request_groups = fn(self.relocate_from + [instance.primary_node])
13564
      result_groups = fn(rdict["result"] + [instance.primary_node])
13565

    
13566
      if self.success and not set(result_groups).issubset(request_groups):
13567
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13568
                                 " differ from original groups (%s)" %
13569
                                 (utils.CommaJoin(result_groups),
13570
                                  utils.CommaJoin(request_groups)))
13571

    
13572
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13573
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13574

    
13575
    self.out_data = rdict
13576

    
13577
  @staticmethod
13578
  def _NodesToGroups(node2group, groups, nodes):
13579
    """Returns a list of unique group names for a list of nodes.
13580

13581
    @type node2group: dict
13582
    @param node2group: Map from node name to group UUID
13583
    @type groups: dict
13584
    @param groups: Group information
13585
    @type nodes: list
13586
    @param nodes: Node names
13587

13588
    """
13589
    result = set()
13590

    
13591
    for node in nodes:
13592
      try:
13593
        group_uuid = node2group[node]
13594
      except KeyError:
13595
        # Ignore unknown node
13596
        pass
13597
      else:
13598
        try:
13599
          group = groups[group_uuid]
13600
        except KeyError:
13601
          # Can't find group, let's use UUID
13602
          group_name = group_uuid
13603
        else:
13604
          group_name = group["name"]
13605

    
13606
        result.add(group_name)
13607

    
13608
    return sorted(result)
13609

    
13610

    
13611
class LUTestAllocator(NoHooksLU):
13612
  """Run allocator tests.
13613

13614
  This LU runs the allocator tests
13615

13616
  """
13617
  def CheckPrereq(self):
13618
    """Check prerequisites.
13619

13620
    This checks the opcode parameters depending on the director and mode test.
13621

13622
    """
13623
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13624
      for attr in ["memory", "disks", "disk_template",
13625
                   "os", "tags", "nics", "vcpus"]:
13626
        if not hasattr(self.op, attr):
13627
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13628
                                     attr, errors.ECODE_INVAL)
13629
      iname = self.cfg.ExpandInstanceName(self.op.name)
13630
      if iname is not None:
13631
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13632
                                   iname, errors.ECODE_EXISTS)
13633
      if not isinstance(self.op.nics, list):
13634
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13635
                                   errors.ECODE_INVAL)
13636
      if not isinstance(self.op.disks, list):
13637
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13638
                                   errors.ECODE_INVAL)
13639
      for row in self.op.disks:
13640
        if (not isinstance(row, dict) or
13641
            constants.IDISK_SIZE not in row or
13642
            not isinstance(row[constants.IDISK_SIZE], int) or
13643
            constants.IDISK_MODE not in row or
13644
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13645
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13646
                                     " parameter", errors.ECODE_INVAL)
13647
      if self.op.hypervisor is None:
13648
        self.op.hypervisor = self.cfg.GetHypervisorType()
13649
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13650
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13651
      self.op.name = fname
13652
      self.relocate_from = \
13653
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13654
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13655
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13656
      if not self.op.instances:
13657
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13658
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13659
    else:
13660
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13661
                                 self.op.mode, errors.ECODE_INVAL)
13662

    
13663
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13664
      if self.op.allocator is None:
13665
        raise errors.OpPrereqError("Missing allocator name",
13666
                                   errors.ECODE_INVAL)
13667
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13668
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13669
                                 self.op.direction, errors.ECODE_INVAL)
13670

    
13671
  def Exec(self, feedback_fn):
13672
    """Run the allocator test.
13673

13674
    """
13675
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13676
      ial = IAllocator(self.cfg, self.rpc,
13677
                       mode=self.op.mode,
13678
                       name=self.op.name,
13679
                       memory=self.op.memory,
13680
                       disks=self.op.disks,
13681
                       disk_template=self.op.disk_template,
13682
                       os=self.op.os,
13683
                       tags=self.op.tags,
13684
                       nics=self.op.nics,
13685
                       vcpus=self.op.vcpus,
13686
                       hypervisor=self.op.hypervisor,
13687
                       )
13688
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13689
      ial = IAllocator(self.cfg, self.rpc,
13690
                       mode=self.op.mode,
13691
                       name=self.op.name,
13692
                       relocate_from=list(self.relocate_from),
13693
                       )
13694
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13695
      ial = IAllocator(self.cfg, self.rpc,
13696
                       mode=self.op.mode,
13697
                       instances=self.op.instances,
13698
                       target_groups=self.op.target_groups)
13699
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13700
      ial = IAllocator(self.cfg, self.rpc,
13701
                       mode=self.op.mode,
13702
                       instances=self.op.instances,
13703
                       evac_mode=self.op.evac_mode)
13704
    else:
13705
      raise errors.ProgrammerError("Uncatched mode %s in"
13706
                                   " LUTestAllocator.Exec", self.op.mode)
13707

    
13708
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13709
      result = ial.in_text
13710
    else:
13711
      ial.Run(self.op.allocator, validate=False)
13712
      result = ial.out_text
13713
    return result
13714

    
13715

    
13716
#: Query type implementations
13717
_QUERY_IMPL = {
13718
  constants.QR_INSTANCE: _InstanceQuery,
13719
  constants.QR_NODE: _NodeQuery,
13720
  constants.QR_GROUP: _GroupQuery,
13721
  constants.QR_OS: _OsQuery,
13722
  }
13723

    
13724
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13725

    
13726

    
13727
def _GetQueryImplementation(name):
13728
  """Returns the implemtnation for a query type.
13729

13730
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13731

13732
  """
13733
  try:
13734
    return _QUERY_IMPL[name]
13735
  except KeyError:
13736
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13737
                               errors.ECODE_INVAL)