Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 57c7bc57

History | View | Annotate | Download (484.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False,
353
                          level=locking.LEVEL_NODE):
354
    """Helper function to declare instances' nodes for locking.
355

356
    This function should be called after locking one or more instances to lock
357
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
358
    with all primary or secondary nodes for instances already locked and
359
    present in self.needed_locks[locking.LEVEL_INSTANCE].
360

361
    It should be called from DeclareLocks, and for safety only works if
362
    self.recalculate_locks[locking.LEVEL_NODE] is set.
363

364
    In the future it may grow parameters to just lock some instance's nodes, or
365
    to just lock primaries or secondary nodes, if needed.
366

367
    If should be called in DeclareLocks in a way similar to::
368

369
      if level == locking.LEVEL_NODE:
370
        self._LockInstancesNodes()
371

372
    @type primary_only: boolean
373
    @param primary_only: only lock primary nodes of locked instances
374
    @param level: Which lock level to use for locking nodes
375

376
    """
377
    assert level in self.recalculate_locks, \
378
      "_LockInstancesNodes helper function called with no nodes to recalculate"
379

    
380
    # TODO: check if we're really been called with the instance locks held
381

    
382
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
383
    # future we might want to have different behaviors depending on the value
384
    # of self.recalculate_locks[locking.LEVEL_NODE]
385
    wanted_nodes = []
386
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
387
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
388
      wanted_nodes.append(instance.primary_node)
389
      if not primary_only:
390
        wanted_nodes.extend(instance.secondary_nodes)
391

    
392
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
393
      self.needed_locks[level] = wanted_nodes
394
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
395
      self.needed_locks[level].extend(wanted_nodes)
396
    else:
397
      raise errors.ProgrammerError("Unknown recalculation mode")
398

    
399
    del self.recalculate_locks[level]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, qfilter, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.owned_locks(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _ShareAll():
563
  """Returns a dict declaring all lock levels shared.
564

565
  """
566
  return dict.fromkeys(locking.LEVELS, 1)
567

    
568

    
569
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
570
  """Checks if the owned node groups are still correct for an instance.
571

572
  @type cfg: L{config.ConfigWriter}
573
  @param cfg: The cluster configuration
574
  @type instance_name: string
575
  @param instance_name: Instance name
576
  @type owned_groups: set or frozenset
577
  @param owned_groups: List of currently owned node groups
578

579
  """
580
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
581

    
582
  if not owned_groups.issuperset(inst_groups):
583
    raise errors.OpPrereqError("Instance %s's node groups changed since"
584
                               " locks were acquired, current groups are"
585
                               " are '%s', owning groups '%s'; retry the"
586
                               " operation" %
587
                               (instance_name,
588
                                utils.CommaJoin(inst_groups),
589
                                utils.CommaJoin(owned_groups)),
590
                               errors.ECODE_STATE)
591

    
592
  return inst_groups
593

    
594

    
595
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
596
  """Checks if the instances in a node group are still correct.
597

598
  @type cfg: L{config.ConfigWriter}
599
  @param cfg: The cluster configuration
600
  @type group_uuid: string
601
  @param group_uuid: Node group UUID
602
  @type owned_instances: set or frozenset
603
  @param owned_instances: List of currently owned instances
604

605
  """
606
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
607
  if owned_instances != wanted_instances:
608
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
609
                               " locks were acquired, wanted '%s', have '%s';"
610
                               " retry the operation" %
611
                               (group_uuid,
612
                                utils.CommaJoin(wanted_instances),
613
                                utils.CommaJoin(owned_instances)),
614
                               errors.ECODE_STATE)
615

    
616
  return wanted_instances
617

    
618

    
619
def _SupportsOob(cfg, node):
620
  """Tells if node supports OOB.
621

622
  @type cfg: L{config.ConfigWriter}
623
  @param cfg: The cluster configuration
624
  @type node: L{objects.Node}
625
  @param node: The node
626
  @return: The OOB script if supported or an empty string otherwise
627

628
  """
629
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if nodes:
645
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
646

    
647
  return utils.NiceSort(lu.cfg.GetNodeList())
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _ReleaseLocks(lu, level, names=None, keep=None):
704
  """Releases locks owned by an LU.
705

706
  @type lu: L{LogicalUnit}
707
  @param level: Lock level
708
  @type names: list or None
709
  @param names: Names of locks to release
710
  @type keep: list or None
711
  @param keep: Names of locks to retain
712

713
  """
714
  assert not (keep is not None and names is not None), \
715
         "Only one of the 'names' and the 'keep' parameters can be given"
716

    
717
  if names is not None:
718
    should_release = names.__contains__
719
  elif keep:
720
    should_release = lambda name: name not in keep
721
  else:
722
    should_release = None
723

    
724
  if should_release:
725
    retain = []
726
    release = []
727

    
728
    # Determine which locks to release
729
    for name in lu.owned_locks(level):
730
      if should_release(name):
731
        release.append(name)
732
      else:
733
        retain.append(name)
734

    
735
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
736

    
737
    # Release just some locks
738
    lu.glm.release(level, names=release)
739

    
740
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
741
  else:
742
    # Release everything
743
    lu.glm.release(level)
744

    
745
    assert not lu.glm.is_owned(level), "No locks should be owned"
746

    
747

    
748
def _MapInstanceDisksToNodes(instances):
749
  """Creates a map from (node, volume) to instance name.
750

751
  @type instances: list of L{objects.Instance}
752
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
753

754
  """
755
  return dict(((node, vol), inst.name)
756
              for inst in instances
757
              for (node, vols) in inst.MapLVsByNode().items()
758
              for vol in vols)
759

    
760

    
761
def _RunPostHook(lu, node_name):
762
  """Runs the post-hook for an opcode on a single node.
763

764
  """
765
  hm = lu.proc.BuildHooksManager(lu)
766
  try:
767
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
768
  except:
769
    # pylint: disable=W0702
770
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
771

    
772

    
773
def _CheckOutputFields(static, dynamic, selected):
774
  """Checks whether all selected fields are valid.
775

776
  @type static: L{utils.FieldSet}
777
  @param static: static fields set
778
  @type dynamic: L{utils.FieldSet}
779
  @param dynamic: dynamic fields set
780

781
  """
782
  f = utils.FieldSet()
783
  f.Extend(static)
784
  f.Extend(dynamic)
785

    
786
  delta = f.NonMatching(selected)
787
  if delta:
788
    raise errors.OpPrereqError("Unknown output fields selected: %s"
789
                               % ",".join(delta), errors.ECODE_INVAL)
790

    
791

    
792
def _CheckGlobalHvParams(params):
793
  """Validates that given hypervisor params are not global ones.
794

795
  This will ensure that instances don't get customised versions of
796
  global params.
797

798
  """
799
  used_globals = constants.HVC_GLOBALS.intersection(params)
800
  if used_globals:
801
    msg = ("The following hypervisor parameters are global and cannot"
802
           " be customized at instance level, please modify them at"
803
           " cluster level: %s" % utils.CommaJoin(used_globals))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805

    
806

    
807
def _CheckNodeOnline(lu, node, msg=None):
808
  """Ensure that a given node is online.
809

810
  @param lu: the LU on behalf of which we make the check
811
  @param node: the node to check
812
  @param msg: if passed, should be a message to replace the default one
813
  @raise errors.OpPrereqError: if the node is offline
814

815
  """
816
  if msg is None:
817
    msg = "Can't use offline node"
818
  if lu.cfg.GetNodeInfo(node).offline:
819
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
820

    
821

    
822
def _CheckNodeNotDrained(lu, node):
823
  """Ensure that a given node is not drained.
824

825
  @param lu: the LU on behalf of which we make the check
826
  @param node: the node to check
827
  @raise errors.OpPrereqError: if the node is drained
828

829
  """
830
  if lu.cfg.GetNodeInfo(node).drained:
831
    raise errors.OpPrereqError("Can't use drained node %s" % node,
832
                               errors.ECODE_STATE)
833

    
834

    
835
def _CheckNodeVmCapable(lu, node):
836
  """Ensure that a given node is vm capable.
837

838
  @param lu: the LU on behalf of which we make the check
839
  @param node: the node to check
840
  @raise errors.OpPrereqError: if the node is not vm capable
841

842
  """
843
  if not lu.cfg.GetNodeInfo(node).vm_capable:
844
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
845
                               errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeHasOS(lu, node, os_name, force_variant):
849
  """Ensure that a node supports a given OS.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @param os_name: the OS to query about
854
  @param force_variant: whether to ignore variant errors
855
  @raise errors.OpPrereqError: if the node is not supporting the OS
856

857
  """
858
  result = lu.rpc.call_os_get(node, os_name)
859
  result.Raise("OS '%s' not in supported OS list for node %s" %
860
               (os_name, node),
861
               prereq=True, ecode=errors.ECODE_INVAL)
862
  if not force_variant:
863
    _CheckOSVariant(result.payload, os_name)
864

    
865

    
866
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
867
  """Ensure that a node has the given secondary ip.
868

869
  @type lu: L{LogicalUnit}
870
  @param lu: the LU on behalf of which we make the check
871
  @type node: string
872
  @param node: the node to check
873
  @type secondary_ip: string
874
  @param secondary_ip: the ip to check
875
  @type prereq: boolean
876
  @param prereq: whether to throw a prerequisite or an execute error
877
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
878
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
879

880
  """
881
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
882
  result.Raise("Failure checking secondary ip on node %s" % node,
883
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
884
  if not result.payload:
885
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
886
           " please fix and re-run this command" % secondary_ip)
887
    if prereq:
888
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
889
    else:
890
      raise errors.OpExecError(msg)
891

    
892

    
893
def _GetClusterDomainSecret():
894
  """Reads the cluster domain secret.
895

896
  """
897
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
898
                               strict=True)
899

    
900

    
901
def _CheckInstanceDown(lu, instance, reason):
902
  """Ensure that an instance is not running."""
903
  if instance.admin_up:
904
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907
  pnode = instance.primary_node
908
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
909
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
910
              prereq=True, ecode=errors.ECODE_ENVIRON)
911

    
912
  if instance.name in ins_l.payload:
913
    raise errors.OpPrereqError("Instance %s is running, %s" %
914
                               (instance.name, reason), errors.ECODE_STATE)
915

    
916

    
917
def _ExpandItemName(fn, name, kind):
918
  """Expand an item name.
919

920
  @param fn: the function to use for expansion
921
  @param name: requested item name
922
  @param kind: text description ('Node' or 'Instance')
923
  @return: the resolved (full) name
924
  @raise errors.OpPrereqError: if the item is not found
925

926
  """
927
  full_name = fn(name)
928
  if full_name is None:
929
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
930
                               errors.ECODE_NOENT)
931
  return full_name
932

    
933

    
934
def _ExpandNodeName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for nodes."""
936
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
937

    
938

    
939
def _ExpandInstanceName(cfg, name):
940
  """Wrapper over L{_ExpandItemName} for instance."""
941
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
942

    
943

    
944
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
945
                          memory, vcpus, nics, disk_template, disks,
946
                          bep, hvp, hypervisor_name, tags):
947
  """Builds instance related env variables for hooks
948

949
  This builds the hook environment from individual variables.
950

951
  @type name: string
952
  @param name: the name of the instance
953
  @type primary_node: string
954
  @param primary_node: the name of the instance's primary node
955
  @type secondary_nodes: list
956
  @param secondary_nodes: list of secondary nodes as strings
957
  @type os_type: string
958
  @param os_type: the name of the instance's OS
959
  @type status: boolean
960
  @param status: the should_run status of the instance
961
  @type memory: string
962
  @param memory: the memory size of the instance
963
  @type vcpus: string
964
  @param vcpus: the count of VCPUs the instance has
965
  @type nics: list
966
  @param nics: list of tuples (ip, mac, mode, link) representing
967
      the NICs the instance has
968
  @type disk_template: string
969
  @param disk_template: the disk template of the instance
970
  @type disks: list
971
  @param disks: the list of (size, mode) pairs
972
  @type bep: dict
973
  @param bep: the backend parameters for the instance
974
  @type hvp: dict
975
  @param hvp: the hypervisor parameters for the instance
976
  @type hypervisor_name: string
977
  @param hypervisor_name: the hypervisor for the instance
978
  @type tags: list
979
  @param tags: list of instance tags as strings
980
  @rtype: dict
981
  @return: the hook environment for this instance
982

983
  """
984
  if status:
985
    str_status = "up"
986
  else:
987
    str_status = "down"
988
  env = {
989
    "OP_TARGET": name,
990
    "INSTANCE_NAME": name,
991
    "INSTANCE_PRIMARY": primary_node,
992
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
993
    "INSTANCE_OS_TYPE": os_type,
994
    "INSTANCE_STATUS": str_status,
995
    "INSTANCE_MEMORY": memory,
996
    "INSTANCE_VCPUS": vcpus,
997
    "INSTANCE_DISK_TEMPLATE": disk_template,
998
    "INSTANCE_HYPERVISOR": hypervisor_name,
999
  }
1000

    
1001
  if nics:
1002
    nic_count = len(nics)
1003
    for idx, (ip, mac, mode, link) in enumerate(nics):
1004
      if ip is None:
1005
        ip = ""
1006
      env["INSTANCE_NIC%d_IP" % idx] = ip
1007
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1008
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1009
      env["INSTANCE_NIC%d_LINK" % idx] = link
1010
      if mode == constants.NIC_MODE_BRIDGED:
1011
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012
  else:
1013
    nic_count = 0
1014

    
1015
  env["INSTANCE_NIC_COUNT"] = nic_count
1016

    
1017
  if disks:
1018
    disk_count = len(disks)
1019
    for idx, (size, mode) in enumerate(disks):
1020
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1021
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1022
  else:
1023
    disk_count = 0
1024

    
1025
  env["INSTANCE_DISK_COUNT"] = disk_count
1026

    
1027
  if not tags:
1028
    tags = []
1029

    
1030
  env["INSTANCE_TAGS"] = " ".join(tags)
1031

    
1032
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033
    for key, value in source.items():
1034
      env["INSTANCE_%s_%s" % (kind, key)] = value
1035

    
1036
  return env
1037

    
1038

    
1039
def _NICListToTuple(lu, nics):
1040
  """Build a list of nic information tuples.
1041

1042
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043
  value in LUInstanceQueryData.
1044

1045
  @type lu:  L{LogicalUnit}
1046
  @param lu: the logical unit on whose behalf we execute
1047
  @type nics: list of L{objects.NIC}
1048
  @param nics: list of nics to convert to hooks tuples
1049

1050
  """
1051
  hooks_nics = []
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  for nic in nics:
1054
    ip = nic.ip
1055
    mac = nic.mac
1056
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057
    mode = filled_params[constants.NIC_MODE]
1058
    link = filled_params[constants.NIC_LINK]
1059
    hooks_nics.append((ip, mac, mode, link))
1060
  return hooks_nics
1061

    
1062

    
1063
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064
  """Builds instance related env variables for hooks from an object.
1065

1066
  @type lu: L{LogicalUnit}
1067
  @param lu: the logical unit on whose behalf we execute
1068
  @type instance: L{objects.Instance}
1069
  @param instance: the instance for which we should build the
1070
      environment
1071
  @type override: dict
1072
  @param override: dictionary with key/values that will override
1073
      our values
1074
  @rtype: dict
1075
  @return: the hook environment dictionary
1076

1077
  """
1078
  cluster = lu.cfg.GetClusterInfo()
1079
  bep = cluster.FillBE(instance)
1080
  hvp = cluster.FillHV(instance)
1081
  args = {
1082
    "name": instance.name,
1083
    "primary_node": instance.primary_node,
1084
    "secondary_nodes": instance.secondary_nodes,
1085
    "os_type": instance.os,
1086
    "status": instance.admin_up,
1087
    "memory": bep[constants.BE_MEMORY],
1088
    "vcpus": bep[constants.BE_VCPUS],
1089
    "nics": _NICListToTuple(lu, instance.nics),
1090
    "disk_template": instance.disk_template,
1091
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092
    "bep": bep,
1093
    "hvp": hvp,
1094
    "hypervisor_name": instance.hypervisor,
1095
    "tags": instance.tags,
1096
  }
1097
  if override:
1098
    args.update(override)
1099
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100

    
1101

    
1102
def _AdjustCandidatePool(lu, exceptions):
1103
  """Adjust the candidate pool after node operations.
1104

1105
  """
1106
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107
  if mod_list:
1108
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1109
               utils.CommaJoin(node.name for node in mod_list))
1110
    for name in mod_list:
1111
      lu.context.ReaddNode(name)
1112
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113
  if mc_now > mc_max:
1114
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115
               (mc_now, mc_max))
1116

    
1117

    
1118
def _DecideSelfPromotion(lu, exceptions=None):
1119
  """Decide whether I should promote myself as a master candidate.
1120

1121
  """
1122
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124
  # the new node will increase mc_max with one, so:
1125
  mc_should = min(mc_should + 1, cp_size)
1126
  return mc_now < mc_should
1127

    
1128

    
1129
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130
  """Check that the brigdes needed by a list of nics exist.
1131

1132
  """
1133
  cluster = lu.cfg.GetClusterInfo()
1134
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135
  brlist = [params[constants.NIC_LINK] for params in paramslist
1136
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137
  if brlist:
1138
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1139
    result.Raise("Error checking bridges on destination node '%s'" %
1140
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141

    
1142

    
1143
def _CheckInstanceBridgesExist(lu, instance, node=None):
1144
  """Check that the brigdes needed by an instance exist.
1145

1146
  """
1147
  if node is None:
1148
    node = instance.primary_node
1149
  _CheckNicsBridgesExist(lu, instance.nics, node)
1150

    
1151

    
1152
def _CheckOSVariant(os_obj, name):
1153
  """Check whether an OS name conforms to the os variants specification.
1154

1155
  @type os_obj: L{objects.OS}
1156
  @param os_obj: OS object to check
1157
  @type name: string
1158
  @param name: OS name passed by the user, to check for validity
1159

1160
  """
1161
  variant = objects.OS.GetVariant(name)
1162
  if not os_obj.supported_variants:
1163
    if variant:
1164
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165
                                 " passed)" % (os_obj.name, variant),
1166
                                 errors.ECODE_INVAL)
1167
    return
1168
  if not variant:
1169
    raise errors.OpPrereqError("OS name must include a variant",
1170
                               errors.ECODE_INVAL)
1171

    
1172
  if variant not in os_obj.supported_variants:
1173
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174

    
1175

    
1176
def _GetNodeInstancesInner(cfg, fn):
1177
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178

    
1179

    
1180
def _GetNodeInstances(cfg, node_name):
1181
  """Returns a list of all primary and secondary instances on a node.
1182

1183
  """
1184

    
1185
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186

    
1187

    
1188
def _GetNodePrimaryInstances(cfg, node_name):
1189
  """Returns primary instances on a node.
1190

1191
  """
1192
  return _GetNodeInstancesInner(cfg,
1193
                                lambda inst: node_name == inst.primary_node)
1194

    
1195

    
1196
def _GetNodeSecondaryInstances(cfg, node_name):
1197
  """Returns secondary instances on a node.
1198

1199
  """
1200
  return _GetNodeInstancesInner(cfg,
1201
                                lambda inst: node_name in inst.secondary_nodes)
1202

    
1203

    
1204
def _GetStorageTypeArgs(cfg, storage_type):
1205
  """Returns the arguments for a storage type.
1206

1207
  """
1208
  # Special case for file storage
1209
  if storage_type == constants.ST_FILE:
1210
    # storage.FileStorage wants a list of storage directories
1211
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212

    
1213
  return []
1214

    
1215

    
1216
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217
  faulty = []
1218

    
1219
  for dev in instance.disks:
1220
    cfg.SetDiskID(dev, node_name)
1221

    
1222
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223
  result.Raise("Failed to get disk status from node %s" % node_name,
1224
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225

    
1226
  for idx, bdev_status in enumerate(result.payload):
1227
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228
      faulty.append(idx)
1229

    
1230
  return faulty
1231

    
1232

    
1233
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234
  """Check the sanity of iallocator and node arguments and use the
1235
  cluster-wide iallocator if appropriate.
1236

1237
  Check that at most one of (iallocator, node) is specified. If none is
1238
  specified, then the LU's opcode's iallocator slot is filled with the
1239
  cluster-wide default iallocator.
1240

1241
  @type iallocator_slot: string
1242
  @param iallocator_slot: the name of the opcode iallocator slot
1243
  @type node_slot: string
1244
  @param node_slot: the name of the opcode target node slot
1245

1246
  """
1247
  node = getattr(lu.op, node_slot, None)
1248
  iallocator = getattr(lu.op, iallocator_slot, None)
1249

    
1250
  if node is not None and iallocator is not None:
1251
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252
                               errors.ECODE_INVAL)
1253
  elif node is None and iallocator is None:
1254
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1255
    if default_iallocator:
1256
      setattr(lu.op, iallocator_slot, default_iallocator)
1257
    else:
1258
      raise errors.OpPrereqError("No iallocator or node given and no"
1259
                                 " cluster-wide default iallocator found;"
1260
                                 " please specify either an iallocator or a"
1261
                                 " node, or set a cluster-wide default"
1262
                                 " iallocator")
1263

    
1264

    
1265
def _GetDefaultIAllocator(cfg, iallocator):
1266
  """Decides on which iallocator to use.
1267

1268
  @type cfg: L{config.ConfigWriter}
1269
  @param cfg: Cluster configuration object
1270
  @type iallocator: string or None
1271
  @param iallocator: Iallocator specified in opcode
1272
  @rtype: string
1273
  @return: Iallocator name
1274

1275
  """
1276
  if not iallocator:
1277
    # Use default iallocator
1278
    iallocator = cfg.GetDefaultIAllocator()
1279

    
1280
  if not iallocator:
1281
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282
                               " opcode nor as a cluster-wide default",
1283
                               errors.ECODE_INVAL)
1284

    
1285
  return iallocator
1286

    
1287

    
1288
class LUClusterPostInit(LogicalUnit):
1289
  """Logical unit for running hooks after cluster initialization.
1290

1291
  """
1292
  HPATH = "cluster-init"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294

    
1295
  def BuildHooksEnv(self):
1296
    """Build hooks env.
1297

1298
    """
1299
    return {
1300
      "OP_TARGET": self.cfg.GetClusterName(),
1301
      }
1302

    
1303
  def BuildHooksNodes(self):
1304
    """Build hooks nodes.
1305

1306
    """
1307
    return ([], [self.cfg.GetMasterNode()])
1308

    
1309
  def Exec(self, feedback_fn):
1310
    """Nothing to do.
1311

1312
    """
1313
    return True
1314

    
1315

    
1316
class LUClusterDestroy(LogicalUnit):
1317
  """Logical unit for destroying the cluster.
1318

1319
  """
1320
  HPATH = "cluster-destroy"
1321
  HTYPE = constants.HTYPE_CLUSTER
1322

    
1323
  def BuildHooksEnv(self):
1324
    """Build hooks env.
1325

1326
    """
1327
    return {
1328
      "OP_TARGET": self.cfg.GetClusterName(),
1329
      }
1330

    
1331
  def BuildHooksNodes(self):
1332
    """Build hooks nodes.
1333

1334
    """
1335
    return ([], [])
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    This checks whether the cluster is empty.
1341

1342
    Any errors are signaled by raising errors.OpPrereqError.
1343

1344
    """
1345
    master = self.cfg.GetMasterNode()
1346

    
1347
    nodelist = self.cfg.GetNodeList()
1348
    if len(nodelist) != 1 or nodelist[0] != master:
1349
      raise errors.OpPrereqError("There are still %d node(s) in"
1350
                                 " this cluster." % (len(nodelist) - 1),
1351
                                 errors.ECODE_INVAL)
1352
    instancelist = self.cfg.GetInstanceList()
1353
    if instancelist:
1354
      raise errors.OpPrereqError("There are still %d instance(s) in"
1355
                                 " this cluster." % len(instancelist),
1356
                                 errors.ECODE_INVAL)
1357

    
1358
  def Exec(self, feedback_fn):
1359
    """Destroys the cluster.
1360

1361
    """
1362
    master_params = self.cfg.GetMasterNetworkParameters()
1363

    
1364
    # Run post hooks on master node before it's removed
1365
    _RunPostHook(self, master_params.name)
1366

    
1367
    ems = self.cfg.GetUseExternalMipScript()
1368
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1369
                                                     master_params, ems)
1370
    result.Raise("Could not disable the master role")
1371

    
1372
    return master_params.name
1373

    
1374

    
1375
def _VerifyCertificate(filename):
1376
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1377

1378
  @type filename: string
1379
  @param filename: Path to PEM file
1380

1381
  """
1382
  try:
1383
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1384
                                           utils.ReadFile(filename))
1385
  except Exception, err: # pylint: disable=W0703
1386
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1387
            "Failed to load X509 certificate %s: %s" % (filename, err))
1388

    
1389
  (errcode, msg) = \
1390
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1391
                                constants.SSL_CERT_EXPIRATION_ERROR)
1392

    
1393
  if msg:
1394
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1395
  else:
1396
    fnamemsg = None
1397

    
1398
  if errcode is None:
1399
    return (None, fnamemsg)
1400
  elif errcode == utils.CERT_WARNING:
1401
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1402
  elif errcode == utils.CERT_ERROR:
1403
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1404

    
1405
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1406

    
1407

    
1408
def _GetAllHypervisorParameters(cluster, instances):
1409
  """Compute the set of all hypervisor parameters.
1410

1411
  @type cluster: L{objects.Cluster}
1412
  @param cluster: the cluster object
1413
  @param instances: list of L{objects.Instance}
1414
  @param instances: additional instances from which to obtain parameters
1415
  @rtype: list of (origin, hypervisor, parameters)
1416
  @return: a list with all parameters found, indicating the hypervisor they
1417
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1418

1419
  """
1420
  hvp_data = []
1421

    
1422
  for hv_name in cluster.enabled_hypervisors:
1423
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1424

    
1425
  for os_name, os_hvp in cluster.os_hvp.items():
1426
    for hv_name, hv_params in os_hvp.items():
1427
      if hv_params:
1428
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1429
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1430

    
1431
  # TODO: collapse identical parameter values in a single one
1432
  for instance in instances:
1433
    if instance.hvparams:
1434
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1435
                       cluster.FillHV(instance)))
1436

    
1437
  return hvp_data
1438

    
1439

    
1440
class _VerifyErrors(object):
1441
  """Mix-in for cluster/group verify LUs.
1442

1443
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1444
  self.op and self._feedback_fn to be available.)
1445

1446
  """
1447

    
1448
  ETYPE_FIELD = "code"
1449
  ETYPE_ERROR = "ERROR"
1450
  ETYPE_WARNING = "WARNING"
1451

    
1452
  def _Error(self, ecode, item, msg, *args, **kwargs):
1453
    """Format an error message.
1454

1455
    Based on the opcode's error_codes parameter, either format a
1456
    parseable error code, or a simpler error string.
1457

1458
    This must be called only from Exec and functions called from Exec.
1459

1460
    """
1461
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1462
    itype, etxt, _ = ecode
1463
    # first complete the msg
1464
    if args:
1465
      msg = msg % args
1466
    # then format the whole message
1467
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1468
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1469
    else:
1470
      if item:
1471
        item = " " + item
1472
      else:
1473
        item = ""
1474
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1475
    # and finally report it via the feedback_fn
1476
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1477

    
1478
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1479
    """Log an error message if the passed condition is True.
1480

1481
    """
1482
    cond = (bool(cond)
1483
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1484

    
1485
    # If the error code is in the list of ignored errors, demote the error to a
1486
    # warning
1487
    (_, etxt, _) = ecode
1488
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1489
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1490

    
1491
    if cond:
1492
      self._Error(ecode, *args, **kwargs)
1493

    
1494
    # do not mark the operation as failed for WARN cases only
1495
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1496
      self.bad = self.bad or cond
1497

    
1498

    
1499
class LUClusterVerify(NoHooksLU):
1500
  """Submits all jobs necessary to verify the cluster.
1501

1502
  """
1503
  REQ_BGL = False
1504

    
1505
  def ExpandNames(self):
1506
    self.needed_locks = {}
1507

    
1508
  def Exec(self, feedback_fn):
1509
    jobs = []
1510

    
1511
    if self.op.group_name:
1512
      groups = [self.op.group_name]
1513
      depends_fn = lambda: None
1514
    else:
1515
      groups = self.cfg.GetNodeGroupList()
1516

    
1517
      # Verify global configuration
1518
      jobs.append([
1519
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1520
        ])
1521

    
1522
      # Always depend on global verification
1523
      depends_fn = lambda: [(-len(jobs), [])]
1524

    
1525
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1526
                                            ignore_errors=self.op.ignore_errors,
1527
                                            depends=depends_fn())]
1528
                for group in groups)
1529

    
1530
    # Fix up all parameters
1531
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1532
      op.debug_simulate_errors = self.op.debug_simulate_errors
1533
      op.verbose = self.op.verbose
1534
      op.error_codes = self.op.error_codes
1535
      try:
1536
        op.skip_checks = self.op.skip_checks
1537
      except AttributeError:
1538
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1539

    
1540
    return ResultWithJobs(jobs)
1541

    
1542

    
1543
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1544
  """Verifies the cluster config.
1545

1546
  """
1547
  REQ_BGL = True
1548

    
1549
  def _VerifyHVP(self, hvp_data):
1550
    """Verifies locally the syntax of the hypervisor parameters.
1551

1552
    """
1553
    for item, hv_name, hv_params in hvp_data:
1554
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1555
             (item, hv_name))
1556
      try:
1557
        hv_class = hypervisor.GetHypervisor(hv_name)
1558
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1559
        hv_class.CheckParameterSyntax(hv_params)
1560
      except errors.GenericError, err:
1561
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1562

    
1563
  def ExpandNames(self):
1564
    # Information can be safely retrieved as the BGL is acquired in exclusive
1565
    # mode
1566
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1567
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1568
    self.all_node_info = self.cfg.GetAllNodesInfo()
1569
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1570
    self.needed_locks = {}
1571

    
1572
  def Exec(self, feedback_fn):
1573
    """Verify integrity of cluster, performing various test on nodes.
1574

1575
    """
1576
    self.bad = False
1577
    self._feedback_fn = feedback_fn
1578

    
1579
    feedback_fn("* Verifying cluster config")
1580

    
1581
    for msg in self.cfg.VerifyConfig():
1582
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1583

    
1584
    feedback_fn("* Verifying cluster certificate files")
1585

    
1586
    for cert_filename in constants.ALL_CERT_FILES:
1587
      (errcode, msg) = _VerifyCertificate(cert_filename)
1588
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1589

    
1590
    feedback_fn("* Verifying hypervisor parameters")
1591

    
1592
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1593
                                                self.all_inst_info.values()))
1594

    
1595
    feedback_fn("* Verifying all nodes belong to an existing group")
1596

    
1597
    # We do this verification here because, should this bogus circumstance
1598
    # occur, it would never be caught by VerifyGroup, which only acts on
1599
    # nodes/instances reachable from existing node groups.
1600

    
1601
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1602
                         if node.group not in self.all_group_info)
1603

    
1604
    dangling_instances = {}
1605
    no_node_instances = []
1606

    
1607
    for inst in self.all_inst_info.values():
1608
      if inst.primary_node in dangling_nodes:
1609
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1610
      elif inst.primary_node not in self.all_node_info:
1611
        no_node_instances.append(inst.name)
1612

    
1613
    pretty_dangling = [
1614
        "%s (%s)" %
1615
        (node.name,
1616
         utils.CommaJoin(dangling_instances.get(node.name,
1617
                                                ["no instances"])))
1618
        for node in dangling_nodes]
1619

    
1620
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1621
                  None,
1622
                  "the following nodes (and their instances) belong to a non"
1623
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1624

    
1625
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1626
                  None,
1627
                  "the following instances have a non-existing primary-node:"
1628
                  " %s", utils.CommaJoin(no_node_instances))
1629

    
1630
    return not self.bad
1631

    
1632

    
1633
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1634
  """Verifies the status of a node group.
1635

1636
  """
1637
  HPATH = "cluster-verify"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  REQ_BGL = False
1640

    
1641
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1642

    
1643
  class NodeImage(object):
1644
    """A class representing the logical and physical status of a node.
1645

1646
    @type name: string
1647
    @ivar name: the node name to which this object refers
1648
    @ivar volumes: a structure as returned from
1649
        L{ganeti.backend.GetVolumeList} (runtime)
1650
    @ivar instances: a list of running instances (runtime)
1651
    @ivar pinst: list of configured primary instances (config)
1652
    @ivar sinst: list of configured secondary instances (config)
1653
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1654
        instances for which this node is secondary (config)
1655
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1656
    @ivar dfree: free disk, as reported by the node (runtime)
1657
    @ivar offline: the offline status (config)
1658
    @type rpc_fail: boolean
1659
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1660
        not whether the individual keys were correct) (runtime)
1661
    @type lvm_fail: boolean
1662
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1663
    @type hyp_fail: boolean
1664
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1665
    @type ghost: boolean
1666
    @ivar ghost: whether this is a known node or not (config)
1667
    @type os_fail: boolean
1668
    @ivar os_fail: whether the RPC call didn't return valid OS data
1669
    @type oslist: list
1670
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1671
    @type vm_capable: boolean
1672
    @ivar vm_capable: whether the node can host instances
1673

1674
    """
1675
    def __init__(self, offline=False, name=None, vm_capable=True):
1676
      self.name = name
1677
      self.volumes = {}
1678
      self.instances = []
1679
      self.pinst = []
1680
      self.sinst = []
1681
      self.sbp = {}
1682
      self.mfree = 0
1683
      self.dfree = 0
1684
      self.offline = offline
1685
      self.vm_capable = vm_capable
1686
      self.rpc_fail = False
1687
      self.lvm_fail = False
1688
      self.hyp_fail = False
1689
      self.ghost = False
1690
      self.os_fail = False
1691
      self.oslist = {}
1692

    
1693
  def ExpandNames(self):
1694
    # This raises errors.OpPrereqError on its own:
1695
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1696

    
1697
    # Get instances in node group; this is unsafe and needs verification later
1698
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1699

    
1700
    self.needed_locks = {
1701
      locking.LEVEL_INSTANCE: inst_names,
1702
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1703
      locking.LEVEL_NODE: [],
1704
      }
1705

    
1706
    self.share_locks = _ShareAll()
1707

    
1708
  def DeclareLocks(self, level):
1709
    if level == locking.LEVEL_NODE:
1710
      # Get members of node group; this is unsafe and needs verification later
1711
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1712

    
1713
      all_inst_info = self.cfg.GetAllInstancesInfo()
1714

    
1715
      # In Exec(), we warn about mirrored instances that have primary and
1716
      # secondary living in separate node groups. To fully verify that
1717
      # volumes for these instances are healthy, we will need to do an
1718
      # extra call to their secondaries. We ensure here those nodes will
1719
      # be locked.
1720
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1721
        # Important: access only the instances whose lock is owned
1722
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1723
          nodes.update(all_inst_info[inst].secondary_nodes)
1724

    
1725
      self.needed_locks[locking.LEVEL_NODE] = nodes
1726

    
1727
  def CheckPrereq(self):
1728
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1729
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1730

    
1731
    group_nodes = set(self.group_info.members)
1732
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1733

    
1734
    unlocked_nodes = \
1735
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1736

    
1737
    unlocked_instances = \
1738
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1739

    
1740
    if unlocked_nodes:
1741
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1742
                                 utils.CommaJoin(unlocked_nodes))
1743

    
1744
    if unlocked_instances:
1745
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1746
                                 utils.CommaJoin(unlocked_instances))
1747

    
1748
    self.all_node_info = self.cfg.GetAllNodesInfo()
1749
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1750

    
1751
    self.my_node_names = utils.NiceSort(group_nodes)
1752
    self.my_inst_names = utils.NiceSort(group_instances)
1753

    
1754
    self.my_node_info = dict((name, self.all_node_info[name])
1755
                             for name in self.my_node_names)
1756

    
1757
    self.my_inst_info = dict((name, self.all_inst_info[name])
1758
                             for name in self.my_inst_names)
1759

    
1760
    # We detect here the nodes that will need the extra RPC calls for verifying
1761
    # split LV volumes; they should be locked.
1762
    extra_lv_nodes = set()
1763

    
1764
    for inst in self.my_inst_info.values():
1765
      if inst.disk_template in constants.DTS_INT_MIRROR:
1766
        group = self.my_node_info[inst.primary_node].group
1767
        for nname in inst.secondary_nodes:
1768
          if self.all_node_info[nname].group != group:
1769
            extra_lv_nodes.add(nname)
1770

    
1771
    unlocked_lv_nodes = \
1772
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773

    
1774
    if unlocked_lv_nodes:
1775
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1776
                                 utils.CommaJoin(unlocked_lv_nodes))
1777
    self.extra_lv_nodes = list(extra_lv_nodes)
1778

    
1779
  def _VerifyNode(self, ninfo, nresult):
1780
    """Perform some basic validation on data returned from a node.
1781

1782
      - check the result data structure is well formed and has all the
1783
        mandatory fields
1784
      - check ganeti version
1785

1786
    @type ninfo: L{objects.Node}
1787
    @param ninfo: the node to check
1788
    @param nresult: the results from the node
1789
    @rtype: boolean
1790
    @return: whether overall this call was successful (and we can expect
1791
         reasonable values in the respose)
1792

1793
    """
1794
    node = ninfo.name
1795
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1796

    
1797
    # main result, nresult should be a non-empty dict
1798
    test = not nresult or not isinstance(nresult, dict)
1799
    _ErrorIf(test, constants.CV_ENODERPC, node,
1800
                  "unable to verify node: no data returned")
1801
    if test:
1802
      return False
1803

    
1804
    # compares ganeti version
1805
    local_version = constants.PROTOCOL_VERSION
1806
    remote_version = nresult.get("version", None)
1807
    test = not (remote_version and
1808
                isinstance(remote_version, (list, tuple)) and
1809
                len(remote_version) == 2)
1810
    _ErrorIf(test, constants.CV_ENODERPC, node,
1811
             "connection to node returned invalid data")
1812
    if test:
1813
      return False
1814

    
1815
    test = local_version != remote_version[0]
1816
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1817
             "incompatible protocol versions: master %s,"
1818
             " node %s", local_version, remote_version[0])
1819
    if test:
1820
      return False
1821

    
1822
    # node seems compatible, we can actually try to look into its results
1823

    
1824
    # full package version
1825
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1826
                  constants.CV_ENODEVERSION, node,
1827
                  "software version mismatch: master %s, node %s",
1828
                  constants.RELEASE_VERSION, remote_version[1],
1829
                  code=self.ETYPE_WARNING)
1830

    
1831
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1832
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1833
      for hv_name, hv_result in hyp_result.iteritems():
1834
        test = hv_result is not None
1835
        _ErrorIf(test, constants.CV_ENODEHV, node,
1836
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1837

    
1838
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1839
    if ninfo.vm_capable and isinstance(hvp_result, list):
1840
      for item, hv_name, hv_result in hvp_result:
1841
        _ErrorIf(True, constants.CV_ENODEHV, node,
1842
                 "hypervisor %s parameter verify failure (source %s): %s",
1843
                 hv_name, item, hv_result)
1844

    
1845
    test = nresult.get(constants.NV_NODESETUP,
1846
                       ["Missing NODESETUP results"])
1847
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1848
             "; ".join(test))
1849

    
1850
    return True
1851

    
1852
  def _VerifyNodeTime(self, ninfo, nresult,
1853
                      nvinfo_starttime, nvinfo_endtime):
1854
    """Check the node time.
1855

1856
    @type ninfo: L{objects.Node}
1857
    @param ninfo: the node to check
1858
    @param nresult: the remote results for the node
1859
    @param nvinfo_starttime: the start time of the RPC call
1860
    @param nvinfo_endtime: the end time of the RPC call
1861

1862
    """
1863
    node = ninfo.name
1864
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1865

    
1866
    ntime = nresult.get(constants.NV_TIME, None)
1867
    try:
1868
      ntime_merged = utils.MergeTime(ntime)
1869
    except (ValueError, TypeError):
1870
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1871
      return
1872

    
1873
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1874
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1875
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1876
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1877
    else:
1878
      ntime_diff = None
1879

    
1880
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1881
             "Node time diverges by at least %s from master node time",
1882
             ntime_diff)
1883

    
1884
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1885
    """Check the node LVM results.
1886

1887
    @type ninfo: L{objects.Node}
1888
    @param ninfo: the node to check
1889
    @param nresult: the remote results for the node
1890
    @param vg_name: the configured VG name
1891

1892
    """
1893
    if vg_name is None:
1894
      return
1895

    
1896
    node = ninfo.name
1897
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1898

    
1899
    # checks vg existence and size > 20G
1900
    vglist = nresult.get(constants.NV_VGLIST, None)
1901
    test = not vglist
1902
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1903
    if not test:
1904
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1905
                                            constants.MIN_VG_SIZE)
1906
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1907

    
1908
    # check pv names
1909
    pvlist = nresult.get(constants.NV_PVLIST, None)
1910
    test = pvlist is None
1911
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1912
    if not test:
1913
      # check that ':' is not present in PV names, since it's a
1914
      # special character for lvcreate (denotes the range of PEs to
1915
      # use on the PV)
1916
      for _, pvname, owner_vg in pvlist:
1917
        test = ":" in pvname
1918
        _ErrorIf(test, constants.CV_ENODELVM, node,
1919
                 "Invalid character ':' in PV '%s' of VG '%s'",
1920
                 pvname, owner_vg)
1921

    
1922
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1923
    """Check the node bridges.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param bridges: the expected list of bridges
1929

1930
    """
1931
    if not bridges:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    missing = nresult.get(constants.NV_BRIDGES, None)
1938
    test = not isinstance(missing, list)
1939
    _ErrorIf(test, constants.CV_ENODENET, node,
1940
             "did not return valid bridge information")
1941
    if not test:
1942
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1943
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1944

    
1945
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1946
    """Check the results of user scripts presence and executability on the node
1947

1948
    @type ninfo: L{objects.Node}
1949
    @param ninfo: the node to check
1950
    @param nresult: the remote results for the node
1951

1952
    """
1953
    node = ninfo.name
1954

    
1955
    test = not constants.NV_USERSCRIPTS in nresult
1956
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1957
                  "did not return user scripts information")
1958

    
1959
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1960
    if not test:
1961
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1962
                    "user scripts not present or not executable: %s" %
1963
                    utils.CommaJoin(sorted(broken_scripts)))
1964

    
1965
  def _VerifyNodeNetwork(self, ninfo, nresult):
1966
    """Check the node network connectivity results.
1967

1968
    @type ninfo: L{objects.Node}
1969
    @param ninfo: the node to check
1970
    @param nresult: the remote results for the node
1971

1972
    """
1973
    node = ninfo.name
1974
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975

    
1976
    test = constants.NV_NODELIST not in nresult
1977
    _ErrorIf(test, constants.CV_ENODESSH, node,
1978
             "node hasn't returned node ssh connectivity data")
1979
    if not test:
1980
      if nresult[constants.NV_NODELIST]:
1981
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1982
          _ErrorIf(True, constants.CV_ENODESSH, node,
1983
                   "ssh communication with node '%s': %s", a_node, a_msg)
1984

    
1985
    test = constants.NV_NODENETTEST not in nresult
1986
    _ErrorIf(test, constants.CV_ENODENET, node,
1987
             "node hasn't returned node tcp connectivity data")
1988
    if not test:
1989
      if nresult[constants.NV_NODENETTEST]:
1990
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1991
        for anode in nlist:
1992
          _ErrorIf(True, constants.CV_ENODENET, node,
1993
                   "tcp communication with node '%s': %s",
1994
                   anode, nresult[constants.NV_NODENETTEST][anode])
1995

    
1996
    test = constants.NV_MASTERIP not in nresult
1997
    _ErrorIf(test, constants.CV_ENODENET, node,
1998
             "node hasn't returned node master IP reachability data")
1999
    if not test:
2000
      if not nresult[constants.NV_MASTERIP]:
2001
        if node == self.master_node:
2002
          msg = "the master node cannot reach the master IP (not configured?)"
2003
        else:
2004
          msg = "cannot reach the master IP"
2005
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2006

    
2007
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2008
                      diskstatus):
2009
    """Verify an instance.
2010

2011
    This function checks to see if the required block devices are
2012
    available on the instance's node.
2013

2014
    """
2015
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2016
    node_current = instanceconfig.primary_node
2017

    
2018
    node_vol_should = {}
2019
    instanceconfig.MapLVsByNode(node_vol_should)
2020

    
2021
    for node in node_vol_should:
2022
      n_img = node_image[node]
2023
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2024
        # ignore missing volumes on offline or broken nodes
2025
        continue
2026
      for volume in node_vol_should[node]:
2027
        test = volume not in n_img.volumes
2028
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2029
                 "volume %s missing on node %s", volume, node)
2030

    
2031
    if instanceconfig.admin_up:
2032
      pri_img = node_image[node_current]
2033
      test = instance not in pri_img.instances and not pri_img.offline
2034
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2035
               "instance not running on its primary node %s",
2036
               node_current)
2037

    
2038
    diskdata = [(nname, success, status, idx)
2039
                for (nname, disks) in diskstatus.items()
2040
                for idx, (success, status) in enumerate(disks)]
2041

    
2042
    for nname, success, bdev_status, idx in diskdata:
2043
      # the 'ghost node' construction in Exec() ensures that we have a
2044
      # node here
2045
      snode = node_image[nname]
2046
      bad_snode = snode.ghost or snode.offline
2047
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2048
               constants.CV_EINSTANCEFAULTYDISK, instance,
2049
               "couldn't retrieve status for disk/%s on %s: %s",
2050
               idx, nname, bdev_status)
2051
      _ErrorIf((instanceconfig.admin_up and success and
2052
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2053
               constants.CV_EINSTANCEFAULTYDISK, instance,
2054
               "disk/%s on %s is faulty", idx, nname)
2055

    
2056
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2057
    """Verify if there are any unknown volumes in the cluster.
2058

2059
    The .os, .swap and backup volumes are ignored. All other volumes are
2060
    reported as unknown.
2061

2062
    @type reserved: L{ganeti.utils.FieldSet}
2063
    @param reserved: a FieldSet of reserved volume names
2064

2065
    """
2066
    for node, n_img in node_image.items():
2067
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2068
        # skip non-healthy nodes
2069
        continue
2070
      for volume in n_img.volumes:
2071
        test = ((node not in node_vol_should or
2072
                volume not in node_vol_should[node]) and
2073
                not reserved.Matches(volume))
2074
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2075
                      "volume %s is unknown", volume)
2076

    
2077
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2078
    """Verify N+1 Memory Resilience.
2079

2080
    Check that if one single node dies we can still start all the
2081
    instances it was primary for.
2082

2083
    """
2084
    cluster_info = self.cfg.GetClusterInfo()
2085
    for node, n_img in node_image.items():
2086
      # This code checks that every node which is now listed as
2087
      # secondary has enough memory to host all instances it is
2088
      # supposed to should a single other node in the cluster fail.
2089
      # FIXME: not ready for failover to an arbitrary node
2090
      # FIXME: does not support file-backed instances
2091
      # WARNING: we currently take into account down instances as well
2092
      # as up ones, considering that even if they're down someone
2093
      # might want to start them even in the event of a node failure.
2094
      if n_img.offline:
2095
        # we're skipping offline nodes from the N+1 warning, since
2096
        # most likely we don't have good memory infromation from them;
2097
        # we already list instances living on such nodes, and that's
2098
        # enough warning
2099
        continue
2100
      for prinode, instances in n_img.sbp.items():
2101
        needed_mem = 0
2102
        for instance in instances:
2103
          bep = cluster_info.FillBE(instance_cfg[instance])
2104
          if bep[constants.BE_AUTO_BALANCE]:
2105
            needed_mem += bep[constants.BE_MEMORY]
2106
        test = n_img.mfree < needed_mem
2107
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2108
                      "not enough memory to accomodate instance failovers"
2109
                      " should node %s fail (%dMiB needed, %dMiB available)",
2110
                      prinode, needed_mem, n_img.mfree)
2111

    
2112
  @classmethod
2113
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2114
                   (files_all, files_opt, files_mc, files_vm)):
2115
    """Verifies file checksums collected from all nodes.
2116

2117
    @param errorif: Callback for reporting errors
2118
    @param nodeinfo: List of L{objects.Node} objects
2119
    @param master_node: Name of master node
2120
    @param all_nvinfo: RPC results
2121

2122
    """
2123
    # Define functions determining which nodes to consider for a file
2124
    files2nodefn = [
2125
      (files_all, None),
2126
      (files_mc, lambda node: (node.master_candidate or
2127
                               node.name == master_node)),
2128
      (files_vm, lambda node: node.vm_capable),
2129
      ]
2130

    
2131
    # Build mapping from filename to list of nodes which should have the file
2132
    nodefiles = {}
2133
    for (files, fn) in files2nodefn:
2134
      if fn is None:
2135
        filenodes = nodeinfo
2136
      else:
2137
        filenodes = filter(fn, nodeinfo)
2138
      nodefiles.update((filename,
2139
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2140
                       for filename in files)
2141

    
2142
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2143

    
2144
    fileinfo = dict((filename, {}) for filename in nodefiles)
2145
    ignore_nodes = set()
2146

    
2147
    for node in nodeinfo:
2148
      if node.offline:
2149
        ignore_nodes.add(node.name)
2150
        continue
2151

    
2152
      nresult = all_nvinfo[node.name]
2153

    
2154
      if nresult.fail_msg or not nresult.payload:
2155
        node_files = None
2156
      else:
2157
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2158

    
2159
      test = not (node_files and isinstance(node_files, dict))
2160
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2161
              "Node did not return file checksum data")
2162
      if test:
2163
        ignore_nodes.add(node.name)
2164
        continue
2165

    
2166
      # Build per-checksum mapping from filename to nodes having it
2167
      for (filename, checksum) in node_files.items():
2168
        assert filename in nodefiles
2169
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2170

    
2171
    for (filename, checksums) in fileinfo.items():
2172
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2173

    
2174
      # Nodes having the file
2175
      with_file = frozenset(node_name
2176
                            for nodes in fileinfo[filename].values()
2177
                            for node_name in nodes) - ignore_nodes
2178

    
2179
      expected_nodes = nodefiles[filename] - ignore_nodes
2180

    
2181
      # Nodes missing file
2182
      missing_file = expected_nodes - with_file
2183

    
2184
      if filename in files_opt:
2185
        # All or no nodes
2186
        errorif(missing_file and missing_file != expected_nodes,
2187
                constants.CV_ECLUSTERFILECHECK, None,
2188
                "File %s is optional, but it must exist on all or no"
2189
                " nodes (not found on %s)",
2190
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2191
      else:
2192
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2193
                "File %s is missing from node(s) %s", filename,
2194
                utils.CommaJoin(utils.NiceSort(missing_file)))
2195

    
2196
        # Warn if a node has a file it shouldn't
2197
        unexpected = with_file - expected_nodes
2198
        errorif(unexpected,
2199
                constants.CV_ECLUSTERFILECHECK, None,
2200
                "File %s should not exist on node(s) %s",
2201
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2202

    
2203
      # See if there are multiple versions of the file
2204
      test = len(checksums) > 1
2205
      if test:
2206
        variants = ["variant %s on %s" %
2207
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2208
                    for (idx, (checksum, nodes)) in
2209
                      enumerate(sorted(checksums.items()))]
2210
      else:
2211
        variants = []
2212

    
2213
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2214
              "File %s found with %s different checksums (%s)",
2215
              filename, len(checksums), "; ".join(variants))
2216

    
2217
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2218
                      drbd_map):
2219
    """Verifies and the node DRBD status.
2220

2221
    @type ninfo: L{objects.Node}
2222
    @param ninfo: the node to check
2223
    @param nresult: the remote results for the node
2224
    @param instanceinfo: the dict of instances
2225
    @param drbd_helper: the configured DRBD usermode helper
2226
    @param drbd_map: the DRBD map as returned by
2227
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2232

    
2233
    if drbd_helper:
2234
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2235
      test = (helper_result == None)
2236
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2237
               "no drbd usermode helper returned")
2238
      if helper_result:
2239
        status, payload = helper_result
2240
        test = not status
2241
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2242
                 "drbd usermode helper check unsuccessful: %s", payload)
2243
        test = status and (payload != drbd_helper)
2244
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2245
                 "wrong drbd usermode helper: %s", payload)
2246

    
2247
    # compute the DRBD minors
2248
    node_drbd = {}
2249
    for minor, instance in drbd_map[node].items():
2250
      test = instance not in instanceinfo
2251
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2252
               "ghost instance '%s' in temporary DRBD map", instance)
2253
        # ghost instance should not be running, but otherwise we
2254
        # don't give double warnings (both ghost instance and
2255
        # unallocated minor in use)
2256
      if test:
2257
        node_drbd[minor] = (instance, False)
2258
      else:
2259
        instance = instanceinfo[instance]
2260
        node_drbd[minor] = (instance.name, instance.admin_up)
2261

    
2262
    # and now check them
2263
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2264
    test = not isinstance(used_minors, (tuple, list))
2265
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2266
             "cannot parse drbd status file: %s", str(used_minors))
2267
    if test:
2268
      # we cannot check drbd status
2269
      return
2270

    
2271
    for minor, (iname, must_exist) in node_drbd.items():
2272
      test = minor not in used_minors and must_exist
2273
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2274
               "drbd minor %d of instance %s is not active", minor, iname)
2275
    for minor in used_minors:
2276
      test = minor not in node_drbd
2277
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2278
               "unallocated drbd minor %d is in use", minor)
2279

    
2280
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2281
    """Builds the node OS structures.
2282

2283
    @type ninfo: L{objects.Node}
2284
    @param ninfo: the node to check
2285
    @param nresult: the remote results for the node
2286
    @param nimg: the node image object
2287

2288
    """
2289
    node = ninfo.name
2290
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291

    
2292
    remote_os = nresult.get(constants.NV_OSLIST, None)
2293
    test = (not isinstance(remote_os, list) or
2294
            not compat.all(isinstance(v, list) and len(v) == 7
2295
                           for v in remote_os))
2296

    
2297
    _ErrorIf(test, constants.CV_ENODEOS, node,
2298
             "node hasn't returned valid OS data")
2299

    
2300
    nimg.os_fail = test
2301

    
2302
    if test:
2303
      return
2304

    
2305
    os_dict = {}
2306

    
2307
    for (name, os_path, status, diagnose,
2308
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2309

    
2310
      if name not in os_dict:
2311
        os_dict[name] = []
2312

    
2313
      # parameters is a list of lists instead of list of tuples due to
2314
      # JSON lacking a real tuple type, fix it:
2315
      parameters = [tuple(v) for v in parameters]
2316
      os_dict[name].append((os_path, status, diagnose,
2317
                            set(variants), set(parameters), set(api_ver)))
2318

    
2319
    nimg.oslist = os_dict
2320

    
2321
  def _VerifyNodeOS(self, ninfo, nimg, base):
2322
    """Verifies the node OS list.
2323

2324
    @type ninfo: L{objects.Node}
2325
    @param ninfo: the node to check
2326
    @param nimg: the node image object
2327
    @param base: the 'template' node we match against (e.g. from the master)
2328

2329
    """
2330
    node = ninfo.name
2331
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332

    
2333
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2334

    
2335
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2336
    for os_name, os_data in nimg.oslist.items():
2337
      assert os_data, "Empty OS status for OS %s?!" % os_name
2338
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2339
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2340
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2341
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2342
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2343
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2344
      # comparisons with the 'base' image
2345
      test = os_name not in base.oslist
2346
      _ErrorIf(test, constants.CV_ENODEOS, node,
2347
               "Extra OS %s not present on reference node (%s)",
2348
               os_name, base.name)
2349
      if test:
2350
        continue
2351
      assert base.oslist[os_name], "Base node has empty OS status?"
2352
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2353
      if not b_status:
2354
        # base OS is invalid, skipping
2355
        continue
2356
      for kind, a, b in [("API version", f_api, b_api),
2357
                         ("variants list", f_var, b_var),
2358
                         ("parameters", beautify_params(f_param),
2359
                          beautify_params(b_param))]:
2360
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2361
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2362
                 kind, os_name, base.name,
2363
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2364

    
2365
    # check any missing OSes
2366
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2367
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2368
             "OSes present on reference node %s but missing on this node: %s",
2369
             base.name, utils.CommaJoin(missing))
2370

    
2371
  def _VerifyOob(self, ninfo, nresult):
2372
    """Verifies out of band functionality of a node.
2373

2374
    @type ninfo: L{objects.Node}
2375
    @param ninfo: the node to check
2376
    @param nresult: the remote results for the node
2377

2378
    """
2379
    node = ninfo.name
2380
    # We just have to verify the paths on master and/or master candidates
2381
    # as the oob helper is invoked on the master
2382
    if ((ninfo.master_candidate or ninfo.master_capable) and
2383
        constants.NV_OOB_PATHS in nresult):
2384
      for path_result in nresult[constants.NV_OOB_PATHS]:
2385
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2386

    
2387
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2388
    """Verifies and updates the node volume data.
2389

2390
    This function will update a L{NodeImage}'s internal structures
2391
    with data from the remote call.
2392

2393
    @type ninfo: L{objects.Node}
2394
    @param ninfo: the node to check
2395
    @param nresult: the remote results for the node
2396
    @param nimg: the node image object
2397
    @param vg_name: the configured VG name
2398

2399
    """
2400
    node = ninfo.name
2401
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402

    
2403
    nimg.lvm_fail = True
2404
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2405
    if vg_name is None:
2406
      pass
2407
    elif isinstance(lvdata, basestring):
2408
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2409
               utils.SafeEncode(lvdata))
2410
    elif not isinstance(lvdata, dict):
2411
      _ErrorIf(True, constants.CV_ENODELVM, node,
2412
               "rpc call to node failed (lvlist)")
2413
    else:
2414
      nimg.volumes = lvdata
2415
      nimg.lvm_fail = False
2416

    
2417
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2418
    """Verifies and updates the node instance list.
2419

2420
    If the listing was successful, then updates this node's instance
2421
    list. Otherwise, it marks the RPC call as failed for the instance
2422
    list key.
2423

2424
    @type ninfo: L{objects.Node}
2425
    @param ninfo: the node to check
2426
    @param nresult: the remote results for the node
2427
    @param nimg: the node image object
2428

2429
    """
2430
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2431
    test = not isinstance(idata, list)
2432
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2433
                  "rpc call to node failed (instancelist): %s",
2434
                  utils.SafeEncode(str(idata)))
2435
    if test:
2436
      nimg.hyp_fail = True
2437
    else:
2438
      nimg.instances = idata
2439

    
2440
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2441
    """Verifies and computes a node information map
2442

2443
    @type ninfo: L{objects.Node}
2444
    @param ninfo: the node to check
2445
    @param nresult: the remote results for the node
2446
    @param nimg: the node image object
2447
    @param vg_name: the configured VG name
2448

2449
    """
2450
    node = ninfo.name
2451
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452

    
2453
    # try to read free memory (from the hypervisor)
2454
    hv_info = nresult.get(constants.NV_HVINFO, None)
2455
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2456
    _ErrorIf(test, constants.CV_ENODEHV, node,
2457
             "rpc call to node failed (hvinfo)")
2458
    if not test:
2459
      try:
2460
        nimg.mfree = int(hv_info["memory_free"])
2461
      except (ValueError, TypeError):
2462
        _ErrorIf(True, constants.CV_ENODERPC, node,
2463
                 "node returned invalid nodeinfo, check hypervisor")
2464

    
2465
    # FIXME: devise a free space model for file based instances as well
2466
    if vg_name is not None:
2467
      test = (constants.NV_VGLIST not in nresult or
2468
              vg_name not in nresult[constants.NV_VGLIST])
2469
      _ErrorIf(test, constants.CV_ENODELVM, node,
2470
               "node didn't return data for the volume group '%s'"
2471
               " - it is either missing or broken", vg_name)
2472
      if not test:
2473
        try:
2474
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475
        except (ValueError, TypeError):
2476
          _ErrorIf(True, constants.CV_ENODERPC, node,
2477
                   "node returned invalid LVM info, check LVM status")
2478

    
2479
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480
    """Gets per-disk status information for all instances.
2481

2482
    @type nodelist: list of strings
2483
    @param nodelist: Node names
2484
    @type node_image: dict of (name, L{objects.Node})
2485
    @param node_image: Node objects
2486
    @type instanceinfo: dict of (name, L{objects.Instance})
2487
    @param instanceinfo: Instance objects
2488
    @rtype: {instance: {node: [(succes, payload)]}}
2489
    @return: a dictionary of per-instance dictionaries with nodes as
2490
        keys and disk information as values; the disk information is a
2491
        list of tuples (success, payload)
2492

2493
    """
2494
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495

    
2496
    node_disks = {}
2497
    node_disks_devonly = {}
2498
    diskless_instances = set()
2499
    diskless = constants.DT_DISKLESS
2500

    
2501
    for nname in nodelist:
2502
      node_instances = list(itertools.chain(node_image[nname].pinst,
2503
                                            node_image[nname].sinst))
2504
      diskless_instances.update(inst for inst in node_instances
2505
                                if instanceinfo[inst].disk_template == diskless)
2506
      disks = [(inst, disk)
2507
               for inst in node_instances
2508
               for disk in instanceinfo[inst].disks]
2509

    
2510
      if not disks:
2511
        # No need to collect data
2512
        continue
2513

    
2514
      node_disks[nname] = disks
2515

    
2516
      # Creating copies as SetDiskID below will modify the objects and that can
2517
      # lead to incorrect data returned from nodes
2518
      devonly = [dev.Copy() for (_, dev) in disks]
2519

    
2520
      for dev in devonly:
2521
        self.cfg.SetDiskID(dev, nname)
2522

    
2523
      node_disks_devonly[nname] = devonly
2524

    
2525
    assert len(node_disks) == len(node_disks_devonly)
2526

    
2527
    # Collect data from all nodes with disks
2528
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529
                                                          node_disks_devonly)
2530

    
2531
    assert len(result) == len(node_disks)
2532

    
2533
    instdisk = {}
2534

    
2535
    for (nname, nres) in result.items():
2536
      disks = node_disks[nname]
2537

    
2538
      if nres.offline:
2539
        # No data from this node
2540
        data = len(disks) * [(False, "node offline")]
2541
      else:
2542
        msg = nres.fail_msg
2543
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2544
                 "while getting disk information: %s", msg)
2545
        if msg:
2546
          # No data from this node
2547
          data = len(disks) * [(False, msg)]
2548
        else:
2549
          data = []
2550
          for idx, i in enumerate(nres.payload):
2551
            if isinstance(i, (tuple, list)) and len(i) == 2:
2552
              data.append(i)
2553
            else:
2554
              logging.warning("Invalid result from node %s, entry %d: %s",
2555
                              nname, idx, i)
2556
              data.append((False, "Invalid result from the remote node"))
2557

    
2558
      for ((inst, _), status) in zip(disks, data):
2559
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560

    
2561
    # Add empty entries for diskless instances.
2562
    for inst in diskless_instances:
2563
      assert inst not in instdisk
2564
      instdisk[inst] = {}
2565

    
2566
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568
                      compat.all(isinstance(s, (tuple, list)) and
2569
                                 len(s) == 2 for s in statuses)
2570
                      for inst, nnames in instdisk.items()
2571
                      for nname, statuses in nnames.items())
2572
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573

    
2574
    return instdisk
2575

    
2576
  @staticmethod
2577
  def _SshNodeSelector(group_uuid, all_nodes):
2578
    """Create endless iterators for all potential SSH check hosts.
2579

2580
    """
2581
    nodes = [node for node in all_nodes
2582
             if (node.group != group_uuid and
2583
                 not node.offline)]
2584
    keyfunc = operator.attrgetter("group")
2585

    
2586
    return map(itertools.cycle,
2587
               [sorted(map(operator.attrgetter("name"), names))
2588
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589
                                                  keyfunc)])
2590

    
2591
  @classmethod
2592
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593
    """Choose which nodes should talk to which other nodes.
2594

2595
    We will make nodes contact all nodes in their group, and one node from
2596
    every other group.
2597

2598
    @warning: This algorithm has a known issue if one node group is much
2599
      smaller than others (e.g. just one node). In such a case all other
2600
      nodes will talk to the single node.
2601

2602
    """
2603
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605

    
2606
    return (online_nodes,
2607
            dict((name, sorted([i.next() for i in sel]))
2608
                 for name in online_nodes))
2609

    
2610
  def BuildHooksEnv(self):
2611
    """Build hooks env.
2612

2613
    Cluster-Verify hooks just ran in the post phase and their failure makes
2614
    the output be logged in the verify output and the verification to fail.
2615

2616
    """
2617
    env = {
2618
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619
      }
2620

    
2621
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622
               for node in self.my_node_info.values())
2623

    
2624
    return env
2625

    
2626
  def BuildHooksNodes(self):
2627
    """Build hooks nodes.
2628

2629
    """
2630
    return ([], self.my_node_names)
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Verify integrity of the node group, performing various test on nodes.
2634

2635
    """
2636
    # This method has too many local variables. pylint: disable=R0914
2637
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638

    
2639
    if not self.my_node_names:
2640
      # empty node group
2641
      feedback_fn("* Empty node group, skipping verification")
2642
      return True
2643

    
2644
    self.bad = False
2645
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646
    verbose = self.op.verbose
2647
    self._feedback_fn = feedback_fn
2648

    
2649
    vg_name = self.cfg.GetVGName()
2650
    drbd_helper = self.cfg.GetDRBDHelper()
2651
    cluster = self.cfg.GetClusterInfo()
2652
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653
    hypervisors = cluster.enabled_hypervisors
2654
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655

    
2656
    i_non_redundant = [] # Non redundant instances
2657
    i_non_a_balanced = [] # Non auto-balanced instances
2658
    n_offline = 0 # Count of offline nodes
2659
    n_drained = 0 # Count of nodes being drained
2660
    node_vol_should = {}
2661

    
2662
    # FIXME: verify OS list
2663

    
2664
    # File verification
2665
    filemap = _ComputeAncillaryFiles(cluster, False)
2666

    
2667
    # do local checksums
2668
    master_node = self.master_node = self.cfg.GetMasterNode()
2669
    master_ip = self.cfg.GetMasterIP()
2670

    
2671
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672

    
2673
    user_scripts = []
2674
    if self.cfg.GetUseExternalMipScript():
2675
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2676

    
2677
    node_verify_param = {
2678
      constants.NV_FILELIST:
2679
        utils.UniqueSequence(filename
2680
                             for files in filemap
2681
                             for filename in files),
2682
      constants.NV_NODELIST:
2683
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2684
                                  self.all_node_info.values()),
2685
      constants.NV_HYPERVISOR: hypervisors,
2686
      constants.NV_HVPARAMS:
2687
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2688
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2689
                                 for node in node_data_list
2690
                                 if not node.offline],
2691
      constants.NV_INSTANCELIST: hypervisors,
2692
      constants.NV_VERSION: None,
2693
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2694
      constants.NV_NODESETUP: None,
2695
      constants.NV_TIME: None,
2696
      constants.NV_MASTERIP: (master_node, master_ip),
2697
      constants.NV_OSLIST: None,
2698
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2699
      constants.NV_USERSCRIPTS: user_scripts,
2700
      }
2701

    
2702
    if vg_name is not None:
2703
      node_verify_param[constants.NV_VGLIST] = None
2704
      node_verify_param[constants.NV_LVLIST] = vg_name
2705
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2706
      node_verify_param[constants.NV_DRBDLIST] = None
2707

    
2708
    if drbd_helper:
2709
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2710

    
2711
    # bridge checks
2712
    # FIXME: this needs to be changed per node-group, not cluster-wide
2713
    bridges = set()
2714
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2715
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716
      bridges.add(default_nicpp[constants.NIC_LINK])
2717
    for instance in self.my_inst_info.values():
2718
      for nic in instance.nics:
2719
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2720
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2721
          bridges.add(full_nic[constants.NIC_LINK])
2722

    
2723
    if bridges:
2724
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2725

    
2726
    # Build our expected cluster state
2727
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2728
                                                 name=node.name,
2729
                                                 vm_capable=node.vm_capable))
2730
                      for node in node_data_list)
2731

    
2732
    # Gather OOB paths
2733
    oob_paths = []
2734
    for node in self.all_node_info.values():
2735
      path = _SupportsOob(self.cfg, node)
2736
      if path and path not in oob_paths:
2737
        oob_paths.append(path)
2738

    
2739
    if oob_paths:
2740
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2741

    
2742
    for instance in self.my_inst_names:
2743
      inst_config = self.my_inst_info[instance]
2744

    
2745
      for nname in inst_config.all_nodes:
2746
        if nname not in node_image:
2747
          gnode = self.NodeImage(name=nname)
2748
          gnode.ghost = (nname not in self.all_node_info)
2749
          node_image[nname] = gnode
2750

    
2751
      inst_config.MapLVsByNode(node_vol_should)
2752

    
2753
      pnode = inst_config.primary_node
2754
      node_image[pnode].pinst.append(instance)
2755

    
2756
      for snode in inst_config.secondary_nodes:
2757
        nimg = node_image[snode]
2758
        nimg.sinst.append(instance)
2759
        if pnode not in nimg.sbp:
2760
          nimg.sbp[pnode] = []
2761
        nimg.sbp[pnode].append(instance)
2762

    
2763
    # At this point, we have the in-memory data structures complete,
2764
    # except for the runtime information, which we'll gather next
2765

    
2766
    # Due to the way our RPC system works, exact response times cannot be
2767
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2768
    # time before and after executing the request, we can at least have a time
2769
    # window.
2770
    nvinfo_starttime = time.time()
2771
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2772
                                           node_verify_param,
2773
                                           self.cfg.GetClusterName())
2774
    nvinfo_endtime = time.time()
2775

    
2776
    if self.extra_lv_nodes and vg_name is not None:
2777
      extra_lv_nvinfo = \
2778
          self.rpc.call_node_verify(self.extra_lv_nodes,
2779
                                    {constants.NV_LVLIST: vg_name},
2780
                                    self.cfg.GetClusterName())
2781
    else:
2782
      extra_lv_nvinfo = {}
2783

    
2784
    all_drbd_map = self.cfg.ComputeDRBDMap()
2785

    
2786
    feedback_fn("* Gathering disk information (%s nodes)" %
2787
                len(self.my_node_names))
2788
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2789
                                     self.my_inst_info)
2790

    
2791
    feedback_fn("* Verifying configuration file consistency")
2792

    
2793
    # If not all nodes are being checked, we need to make sure the master node
2794
    # and a non-checked vm_capable node are in the list.
2795
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2796
    if absent_nodes:
2797
      vf_nvinfo = all_nvinfo.copy()
2798
      vf_node_info = list(self.my_node_info.values())
2799
      additional_nodes = []
2800
      if master_node not in self.my_node_info:
2801
        additional_nodes.append(master_node)
2802
        vf_node_info.append(self.all_node_info[master_node])
2803
      # Add the first vm_capable node we find which is not included
2804
      for node in absent_nodes:
2805
        nodeinfo = self.all_node_info[node]
2806
        if nodeinfo.vm_capable and not nodeinfo.offline:
2807
          additional_nodes.append(node)
2808
          vf_node_info.append(self.all_node_info[node])
2809
          break
2810
      key = constants.NV_FILELIST
2811
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2812
                                                 {key: node_verify_param[key]},
2813
                                                 self.cfg.GetClusterName()))
2814
    else:
2815
      vf_nvinfo = all_nvinfo
2816
      vf_node_info = self.my_node_info.values()
2817

    
2818
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2819

    
2820
    feedback_fn("* Verifying node status")
2821

    
2822
    refos_img = None
2823

    
2824
    for node_i in node_data_list:
2825
      node = node_i.name
2826
      nimg = node_image[node]
2827

    
2828
      if node_i.offline:
2829
        if verbose:
2830
          feedback_fn("* Skipping offline node %s" % (node,))
2831
        n_offline += 1
2832
        continue
2833

    
2834
      if node == master_node:
2835
        ntype = "master"
2836
      elif node_i.master_candidate:
2837
        ntype = "master candidate"
2838
      elif node_i.drained:
2839
        ntype = "drained"
2840
        n_drained += 1
2841
      else:
2842
        ntype = "regular"
2843
      if verbose:
2844
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2845

    
2846
      msg = all_nvinfo[node].fail_msg
2847
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2848
               msg)
2849
      if msg:
2850
        nimg.rpc_fail = True
2851
        continue
2852

    
2853
      nresult = all_nvinfo[node].payload
2854

    
2855
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2856
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2857
      self._VerifyNodeNetwork(node_i, nresult)
2858
      self._VerifyNodeUserScripts(node_i, nresult)
2859
      self._VerifyOob(node_i, nresult)
2860

    
2861
      if nimg.vm_capable:
2862
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2863
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2864
                             all_drbd_map)
2865

    
2866
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2867
        self._UpdateNodeInstances(node_i, nresult, nimg)
2868
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2869
        self._UpdateNodeOS(node_i, nresult, nimg)
2870

    
2871
        if not nimg.os_fail:
2872
          if refos_img is None:
2873
            refos_img = nimg
2874
          self._VerifyNodeOS(node_i, nimg, refos_img)
2875
        self._VerifyNodeBridges(node_i, nresult, bridges)
2876

    
2877
        # Check whether all running instancies are primary for the node. (This
2878
        # can no longer be done from _VerifyInstance below, since some of the
2879
        # wrong instances could be from other node groups.)
2880
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2881

    
2882
        for inst in non_primary_inst:
2883
          test = inst in self.all_inst_info
2884
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2885
                   "instance should not run on node %s", node_i.name)
2886
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2887
                   "node is running unknown instance %s", inst)
2888

    
2889
    for node, result in extra_lv_nvinfo.items():
2890
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2891
                              node_image[node], vg_name)
2892

    
2893
    feedback_fn("* Verifying instance status")
2894
    for instance in self.my_inst_names:
2895
      if verbose:
2896
        feedback_fn("* Verifying instance %s" % instance)
2897
      inst_config = self.my_inst_info[instance]
2898
      self._VerifyInstance(instance, inst_config, node_image,
2899
                           instdisk[instance])
2900
      inst_nodes_offline = []
2901

    
2902
      pnode = inst_config.primary_node
2903
      pnode_img = node_image[pnode]
2904
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2905
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2906
               " primary node failed", instance)
2907

    
2908
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2909
               constants.CV_EINSTANCEBADNODE, instance,
2910
               "instance is marked as running and lives on offline node %s",
2911
               inst_config.primary_node)
2912

    
2913
      # If the instance is non-redundant we cannot survive losing its primary
2914
      # node, so we are not N+1 compliant. On the other hand we have no disk
2915
      # templates with more than one secondary so that situation is not well
2916
      # supported either.
2917
      # FIXME: does not support file-backed instances
2918
      if not inst_config.secondary_nodes:
2919
        i_non_redundant.append(instance)
2920

    
2921
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2922
               constants.CV_EINSTANCELAYOUT,
2923
               instance, "instance has multiple secondary nodes: %s",
2924
               utils.CommaJoin(inst_config.secondary_nodes),
2925
               code=self.ETYPE_WARNING)
2926

    
2927
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2928
        pnode = inst_config.primary_node
2929
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2930
        instance_groups = {}
2931

    
2932
        for node in instance_nodes:
2933
          instance_groups.setdefault(self.all_node_info[node].group,
2934
                                     []).append(node)
2935

    
2936
        pretty_list = [
2937
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2938
          # Sort so that we always list the primary node first.
2939
          for group, nodes in sorted(instance_groups.items(),
2940
                                     key=lambda (_, nodes): pnode in nodes,
2941
                                     reverse=True)]
2942

    
2943
        self._ErrorIf(len(instance_groups) > 1,
2944
                      constants.CV_EINSTANCESPLITGROUPS,
2945
                      instance, "instance has primary and secondary nodes in"
2946
                      " different groups: %s", utils.CommaJoin(pretty_list),
2947
                      code=self.ETYPE_WARNING)
2948

    
2949
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2950
        i_non_a_balanced.append(instance)
2951

    
2952
      for snode in inst_config.secondary_nodes:
2953
        s_img = node_image[snode]
2954
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2955
                 snode, "instance %s, connection to secondary node failed",
2956
                 instance)
2957

    
2958
        if s_img.offline:
2959
          inst_nodes_offline.append(snode)
2960

    
2961
      # warn that the instance lives on offline nodes
2962
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2963
               "instance has offline secondary node(s) %s",
2964
               utils.CommaJoin(inst_nodes_offline))
2965
      # ... or ghost/non-vm_capable nodes
2966
      for node in inst_config.all_nodes:
2967
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2968
                 instance, "instance lives on ghost node %s", node)
2969
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2970
                 instance, "instance lives on non-vm_capable node %s", node)
2971

    
2972
    feedback_fn("* Verifying orphan volumes")
2973
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2974

    
2975
    # We will get spurious "unknown volume" warnings if any node of this group
2976
    # is secondary for an instance whose primary is in another group. To avoid
2977
    # them, we find these instances and add their volumes to node_vol_should.
2978
    for inst in self.all_inst_info.values():
2979
      for secondary in inst.secondary_nodes:
2980
        if (secondary in self.my_node_info
2981
            and inst.name not in self.my_inst_info):
2982
          inst.MapLVsByNode(node_vol_should)
2983
          break
2984

    
2985
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2986

    
2987
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2988
      feedback_fn("* Verifying N+1 Memory redundancy")
2989
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2990

    
2991
    feedback_fn("* Other Notes")
2992
    if i_non_redundant:
2993
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2994
                  % len(i_non_redundant))
2995

    
2996
    if i_non_a_balanced:
2997
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2998
                  % len(i_non_a_balanced))
2999

    
3000
    if n_offline:
3001
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3002

    
3003
    if n_drained:
3004
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3005

    
3006
    return not self.bad
3007

    
3008
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3009
    """Analyze the post-hooks' result
3010

3011
    This method analyses the hook result, handles it, and sends some
3012
    nicely-formatted feedback back to the user.
3013

3014
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3015
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3016
    @param hooks_results: the results of the multi-node hooks rpc call
3017
    @param feedback_fn: function used send feedback back to the caller
3018
    @param lu_result: previous Exec result
3019
    @return: the new Exec result, based on the previous result
3020
        and hook results
3021

3022
    """
3023
    # We only really run POST phase hooks, only for non-empty groups,
3024
    # and are only interested in their results
3025
    if not self.my_node_names:
3026
      # empty node group
3027
      pass
3028
    elif phase == constants.HOOKS_PHASE_POST:
3029
      # Used to change hooks' output to proper indentation
3030
      feedback_fn("* Hooks Results")
3031
      assert hooks_results, "invalid result from hooks"
3032

    
3033
      for node_name in hooks_results:
3034
        res = hooks_results[node_name]
3035
        msg = res.fail_msg
3036
        test = msg and not res.offline
3037
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3038
                      "Communication failure in hooks execution: %s", msg)
3039
        if res.offline or msg:
3040
          # No need to investigate payload if node is offline or gave
3041
          # an error.
3042
          continue
3043
        for script, hkr, output in res.payload:
3044
          test = hkr == constants.HKR_FAIL
3045
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3046
                        "Script %s failed, output:", script)
3047
          if test:
3048
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3049
            feedback_fn("%s" % output)
3050
            lu_result = False
3051

    
3052
    return lu_result
3053

    
3054

    
3055
class LUClusterVerifyDisks(NoHooksLU):
3056
  """Verifies the cluster disks status.
3057

3058
  """
3059
  REQ_BGL = False
3060

    
3061
  def ExpandNames(self):
3062
    self.share_locks = _ShareAll()
3063
    self.needed_locks = {
3064
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3065
      }
3066

    
3067
  def Exec(self, feedback_fn):
3068
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3069

    
3070
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3071
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3072
                           for group in group_names])
3073

    
3074

    
3075
class LUGroupVerifyDisks(NoHooksLU):
3076
  """Verifies the status of all disks in a node group.
3077

3078
  """
3079
  REQ_BGL = False
3080

    
3081
  def ExpandNames(self):
3082
    # Raises errors.OpPrereqError on its own if group can't be found
3083
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3084

    
3085
    self.share_locks = _ShareAll()
3086
    self.needed_locks = {
3087
      locking.LEVEL_INSTANCE: [],
3088
      locking.LEVEL_NODEGROUP: [],
3089
      locking.LEVEL_NODE: [],
3090
      }
3091

    
3092
  def DeclareLocks(self, level):
3093
    if level == locking.LEVEL_INSTANCE:
3094
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3095

    
3096
      # Lock instances optimistically, needs verification once node and group
3097
      # locks have been acquired
3098
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3099
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3100

    
3101
    elif level == locking.LEVEL_NODEGROUP:
3102
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3103

    
3104
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3105
        set([self.group_uuid] +
3106
            # Lock all groups used by instances optimistically; this requires
3107
            # going via the node before it's locked, requiring verification
3108
            # later on
3109
            [group_uuid
3110
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3111
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3112

    
3113
    elif level == locking.LEVEL_NODE:
3114
      # This will only lock the nodes in the group to be verified which contain
3115
      # actual instances
3116
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3117
      self._LockInstancesNodes()
3118

    
3119
      # Lock all nodes in group to be verified
3120
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3121
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3122
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3123

    
3124
  def CheckPrereq(self):
3125
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3126
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3127
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3128

    
3129
    assert self.group_uuid in owned_groups
3130

    
3131
    # Check if locked instances are still correct
3132
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3133

    
3134
    # Get instance information
3135
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3136

    
3137
    # Check if node groups for locked instances are still correct
3138
    for (instance_name, inst) in self.instances.items():
3139
      assert owned_nodes.issuperset(inst.all_nodes), \
3140
        "Instance %s's nodes changed while we kept the lock" % instance_name
3141

    
3142
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3143
                                             owned_groups)
3144

    
3145
      assert self.group_uuid in inst_groups, \
3146
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Verify integrity of cluster disks.
3150

3151
    @rtype: tuple of three items
3152
    @return: a tuple of (dict of node-to-node_error, list of instances
3153
        which need activate-disks, dict of instance: (node, volume) for
3154
        missing volumes
3155

3156
    """
3157
    res_nodes = {}
3158
    res_instances = set()
3159
    res_missing = {}
3160

    
3161
    nv_dict = _MapInstanceDisksToNodes([inst
3162
                                        for inst in self.instances.values()
3163
                                        if inst.admin_up])
3164

    
3165
    if nv_dict:
3166
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167
                             set(self.cfg.GetVmCapableNodeList()))
3168

    
3169
      node_lvs = self.rpc.call_lv_list(nodes, [])
3170

    
3171
      for (node, node_res) in node_lvs.items():
3172
        if node_res.offline:
3173
          continue
3174

    
3175
        msg = node_res.fail_msg
3176
        if msg:
3177
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178
          res_nodes[node] = msg
3179
          continue
3180

    
3181
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3182
          inst = nv_dict.pop((node, lv_name), None)
3183
          if not (lv_online or inst is None):
3184
            res_instances.add(inst)
3185

    
3186
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3187
      # better
3188
      for key, inst in nv_dict.iteritems():
3189
        res_missing.setdefault(inst, []).append(list(key))
3190

    
3191
    return (res_nodes, list(res_instances), res_missing)
3192

    
3193

    
3194
class LUClusterRepairDiskSizes(NoHooksLU):
3195
  """Verifies the cluster disks sizes.
3196

3197
  """
3198
  REQ_BGL = False
3199

    
3200
  def ExpandNames(self):
3201
    if self.op.instances:
3202
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203
      self.needed_locks = {
3204
        locking.LEVEL_NODE: [],
3205
        locking.LEVEL_INSTANCE: self.wanted_names,
3206
        }
3207
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3208
    else:
3209
      self.wanted_names = None
3210
      self.needed_locks = {
3211
        locking.LEVEL_NODE: locking.ALL_SET,
3212
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3213
        }
3214
    self.share_locks = _ShareAll()
3215

    
3216
  def DeclareLocks(self, level):
3217
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3218
      self._LockInstancesNodes(primary_only=True)
3219

    
3220
  def CheckPrereq(self):
3221
    """Check prerequisites.
3222

3223
    This only checks the optional instance list against the existing names.
3224

3225
    """
3226
    if self.wanted_names is None:
3227
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3228

    
3229
    self.wanted_instances = \
3230
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3231

    
3232
  def _EnsureChildSizes(self, disk):
3233
    """Ensure children of the disk have the needed disk size.
3234

3235
    This is valid mainly for DRBD8 and fixes an issue where the
3236
    children have smaller disk size.
3237

3238
    @param disk: an L{ganeti.objects.Disk} object
3239

3240
    """
3241
    if disk.dev_type == constants.LD_DRBD8:
3242
      assert disk.children, "Empty children for DRBD8?"
3243
      fchild = disk.children[0]
3244
      mismatch = fchild.size < disk.size
3245
      if mismatch:
3246
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3247
                     fchild.size, disk.size)
3248
        fchild.size = disk.size
3249

    
3250
      # and we recurse on this child only, not on the metadev
3251
      return self._EnsureChildSizes(fchild) or mismatch
3252
    else:
3253
      return False
3254

    
3255
  def Exec(self, feedback_fn):
3256
    """Verify the size of cluster disks.
3257

3258
    """
3259
    # TODO: check child disks too
3260
    # TODO: check differences in size between primary/secondary nodes
3261
    per_node_disks = {}
3262
    for instance in self.wanted_instances:
3263
      pnode = instance.primary_node
3264
      if pnode not in per_node_disks:
3265
        per_node_disks[pnode] = []
3266
      for idx, disk in enumerate(instance.disks):
3267
        per_node_disks[pnode].append((instance, idx, disk))
3268

    
3269
    changed = []
3270
    for node, dskl in per_node_disks.items():
3271
      newl = [v[2].Copy() for v in dskl]
3272
      for dsk in newl:
3273
        self.cfg.SetDiskID(dsk, node)
3274
      result = self.rpc.call_blockdev_getsize(node, newl)
3275
      if result.fail_msg:
3276
        self.LogWarning("Failure in blockdev_getsize call to node"
3277
                        " %s, ignoring", node)
3278
        continue
3279
      if len(result.payload) != len(dskl):
3280
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3281
                        " result.payload=%s", node, len(dskl), result.payload)
3282
        self.LogWarning("Invalid result from node %s, ignoring node results",
3283
                        node)
3284
        continue
3285
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3286
        if size is None:
3287
          self.LogWarning("Disk %d of instance %s did not return size"
3288
                          " information, ignoring", idx, instance.name)
3289
          continue
3290
        if not isinstance(size, (int, long)):
3291
          self.LogWarning("Disk %d of instance %s did not return valid"
3292
                          " size information, ignoring", idx, instance.name)
3293
          continue
3294
        size = size >> 20
3295
        if size != disk.size:
3296
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3297
                       " correcting: recorded %d, actual %d", idx,
3298
                       instance.name, disk.size, size)
3299
          disk.size = size
3300
          self.cfg.Update(instance, feedback_fn)
3301
          changed.append((instance.name, idx, size))
3302
        if self._EnsureChildSizes(disk):
3303
          self.cfg.Update(instance, feedback_fn)
3304
          changed.append((instance.name, idx, disk.size))
3305
    return changed
3306

    
3307

    
3308
class LUClusterRename(LogicalUnit):
3309
  """Rename the cluster.
3310

3311
  """
3312
  HPATH = "cluster-rename"
3313
  HTYPE = constants.HTYPE_CLUSTER
3314

    
3315
  def BuildHooksEnv(self):
3316
    """Build hooks env.
3317

3318
    """
3319
    return {
3320
      "OP_TARGET": self.cfg.GetClusterName(),
3321
      "NEW_NAME": self.op.name,
3322
      }
3323

    
3324
  def BuildHooksNodes(self):
3325
    """Build hooks nodes.
3326

3327
    """
3328
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3329

    
3330
  def CheckPrereq(self):
3331
    """Verify that the passed name is a valid one.
3332

3333
    """
3334
    hostname = netutils.GetHostname(name=self.op.name,
3335
                                    family=self.cfg.GetPrimaryIPFamily())
3336

    
3337
    new_name = hostname.name
3338
    self.ip = new_ip = hostname.ip
3339
    old_name = self.cfg.GetClusterName()
3340
    old_ip = self.cfg.GetMasterIP()
3341
    if new_name == old_name and new_ip == old_ip:
3342
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3343
                                 " cluster has changed",
3344
                                 errors.ECODE_INVAL)
3345
    if new_ip != old_ip:
3346
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3347
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3348
                                   " reachable on the network" %
3349
                                   new_ip, errors.ECODE_NOTUNIQUE)
3350

    
3351
    self.op.name = new_name
3352

    
3353
  def Exec(self, feedback_fn):
3354
    """Rename the cluster.
3355

3356
    """
3357
    clustername = self.op.name
3358
    new_ip = self.ip
3359

    
3360
    # shutdown the master IP
3361
    master_params = self.cfg.GetMasterNetworkParameters()
3362
    ems = self.cfg.GetUseExternalMipScript()
3363
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3364
                                                     master_params, ems)
3365
    result.Raise("Could not disable the master role")
3366

    
3367
    try:
3368
      cluster = self.cfg.GetClusterInfo()
3369
      cluster.cluster_name = clustername
3370
      cluster.master_ip = new_ip
3371
      self.cfg.Update(cluster, feedback_fn)
3372

    
3373
      # update the known hosts file
3374
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3375
      node_list = self.cfg.GetOnlineNodeList()
3376
      try:
3377
        node_list.remove(master_params.name)
3378
      except ValueError:
3379
        pass
3380
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3381
    finally:
3382
      master_params.ip = new_ip
3383
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3384
                                                     master_params, ems)
3385
      msg = result.fail_msg
3386
      if msg:
3387
        self.LogWarning("Could not re-enable the master role on"
3388
                        " the master, please restart manually: %s", msg)
3389

    
3390
    return clustername
3391

    
3392

    
3393
def _ValidateNetmask(cfg, netmask):
3394
  """Checks if a netmask is valid.
3395

3396
  @type cfg: L{config.ConfigWriter}
3397
  @param cfg: The cluster configuration
3398
  @type netmask: int
3399
  @param netmask: the netmask to be verified
3400
  @raise errors.OpPrereqError: if the validation fails
3401

3402
  """
3403
  ip_family = cfg.GetPrimaryIPFamily()
3404
  try:
3405
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3406
  except errors.ProgrammerError:
3407
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3408
                               ip_family)
3409
  if not ipcls.ValidateNetmask(netmask):
3410
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3411
                                (netmask))
3412

    
3413

    
3414
class LUClusterSetParams(LogicalUnit):
3415
  """Change the parameters of the cluster.
3416

3417
  """
3418
  HPATH = "cluster-modify"
3419
  HTYPE = constants.HTYPE_CLUSTER
3420
  REQ_BGL = False
3421

    
3422
  def CheckArguments(self):
3423
    """Check parameters
3424

3425
    """
3426
    if self.op.uid_pool:
3427
      uidpool.CheckUidPool(self.op.uid_pool)
3428

    
3429
    if self.op.add_uids:
3430
      uidpool.CheckUidPool(self.op.add_uids)
3431

    
3432
    if self.op.remove_uids:
3433
      uidpool.CheckUidPool(self.op.remove_uids)
3434

    
3435
    if self.op.master_netmask is not None:
3436
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3437

    
3438
  def ExpandNames(self):
3439
    # FIXME: in the future maybe other cluster params won't require checking on
3440
    # all nodes to be modified.
3441
    self.needed_locks = {
3442
      locking.LEVEL_NODE: locking.ALL_SET,
3443
    }
3444
    self.share_locks[locking.LEVEL_NODE] = 1
3445

    
3446
  def BuildHooksEnv(self):
3447
    """Build hooks env.
3448

3449
    """
3450
    return {
3451
      "OP_TARGET": self.cfg.GetClusterName(),
3452
      "NEW_VG_NAME": self.op.vg_name,
3453
      }
3454

    
3455
  def BuildHooksNodes(self):
3456
    """Build hooks nodes.
3457

3458
    """
3459
    mn = self.cfg.GetMasterNode()
3460
    return ([mn], [mn])
3461

    
3462
  def CheckPrereq(self):
3463
    """Check prerequisites.
3464

3465
    This checks whether the given params don't conflict and
3466
    if the given volume group is valid.
3467

3468
    """
3469
    if self.op.vg_name is not None and not self.op.vg_name:
3470
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3471
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3472
                                   " instances exist", errors.ECODE_INVAL)
3473

    
3474
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3475
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3476
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3477
                                   " drbd-based instances exist",
3478
                                   errors.ECODE_INVAL)
3479

    
3480
    node_list = self.owned_locks(locking.LEVEL_NODE)
3481

    
3482
    # if vg_name not None, checks given volume group on all nodes
3483
    if self.op.vg_name:
3484
      vglist = self.rpc.call_vg_list(node_list)
3485
      for node in node_list:
3486
        msg = vglist[node].fail_msg
3487
        if msg:
3488
          # ignoring down node
3489
          self.LogWarning("Error while gathering data on node %s"
3490
                          " (ignoring node): %s", node, msg)
3491
          continue
3492
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3493
                                              self.op.vg_name,
3494
                                              constants.MIN_VG_SIZE)
3495
        if vgstatus:
3496
          raise errors.OpPrereqError("Error on node '%s': %s" %
3497
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3498

    
3499
    if self.op.drbd_helper:
3500
      # checks given drbd helper on all nodes
3501
      helpers = self.rpc.call_drbd_helper(node_list)
3502
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3503
        if ninfo.offline:
3504
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3505
          continue
3506
        msg = helpers[node].fail_msg
3507
        if msg:
3508
          raise errors.OpPrereqError("Error checking drbd helper on node"
3509
                                     " '%s': %s" % (node, msg),
3510
                                     errors.ECODE_ENVIRON)
3511
        node_helper = helpers[node].payload
3512
        if node_helper != self.op.drbd_helper:
3513
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3514
                                     (node, node_helper), errors.ECODE_ENVIRON)
3515

    
3516
    self.cluster = cluster = self.cfg.GetClusterInfo()
3517
    # validate params changes
3518
    if self.op.beparams:
3519
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3520
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3521

    
3522
    if self.op.ndparams:
3523
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3524
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3525

    
3526
      # TODO: we need a more general way to handle resetting
3527
      # cluster-level parameters to default values
3528
      if self.new_ndparams["oob_program"] == "":
3529
        self.new_ndparams["oob_program"] = \
3530
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3531

    
3532
    if self.op.nicparams:
3533
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3534
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3535
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3536
      nic_errors = []
3537

    
3538
      # check all instances for consistency
3539
      for instance in self.cfg.GetAllInstancesInfo().values():
3540
        for nic_idx, nic in enumerate(instance.nics):
3541
          params_copy = copy.deepcopy(nic.nicparams)
3542
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3543

    
3544
          # check parameter syntax
3545
          try:
3546
            objects.NIC.CheckParameterSyntax(params_filled)
3547
          except errors.ConfigurationError, err:
3548
            nic_errors.append("Instance %s, nic/%d: %s" %
3549
                              (instance.name, nic_idx, err))
3550

    
3551
          # if we're moving instances to routed, check that they have an ip
3552
          target_mode = params_filled[constants.NIC_MODE]
3553
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3554
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3555
                              " address" % (instance.name, nic_idx))
3556
      if nic_errors:
3557
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3558
                                   "\n".join(nic_errors))
3559

    
3560
    # hypervisor list/parameters
3561
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3562
    if self.op.hvparams:
3563
      for hv_name, hv_dict in self.op.hvparams.items():
3564
        if hv_name not in self.new_hvparams:
3565
          self.new_hvparams[hv_name] = hv_dict
3566
        else:
3567
          self.new_hvparams[hv_name].update(hv_dict)
3568

    
3569
    # os hypervisor parameters
3570
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3571
    if self.op.os_hvp:
3572
      for os_name, hvs in self.op.os_hvp.items():
3573
        if os_name not in self.new_os_hvp:
3574
          self.new_os_hvp[os_name] = hvs
3575
        else:
3576
          for hv_name, hv_dict in hvs.items():
3577
            if hv_name not in self.new_os_hvp[os_name]:
3578
              self.new_os_hvp[os_name][hv_name] = hv_dict
3579
            else:
3580
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3581

    
3582
    # os parameters
3583
    self.new_osp = objects.FillDict(cluster.osparams, {})
3584
    if self.op.osparams:
3585
      for os_name, osp in self.op.osparams.items():
3586
        if os_name not in self.new_osp:
3587
          self.new_osp[os_name] = {}
3588

    
3589
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3590
                                                  use_none=True)
3591

    
3592
        if not self.new_osp[os_name]:
3593
          # we removed all parameters
3594
          del self.new_osp[os_name]
3595
        else:
3596
          # check the parameter validity (remote check)
3597
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3598
                         os_name, self.new_osp[os_name])
3599

    
3600
    # changes to the hypervisor list
3601
    if self.op.enabled_hypervisors is not None:
3602
      self.hv_list = self.op.enabled_hypervisors
3603
      for hv in self.hv_list:
3604
        # if the hypervisor doesn't already exist in the cluster
3605
        # hvparams, we initialize it to empty, and then (in both
3606
        # cases) we make sure to fill the defaults, as we might not
3607
        # have a complete defaults list if the hypervisor wasn't
3608
        # enabled before
3609
        if hv not in new_hvp:
3610
          new_hvp[hv] = {}
3611
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3612
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3613
    else:
3614
      self.hv_list = cluster.enabled_hypervisors
3615

    
3616
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3617
      # either the enabled list has changed, or the parameters have, validate
3618
      for hv_name, hv_params in self.new_hvparams.items():
3619
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3620
            (self.op.enabled_hypervisors and
3621
             hv_name in self.op.enabled_hypervisors)):
3622
          # either this is a new hypervisor, or its parameters have changed
3623
          hv_class = hypervisor.GetHypervisor(hv_name)
3624
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3625
          hv_class.CheckParameterSyntax(hv_params)
3626
          _CheckHVParams(self, node_list, hv_name, hv_params)
3627

    
3628
    if self.op.os_hvp:
3629
      # no need to check any newly-enabled hypervisors, since the
3630
      # defaults have already been checked in the above code-block
3631
      for os_name, os_hvp in self.new_os_hvp.items():
3632
        for hv_name, hv_params in os_hvp.items():
3633
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3634
          # we need to fill in the new os_hvp on top of the actual hv_p
3635
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3636
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3637
          hv_class = hypervisor.GetHypervisor(hv_name)
3638
          hv_class.CheckParameterSyntax(new_osp)
3639
          _CheckHVParams(self, node_list, hv_name, new_osp)
3640

    
3641
    if self.op.default_iallocator:
3642
      alloc_script = utils.FindFile(self.op.default_iallocator,
3643
                                    constants.IALLOCATOR_SEARCH_PATH,
3644
                                    os.path.isfile)
3645
      if alloc_script is None:
3646
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3647
                                   " specified" % self.op.default_iallocator,
3648
                                   errors.ECODE_INVAL)
3649

    
3650
  def Exec(self, feedback_fn):
3651
    """Change the parameters of the cluster.
3652

3653
    """
3654
    if self.op.vg_name is not None:
3655
      new_volume = self.op.vg_name
3656
      if not new_volume:
3657
        new_volume = None
3658
      if new_volume != self.cfg.GetVGName():
3659
        self.cfg.SetVGName(new_volume)
3660
      else:
3661
        feedback_fn("Cluster LVM configuration already in desired"
3662
                    " state, not changing")
3663
    if self.op.drbd_helper is not None:
3664
      new_helper = self.op.drbd_helper
3665
      if not new_helper:
3666
        new_helper = None
3667
      if new_helper != self.cfg.GetDRBDHelper():
3668
        self.cfg.SetDRBDHelper(new_helper)
3669
      else:
3670
        feedback_fn("Cluster DRBD helper already in desired state,"
3671
                    " not changing")
3672
    if self.op.hvparams:
3673
      self.cluster.hvparams = self.new_hvparams
3674
    if self.op.os_hvp:
3675
      self.cluster.os_hvp = self.new_os_hvp
3676
    if self.op.enabled_hypervisors is not None:
3677
      self.cluster.hvparams = self.new_hvparams
3678
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3679
    if self.op.beparams:
3680
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3681
    if self.op.nicparams:
3682
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3683
    if self.op.osparams:
3684
      self.cluster.osparams = self.new_osp
3685
    if self.op.ndparams:
3686
      self.cluster.ndparams = self.new_ndparams
3687

    
3688
    if self.op.candidate_pool_size is not None:
3689
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3690
      # we need to update the pool size here, otherwise the save will fail
3691
      _AdjustCandidatePool(self, [])
3692

    
3693
    if self.op.maintain_node_health is not None:
3694
      self.cluster.maintain_node_health = self.op.maintain_node_health
3695

    
3696
    if self.op.prealloc_wipe_disks is not None:
3697
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3698

    
3699
    if self.op.add_uids is not None:
3700
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3701

    
3702
    if self.op.remove_uids is not None:
3703
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3704

    
3705
    if self.op.uid_pool is not None:
3706
      self.cluster.uid_pool = self.op.uid_pool
3707

    
3708
    if self.op.default_iallocator is not None:
3709
      self.cluster.default_iallocator = self.op.default_iallocator
3710

    
3711
    if self.op.reserved_lvs is not None:
3712
      self.cluster.reserved_lvs = self.op.reserved_lvs
3713

    
3714
    if self.op.use_external_mip_script is not None:
3715
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3716

    
3717
    def helper_os(aname, mods, desc):
3718
      desc += " OS list"
3719
      lst = getattr(self.cluster, aname)
3720
      for key, val in mods:
3721
        if key == constants.DDM_ADD:
3722
          if val in lst:
3723
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3724
          else:
3725
            lst.append(val)
3726
        elif key == constants.DDM_REMOVE:
3727
          if val in lst:
3728
            lst.remove(val)
3729
          else:
3730
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3731
        else:
3732
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3733

    
3734
    if self.op.hidden_os:
3735
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3736

    
3737
    if self.op.blacklisted_os:
3738
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3739

    
3740
    if self.op.master_netdev:
3741
      master_params = self.cfg.GetMasterNetworkParameters()
3742
      ems = self.cfg.GetUseExternalMipScript()
3743
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3744
                  self.cluster.master_netdev)
3745
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3746
                                                       master_params, ems)
3747
      result.Raise("Could not disable the master ip")
3748
      feedback_fn("Changing master_netdev from %s to %s" %
3749
                  (master_params.netdev, self.op.master_netdev))
3750
      self.cluster.master_netdev = self.op.master_netdev
3751

    
3752
    if self.op.master_netmask:
3753
      master_params = self.cfg.GetMasterNetworkParameters()
3754
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3755
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3756
                                                        master_params.netmask,
3757
                                                        self.op.master_netmask,
3758
                                                        master_params.ip,
3759
                                                        master_params.netdev)
3760
      if result.fail_msg:
3761
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3762
        feedback_fn(msg)
3763

    
3764
      self.cluster.master_netmask = self.op.master_netmask
3765

    
3766
    self.cfg.Update(self.cluster, feedback_fn)
3767

    
3768
    if self.op.master_netdev:
3769
      master_params = self.cfg.GetMasterNetworkParameters()
3770
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3771
                  self.op.master_netdev)
3772
      ems = self.cfg.GetUseExternalMipScript()
3773
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3774
                                                     master_params, ems)
3775
      if result.fail_msg:
3776
        self.LogWarning("Could not re-enable the master ip on"
3777
                        " the master, please restart manually: %s",
3778
                        result.fail_msg)
3779

    
3780

    
3781
def _UploadHelper(lu, nodes, fname):
3782
  """Helper for uploading a file and showing warnings.
3783

3784
  """
3785
  if os.path.exists(fname):
3786
    result = lu.rpc.call_upload_file(nodes, fname)
3787
    for to_node, to_result in result.items():
3788
      msg = to_result.fail_msg
3789
      if msg:
3790
        msg = ("Copy of file %s to node %s failed: %s" %
3791
               (fname, to_node, msg))
3792
        lu.proc.LogWarning(msg)
3793

    
3794

    
3795
def _ComputeAncillaryFiles(cluster, redist):
3796
  """Compute files external to Ganeti which need to be consistent.
3797

3798
  @type redist: boolean
3799
  @param redist: Whether to include files which need to be redistributed
3800

3801
  """
3802
  # Compute files for all nodes
3803
  files_all = set([
3804
    constants.SSH_KNOWN_HOSTS_FILE,
3805
    constants.CONFD_HMAC_KEY,
3806
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3807
    constants.SPICE_CERT_FILE,
3808
    constants.SPICE_CACERT_FILE,
3809
    constants.RAPI_USERS_FILE,
3810
    ])
3811

    
3812
  if not redist:
3813
    files_all.update(constants.ALL_CERT_FILES)
3814
    files_all.update(ssconf.SimpleStore().GetFileList())
3815
  else:
3816
    # we need to ship at least the RAPI certificate
3817
    files_all.add(constants.RAPI_CERT_FILE)
3818

    
3819
  if cluster.modify_etc_hosts:
3820
    files_all.add(constants.ETC_HOSTS)
3821

    
3822
  # Files which are optional, these must:
3823
  # - be present in one other category as well
3824
  # - either exist or not exist on all nodes of that category (mc, vm all)
3825
  files_opt = set([
3826
    constants.RAPI_USERS_FILE,
3827
    ])
3828

    
3829
  # Files which should only be on master candidates
3830
  files_mc = set()
3831

    
3832
  if not redist:
3833
    files_mc.add(constants.CLUSTER_CONF_FILE)
3834

    
3835
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3836
    # replication
3837
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3838

    
3839
  # Files which should only be on VM-capable nodes
3840
  files_vm = set(filename
3841
    for hv_name in cluster.enabled_hypervisors
3842
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3843

    
3844
  files_opt |= set(filename
3845
    for hv_name in cluster.enabled_hypervisors
3846
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3847

    
3848
  # Filenames in each category must be unique
3849
  all_files_set = files_all | files_mc | files_vm
3850
  assert (len(all_files_set) ==
3851
          sum(map(len, [files_all, files_mc, files_vm]))), \
3852
         "Found file listed in more than one file list"
3853

    
3854
  # Optional files must be present in one other category
3855
  assert all_files_set.issuperset(files_opt), \
3856
         "Optional file not in a different required list"
3857

    
3858
  return (files_all, files_opt, files_mc, files_vm)
3859

    
3860

    
3861
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3862
  """Distribute additional files which are part of the cluster configuration.
3863

3864
  ConfigWriter takes care of distributing the config and ssconf files, but
3865
  there are more files which should be distributed to all nodes. This function
3866
  makes sure those are copied.
3867

3868
  @param lu: calling logical unit
3869
  @param additional_nodes: list of nodes not in the config to distribute to
3870
  @type additional_vm: boolean
3871
  @param additional_vm: whether the additional nodes are vm-capable or not
3872

3873
  """
3874
  # Gather target nodes
3875
  cluster = lu.cfg.GetClusterInfo()
3876
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3877

    
3878
  online_nodes = lu.cfg.GetOnlineNodeList()
3879
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3880

    
3881
  if additional_nodes is not None:
3882
    online_nodes.extend(additional_nodes)
3883
    if additional_vm:
3884
      vm_nodes.extend(additional_nodes)
3885

    
3886
  # Never distribute to master node
3887
  for nodelist in [online_nodes, vm_nodes]:
3888
    if master_info.name in nodelist:
3889
      nodelist.remove(master_info.name)
3890

    
3891
  # Gather file lists
3892
  (files_all, _, files_mc, files_vm) = \
3893
    _ComputeAncillaryFiles(cluster, True)
3894

    
3895
  # Never re-distribute configuration file from here
3896
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3897
              constants.CLUSTER_CONF_FILE in files_vm)
3898
  assert not files_mc, "Master candidates not handled in this function"
3899

    
3900
  filemap = [
3901
    (online_nodes, files_all),
3902
    (vm_nodes, files_vm),
3903
    ]
3904

    
3905
  # Upload the files
3906
  for (node_list, files) in filemap:
3907
    for fname in files:
3908
      _UploadHelper(lu, node_list, fname)
3909

    
3910

    
3911
class LUClusterRedistConf(NoHooksLU):
3912
  """Force the redistribution of cluster configuration.
3913

3914
  This is a very simple LU.
3915

3916
  """
3917
  REQ_BGL = False
3918

    
3919
  def ExpandNames(self):
3920
    self.needed_locks = {
3921
      locking.LEVEL_NODE: locking.ALL_SET,
3922
    }
3923
    self.share_locks[locking.LEVEL_NODE] = 1
3924

    
3925
  def Exec(self, feedback_fn):
3926
    """Redistribute the configuration.
3927

3928
    """
3929
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3930
    _RedistributeAncillaryFiles(self)
3931

    
3932

    
3933
class LUClusterActivateMasterIp(NoHooksLU):
3934
  """Activate the master IP on the master node.
3935

3936
  """
3937
  def Exec(self, feedback_fn):
3938
    """Activate the master IP.
3939

3940
    """
3941
    master_params = self.cfg.GetMasterNetworkParameters()
3942
    ems = self.cfg.GetUseExternalMipScript()
3943
    self.rpc.call_node_activate_master_ip(master_params.name,
3944
                                          master_params, ems)
3945

    
3946

    
3947
class LUClusterDeactivateMasterIp(NoHooksLU):
3948
  """Deactivate the master IP on the master node.
3949

3950
  """
3951
  def Exec(self, feedback_fn):
3952
    """Deactivate the master IP.
3953

3954
    """
3955
    master_params = self.cfg.GetMasterNetworkParameters()
3956
    ems = self.cfg.GetUseExternalMipScript()
3957
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3958
                                            ems)
3959

    
3960

    
3961
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3962
  """Sleep and poll for an instance's disk to sync.
3963

3964
  """
3965
  if not instance.disks or disks is not None and not disks:
3966
    return True
3967

    
3968
  disks = _ExpandCheckDisks(instance, disks)
3969

    
3970
  if not oneshot:
3971
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3972

    
3973
  node = instance.primary_node
3974

    
3975
  for dev in disks:
3976
    lu.cfg.SetDiskID(dev, node)
3977

    
3978
  # TODO: Convert to utils.Retry
3979

    
3980
  retries = 0
3981
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3982
  while True:
3983
    max_time = 0
3984
    done = True
3985
    cumul_degraded = False
3986
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3987
    msg = rstats.fail_msg
3988
    if msg:
3989
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3990
      retries += 1
3991
      if retries >= 10:
3992
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3993
                                 " aborting." % node)
3994
      time.sleep(6)
3995
      continue
3996
    rstats = rstats.payload
3997
    retries = 0
3998
    for i, mstat in enumerate(rstats):
3999
      if mstat is None:
4000
        lu.LogWarning("Can't compute data for node %s/%s",
4001
                           node, disks[i].iv_name)
4002
        continue
4003

    
4004
      cumul_degraded = (cumul_degraded or
4005
                        (mstat.is_degraded and mstat.sync_percent is None))
4006
      if mstat.sync_percent is not None:
4007
        done = False
4008
        if mstat.estimated_time is not None:
4009
          rem_time = ("%s remaining (estimated)" %
4010
                      utils.FormatSeconds(mstat.estimated_time))
4011
          max_time = mstat.estimated_time
4012
        else:
4013
          rem_time = "no time estimate"
4014
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4015
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4016

    
4017
    # if we're done but degraded, let's do a few small retries, to
4018
    # make sure we see a stable and not transient situation; therefore
4019
    # we force restart of the loop
4020
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4021
      logging.info("Degraded disks found, %d retries left", degr_retries)
4022
      degr_retries -= 1
4023
      time.sleep(1)
4024
      continue
4025

    
4026
    if done or oneshot:
4027
      break
4028

    
4029
    time.sleep(min(60, max_time))
4030

    
4031
  if done:
4032
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4033
  return not cumul_degraded
4034

    
4035

    
4036
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4037
  """Check that mirrors are not degraded.
4038

4039
  The ldisk parameter, if True, will change the test from the
4040
  is_degraded attribute (which represents overall non-ok status for
4041
  the device(s)) to the ldisk (representing the local storage status).
4042

4043
  """
4044
  lu.cfg.SetDiskID(dev, node)
4045

    
4046
  result = True
4047

    
4048
  if on_primary or dev.AssembleOnSecondary():
4049
    rstats = lu.rpc.call_blockdev_find(node, dev)
4050
    msg = rstats.fail_msg
4051
    if msg:
4052
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4053
      result = False
4054
    elif not rstats.payload:
4055
      lu.LogWarning("Can't find disk on node %s", node)
4056
      result = False
4057
    else:
4058
      if ldisk:
4059
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4060
      else:
4061
        result = result and not rstats.payload.is_degraded
4062

    
4063
  if dev.children:
4064
    for child in dev.children:
4065
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4066

    
4067
  return result
4068

    
4069

    
4070
class LUOobCommand(NoHooksLU):
4071
  """Logical unit for OOB handling.
4072

4073
  """
4074
  REG_BGL = False
4075
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4076

    
4077
  def ExpandNames(self):
4078
    """Gather locks we need.
4079

4080
    """
4081
    if self.op.node_names:
4082
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4083
      lock_names = self.op.node_names
4084
    else:
4085
      lock_names = locking.ALL_SET
4086

    
4087
    self.needed_locks = {
4088
      locking.LEVEL_NODE: lock_names,
4089
      }
4090

    
4091
  def CheckPrereq(self):
4092
    """Check prerequisites.
4093

4094
    This checks:
4095
     - the node exists in the configuration
4096
     - OOB is supported
4097

4098
    Any errors are signaled by raising errors.OpPrereqError.
4099

4100
    """
4101
    self.nodes = []
4102
    self.master_node = self.cfg.GetMasterNode()
4103

    
4104
    assert self.op.power_delay >= 0.0
4105

    
4106
    if self.op.node_names:
4107
      if (self.op.command in self._SKIP_MASTER and
4108
          self.master_node in self.op.node_names):
4109
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4110
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4111

    
4112
        if master_oob_handler:
4113
          additional_text = ("run '%s %s %s' if you want to operate on the"
4114
                             " master regardless") % (master_oob_handler,
4115
                                                      self.op.command,
4116
                                                      self.master_node)
4117
        else:
4118
          additional_text = "it does not support out-of-band operations"
4119

    
4120
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4121
                                    " allowed for %s; %s") %
4122
                                   (self.master_node, self.op.command,
4123
                                    additional_text), errors.ECODE_INVAL)
4124
    else:
4125
      self.op.node_names = self.cfg.GetNodeList()
4126
      if self.op.command in self._SKIP_MASTER:
4127
        self.op.node_names.remove(self.master_node)
4128

    
4129
    if self.op.command in self._SKIP_MASTER:
4130
      assert self.master_node not in self.op.node_names
4131

    
4132
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4133
      if node is None:
4134
        raise errors.OpPrereqError("Node %s not found" % node_name,
4135
                                   errors.ECODE_NOENT)
4136
      else:
4137
        self.nodes.append(node)
4138

    
4139
      if (not self.op.ignore_status and
4140
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4141
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4142
                                    " not marked offline") % node_name,
4143
                                   errors.ECODE_STATE)
4144

    
4145
  def Exec(self, feedback_fn):
4146
    """Execute OOB and return result if we expect any.
4147

4148
    """
4149
    master_node = self.master_node
4150
    ret = []
4151

    
4152
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4153
                                              key=lambda node: node.name)):
4154
      node_entry = [(constants.RS_NORMAL, node.name)]
4155
      ret.append(node_entry)
4156

    
4157
      oob_program = _SupportsOob(self.cfg, node)
4158

    
4159
      if not oob_program:
4160
        node_entry.append((constants.RS_UNAVAIL, None))
4161
        continue
4162

    
4163
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4164
                   self.op.command, oob_program, node.name)
4165
      result = self.rpc.call_run_oob(master_node, oob_program,
4166
                                     self.op.command, node.name,
4167
                                     self.op.timeout)
4168

    
4169
      if result.fail_msg:
4170
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4171
                        node.name, result.fail_msg)
4172
        node_entry.append((constants.RS_NODATA, None))
4173
      else:
4174
        try:
4175
          self._CheckPayload(result)
4176
        except errors.OpExecError, err:
4177
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4178
                          node.name, err)
4179
          node_entry.append((constants.RS_NODATA, None))
4180
        else:
4181
          if self.op.command == constants.OOB_HEALTH:
4182
            # For health we should log important events
4183
            for item, status in result.payload:
4184
              if status in [constants.OOB_STATUS_WARNING,
4185
                            constants.OOB_STATUS_CRITICAL]:
4186
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4187
                                item, node.name, status)
4188

    
4189
          if self.op.command == constants.OOB_POWER_ON:
4190
            node.powered = True
4191
          elif self.op.command == constants.OOB_POWER_OFF:
4192
            node.powered = False
4193
          elif self.op.command == constants.OOB_POWER_STATUS:
4194
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4195
            if powered != node.powered:
4196
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4197
                               " match actual power state (%s)"), node.powered,
4198
                              node.name, powered)
4199

    
4200
          # For configuration changing commands we should update the node
4201
          if self.op.command in (constants.OOB_POWER_ON,
4202
                                 constants.OOB_POWER_OFF):
4203
            self.cfg.Update(node, feedback_fn)
4204

    
4205
          node_entry.append((constants.RS_NORMAL, result.payload))
4206

    
4207
          if (self.op.command == constants.OOB_POWER_ON and
4208
              idx < len(self.nodes) - 1):
4209
            time.sleep(self.op.power_delay)
4210

    
4211
    return ret
4212

    
4213
  def _CheckPayload(self, result):
4214
    """Checks if the payload is valid.
4215

4216
    @param result: RPC result
4217
    @raises errors.OpExecError: If payload is not valid
4218

4219
    """
4220
    errs = []
4221
    if self.op.command == constants.OOB_HEALTH:
4222
      if not isinstance(result.payload, list):
4223
        errs.append("command 'health' is expected to return a list but got %s" %
4224
                    type(result.payload))
4225
      else:
4226
        for item, status in result.payload:
4227
          if status not in constants.OOB_STATUSES:
4228
            errs.append("health item '%s' has invalid status '%s'" %
4229
                        (item, status))
4230

    
4231
    if self.op.command == constants.OOB_POWER_STATUS:
4232
      if not isinstance(result.payload, dict):
4233
        errs.append("power-status is expected to return a dict but got %s" %
4234
                    type(result.payload))
4235

    
4236
    if self.op.command in [
4237
        constants.OOB_POWER_ON,
4238
        constants.OOB_POWER_OFF,
4239
        constants.OOB_POWER_CYCLE,
4240
        ]:
4241
      if result.payload is not None:
4242
        errs.append("%s is expected to not return payload but got '%s'" %
4243
                    (self.op.command, result.payload))
4244

    
4245
    if errs:
4246
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4247
                               utils.CommaJoin(errs))
4248

    
4249

    
4250
class _OsQuery(_QueryBase):
4251
  FIELDS = query.OS_FIELDS
4252

    
4253
  def ExpandNames(self, lu):
4254
    # Lock all nodes in shared mode
4255
    # Temporary removal of locks, should be reverted later
4256
    # TODO: reintroduce locks when they are lighter-weight
4257
    lu.needed_locks = {}
4258
    #self.share_locks[locking.LEVEL_NODE] = 1
4259
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4260

    
4261
    # The following variables interact with _QueryBase._GetNames
4262
    if self.names:
4263
      self.wanted = self.names
4264
    else:
4265
      self.wanted = locking.ALL_SET
4266

    
4267
    self.do_locking = self.use_locking
4268

    
4269
  def DeclareLocks(self, lu, level):
4270
    pass
4271

    
4272
  @staticmethod
4273
  def _DiagnoseByOS(rlist):
4274
    """Remaps a per-node return list into an a per-os per-node dictionary
4275

4276
    @param rlist: a map with node names as keys and OS objects as values
4277

4278
    @rtype: dict
4279
    @return: a dictionary with osnames as keys and as value another
4280
        map, with nodes as keys and tuples of (path, status, diagnose,
4281
        variants, parameters, api_versions) as values, eg::
4282

4283
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4284
                                     (/srv/..., False, "invalid api")],
4285
                           "node2": [(/srv/..., True, "", [], [])]}
4286
          }
4287

4288
    """
4289
    all_os = {}
4290
    # we build here the list of nodes that didn't fail the RPC (at RPC
4291
    # level), so that nodes with a non-responding node daemon don't
4292
    # make all OSes invalid
4293
    good_nodes = [node_name for node_name in rlist
4294
                  if not rlist[node_name].fail_msg]
4295
    for node_name, nr in rlist.items():
4296
      if nr.fail_msg or not nr.payload:
4297
        continue
4298
      for (name, path, status, diagnose, variants,
4299
           params, api_versions) in nr.payload:
4300
        if name not in all_os:
4301
          # build a list of nodes for this os containing empty lists
4302
          # for each node in node_list
4303
          all_os[name] = {}
4304
          for nname in good_nodes:
4305
            all_os[name][nname] = []
4306
        # convert params from [name, help] to (name, help)
4307
        params = [tuple(v) for v in params]
4308
        all_os[name][node_name].append((path, status, diagnose,
4309
                                        variants, params, api_versions))
4310
    return all_os
4311

    
4312
  def _GetQueryData(self, lu):
4313
    """Computes the list of nodes and their attributes.
4314

4315
    """
4316
    # Locking is not used
4317
    assert not (compat.any(lu.glm.is_owned(level)
4318
                           for level in locking.LEVELS
4319
                           if level != locking.LEVEL_CLUSTER) or
4320
                self.do_locking or self.use_locking)
4321

    
4322
    valid_nodes = [node.name
4323
                   for node in lu.cfg.GetAllNodesInfo().values()
4324
                   if not node.offline and node.vm_capable]
4325
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4326
    cluster = lu.cfg.GetClusterInfo()
4327

    
4328
    data = {}
4329

    
4330
    for (os_name, os_data) in pol.items():
4331
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4332
                          hidden=(os_name in cluster.hidden_os),
4333
                          blacklisted=(os_name in cluster.blacklisted_os))
4334

    
4335
      variants = set()
4336
      parameters = set()
4337
      api_versions = set()
4338

    
4339
      for idx, osl in enumerate(os_data.values()):
4340
        info.valid = bool(info.valid and osl and osl[0][1])
4341
        if not info.valid:
4342
          break
4343

    
4344
        (node_variants, node_params, node_api) = osl[0][3:6]
4345
        if idx == 0:
4346
          # First entry
4347
          variants.update(node_variants)
4348
          parameters.update(node_params)
4349
          api_versions.update(node_api)
4350
        else:
4351
          # Filter out inconsistent values
4352
          variants.intersection_update(node_variants)
4353
          parameters.intersection_update(node_params)
4354
          api_versions.intersection_update(node_api)
4355

    
4356
      info.variants = list(variants)
4357
      info.parameters = list(parameters)
4358
      info.api_versions = list(api_versions)
4359

    
4360
      data[os_name] = info
4361

    
4362
    # Prepare data in requested order
4363
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4364
            if name in data]
4365

    
4366

    
4367
class LUOsDiagnose(NoHooksLU):
4368
  """Logical unit for OS diagnose/query.
4369

4370
  """
4371
  REQ_BGL = False
4372

    
4373
  @staticmethod
4374
  def _BuildFilter(fields, names):
4375
    """Builds a filter for querying OSes.
4376

4377
    """
4378
    name_filter = qlang.MakeSimpleFilter("name", names)
4379

    
4380
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4381
    # respective field is not requested
4382
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4383
                     for fname in ["hidden", "blacklisted"]
4384
                     if fname not in fields]
4385
    if "valid" not in fields:
4386
      status_filter.append([qlang.OP_TRUE, "valid"])
4387

    
4388
    if status_filter:
4389
      status_filter.insert(0, qlang.OP_AND)
4390
    else:
4391
      status_filter = None
4392

    
4393
    if name_filter and status_filter:
4394
      return [qlang.OP_AND, name_filter, status_filter]
4395
    elif name_filter:
4396
      return name_filter
4397
    else:
4398
      return status_filter
4399

    
4400
  def CheckArguments(self):
4401
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4402
                       self.op.output_fields, False)
4403

    
4404
  def ExpandNames(self):
4405
    self.oq.ExpandNames(self)
4406

    
4407
  def Exec(self, feedback_fn):
4408
    return self.oq.OldStyleQuery(self)
4409

    
4410

    
4411
class LUNodeRemove(LogicalUnit):
4412
  """Logical unit for removing a node.
4413

4414
  """
4415
  HPATH = "node-remove"
4416
  HTYPE = constants.HTYPE_NODE
4417

    
4418
  def BuildHooksEnv(self):
4419
    """Build hooks env.
4420

4421
    This doesn't run on the target node in the pre phase as a failed
4422
    node would then be impossible to remove.
4423

4424
    """
4425
    return {
4426
      "OP_TARGET": self.op.node_name,
4427
      "NODE_NAME": self.op.node_name,
4428
      }
4429

    
4430
  def BuildHooksNodes(self):
4431
    """Build hooks nodes.
4432

4433
    """
4434
    all_nodes = self.cfg.GetNodeList()
4435
    try:
4436
      all_nodes.remove(self.op.node_name)
4437
    except ValueError:
4438
      logging.warning("Node '%s', which is about to be removed, was not found"
4439
                      " in the list of all nodes", self.op.node_name)
4440
    return (all_nodes, all_nodes)
4441

    
4442
  def CheckPrereq(self):
4443
    """Check prerequisites.
4444

4445
    This checks:
4446
     - the node exists in the configuration
4447
     - it does not have primary or secondary instances
4448
     - it's not the master
4449

4450
    Any errors are signaled by raising errors.OpPrereqError.
4451

4452
    """
4453
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4454
    node = self.cfg.GetNodeInfo(self.op.node_name)
4455
    assert node is not None
4456

    
4457
    masternode = self.cfg.GetMasterNode()
4458
    if node.name == masternode:
4459
      raise errors.OpPrereqError("Node is the master node, failover to another"
4460
                                 " node is required", errors.ECODE_INVAL)
4461

    
4462
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4463
      if node.name in instance.all_nodes:
4464
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4465
                                   " please remove first" % instance_name,
4466
                                   errors.ECODE_INVAL)
4467
    self.op.node_name = node.name
4468
    self.node = node
4469

    
4470
  def Exec(self, feedback_fn):
4471
    """Removes the node from the cluster.
4472

4473
    """
4474
    node = self.node
4475
    logging.info("Stopping the node daemon and removing configs from node %s",
4476
                 node.name)
4477

    
4478
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4479

    
4480
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4481
      "Not owning BGL"
4482

    
4483
    # Promote nodes to master candidate as needed
4484
    _AdjustCandidatePool(self, exceptions=[node.name])
4485
    self.context.RemoveNode(node.name)
4486

    
4487
    # Run post hooks on the node before it's removed
4488
    _RunPostHook(self, node.name)
4489

    
4490
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4491
    msg = result.fail_msg
4492
    if msg:
4493
      self.LogWarning("Errors encountered on the remote node while leaving"
4494
                      " the cluster: %s", msg)
4495

    
4496
    # Remove node from our /etc/hosts
4497
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4498
      master_node = self.cfg.GetMasterNode()
4499
      result = self.rpc.call_etc_hosts_modify(master_node,
4500
                                              constants.ETC_HOSTS_REMOVE,
4501
                                              node.name, None)
4502
      result.Raise("Can't update hosts file with new host data")
4503
      _RedistributeAncillaryFiles(self)
4504

    
4505

    
4506
class _NodeQuery(_QueryBase):
4507
  FIELDS = query.NODE_FIELDS
4508

    
4509
  def ExpandNames(self, lu):
4510
    lu.needed_locks = {}
4511
    lu.share_locks = _ShareAll()
4512

    
4513
    if self.names:
4514
      self.wanted = _GetWantedNodes(lu, self.names)
4515
    else:
4516
      self.wanted = locking.ALL_SET
4517

    
4518
    self.do_locking = (self.use_locking and
4519
                       query.NQ_LIVE in self.requested_data)
4520

    
4521
    if self.do_locking:
4522
      # If any non-static field is requested we need to lock the nodes
4523
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4524

    
4525
  def DeclareLocks(self, lu, level):
4526
    pass
4527

    
4528
  def _GetQueryData(self, lu):
4529
    """Computes the list of nodes and their attributes.
4530

4531
    """
4532
    all_info = lu.cfg.GetAllNodesInfo()
4533

    
4534
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4535

    
4536
    # Gather data as requested
4537
    if query.NQ_LIVE in self.requested_data:
4538
      # filter out non-vm_capable nodes
4539
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4540

    
4541
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4542
                                        lu.cfg.GetHypervisorType())
4543
      live_data = dict((name, nresult.payload)
4544
                       for (name, nresult) in node_data.items()
4545
                       if not nresult.fail_msg and nresult.payload)
4546
    else:
4547
      live_data = None
4548

    
4549
    if query.NQ_INST in self.requested_data:
4550
      node_to_primary = dict([(name, set()) for name in nodenames])
4551
      node_to_secondary = dict([(name, set()) for name in nodenames])
4552

    
4553
      inst_data = lu.cfg.GetAllInstancesInfo()
4554

    
4555
      for inst in inst_data.values():
4556
        if inst.primary_node in node_to_primary:
4557
          node_to_primary[inst.primary_node].add(inst.name)
4558
        for secnode in inst.secondary_nodes:
4559
          if secnode in node_to_secondary:
4560
            node_to_secondary[secnode].add(inst.name)
4561
    else:
4562
      node_to_primary = None
4563
      node_to_secondary = None
4564

    
4565
    if query.NQ_OOB in self.requested_data:
4566
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4567
                         for name, node in all_info.iteritems())
4568
    else:
4569
      oob_support = None
4570

    
4571
    if query.NQ_GROUP in self.requested_data:
4572
      groups = lu.cfg.GetAllNodeGroupsInfo()
4573
    else:
4574
      groups = {}
4575

    
4576
    return query.NodeQueryData([all_info[name] for name in nodenames],
4577
                               live_data, lu.cfg.GetMasterNode(),
4578
                               node_to_primary, node_to_secondary, groups,
4579
                               oob_support, lu.cfg.GetClusterInfo())
4580

    
4581

    
4582
class LUNodeQuery(NoHooksLU):
4583
  """Logical unit for querying nodes.
4584

4585
  """
4586
  # pylint: disable=W0142
4587
  REQ_BGL = False
4588

    
4589
  def CheckArguments(self):
4590
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4591
                         self.op.output_fields, self.op.use_locking)
4592

    
4593
  def ExpandNames(self):
4594
    self.nq.ExpandNames(self)
4595

    
4596
  def Exec(self, feedback_fn):
4597
    return self.nq.OldStyleQuery(self)
4598

    
4599

    
4600
class LUNodeQueryvols(NoHooksLU):
4601
  """Logical unit for getting volumes on node(s).
4602

4603
  """
4604
  REQ_BGL = False
4605
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4606
  _FIELDS_STATIC = utils.FieldSet("node")
4607

    
4608
  def CheckArguments(self):
4609
    _CheckOutputFields(static=self._FIELDS_STATIC,
4610
                       dynamic=self._FIELDS_DYNAMIC,
4611
                       selected=self.op.output_fields)
4612

    
4613
  def ExpandNames(self):
4614
    self.needed_locks = {}
4615
    self.share_locks[locking.LEVEL_NODE] = 1
4616
    if not self.op.nodes:
4617
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4618
    else:
4619
      self.needed_locks[locking.LEVEL_NODE] = \
4620
        _GetWantedNodes(self, self.op.nodes)
4621

    
4622
  def Exec(self, feedback_fn):
4623
    """Computes the list of nodes and their attributes.
4624

4625
    """
4626
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4627
    volumes = self.rpc.call_node_volumes(nodenames)
4628

    
4629
    ilist = self.cfg.GetAllInstancesInfo()
4630
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4631

    
4632
    output = []
4633
    for node in nodenames:
4634
      nresult = volumes[node]
4635
      if nresult.offline:
4636
        continue
4637
      msg = nresult.fail_msg
4638
      if msg:
4639
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4640
        continue
4641

    
4642
      node_vols = sorted(nresult.payload,
4643
                         key=operator.itemgetter("dev"))
4644

    
4645
      for vol in node_vols:
4646
        node_output = []
4647
        for field in self.op.output_fields:
4648
          if field == "node":
4649
            val = node
4650
          elif field == "phys":
4651
            val = vol["dev"]
4652
          elif field == "vg":
4653
            val = vol["vg"]
4654
          elif field == "name":
4655
            val = vol["name"]
4656
          elif field == "size":
4657
            val = int(float(vol["size"]))
4658
          elif field == "instance":
4659
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4660
          else:
4661
            raise errors.ParameterError(field)
4662
          node_output.append(str(val))
4663

    
4664
        output.append(node_output)
4665

    
4666
    return output
4667

    
4668

    
4669
class LUNodeQueryStorage(NoHooksLU):
4670
  """Logical unit for getting information on storage units on node(s).
4671

4672
  """
4673
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4674
  REQ_BGL = False
4675

    
4676
  def CheckArguments(self):
4677
    _CheckOutputFields(static=self._FIELDS_STATIC,
4678
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4679
                       selected=self.op.output_fields)
4680

    
4681
  def ExpandNames(self):
4682
    self.needed_locks = {}
4683
    self.share_locks[locking.LEVEL_NODE] = 1
4684

    
4685
    if self.op.nodes:
4686
      self.needed_locks[locking.LEVEL_NODE] = \
4687
        _GetWantedNodes(self, self.op.nodes)
4688
    else:
4689
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4690

    
4691
  def Exec(self, feedback_fn):
4692
    """Computes the list of nodes and their attributes.
4693

4694
    """
4695
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4696

    
4697
    # Always get name to sort by
4698
    if constants.SF_NAME in self.op.output_fields:
4699
      fields = self.op.output_fields[:]
4700
    else:
4701
      fields = [constants.SF_NAME] + self.op.output_fields
4702

    
4703
    # Never ask for node or type as it's only known to the LU
4704
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4705
      while extra in fields:
4706
        fields.remove(extra)
4707

    
4708
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4709
    name_idx = field_idx[constants.SF_NAME]
4710

    
4711
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4712
    data = self.rpc.call_storage_list(self.nodes,
4713
                                      self.op.storage_type, st_args,
4714
                                      self.op.name, fields)
4715

    
4716
    result = []
4717

    
4718
    for node in utils.NiceSort(self.nodes):
4719
      nresult = data[node]
4720
      if nresult.offline:
4721
        continue
4722

    
4723
      msg = nresult.fail_msg
4724
      if msg:
4725
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4726
        continue
4727

    
4728
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4729

    
4730
      for name in utils.NiceSort(rows.keys()):
4731
        row = rows[name]
4732

    
4733
        out = []
4734

    
4735
        for field in self.op.output_fields:
4736
          if field == constants.SF_NODE:
4737
            val = node
4738
          elif field == constants.SF_TYPE:
4739
            val = self.op.storage_type
4740
          elif field in field_idx:
4741
            val = row[field_idx[field]]
4742
          else:
4743
            raise errors.ParameterError(field)
4744

    
4745
          out.append(val)
4746

    
4747
        result.append(out)
4748

    
4749
    return result
4750

    
4751

    
4752
class _InstanceQuery(_QueryBase):
4753
  FIELDS = query.INSTANCE_FIELDS
4754

    
4755
  def ExpandNames(self, lu):
4756
    lu.needed_locks = {}
4757
    lu.share_locks = _ShareAll()
4758

    
4759
    if self.names:
4760
      self.wanted = _GetWantedInstances(lu, self.names)
4761
    else:
4762
      self.wanted = locking.ALL_SET
4763

    
4764
    self.do_locking = (self.use_locking and
4765
                       query.IQ_LIVE in self.requested_data)
4766
    if self.do_locking:
4767
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4768
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4769
      lu.needed_locks[locking.LEVEL_NODE] = []
4770
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4771

    
4772
    self.do_grouplocks = (self.do_locking and
4773
                          query.IQ_NODES in self.requested_data)
4774

    
4775
  def DeclareLocks(self, lu, level):
4776
    if self.do_locking:
4777
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4778
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4779

    
4780
        # Lock all groups used by instances optimistically; this requires going
4781
        # via the node before it's locked, requiring verification later on
4782
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4783
          set(group_uuid
4784
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4785
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4786
      elif level == locking.LEVEL_NODE:
4787
        lu._LockInstancesNodes() # pylint: disable=W0212
4788

    
4789
  @staticmethod
4790
  def _CheckGroupLocks(lu):
4791
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4792
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4793

    
4794
    # Check if node groups for locked instances are still correct
4795
    for instance_name in owned_instances:
4796
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4797

    
4798
  def _GetQueryData(self, lu):
4799
    """Computes the list of instances and their attributes.
4800

4801
    """
4802
    if self.do_grouplocks:
4803
      self._CheckGroupLocks(lu)
4804

    
4805
    cluster = lu.cfg.GetClusterInfo()
4806
    all_info = lu.cfg.GetAllInstancesInfo()
4807

    
4808
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4809

    
4810
    instance_list = [all_info[name] for name in instance_names]
4811
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4812
                                        for inst in instance_list)))
4813
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4814
    bad_nodes = []
4815
    offline_nodes = []
4816
    wrongnode_inst = set()
4817

    
4818
    # Gather data as requested
4819
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4820
      live_data = {}
4821
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4822
      for name in nodes:
4823
        result = node_data[name]
4824
        if result.offline:
4825
          # offline nodes will be in both lists
4826
          assert result.fail_msg
4827
          offline_nodes.append(name)
4828
        if result.fail_msg:
4829
          bad_nodes.append(name)
4830
        elif result.payload:
4831
          for inst in result.payload:
4832
            if inst in all_info:
4833
              if all_info[inst].primary_node == name:
4834
                live_data.update(result.payload)
4835
              else:
4836
                wrongnode_inst.add(inst)
4837
            else:
4838
              # orphan instance; we don't list it here as we don't
4839
              # handle this case yet in the output of instance listing
4840
              logging.warning("Orphan instance '%s' found on node %s",
4841
                              inst, name)
4842
        # else no instance is alive
4843
    else:
4844
      live_data = {}
4845

    
4846
    if query.IQ_DISKUSAGE in self.requested_data:
4847
      disk_usage = dict((inst.name,
4848
                         _ComputeDiskSize(inst.disk_template,
4849
                                          [{constants.IDISK_SIZE: disk.size}
4850
                                           for disk in inst.disks]))
4851
                        for inst in instance_list)
4852
    else:
4853
      disk_usage = None
4854

    
4855
    if query.IQ_CONSOLE in self.requested_data:
4856
      consinfo = {}
4857
      for inst in instance_list:
4858
        if inst.name in live_data:
4859
          # Instance is running
4860
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4861
        else:
4862
          consinfo[inst.name] = None
4863
      assert set(consinfo.keys()) == set(instance_names)
4864
    else:
4865
      consinfo = None
4866

    
4867
    if query.IQ_NODES in self.requested_data:
4868
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4869
                                            instance_list)))
4870
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4871
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4872
                    for uuid in set(map(operator.attrgetter("group"),
4873
                                        nodes.values())))
4874
    else:
4875
      nodes = None
4876
      groups = None
4877

    
4878
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4879
                                   disk_usage, offline_nodes, bad_nodes,
4880
                                   live_data, wrongnode_inst, consinfo,
4881
                                   nodes, groups)
4882

    
4883

    
4884
class LUQuery(NoHooksLU):
4885
  """Query for resources/items of a certain kind.
4886

4887
  """
4888
  # pylint: disable=W0142
4889
  REQ_BGL = False
4890

    
4891
  def CheckArguments(self):
4892
    qcls = _GetQueryImplementation(self.op.what)
4893

    
4894
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4895

    
4896
  def ExpandNames(self):
4897
    self.impl.ExpandNames(self)
4898

    
4899
  def DeclareLocks(self, level):
4900
    self.impl.DeclareLocks(self, level)
4901

    
4902
  def Exec(self, feedback_fn):
4903
    return self.impl.NewStyleQuery(self)
4904

    
4905

    
4906
class LUQueryFields(NoHooksLU):
4907
  """Query for resources/items of a certain kind.
4908

4909
  """
4910
  # pylint: disable=W0142
4911
  REQ_BGL = False
4912

    
4913
  def CheckArguments(self):
4914
    self.qcls = _GetQueryImplementation(self.op.what)
4915

    
4916
  def ExpandNames(self):
4917
    self.needed_locks = {}
4918

    
4919
  def Exec(self, feedback_fn):
4920
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4921

    
4922

    
4923
class LUNodeModifyStorage(NoHooksLU):
4924
  """Logical unit for modifying a storage volume on a node.
4925

4926
  """
4927
  REQ_BGL = False
4928

    
4929
  def CheckArguments(self):
4930
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4931

    
4932
    storage_type = self.op.storage_type
4933

    
4934
    try:
4935
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4936
    except KeyError:
4937
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4938
                                 " modified" % storage_type,
4939
                                 errors.ECODE_INVAL)
4940

    
4941
    diff = set(self.op.changes.keys()) - modifiable
4942
    if diff:
4943
      raise errors.OpPrereqError("The following fields can not be modified for"
4944
                                 " storage units of type '%s': %r" %
4945
                                 (storage_type, list(diff)),
4946
                                 errors.ECODE_INVAL)
4947

    
4948
  def ExpandNames(self):
4949
    self.needed_locks = {
4950
      locking.LEVEL_NODE: self.op.node_name,
4951
      }
4952

    
4953
  def Exec(self, feedback_fn):
4954
    """Computes the list of nodes and their attributes.
4955

4956
    """
4957
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4958
    result = self.rpc.call_storage_modify(self.op.node_name,
4959
                                          self.op.storage_type, st_args,
4960
                                          self.op.name, self.op.changes)
4961
    result.Raise("Failed to modify storage unit '%s' on %s" %
4962
                 (self.op.name, self.op.node_name))
4963

    
4964

    
4965
class LUNodeAdd(LogicalUnit):
4966
  """Logical unit for adding node to the cluster.
4967

4968
  """
4969
  HPATH = "node-add"
4970
  HTYPE = constants.HTYPE_NODE
4971
  _NFLAGS = ["master_capable", "vm_capable"]
4972

    
4973
  def CheckArguments(self):
4974
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4975
    # validate/normalize the node name
4976
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4977
                                         family=self.primary_ip_family)
4978
    self.op.node_name = self.hostname.name
4979

    
4980
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4981
      raise errors.OpPrereqError("Cannot readd the master node",
4982
                                 errors.ECODE_STATE)
4983

    
4984
    if self.op.readd and self.op.group:
4985
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4986
                                 " being readded", errors.ECODE_INVAL)
4987

    
4988
  def BuildHooksEnv(self):
4989
    """Build hooks env.
4990

4991
    This will run on all nodes before, and on all nodes + the new node after.
4992

4993
    """
4994
    return {
4995
      "OP_TARGET": self.op.node_name,
4996
      "NODE_NAME": self.op.node_name,
4997
      "NODE_PIP": self.op.primary_ip,
4998
      "NODE_SIP": self.op.secondary_ip,
4999
      "MASTER_CAPABLE": str(self.op.master_capable),
5000
      "VM_CAPABLE": str(self.op.vm_capable),
5001
      }
5002

    
5003
  def BuildHooksNodes(self):
5004
    """Build hooks nodes.
5005

5006
    """
5007
    # Exclude added node
5008
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5009
    post_nodes = pre_nodes + [self.op.node_name, ]
5010

    
5011
    return (pre_nodes, post_nodes)
5012

    
5013
  def CheckPrereq(self):
5014
    """Check prerequisites.
5015

5016
    This checks:
5017
     - the new node is not already in the config
5018
     - it is resolvable
5019
     - its parameters (single/dual homed) matches the cluster
5020

5021
    Any errors are signaled by raising errors.OpPrereqError.
5022

5023
    """
5024
    cfg = self.cfg
5025
    hostname = self.hostname
5026
    node = hostname.name
5027
    primary_ip = self.op.primary_ip = hostname.ip
5028
    if self.op.secondary_ip is None:
5029
      if self.primary_ip_family == netutils.IP6Address.family:
5030
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5031
                                   " IPv4 address must be given as secondary",
5032
                                   errors.ECODE_INVAL)
5033
      self.op.secondary_ip = primary_ip
5034

    
5035
    secondary_ip = self.op.secondary_ip
5036
    if not netutils.IP4Address.IsValid(secondary_ip):
5037
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5038
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5039

    
5040
    node_list = cfg.GetNodeList()
5041
    if not self.op.readd and node in node_list:
5042
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5043
                                 node, errors.ECODE_EXISTS)
5044
    elif self.op.readd and node not in node_list:
5045
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5046
                                 errors.ECODE_NOENT)
5047

    
5048
    self.changed_primary_ip = False
5049

    
5050
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5051
      if self.op.readd and node == existing_node_name:
5052
        if existing_node.secondary_ip != secondary_ip:
5053
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5054
                                     " address configuration as before",
5055
                                     errors.ECODE_INVAL)
5056
        if existing_node.primary_ip != primary_ip:
5057
          self.changed_primary_ip = True
5058

    
5059
        continue
5060

    
5061
      if (existing_node.primary_ip == primary_ip or
5062
          existing_node.secondary_ip == primary_ip or
5063
          existing_node.primary_ip == secondary_ip or
5064
          existing_node.secondary_ip == secondary_ip):
5065
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5066
                                   " existing node %s" % existing_node.name,
5067
                                   errors.ECODE_NOTUNIQUE)
5068

    
5069
    # After this 'if' block, None is no longer a valid value for the
5070
    # _capable op attributes
5071
    if self.op.readd:
5072
      old_node = self.cfg.GetNodeInfo(node)
5073
      assert old_node is not None, "Can't retrieve locked node %s" % node
5074
      for attr in self._NFLAGS:
5075
        if getattr(self.op, attr) is None:
5076
          setattr(self.op, attr, getattr(old_node, attr))
5077
    else:
5078
      for attr in self._NFLAGS:
5079
        if getattr(self.op, attr) is None:
5080
          setattr(self.op, attr, True)
5081

    
5082
    if self.op.readd and not self.op.vm_capable:
5083
      pri, sec = cfg.GetNodeInstances(node)
5084
      if pri or sec:
5085
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5086
                                   " flag set to false, but it already holds"
5087
                                   " instances" % node,
5088
                                   errors.ECODE_STATE)
5089

    
5090
    # check that the type of the node (single versus dual homed) is the
5091
    # same as for the master
5092
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5093
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5094
    newbie_singlehomed = secondary_ip == primary_ip
5095
    if master_singlehomed != newbie_singlehomed:
5096
      if master_singlehomed:
5097
        raise errors.OpPrereqError("The master has no secondary ip but the"
5098
                                   " new node has one",
5099
                                   errors.ECODE_INVAL)
5100
      else:
5101
        raise errors.OpPrereqError("The master has a secondary ip but the"
5102
                                   " new node doesn't have one",
5103
                                   errors.ECODE_INVAL)
5104

    
5105
    # checks reachability
5106
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5107
      raise errors.OpPrereqError("Node not reachable by ping",
5108
                                 errors.ECODE_ENVIRON)
5109

    
5110
    if not newbie_singlehomed:
5111
      # check reachability from my secondary ip to newbie's secondary ip
5112
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5113
                           source=myself.secondary_ip):
5114
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5115
                                   " based ping to node daemon port",
5116
                                   errors.ECODE_ENVIRON)
5117

    
5118
    if self.op.readd:
5119
      exceptions = [node]
5120
    else:
5121
      exceptions = []
5122

    
5123
    if self.op.master_capable:
5124
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5125
    else:
5126
      self.master_candidate = False
5127

    
5128
    if self.op.readd:
5129
      self.new_node = old_node
5130
    else:
5131
      node_group = cfg.LookupNodeGroup(self.op.group)
5132
      self.new_node = objects.Node(name=node,
5133
                                   primary_ip=primary_ip,
5134
                                   secondary_ip=secondary_ip,
5135
                                   master_candidate=self.master_candidate,
5136
                                   offline=False, drained=False,
5137
                                   group=node_group)
5138

    
5139
    if self.op.ndparams:
5140
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5141

    
5142
  def Exec(self, feedback_fn):
5143
    """Adds the new node to the cluster.
5144

5145
    """
5146
    new_node = self.new_node
5147
    node = new_node.name
5148

    
5149
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5150
      "Not owning BGL"
5151

    
5152
    # We adding a new node so we assume it's powered
5153
    new_node.powered = True
5154

    
5155
    # for re-adds, reset the offline/drained/master-candidate flags;
5156
    # we need to reset here, otherwise offline would prevent RPC calls
5157
    # later in the procedure; this also means that if the re-add
5158
    # fails, we are left with a non-offlined, broken node
5159
    if self.op.readd:
5160
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5161
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5162
      # if we demote the node, we do cleanup later in the procedure
5163
      new_node.master_candidate = self.master_candidate
5164
      if self.changed_primary_ip:
5165
        new_node.primary_ip = self.op.primary_ip
5166

    
5167
    # copy the master/vm_capable flags
5168
    for attr in self._NFLAGS:
5169
      setattr(new_node, attr, getattr(self.op, attr))
5170

    
5171
    # notify the user about any possible mc promotion
5172
    if new_node.master_candidate:
5173
      self.LogInfo("Node will be a master candidate")
5174

    
5175
    if self.op.ndparams:
5176
      new_node.ndparams = self.op.ndparams
5177
    else:
5178
      new_node.ndparams = {}
5179

    
5180
    # check connectivity
5181
    result = self.rpc.call_version([node])[node]
5182
    result.Raise("Can't get version information from node %s" % node)
5183
    if constants.PROTOCOL_VERSION == result.payload:
5184
      logging.info("Communication to node %s fine, sw version %s match",
5185
                   node, result.payload)
5186
    else:
5187
      raise errors.OpExecError("Version mismatch master version %s,"
5188
                               " node version %s" %
5189
                               (constants.PROTOCOL_VERSION, result.payload))
5190

    
5191
    # Add node to our /etc/hosts, and add key to known_hosts
5192
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5193
      master_node = self.cfg.GetMasterNode()
5194
      result = self.rpc.call_etc_hosts_modify(master_node,
5195
                                              constants.ETC_HOSTS_ADD,
5196
                                              self.hostname.name,
5197
                                              self.hostname.ip)
5198
      result.Raise("Can't update hosts file with new host data")
5199

    
5200
    if new_node.secondary_ip != new_node.primary_ip:
5201
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5202
                               False)
5203

    
5204
    node_verify_list = [self.cfg.GetMasterNode()]
5205
    node_verify_param = {
5206
      constants.NV_NODELIST: ([node], {}),
5207
      # TODO: do a node-net-test as well?
5208
    }
5209

    
5210
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5211
                                       self.cfg.GetClusterName())
5212
    for verifier in node_verify_list:
5213
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5214
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5215
      if nl_payload:
5216
        for failed in nl_payload:
5217
          feedback_fn("ssh/hostname verification failed"
5218
                      " (checking from %s): %s" %
5219
                      (verifier, nl_payload[failed]))
5220
        raise errors.OpExecError("ssh/hostname verification failed")
5221

    
5222
    if self.op.readd:
5223
      _RedistributeAncillaryFiles(self)
5224
      self.context.ReaddNode(new_node)
5225
      # make sure we redistribute the config
5226
      self.cfg.Update(new_node, feedback_fn)
5227
      # and make sure the new node will not have old files around
5228
      if not new_node.master_candidate:
5229
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5230
        msg = result.fail_msg
5231
        if msg:
5232
          self.LogWarning("Node failed to demote itself from master"
5233
                          " candidate status: %s" % msg)
5234
    else:
5235
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5236
                                  additional_vm=self.op.vm_capable)
5237
      self.context.AddNode(new_node, self.proc.GetECId())
5238

    
5239

    
5240
class LUNodeSetParams(LogicalUnit):
5241
  """Modifies the parameters of a node.
5242

5243
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5244
      to the node role (as _ROLE_*)
5245
  @cvar _R2F: a dictionary from node role to tuples of flags
5246
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5247

5248
  """
5249
  HPATH = "node-modify"
5250
  HTYPE = constants.HTYPE_NODE
5251
  REQ_BGL = False
5252
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5253
  _F2R = {
5254
    (True, False, False): _ROLE_CANDIDATE,
5255
    (False, True, False): _ROLE_DRAINED,
5256
    (False, False, True): _ROLE_OFFLINE,
5257
    (False, False, False): _ROLE_REGULAR,
5258
    }
5259
  _R2F = dict((v, k) for k, v in _F2R.items())
5260
  _FLAGS = ["master_candidate", "drained", "offline"]
5261

    
5262
  def CheckArguments(self):
5263
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5264
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5265
                self.op.master_capable, self.op.vm_capable,
5266
                self.op.secondary_ip, self.op.ndparams]
5267
    if all_mods.count(None) == len(all_mods):
5268
      raise errors.OpPrereqError("Please pass at least one modification",
5269
                                 errors.ECODE_INVAL)
5270
    if all_mods.count(True) > 1:
5271
      raise errors.OpPrereqError("Can't set the node into more than one"
5272
                                 " state at the same time",
5273
                                 errors.ECODE_INVAL)
5274

    
5275
    # Boolean value that tells us whether we might be demoting from MC
5276
    self.might_demote = (self.op.master_candidate == False or
5277
                         self.op.offline == True or
5278
                         self.op.drained == True or
5279
                         self.op.master_capable == False)
5280

    
5281
    if self.op.secondary_ip:
5282
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5283
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5284
                                   " address" % self.op.secondary_ip,
5285
                                   errors.ECODE_INVAL)
5286

    
5287
    self.lock_all = self.op.auto_promote and self.might_demote
5288
    self.lock_instances = self.op.secondary_ip is not None
5289

    
5290
  def _InstanceFilter(self, instance):
5291
    """Filter for getting affected instances.
5292

5293
    """
5294
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5295
            self.op.node_name in instance.all_nodes)
5296

    
5297
  def ExpandNames(self):
5298
    if self.lock_all:
5299
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5300
    else:
5301
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5302

    
5303
    if self.lock_instances:
5304
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5305
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5306

    
5307
  def BuildHooksEnv(self):
5308
    """Build hooks env.
5309

5310
    This runs on the master node.
5311

5312
    """
5313
    return {
5314
      "OP_TARGET": self.op.node_name,
5315
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5316
      "OFFLINE": str(self.op.offline),
5317
      "DRAINED": str(self.op.drained),
5318
      "MASTER_CAPABLE": str(self.op.master_capable),
5319
      "VM_CAPABLE": str(self.op.vm_capable),
5320
      }
5321

    
5322
  def BuildHooksNodes(self):
5323
    """Build hooks nodes.
5324

5325
    """
5326
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5327
    return (nl, nl)
5328

    
5329
  def CheckPrereq(self):
5330
    """Check prerequisites.
5331

5332
    This only checks the instance list against the existing names.
5333

5334
    """
5335
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5336

    
5337
    if self.lock_instances:
5338
      affected_instances = \
5339
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5340

    
5341
      # Verify instance locks
5342
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5343
      wanted_instances = frozenset(affected_instances.keys())
5344
      if wanted_instances - owned_instances:
5345
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5346
                                   " secondary IP address have changed since"
5347
                                   " locks were acquired, wanted '%s', have"
5348
                                   " '%s'; retry the operation" %
5349
                                   (self.op.node_name,
5350
                                    utils.CommaJoin(wanted_instances),
5351
                                    utils.CommaJoin(owned_instances)),
5352
                                   errors.ECODE_STATE)
5353
    else:
5354
      affected_instances = None
5355

    
5356
    if (self.op.master_candidate is not None or
5357
        self.op.drained is not None or
5358
        self.op.offline is not None):
5359
      # we can't change the master's node flags
5360
      if self.op.node_name == self.cfg.GetMasterNode():
5361
        raise errors.OpPrereqError("The master role can be changed"
5362
                                   " only via master-failover",
5363
                                   errors.ECODE_INVAL)
5364

    
5365
    if self.op.master_candidate and not node.master_capable:
5366
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5367
                                 " it a master candidate" % node.name,
5368
                                 errors.ECODE_STATE)
5369

    
5370
    if self.op.vm_capable == False:
5371
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5372
      if ipri or isec:
5373
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5374
                                   " the vm_capable flag" % node.name,
5375
                                   errors.ECODE_STATE)
5376

    
5377
    if node.master_candidate and self.might_demote and not self.lock_all:
5378
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5379
      # check if after removing the current node, we're missing master
5380
      # candidates
5381
      (mc_remaining, mc_should, _) = \
5382
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5383
      if mc_remaining < mc_should:
5384
        raise errors.OpPrereqError("Not enough master candidates, please"
5385
                                   " pass auto promote option to allow"
5386
                                   " promotion", errors.ECODE_STATE)
5387

    
5388
    self.old_flags = old_flags = (node.master_candidate,
5389
                                  node.drained, node.offline)
5390
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5391
    self.old_role = old_role = self._F2R[old_flags]
5392

    
5393
    # Check for ineffective changes
5394
    for attr in self._FLAGS:
5395
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5396
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5397
        setattr(self.op, attr, None)
5398

    
5399
    # Past this point, any flag change to False means a transition
5400
    # away from the respective state, as only real changes are kept
5401

    
5402
    # TODO: We might query the real power state if it supports OOB
5403
    if _SupportsOob(self.cfg, node):
5404
      if self.op.offline is False and not (node.powered or
5405
                                           self.op.powered == True):
5406
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5407
                                    " offline status can be reset") %
5408
                                   self.op.node_name)
5409
    elif self.op.powered is not None:
5410
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5411
                                  " as it does not support out-of-band"
5412
                                  " handling") % self.op.node_name)
5413

    
5414
    # If we're being deofflined/drained, we'll MC ourself if needed
5415
    if (self.op.drained == False or self.op.offline == False or
5416
        (self.op.master_capable and not node.master_capable)):
5417
      if _DecideSelfPromotion(self):
5418
        self.op.master_candidate = True
5419
        self.LogInfo("Auto-promoting node to master candidate")
5420

    
5421
    # If we're no longer master capable, we'll demote ourselves from MC
5422
    if self.op.master_capable == False and node.master_candidate:
5423
      self.LogInfo("Demoting from master candidate")
5424
      self.op.master_candidate = False
5425

    
5426
    # Compute new role
5427
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5428
    if self.op.master_candidate:
5429
      new_role = self._ROLE_CANDIDATE
5430
    elif self.op.drained:
5431
      new_role = self._ROLE_DRAINED
5432
    elif self.op.offline:
5433
      new_role = self._ROLE_OFFLINE
5434
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5435
      # False is still in new flags, which means we're un-setting (the
5436
      # only) True flag
5437
      new_role = self._ROLE_REGULAR
5438
    else: # no new flags, nothing, keep old role
5439
      new_role = old_role
5440

    
5441
    self.new_role = new_role
5442

    
5443
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5444
      # Trying to transition out of offline status
5445
      # TODO: Use standard RPC runner, but make sure it works when the node is
5446
      # still marked offline
5447
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5448
      if result.fail_msg:
5449
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5450
                                   " to report its version: %s" %
5451
                                   (node.name, result.fail_msg),
5452
                                   errors.ECODE_STATE)
5453
      else:
5454
        self.LogWarning("Transitioning node from offline to online state"
5455
                        " without using re-add. Please make sure the node"
5456
                        " is healthy!")
5457

    
5458
    if self.op.secondary_ip:
5459
      # Ok even without locking, because this can't be changed by any LU
5460
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5461
      master_singlehomed = master.secondary_ip == master.primary_ip
5462
      if master_singlehomed and self.op.secondary_ip:
5463
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5464
                                   " homed cluster", errors.ECODE_INVAL)
5465

    
5466
      assert not (frozenset(affected_instances) -
5467
                  self.owned_locks(locking.LEVEL_INSTANCE))
5468

    
5469
      if node.offline:
5470
        if affected_instances:
5471
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5472
                                     " offline node has instances (%s)"
5473
                                     " configured to use it" %
5474
                                     utils.CommaJoin(affected_instances.keys()))
5475
      else:
5476
        # On online nodes, check that no instances are running, and that
5477
        # the node has the new ip and we can reach it.
5478
        for instance in affected_instances.values():
5479
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5480

    
5481
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5482
        if master.name != node.name:
5483
          # check reachability from master secondary ip to new secondary ip
5484
          if not netutils.TcpPing(self.op.secondary_ip,
5485
                                  constants.DEFAULT_NODED_PORT,
5486
                                  source=master.secondary_ip):
5487
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5488
                                       " based ping to node daemon port",
5489
                                       errors.ECODE_ENVIRON)
5490

    
5491
    if self.op.ndparams:
5492
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5493
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5494
      self.new_ndparams = new_ndparams
5495

    
5496
  def Exec(self, feedback_fn):
5497
    """Modifies a node.
5498

5499
    """
5500
    node = self.node
5501
    old_role = self.old_role
5502
    new_role = self.new_role
5503

    
5504
    result = []
5505

    
5506
    if self.op.ndparams:
5507
      node.ndparams = self.new_ndparams
5508

    
5509
    if self.op.powered is not None:
5510
      node.powered = self.op.powered
5511

    
5512
    for attr in ["master_capable", "vm_capable"]:
5513
      val = getattr(self.op, attr)
5514
      if val is not None:
5515
        setattr(node, attr, val)
5516
        result.append((attr, str(val)))
5517

    
5518
    if new_role != old_role:
5519
      # Tell the node to demote itself, if no longer MC and not offline
5520
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5521
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5522
        if msg:
5523
          self.LogWarning("Node failed to demote itself: %s", msg)
5524

    
5525
      new_flags = self._R2F[new_role]
5526
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5527
        if of != nf:
5528
          result.append((desc, str(nf)))
5529
      (node.master_candidate, node.drained, node.offline) = new_flags
5530

    
5531
      # we locked all nodes, we adjust the CP before updating this node
5532
      if self.lock_all:
5533
        _AdjustCandidatePool(self, [node.name])
5534

    
5535
    if self.op.secondary_ip:
5536
      node.secondary_ip = self.op.secondary_ip
5537
      result.append(("secondary_ip", self.op.secondary_ip))
5538

    
5539
    # this will trigger configuration file update, if needed
5540
    self.cfg.Update(node, feedback_fn)
5541

    
5542
    # this will trigger job queue propagation or cleanup if the mc
5543
    # flag changed
5544
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5545
      self.context.ReaddNode(node)
5546

    
5547
    return result
5548

    
5549

    
5550
class LUNodePowercycle(NoHooksLU):
5551
  """Powercycles a node.
5552

5553
  """
5554
  REQ_BGL = False
5555

    
5556
  def CheckArguments(self):
5557
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5558
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5559
      raise errors.OpPrereqError("The node is the master and the force"
5560
                                 " parameter was not set",
5561
                                 errors.ECODE_INVAL)
5562

    
5563
  def ExpandNames(self):
5564
    """Locking for PowercycleNode.
5565

5566
    This is a last-resort option and shouldn't block on other
5567
    jobs. Therefore, we grab no locks.
5568

5569
    """
5570
    self.needed_locks = {}
5571

    
5572
  def Exec(self, feedback_fn):
5573
    """Reboots a node.
5574

5575
    """
5576
    result = self.rpc.call_node_powercycle(self.op.node_name,
5577
                                           self.cfg.GetHypervisorType())
5578
    result.Raise("Failed to schedule the reboot")
5579
    return result.payload
5580

    
5581

    
5582
class LUClusterQuery(NoHooksLU):
5583
  """Query cluster configuration.
5584

5585
  """
5586
  REQ_BGL = False
5587

    
5588
  def ExpandNames(self):
5589
    self.needed_locks = {}
5590

    
5591
  def Exec(self, feedback_fn):
5592
    """Return cluster config.
5593

5594
    """
5595
    cluster = self.cfg.GetClusterInfo()
5596
    os_hvp = {}
5597

    
5598
    # Filter just for enabled hypervisors
5599
    for os_name, hv_dict in cluster.os_hvp.items():
5600
      os_hvp[os_name] = {}
5601
      for hv_name, hv_params in hv_dict.items():
5602
        if hv_name in cluster.enabled_hypervisors:
5603
          os_hvp[os_name][hv_name] = hv_params
5604

    
5605
    # Convert ip_family to ip_version
5606
    primary_ip_version = constants.IP4_VERSION
5607
    if cluster.primary_ip_family == netutils.IP6Address.family:
5608
      primary_ip_version = constants.IP6_VERSION
5609

    
5610
    result = {
5611
      "software_version": constants.RELEASE_VERSION,
5612
      "protocol_version": constants.PROTOCOL_VERSION,
5613
      "config_version": constants.CONFIG_VERSION,
5614
      "os_api_version": max(constants.OS_API_VERSIONS),
5615
      "export_version": constants.EXPORT_VERSION,
5616
      "architecture": (platform.architecture()[0], platform.machine()),
5617
      "name": cluster.cluster_name,
5618
      "master": cluster.master_node,
5619
      "default_hypervisor": cluster.enabled_hypervisors[0],
5620
      "enabled_hypervisors": cluster.enabled_hypervisors,
5621
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5622
                        for hypervisor_name in cluster.enabled_hypervisors]),
5623
      "os_hvp": os_hvp,
5624
      "beparams": cluster.beparams,
5625
      "osparams": cluster.osparams,
5626
      "nicparams": cluster.nicparams,
5627
      "ndparams": cluster.ndparams,
5628
      "candidate_pool_size": cluster.candidate_pool_size,
5629
      "master_netdev": cluster.master_netdev,
5630
      "master_netmask": cluster.master_netmask,
5631
      "use_external_mip_script": cluster.use_external_mip_script,
5632
      "volume_group_name": cluster.volume_group_name,
5633
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5634
      "file_storage_dir": cluster.file_storage_dir,
5635
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5636
      "maintain_node_health": cluster.maintain_node_health,
5637
      "ctime": cluster.ctime,
5638
      "mtime": cluster.mtime,
5639
      "uuid": cluster.uuid,
5640
      "tags": list(cluster.GetTags()),
5641
      "uid_pool": cluster.uid_pool,
5642
      "default_iallocator": cluster.default_iallocator,
5643
      "reserved_lvs": cluster.reserved_lvs,
5644
      "primary_ip_version": primary_ip_version,
5645
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5646
      "hidden_os": cluster.hidden_os,
5647
      "blacklisted_os": cluster.blacklisted_os,
5648
      }
5649

    
5650
    return result
5651

    
5652

    
5653
class LUClusterConfigQuery(NoHooksLU):
5654
  """Return configuration values.
5655

5656
  """
5657
  REQ_BGL = False
5658
  _FIELDS_DYNAMIC = utils.FieldSet()
5659
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5660
                                  "watcher_pause", "volume_group_name")
5661

    
5662
  def CheckArguments(self):
5663
    _CheckOutputFields(static=self._FIELDS_STATIC,
5664
                       dynamic=self._FIELDS_DYNAMIC,
5665
                       selected=self.op.output_fields)
5666

    
5667
  def ExpandNames(self):
5668
    self.needed_locks = {}
5669

    
5670
  def Exec(self, feedback_fn):
5671
    """Dump a representation of the cluster config to the standard output.
5672

5673
    """
5674
    values = []
5675
    for field in self.op.output_fields:
5676
      if field == "cluster_name":
5677
        entry = self.cfg.GetClusterName()
5678
      elif field == "master_node":
5679
        entry = self.cfg.GetMasterNode()
5680
      elif field == "drain_flag":
5681
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5682
      elif field == "watcher_pause":
5683
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5684
      elif field == "volume_group_name":
5685
        entry = self.cfg.GetVGName()
5686
      else:
5687
        raise errors.ParameterError(field)
5688
      values.append(entry)
5689
    return values
5690

    
5691

    
5692
class LUInstanceActivateDisks(NoHooksLU):
5693
  """Bring up an instance's disks.
5694

5695
  """
5696
  REQ_BGL = False
5697

    
5698
  def ExpandNames(self):
5699
    self._ExpandAndLockInstance()
5700
    self.needed_locks[locking.LEVEL_NODE] = []
5701
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5702

    
5703
  def DeclareLocks(self, level):
5704
    if level == locking.LEVEL_NODE:
5705
      self._LockInstancesNodes()
5706

    
5707
  def CheckPrereq(self):
5708
    """Check prerequisites.
5709

5710
    This checks that the instance is in the cluster.
5711

5712
    """
5713
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5714
    assert self.instance is not None, \
5715
      "Cannot retrieve locked instance %s" % self.op.instance_name
5716
    _CheckNodeOnline(self, self.instance.primary_node)
5717

    
5718
  def Exec(self, feedback_fn):
5719
    """Activate the disks.
5720

5721
    """
5722
    disks_ok, disks_info = \
5723
              _AssembleInstanceDisks(self, self.instance,
5724
                                     ignore_size=self.op.ignore_size)
5725
    if not disks_ok:
5726
      raise errors.OpExecError("Cannot activate block devices")
5727

    
5728
    return disks_info
5729

    
5730

    
5731
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5732
                           ignore_size=False):
5733
  """Prepare the block devices for an instance.
5734

5735
  This sets up the block devices on all nodes.
5736

5737
  @type lu: L{LogicalUnit}
5738
  @param lu: the logical unit on whose behalf we execute
5739
  @type instance: L{objects.Instance}
5740
  @param instance: the instance for whose disks we assemble
5741
  @type disks: list of L{objects.Disk} or None
5742
  @param disks: which disks to assemble (or all, if None)
5743
  @type ignore_secondaries: boolean
5744
  @param ignore_secondaries: if true, errors on secondary nodes
5745
      won't result in an error return from the function
5746
  @type ignore_size: boolean
5747
  @param ignore_size: if true, the current known size of the disk
5748
      will not be used during the disk activation, useful for cases
5749
      when the size is wrong
5750
  @return: False if the operation failed, otherwise a list of
5751
      (host, instance_visible_name, node_visible_name)
5752
      with the mapping from node devices to instance devices
5753

5754
  """
5755
  device_info = []
5756
  disks_ok = True
5757
  iname = instance.name
5758
  disks = _ExpandCheckDisks(instance, disks)
5759

    
5760
  # With the two passes mechanism we try to reduce the window of
5761
  # opportunity for the race condition of switching DRBD to primary
5762
  # before handshaking occured, but we do not eliminate it
5763

    
5764
  # The proper fix would be to wait (with some limits) until the
5765
  # connection has been made and drbd transitions from WFConnection
5766
  # into any other network-connected state (Connected, SyncTarget,
5767
  # SyncSource, etc.)
5768

    
5769
  # 1st pass, assemble on all nodes in secondary mode
5770
  for idx, inst_disk in enumerate(disks):
5771
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5772
      if ignore_size:
5773
        node_disk = node_disk.Copy()
5774
        node_disk.UnsetSize()
5775
      lu.cfg.SetDiskID(node_disk, node)
5776
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5777
      msg = result.fail_msg
5778
      if msg:
5779
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5780
                           " (is_primary=False, pass=1): %s",
5781
                           inst_disk.iv_name, node, msg)
5782
        if not ignore_secondaries:
5783
          disks_ok = False
5784

    
5785
  # FIXME: race condition on drbd migration to primary
5786

    
5787
  # 2nd pass, do only the primary node
5788
  for idx, inst_disk in enumerate(disks):
5789
    dev_path = None
5790

    
5791
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5792
      if node != instance.primary_node:
5793
        continue
5794
      if ignore_size:
5795
        node_disk = node_disk.Copy()
5796
        node_disk.UnsetSize()
5797
      lu.cfg.SetDiskID(node_disk, node)
5798
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5799
      msg = result.fail_msg
5800
      if msg:
5801
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5802
                           " (is_primary=True, pass=2): %s",
5803
                           inst_disk.iv_name, node, msg)
5804
        disks_ok = False
5805
      else:
5806
        dev_path = result.payload
5807

    
5808
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5809

    
5810
  # leave the disks configured for the primary node
5811
  # this is a workaround that would be fixed better by
5812
  # improving the logical/physical id handling
5813
  for disk in disks:
5814
    lu.cfg.SetDiskID(disk, instance.primary_node)
5815

    
5816
  return disks_ok, device_info
5817

    
5818

    
5819
def _StartInstanceDisks(lu, instance, force):
5820
  """Start the disks of an instance.
5821

5822
  """
5823
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5824
                                           ignore_secondaries=force)
5825
  if not disks_ok:
5826
    _ShutdownInstanceDisks(lu, instance)
5827
    if force is not None and not force:
5828
      lu.proc.LogWarning("", hint="If the message above refers to a"
5829
                         " secondary node,"
5830
                         " you can retry the operation using '--force'.")
5831
    raise errors.OpExecError("Disk consistency error")
5832

    
5833

    
5834
class LUInstanceDeactivateDisks(NoHooksLU):
5835
  """Shutdown an instance's disks.
5836

5837
  """
5838
  REQ_BGL = False
5839

    
5840
  def ExpandNames(self):
5841
    self._ExpandAndLockInstance()
5842
    self.needed_locks[locking.LEVEL_NODE] = []
5843
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5844

    
5845
  def DeclareLocks(self, level):
5846
    if level == locking.LEVEL_NODE:
5847
      self._LockInstancesNodes()
5848

    
5849
  def CheckPrereq(self):
5850
    """Check prerequisites.
5851

5852
    This checks that the instance is in the cluster.
5853

5854
    """
5855
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5856
    assert self.instance is not None, \
5857
      "Cannot retrieve locked instance %s" % self.op.instance_name
5858

    
5859
  def Exec(self, feedback_fn):
5860
    """Deactivate the disks
5861

5862
    """
5863
    instance = self.instance
5864
    if self.op.force:
5865
      _ShutdownInstanceDisks(self, instance)
5866
    else:
5867
      _SafeShutdownInstanceDisks(self, instance)
5868

    
5869

    
5870
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5871
  """Shutdown block devices of an instance.
5872

5873
  This function checks if an instance is running, before calling
5874
  _ShutdownInstanceDisks.
5875

5876
  """
5877
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5878
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5879

    
5880

    
5881
def _ExpandCheckDisks(instance, disks):
5882
  """Return the instance disks selected by the disks list
5883

5884
  @type disks: list of L{objects.Disk} or None
5885
  @param disks: selected disks
5886
  @rtype: list of L{objects.Disk}
5887
  @return: selected instance disks to act on
5888

5889
  """
5890
  if disks is None:
5891
    return instance.disks
5892
  else:
5893
    if not set(disks).issubset(instance.disks):
5894
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5895
                                   " target instance")
5896
    return disks
5897

    
5898

    
5899
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5900
  """Shutdown block devices of an instance.
5901

5902
  This does the shutdown on all nodes of the instance.
5903

5904
  If the ignore_primary is false, errors on the primary node are
5905
  ignored.
5906

5907
  """
5908
  all_result = True
5909
  disks = _ExpandCheckDisks(instance, disks)
5910

    
5911
  for disk in disks:
5912
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5913
      lu.cfg.SetDiskID(top_disk, node)
5914
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5915
      msg = result.fail_msg
5916
      if msg:
5917
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5918
                      disk.iv_name, node, msg)
5919
        if ((node == instance.primary_node and not ignore_primary) or
5920
            (node != instance.primary_node and not result.offline)):
5921
          all_result = False
5922
  return all_result
5923

    
5924

    
5925
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5926
  """Checks if a node has enough free memory.
5927

5928
  This function check if a given node has the needed amount of free
5929
  memory. In case the node has less memory or we cannot get the
5930
  information from the node, this function raise an OpPrereqError
5931
  exception.
5932

5933
  @type lu: C{LogicalUnit}
5934
  @param lu: a logical unit from which we get configuration data
5935
  @type node: C{str}
5936
  @param node: the node to check
5937
  @type reason: C{str}
5938
  @param reason: string to use in the error message
5939
  @type requested: C{int}
5940
  @param requested: the amount of memory in MiB to check for
5941
  @type hypervisor_name: C{str}
5942
  @param hypervisor_name: the hypervisor to ask for memory stats
5943
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5944
      we cannot check the node
5945

5946
  """
5947
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5948
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5949
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5950
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5951
  if not isinstance(free_mem, int):
5952
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5953
                               " was '%s'" % (node, free_mem),
5954
                               errors.ECODE_ENVIRON)
5955
  if requested > free_mem:
5956
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5957
                               " needed %s MiB, available %s MiB" %
5958
                               (node, reason, requested, free_mem),
5959
                               errors.ECODE_NORES)
5960

    
5961

    
5962
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5963
  """Checks if nodes have enough free disk space in the all VGs.
5964

5965
  This function check if all given nodes have the needed amount of
5966
  free disk. In case any node has less disk or we cannot get the
5967
  information from the node, this function raise an OpPrereqError
5968
  exception.
5969

5970
  @type lu: C{LogicalUnit}
5971
  @param lu: a logical unit from which we get configuration data
5972
  @type nodenames: C{list}
5973
  @param nodenames: the list of node names to check
5974
  @type req_sizes: C{dict}
5975
  @param req_sizes: the hash of vg and corresponding amount of disk in
5976
      MiB to check for
5977
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5978
      or we cannot check the node
5979

5980
  """
5981
  for vg, req_size in req_sizes.items():
5982
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5983

    
5984

    
5985
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5986
  """Checks if nodes have enough free disk space in the specified VG.
5987

5988
  This function check if all given nodes have the needed amount of
5989
  free disk. In case any node has less disk or we cannot get the
5990
  information from the node, this function raise an OpPrereqError
5991
  exception.
5992

5993
  @type lu: C{LogicalUnit}
5994
  @param lu: a logical unit from which we get configuration data
5995
  @type nodenames: C{list}
5996
  @param nodenames: the list of node names to check
5997
  @type vg: C{str}
5998
  @param vg: the volume group to check
5999
  @type requested: C{int}
6000
  @param requested: the amount of disk in MiB to check for
6001
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6002
      or we cannot check the node
6003

6004
  """
6005
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6006
  for node in nodenames:
6007
    info = nodeinfo[node]
6008
    info.Raise("Cannot get current information from node %s" % node,
6009
               prereq=True, ecode=errors.ECODE_ENVIRON)
6010
    vg_free = info.payload.get("vg_free", None)
6011
    if not isinstance(vg_free, int):
6012
      raise errors.OpPrereqError("Can't compute free disk space on node"
6013
                                 " %s for vg %s, result was '%s'" %
6014
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6015
    if requested > vg_free:
6016
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6017
                                 " vg %s: required %d MiB, available %d MiB" %
6018
                                 (node, vg, requested, vg_free),
6019
                                 errors.ECODE_NORES)
6020

    
6021

    
6022
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6023
  """Checks if nodes have enough physical CPUs
6024

6025
  This function checks if all given nodes have the needed number of
6026
  physical CPUs. In case any node has less CPUs or we cannot get the
6027
  information from the node, this function raises an OpPrereqError
6028
  exception.
6029

6030
  @type lu: C{LogicalUnit}
6031
  @param lu: a logical unit from which we get configuration data
6032
  @type nodenames: C{list}
6033
  @param nodenames: the list of node names to check
6034
  @type requested: C{int}
6035
  @param requested: the minimum acceptable number of physical CPUs
6036
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6037
      or we cannot check the node
6038

6039
  """
6040
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6041
  for node in nodenames:
6042
    info = nodeinfo[node]
6043
    info.Raise("Cannot get current information from node %s" % node,
6044
               prereq=True, ecode=errors.ECODE_ENVIRON)
6045
    num_cpus = info.payload.get("cpu_total", None)
6046
    if not isinstance(num_cpus, int):
6047
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6048
                                 " on node %s, result was '%s'" %
6049
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6050
    if requested > num_cpus:
6051
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6052
                                 "required" % (node, num_cpus, requested),
6053
                                 errors.ECODE_NORES)
6054

    
6055

    
6056
class LUInstanceStartup(LogicalUnit):
6057
  """Starts an instance.
6058

6059
  """
6060
  HPATH = "instance-start"
6061
  HTYPE = constants.HTYPE_INSTANCE
6062
  REQ_BGL = False
6063

    
6064
  def CheckArguments(self):
6065
    # extra beparams
6066
    if self.op.beparams:
6067
      # fill the beparams dict
6068
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6069

    
6070
  def ExpandNames(self):
6071
    self._ExpandAndLockInstance()
6072

    
6073
  def BuildHooksEnv(self):
6074
    """Build hooks env.
6075

6076
    This runs on master, primary and secondary nodes of the instance.
6077

6078
    """
6079
    env = {
6080
      "FORCE": self.op.force,
6081
      }
6082

    
6083
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6084

    
6085
    return env
6086

    
6087
  def BuildHooksNodes(self):
6088
    """Build hooks nodes.
6089

6090
    """
6091
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6092
    return (nl, nl)
6093

    
6094
  def CheckPrereq(self):
6095
    """Check prerequisites.
6096

6097
    This checks that the instance is in the cluster.
6098

6099
    """
6100
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6101
    assert self.instance is not None, \
6102
      "Cannot retrieve locked instance %s" % self.op.instance_name
6103

    
6104
    # extra hvparams
6105
    if self.op.hvparams:
6106
      # check hypervisor parameter syntax (locally)
6107
      cluster = self.cfg.GetClusterInfo()
6108
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6109
      filled_hvp = cluster.FillHV(instance)
6110
      filled_hvp.update(self.op.hvparams)
6111
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6112
      hv_type.CheckParameterSyntax(filled_hvp)
6113
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6114

    
6115
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6116

    
6117
    if self.primary_offline and self.op.ignore_offline_nodes:
6118
      self.proc.LogWarning("Ignoring offline primary node")
6119

    
6120
      if self.op.hvparams or self.op.beparams:
6121
        self.proc.LogWarning("Overridden parameters are ignored")
6122
    else:
6123
      _CheckNodeOnline(self, instance.primary_node)
6124

    
6125
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6126

    
6127
      # check bridges existence
6128
      _CheckInstanceBridgesExist(self, instance)
6129

    
6130
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6131
                                                instance.name,
6132
                                                instance.hypervisor)
6133
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6134
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6135
      if not remote_info.payload: # not running already
6136
        _CheckNodeFreeMemory(self, instance.primary_node,
6137
                             "starting instance %s" % instance.name,
6138
                             bep[constants.BE_MEMORY], instance.hypervisor)
6139

    
6140
  def Exec(self, feedback_fn):
6141
    """Start the instance.
6142

6143
    """
6144
    instance = self.instance
6145
    force = self.op.force
6146

    
6147
    if not self.op.no_remember:
6148
      self.cfg.MarkInstanceUp(instance.name)
6149

    
6150
    if self.primary_offline:
6151
      assert self.op.ignore_offline_nodes
6152
      self.proc.LogInfo("Primary node offline, marked instance as started")
6153
    else:
6154
      node_current = instance.primary_node
6155

    
6156
      _StartInstanceDisks(self, instance, force)
6157

    
6158
      result = \
6159
        self.rpc.call_instance_start(node_current,
6160
                                     (instance, self.op.hvparams,
6161
                                      self.op.beparams),
6162
                                     self.op.startup_paused)
6163
      msg = result.fail_msg
6164
      if msg:
6165
        _ShutdownInstanceDisks(self, instance)
6166
        raise errors.OpExecError("Could not start instance: %s" % msg)
6167

    
6168

    
6169
class LUInstanceReboot(LogicalUnit):
6170
  """Reboot an instance.
6171

6172
  """
6173
  HPATH = "instance-reboot"
6174
  HTYPE = constants.HTYPE_INSTANCE
6175
  REQ_BGL = False
6176

    
6177
  def ExpandNames(self):
6178
    self._ExpandAndLockInstance()
6179

    
6180
  def BuildHooksEnv(self):
6181
    """Build hooks env.
6182

6183
    This runs on master, primary and secondary nodes of the instance.
6184

6185
    """
6186
    env = {
6187
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6188
      "REBOOT_TYPE": self.op.reboot_type,
6189
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6190
      }
6191

    
6192
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6193

    
6194
    return env
6195

    
6196
  def BuildHooksNodes(self):
6197
    """Build hooks nodes.
6198

6199
    """
6200
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6201
    return (nl, nl)
6202

    
6203
  def CheckPrereq(self):
6204
    """Check prerequisites.
6205

6206
    This checks that the instance is in the cluster.
6207

6208
    """
6209
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6210
    assert self.instance is not None, \
6211
      "Cannot retrieve locked instance %s" % self.op.instance_name
6212

    
6213
    _CheckNodeOnline(self, instance.primary_node)
6214

    
6215
    # check bridges existence
6216
    _CheckInstanceBridgesExist(self, instance)
6217

    
6218
  def Exec(self, feedback_fn):
6219
    """Reboot the instance.
6220

6221
    """
6222
    instance = self.instance
6223
    ignore_secondaries = self.op.ignore_secondaries
6224
    reboot_type = self.op.reboot_type
6225

    
6226
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6227
                                              instance.name,
6228
                                              instance.hypervisor)
6229
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6230
    instance_running = bool(remote_info.payload)
6231

    
6232
    node_current = instance.primary_node
6233

    
6234
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6235
                                            constants.INSTANCE_REBOOT_HARD]:
6236
      for disk in instance.disks:
6237
        self.cfg.SetDiskID(disk, node_current)
6238
      result = self.rpc.call_instance_reboot(node_current, instance,
6239
                                             reboot_type,
6240
                                             self.op.shutdown_timeout)
6241
      result.Raise("Could not reboot instance")
6242
    else:
6243
      if instance_running:
6244
        result = self.rpc.call_instance_shutdown(node_current, instance,
6245
                                                 self.op.shutdown_timeout)
6246
        result.Raise("Could not shutdown instance for full reboot")
6247
        _ShutdownInstanceDisks(self, instance)
6248
      else:
6249
        self.LogInfo("Instance %s was already stopped, starting now",
6250
                     instance.name)
6251
      _StartInstanceDisks(self, instance, ignore_secondaries)
6252
      result = self.rpc.call_instance_start(node_current,
6253
                                            (instance, None, None), False)
6254
      msg = result.fail_msg
6255
      if msg:
6256
        _ShutdownInstanceDisks(self, instance)
6257
        raise errors.OpExecError("Could not start instance for"
6258
                                 " full reboot: %s" % msg)
6259

    
6260
    self.cfg.MarkInstanceUp(instance.name)
6261

    
6262

    
6263
class LUInstanceShutdown(LogicalUnit):
6264
  """Shutdown an instance.
6265

6266
  """
6267
  HPATH = "instance-stop"
6268
  HTYPE = constants.HTYPE_INSTANCE
6269
  REQ_BGL = False
6270

    
6271
  def ExpandNames(self):
6272
    self._ExpandAndLockInstance()
6273

    
6274
  def BuildHooksEnv(self):
6275
    """Build hooks env.
6276

6277
    This runs on master, primary and secondary nodes of the instance.
6278

6279
    """
6280
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6281
    env["TIMEOUT"] = self.op.timeout
6282
    return env
6283

    
6284
  def BuildHooksNodes(self):
6285
    """Build hooks nodes.
6286

6287
    """
6288
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6289
    return (nl, nl)
6290

    
6291
  def CheckPrereq(self):
6292
    """Check prerequisites.
6293

6294
    This checks that the instance is in the cluster.
6295

6296
    """
6297
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6298
    assert self.instance is not None, \
6299
      "Cannot retrieve locked instance %s" % self.op.instance_name
6300

    
6301
    self.primary_offline = \
6302
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6303

    
6304
    if self.primary_offline and self.op.ignore_offline_nodes:
6305
      self.proc.LogWarning("Ignoring offline primary node")
6306
    else:
6307
      _CheckNodeOnline(self, self.instance.primary_node)
6308

    
6309
  def Exec(self, feedback_fn):
6310
    """Shutdown the instance.
6311

6312
    """
6313
    instance = self.instance
6314
    node_current = instance.primary_node
6315
    timeout = self.op.timeout
6316

    
6317
    if not self.op.no_remember:
6318
      self.cfg.MarkInstanceDown(instance.name)
6319

    
6320
    if self.primary_offline:
6321
      assert self.op.ignore_offline_nodes
6322
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6323
    else:
6324
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6325
      msg = result.fail_msg
6326
      if msg:
6327
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6328

    
6329
      _ShutdownInstanceDisks(self, instance)
6330

    
6331

    
6332
class LUInstanceReinstall(LogicalUnit):
6333
  """Reinstall an instance.
6334

6335
  """
6336
  HPATH = "instance-reinstall"
6337
  HTYPE = constants.HTYPE_INSTANCE
6338
  REQ_BGL = False
6339

    
6340
  def ExpandNames(self):
6341
    self._ExpandAndLockInstance()
6342

    
6343
  def BuildHooksEnv(self):
6344
    """Build hooks env.
6345

6346
    This runs on master, primary and secondary nodes of the instance.
6347

6348
    """
6349
    return _BuildInstanceHookEnvByObject(self, self.instance)
6350

    
6351
  def BuildHooksNodes(self):
6352
    """Build hooks nodes.
6353

6354
    """
6355
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6356
    return (nl, nl)
6357

    
6358
  def CheckPrereq(self):
6359
    """Check prerequisites.
6360

6361
    This checks that the instance is in the cluster and is not running.
6362

6363
    """
6364
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6365
    assert instance is not None, \
6366
      "Cannot retrieve locked instance %s" % self.op.instance_name
6367
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6368
                     " offline, cannot reinstall")
6369
    for node in instance.secondary_nodes:
6370
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6371
                       " cannot reinstall")
6372

    
6373
    if instance.disk_template == constants.DT_DISKLESS:
6374
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6375
                                 self.op.instance_name,
6376
                                 errors.ECODE_INVAL)
6377
    _CheckInstanceDown(self, instance, "cannot reinstall")
6378

    
6379
    if self.op.os_type is not None:
6380
      # OS verification
6381
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6382
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6383
      instance_os = self.op.os_type
6384
    else:
6385
      instance_os = instance.os
6386

    
6387
    nodelist = list(instance.all_nodes)
6388

    
6389
    if self.op.osparams:
6390
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6391
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6392
      self.os_inst = i_osdict # the new dict (without defaults)
6393
    else:
6394
      self.os_inst = None
6395

    
6396
    self.instance = instance
6397

    
6398
  def Exec(self, feedback_fn):
6399
    """Reinstall the instance.
6400

6401
    """
6402
    inst = self.instance
6403

    
6404
    if self.op.os_type is not None:
6405
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6406
      inst.os = self.op.os_type
6407
      # Write to configuration
6408
      self.cfg.Update(inst, feedback_fn)
6409

    
6410
    _StartInstanceDisks(self, inst, None)
6411
    try:
6412
      feedback_fn("Running the instance OS create scripts...")
6413
      # FIXME: pass debug option from opcode to backend
6414
      result = self.rpc.call_instance_os_add(inst.primary_node,
6415
                                             (inst, self.os_inst), True,
6416
                                             self.op.debug_level)
6417
      result.Raise("Could not install OS for instance %s on node %s" %
6418
                   (inst.name, inst.primary_node))
6419
    finally:
6420
      _ShutdownInstanceDisks(self, inst)
6421

    
6422

    
6423
class LUInstanceRecreateDisks(LogicalUnit):
6424
  """Recreate an instance's missing disks.
6425

6426
  """
6427
  HPATH = "instance-recreate-disks"
6428
  HTYPE = constants.HTYPE_INSTANCE
6429
  REQ_BGL = False
6430

    
6431
  def CheckArguments(self):
6432
    # normalise the disk list
6433
    self.op.disks = sorted(frozenset(self.op.disks))
6434

    
6435
  def ExpandNames(self):
6436
    self._ExpandAndLockInstance()
6437
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6438
    if self.op.nodes:
6439
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6440
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6441
    else:
6442
      self.needed_locks[locking.LEVEL_NODE] = []
6443

    
6444
  def DeclareLocks(self, level):
6445
    if level == locking.LEVEL_NODE:
6446
      # if we replace the nodes, we only need to lock the old primary,
6447
      # otherwise we need to lock all nodes for disk re-creation
6448
      primary_only = bool(self.op.nodes)
6449
      self._LockInstancesNodes(primary_only=primary_only)
6450

    
6451
  def BuildHooksEnv(self):
6452
    """Build hooks env.
6453

6454
    This runs on master, primary and secondary nodes of the instance.
6455

6456
    """
6457
    return _BuildInstanceHookEnvByObject(self, self.instance)
6458

    
6459
  def BuildHooksNodes(self):
6460
    """Build hooks nodes.
6461

6462
    """
6463
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6464
    return (nl, nl)
6465

    
6466
  def CheckPrereq(self):
6467
    """Check prerequisites.
6468

6469
    This checks that the instance is in the cluster and is not running.
6470

6471
    """
6472
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6473
    assert instance is not None, \
6474
      "Cannot retrieve locked instance %s" % self.op.instance_name
6475
    if self.op.nodes:
6476
      if len(self.op.nodes) != len(instance.all_nodes):
6477
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6478
                                   " %d replacement nodes were specified" %
6479
                                   (instance.name, len(instance.all_nodes),
6480
                                    len(self.op.nodes)),
6481
                                   errors.ECODE_INVAL)
6482
      assert instance.disk_template != constants.DT_DRBD8 or \
6483
          len(self.op.nodes) == 2
6484
      assert instance.disk_template != constants.DT_PLAIN or \
6485
          len(self.op.nodes) == 1
6486
      primary_node = self.op.nodes[0]
6487
    else:
6488
      primary_node = instance.primary_node
6489
    _CheckNodeOnline(self, primary_node)
6490

    
6491
    if instance.disk_template == constants.DT_DISKLESS:
6492
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6493
                                 self.op.instance_name, errors.ECODE_INVAL)
6494
    # if we replace nodes *and* the old primary is offline, we don't
6495
    # check
6496
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6497
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6498
    if not (self.op.nodes and old_pnode.offline):
6499
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6500

    
6501
    if not self.op.disks:
6502
      self.op.disks = range(len(instance.disks))
6503
    else:
6504
      for idx in self.op.disks:
6505
        if idx >= len(instance.disks):
6506
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6507
                                     errors.ECODE_INVAL)
6508
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6509
      raise errors.OpPrereqError("Can't recreate disks partially and"
6510
                                 " change the nodes at the same time",
6511
                                 errors.ECODE_INVAL)
6512
    self.instance = instance
6513

    
6514
  def Exec(self, feedback_fn):
6515
    """Recreate the disks.
6516

6517
    """
6518
    instance = self.instance
6519

    
6520
    to_skip = []
6521
    mods = [] # keeps track of needed logical_id changes
6522

    
6523
    for idx, disk in enumerate(instance.disks):
6524
      if idx not in self.op.disks: # disk idx has not been passed in
6525
        to_skip.append(idx)
6526
        continue
6527
      # update secondaries for disks, if needed
6528
      if self.op.nodes:
6529
        if disk.dev_type == constants.LD_DRBD8:
6530
          # need to update the nodes and minors
6531
          assert len(self.op.nodes) == 2
6532
          assert len(disk.logical_id) == 6 # otherwise disk internals
6533
                                           # have changed
6534
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6535
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6536
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6537
                    new_minors[0], new_minors[1], old_secret)
6538
          assert len(disk.logical_id) == len(new_id)
6539
          mods.append((idx, new_id))
6540

    
6541
    # now that we have passed all asserts above, we can apply the mods
6542
    # in a single run (to avoid partial changes)
6543
    for idx, new_id in mods:
6544
      instance.disks[idx].logical_id = new_id
6545

    
6546
    # change primary node, if needed
6547
    if self.op.nodes:
6548
      instance.primary_node = self.op.nodes[0]
6549
      self.LogWarning("Changing the instance's nodes, you will have to"
6550
                      " remove any disks left on the older nodes manually")
6551

    
6552
    if self.op.nodes:
6553
      self.cfg.Update(instance, feedback_fn)
6554

    
6555
    _CreateDisks(self, instance, to_skip=to_skip)
6556

    
6557

    
6558
class LUInstanceRename(LogicalUnit):
6559
  """Rename an instance.
6560

6561
  """
6562
  HPATH = "instance-rename"
6563
  HTYPE = constants.HTYPE_INSTANCE
6564

    
6565
  def CheckArguments(self):
6566
    """Check arguments.
6567

6568
    """
6569
    if self.op.ip_check and not self.op.name_check:
6570
      # TODO: make the ip check more flexible and not depend on the name check
6571
      raise errors.OpPrereqError("IP address check requires a name check",
6572
                                 errors.ECODE_INVAL)
6573

    
6574
  def BuildHooksEnv(self):
6575
    """Build hooks env.
6576

6577
    This runs on master, primary and secondary nodes of the instance.
6578

6579
    """
6580
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6581
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6582
    return env
6583

    
6584
  def BuildHooksNodes(self):
6585
    """Build hooks nodes.
6586

6587
    """
6588
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6589
    return (nl, nl)
6590

    
6591
  def CheckPrereq(self):
6592
    """Check prerequisites.
6593

6594
    This checks that the instance is in the cluster and is not running.
6595

6596
    """
6597
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6598
                                                self.op.instance_name)
6599
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6600
    assert instance is not None
6601
    _CheckNodeOnline(self, instance.primary_node)
6602
    _CheckInstanceDown(self, instance, "cannot rename")
6603
    self.instance = instance
6604

    
6605
    new_name = self.op.new_name
6606
    if self.op.name_check:
6607
      hostname = netutils.GetHostname(name=new_name)
6608
      if hostname != new_name:
6609
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6610
                     hostname.name)
6611
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6612
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6613
                                    " same as given hostname '%s'") %
6614
                                    (hostname.name, self.op.new_name),
6615
                                    errors.ECODE_INVAL)
6616
      new_name = self.op.new_name = hostname.name
6617
      if (self.op.ip_check and
6618
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6619
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6620
                                   (hostname.ip, new_name),
6621
                                   errors.ECODE_NOTUNIQUE)
6622

    
6623
    instance_list = self.cfg.GetInstanceList()
6624
    if new_name in instance_list and new_name != instance.name:
6625
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6626
                                 new_name, errors.ECODE_EXISTS)
6627

    
6628
  def Exec(self, feedback_fn):
6629
    """Rename the instance.
6630

6631
    """
6632
    inst = self.instance
6633
    old_name = inst.name
6634

    
6635
    rename_file_storage = False
6636
    if (inst.disk_template in constants.DTS_FILEBASED and
6637
        self.op.new_name != inst.name):
6638
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6639
      rename_file_storage = True
6640

    
6641
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6642
    # Change the instance lock. This is definitely safe while we hold the BGL.
6643
    # Otherwise the new lock would have to be added in acquired mode.
6644
    assert self.REQ_BGL
6645
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6646
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6647

    
6648
    # re-read the instance from the configuration after rename
6649
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6650

    
6651
    if rename_file_storage:
6652
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6653
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6654
                                                     old_file_storage_dir,
6655
                                                     new_file_storage_dir)
6656
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6657
                   " (but the instance has been renamed in Ganeti)" %
6658
                   (inst.primary_node, old_file_storage_dir,
6659
                    new_file_storage_dir))
6660

    
6661
    _StartInstanceDisks(self, inst, None)
6662
    try:
6663
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6664
                                                 old_name, self.op.debug_level)
6665
      msg = result.fail_msg
6666
      if msg:
6667
        msg = ("Could not run OS rename script for instance %s on node %s"
6668
               " (but the instance has been renamed in Ganeti): %s" %
6669
               (inst.name, inst.primary_node, msg))
6670
        self.proc.LogWarning(msg)
6671
    finally:
6672
      _ShutdownInstanceDisks(self, inst)
6673

    
6674
    return inst.name
6675

    
6676

    
6677
class LUInstanceRemove(LogicalUnit):
6678
  """Remove an instance.
6679

6680
  """
6681
  HPATH = "instance-remove"
6682
  HTYPE = constants.HTYPE_INSTANCE
6683
  REQ_BGL = False
6684

    
6685
  def ExpandNames(self):
6686
    self._ExpandAndLockInstance()
6687
    self.needed_locks[locking.LEVEL_NODE] = []
6688
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6689

    
6690
  def DeclareLocks(self, level):
6691
    if level == locking.LEVEL_NODE:
6692
      self._LockInstancesNodes()
6693

    
6694
  def BuildHooksEnv(self):
6695
    """Build hooks env.
6696

6697
    This runs on master, primary and secondary nodes of the instance.
6698

6699
    """
6700
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6701
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6702
    return env
6703

    
6704
  def BuildHooksNodes(self):
6705
    """Build hooks nodes.
6706

6707
    """
6708
    nl = [self.cfg.GetMasterNode()]
6709
    nl_post = list(self.instance.all_nodes) + nl
6710
    return (nl, nl_post)
6711

    
6712
  def CheckPrereq(self):
6713
    """Check prerequisites.
6714

6715
    This checks that the instance is in the cluster.
6716

6717
    """
6718
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719
    assert self.instance is not None, \
6720
      "Cannot retrieve locked instance %s" % self.op.instance_name
6721

    
6722
  def Exec(self, feedback_fn):
6723
    """Remove the instance.
6724

6725
    """
6726
    instance = self.instance
6727
    logging.info("Shutting down instance %s on node %s",
6728
                 instance.name, instance.primary_node)
6729

    
6730
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6731
                                             self.op.shutdown_timeout)
6732
    msg = result.fail_msg
6733
    if msg:
6734
      if self.op.ignore_failures:
6735
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6736
      else:
6737
        raise errors.OpExecError("Could not shutdown instance %s on"
6738
                                 " node %s: %s" %
6739
                                 (instance.name, instance.primary_node, msg))
6740

    
6741
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6742

    
6743

    
6744
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6745
  """Utility function to remove an instance.
6746

6747
  """
6748
  logging.info("Removing block devices for instance %s", instance.name)
6749

    
6750
  if not _RemoveDisks(lu, instance):
6751
    if not ignore_failures:
6752
      raise errors.OpExecError("Can't remove instance's disks")
6753
    feedback_fn("Warning: can't remove instance's disks")
6754

    
6755
  logging.info("Removing instance %s out of cluster config", instance.name)
6756

    
6757
  lu.cfg.RemoveInstance(instance.name)
6758

    
6759
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6760
    "Instance lock removal conflict"
6761

    
6762
  # Remove lock for the instance
6763
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6764

    
6765

    
6766
class LUInstanceQuery(NoHooksLU):
6767
  """Logical unit for querying instances.
6768

6769
  """
6770
  # pylint: disable=W0142
6771
  REQ_BGL = False
6772

    
6773
  def CheckArguments(self):
6774
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6775
                             self.op.output_fields, self.op.use_locking)
6776

    
6777
  def ExpandNames(self):
6778
    self.iq.ExpandNames(self)
6779

    
6780
  def DeclareLocks(self, level):
6781
    self.iq.DeclareLocks(self, level)
6782

    
6783
  def Exec(self, feedback_fn):
6784
    return self.iq.OldStyleQuery(self)
6785

    
6786

    
6787
class LUInstanceFailover(LogicalUnit):
6788
  """Failover an instance.
6789

6790
  """
6791
  HPATH = "instance-failover"
6792
  HTYPE = constants.HTYPE_INSTANCE
6793
  REQ_BGL = False
6794

    
6795
  def CheckArguments(self):
6796
    """Check the arguments.
6797

6798
    """
6799
    self.iallocator = getattr(self.op, "iallocator", None)
6800
    self.target_node = getattr(self.op, "target_node", None)
6801

    
6802
  def ExpandNames(self):
6803
    self._ExpandAndLockInstance()
6804

    
6805
    if self.op.target_node is not None:
6806
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6807

    
6808
    self.needed_locks[locking.LEVEL_NODE] = []
6809
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6810

    
6811
    ignore_consistency = self.op.ignore_consistency
6812
    shutdown_timeout = self.op.shutdown_timeout
6813
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6814
                                       cleanup=False,
6815
                                       failover=True,
6816
                                       ignore_consistency=ignore_consistency,
6817
                                       shutdown_timeout=shutdown_timeout)
6818
    self.tasklets = [self._migrater]
6819

    
6820
  def DeclareLocks(self, level):
6821
    if level == locking.LEVEL_NODE:
6822
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6823
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6824
        if self.op.target_node is None:
6825
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6826
        else:
6827
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6828
                                                   self.op.target_node]
6829
        del self.recalculate_locks[locking.LEVEL_NODE]
6830
      else:
6831
        self._LockInstancesNodes()
6832

    
6833
  def BuildHooksEnv(self):
6834
    """Build hooks env.
6835

6836
    This runs on master, primary and secondary nodes of the instance.
6837

6838
    """
6839
    instance = self._migrater.instance
6840
    source_node = instance.primary_node
6841
    target_node = self.op.target_node
6842
    env = {
6843
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6844
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6845
      "OLD_PRIMARY": source_node,
6846
      "NEW_PRIMARY": target_node,
6847
      }
6848

    
6849
    if instance.disk_template in constants.DTS_INT_MIRROR:
6850
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6851
      env["NEW_SECONDARY"] = source_node
6852
    else:
6853
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6854

    
6855
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6856

    
6857
    return env
6858

    
6859
  def BuildHooksNodes(self):
6860
    """Build hooks nodes.
6861

6862
    """
6863
    instance = self._migrater.instance
6864
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6865
    return (nl, nl + [instance.primary_node])
6866

    
6867

    
6868
class LUInstanceMigrate(LogicalUnit):
6869
  """Migrate an instance.
6870

6871
  This is migration without shutting down, compared to the failover,
6872
  which is done with shutdown.
6873

6874
  """
6875
  HPATH = "instance-migrate"
6876
  HTYPE = constants.HTYPE_INSTANCE
6877
  REQ_BGL = False
6878

    
6879
  def ExpandNames(self):
6880
    self._ExpandAndLockInstance()
6881

    
6882
    if self.op.target_node is not None:
6883
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6884

    
6885
    self.needed_locks[locking.LEVEL_NODE] = []
6886
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6887

    
6888
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6889
                                       cleanup=self.op.cleanup,
6890
                                       failover=False,
6891
                                       fallback=self.op.allow_failover)
6892
    self.tasklets = [self._migrater]
6893

    
6894
  def DeclareLocks(self, level):
6895
    if level == locking.LEVEL_NODE:
6896
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6897
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6898
        if self.op.target_node is None:
6899
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6900
        else:
6901
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6902
                                                   self.op.target_node]
6903
        del self.recalculate_locks[locking.LEVEL_NODE]
6904
      else:
6905
        self._LockInstancesNodes()
6906

    
6907
  def BuildHooksEnv(self):
6908
    """Build hooks env.
6909

6910
    This runs on master, primary and secondary nodes of the instance.
6911

6912
    """
6913
    instance = self._migrater.instance
6914
    source_node = instance.primary_node
6915
    target_node = self.op.target_node
6916
    env = _BuildInstanceHookEnvByObject(self, instance)
6917
    env.update({
6918
      "MIGRATE_LIVE": self._migrater.live,
6919
      "MIGRATE_CLEANUP": self.op.cleanup,
6920
      "OLD_PRIMARY": source_node,
6921
      "NEW_PRIMARY": target_node,
6922
      })
6923

    
6924
    if instance.disk_template in constants.DTS_INT_MIRROR:
6925
      env["OLD_SECONDARY"] = target_node
6926
      env["NEW_SECONDARY"] = source_node
6927
    else:
6928
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6929

    
6930
    return env
6931

    
6932
  def BuildHooksNodes(self):
6933
    """Build hooks nodes.
6934

6935
    """
6936
    instance = self._migrater.instance
6937
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6938
    return (nl, nl + [instance.primary_node])
6939

    
6940

    
6941
class LUInstanceMove(LogicalUnit):
6942
  """Move an instance by data-copying.
6943

6944
  """
6945
  HPATH = "instance-move"
6946
  HTYPE = constants.HTYPE_INSTANCE
6947
  REQ_BGL = False
6948

    
6949
  def ExpandNames(self):
6950
    self._ExpandAndLockInstance()
6951
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6952
    self.op.target_node = target_node
6953
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6954
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6955

    
6956
  def DeclareLocks(self, level):
6957
    if level == locking.LEVEL_NODE:
6958
      self._LockInstancesNodes(primary_only=True)
6959

    
6960
  def BuildHooksEnv(self):
6961
    """Build hooks env.
6962

6963
    This runs on master, primary and secondary nodes of the instance.
6964

6965
    """
6966
    env = {
6967
      "TARGET_NODE": self.op.target_node,
6968
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6969
      }
6970
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6971
    return env
6972

    
6973
  def BuildHooksNodes(self):
6974
    """Build hooks nodes.
6975

6976
    """
6977
    nl = [
6978
      self.cfg.GetMasterNode(),
6979
      self.instance.primary_node,
6980
      self.op.target_node,
6981
      ]
6982
    return (nl, nl)
6983

    
6984
  def CheckPrereq(self):
6985
    """Check prerequisites.
6986

6987
    This checks that the instance is in the cluster.
6988

6989
    """
6990
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991
    assert self.instance is not None, \
6992
      "Cannot retrieve locked instance %s" % self.op.instance_name
6993

    
6994
    node = self.cfg.GetNodeInfo(self.op.target_node)
6995
    assert node is not None, \
6996
      "Cannot retrieve locked node %s" % self.op.target_node
6997

    
6998
    self.target_node = target_node = node.name
6999

    
7000
    if target_node == instance.primary_node:
7001
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7002
                                 (instance.name, target_node),
7003
                                 errors.ECODE_STATE)
7004

    
7005
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7006

    
7007
    for idx, dsk in enumerate(instance.disks):
7008
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7009
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7010
                                   " cannot copy" % idx, errors.ECODE_STATE)
7011

    
7012
    _CheckNodeOnline(self, target_node)
7013
    _CheckNodeNotDrained(self, target_node)
7014
    _CheckNodeVmCapable(self, target_node)
7015

    
7016
    if instance.admin_up:
7017
      # check memory requirements on the secondary node
7018
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7019
                           instance.name, bep[constants.BE_MEMORY],
7020
                           instance.hypervisor)
7021
    else:
7022
      self.LogInfo("Not checking memory on the secondary node as"
7023
                   " instance will not be started")
7024

    
7025
    # check bridge existance
7026
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7027

    
7028
  def Exec(self, feedback_fn):
7029
    """Move an instance.
7030

7031
    The move is done by shutting it down on its present node, copying
7032
    the data over (slow) and starting it on the new node.
7033

7034
    """
7035
    instance = self.instance
7036

    
7037
    source_node = instance.primary_node
7038
    target_node = self.target_node
7039

    
7040
    self.LogInfo("Shutting down instance %s on source node %s",
7041
                 instance.name, source_node)
7042

    
7043
    result = self.rpc.call_instance_shutdown(source_node, instance,
7044
                                             self.op.shutdown_timeout)
7045
    msg = result.fail_msg
7046
    if msg:
7047
      if self.op.ignore_consistency:
7048
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7049
                             " Proceeding anyway. Please make sure node"
7050
                             " %s is down. Error details: %s",
7051
                             instance.name, source_node, source_node, msg)
7052
      else:
7053
        raise errors.OpExecError("Could not shutdown instance %s on"
7054
                                 " node %s: %s" %
7055
                                 (instance.name, source_node, msg))
7056

    
7057
    # create the target disks
7058
    try:
7059
      _CreateDisks(self, instance, target_node=target_node)
7060
    except errors.OpExecError:
7061
      self.LogWarning("Device creation failed, reverting...")
7062
      try:
7063
        _RemoveDisks(self, instance, target_node=target_node)
7064
      finally:
7065
        self.cfg.ReleaseDRBDMinors(instance.name)
7066
        raise
7067

    
7068
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7069

    
7070
    errs = []
7071
    # activate, get path, copy the data over
7072
    for idx, disk in enumerate(instance.disks):
7073
      self.LogInfo("Copying data for disk %d", idx)
7074
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7075
                                               instance.name, True, idx)
7076
      if result.fail_msg:
7077
        self.LogWarning("Can't assemble newly created disk %d: %s",
7078
                        idx, result.fail_msg)
7079
        errs.append(result.fail_msg)
7080
        break
7081
      dev_path = result.payload
7082
      result = self.rpc.call_blockdev_export(source_node, disk,
7083
                                             target_node, dev_path,
7084
                                             cluster_name)
7085
      if result.fail_msg:
7086
        self.LogWarning("Can't copy data over for disk %d: %s",
7087
                        idx, result.fail_msg)
7088
        errs.append(result.fail_msg)
7089
        break
7090

    
7091
    if errs:
7092
      self.LogWarning("Some disks failed to copy, aborting")
7093
      try:
7094
        _RemoveDisks(self, instance, target_node=target_node)
7095
      finally:
7096
        self.cfg.ReleaseDRBDMinors(instance.name)
7097
        raise errors.OpExecError("Errors during disk copy: %s" %
7098
                                 (",".join(errs),))
7099

    
7100
    instance.primary_node = target_node
7101
    self.cfg.Update(instance, feedback_fn)
7102

    
7103
    self.LogInfo("Removing the disks on the original node")
7104
    _RemoveDisks(self, instance, target_node=source_node)
7105

    
7106
    # Only start the instance if it's marked as up
7107
    if instance.admin_up:
7108
      self.LogInfo("Starting instance %s on node %s",
7109
                   instance.name, target_node)
7110

    
7111
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7112
                                           ignore_secondaries=True)
7113
      if not disks_ok:
7114
        _ShutdownInstanceDisks(self, instance)
7115
        raise errors.OpExecError("Can't activate the instance's disks")
7116

    
7117
      result = self.rpc.call_instance_start(target_node,
7118
                                            (instance, None, None), False)
7119
      msg = result.fail_msg
7120
      if msg:
7121
        _ShutdownInstanceDisks(self, instance)
7122
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7123
                                 (instance.name, target_node, msg))
7124

    
7125

    
7126
class LUNodeMigrate(LogicalUnit):
7127
  """Migrate all instances from a node.
7128

7129
  """
7130
  HPATH = "node-migrate"
7131
  HTYPE = constants.HTYPE_NODE
7132
  REQ_BGL = False
7133

    
7134
  def CheckArguments(self):
7135
    pass
7136

    
7137
  def ExpandNames(self):
7138
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7139

    
7140
    self.share_locks = _ShareAll()
7141
    self.needed_locks = {
7142
      locking.LEVEL_NODE: [self.op.node_name],
7143
      }
7144

    
7145
  def BuildHooksEnv(self):
7146
    """Build hooks env.
7147

7148
    This runs on the master, the primary and all the secondaries.
7149

7150
    """
7151
    return {
7152
      "NODE_NAME": self.op.node_name,
7153
      }
7154

    
7155
  def BuildHooksNodes(self):
7156
    """Build hooks nodes.
7157

7158
    """
7159
    nl = [self.cfg.GetMasterNode()]
7160
    return (nl, nl)
7161

    
7162
  def CheckPrereq(self):
7163
    pass
7164

    
7165
  def Exec(self, feedback_fn):
7166
    # Prepare jobs for migration instances
7167
    jobs = [
7168
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7169
                                 mode=self.op.mode,
7170
                                 live=self.op.live,
7171
                                 iallocator=self.op.iallocator,
7172
                                 target_node=self.op.target_node)]
7173
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7174
      ]
7175

    
7176
    # TODO: Run iallocator in this opcode and pass correct placement options to
7177
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7178
    # running the iallocator and the actual migration, a good consistency model
7179
    # will have to be found.
7180

    
7181
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7182
            frozenset([self.op.node_name]))
7183

    
7184
    return ResultWithJobs(jobs)
7185

    
7186

    
7187
class TLMigrateInstance(Tasklet):
7188
  """Tasklet class for instance migration.
7189

7190
  @type live: boolean
7191
  @ivar live: whether the migration will be done live or non-live;
7192
      this variable is initalized only after CheckPrereq has run
7193
  @type cleanup: boolean
7194
  @ivar cleanup: Wheater we cleanup from a failed migration
7195
  @type iallocator: string
7196
  @ivar iallocator: The iallocator used to determine target_node
7197
  @type target_node: string
7198
  @ivar target_node: If given, the target_node to reallocate the instance to
7199
  @type failover: boolean
7200
  @ivar failover: Whether operation results in failover or migration
7201
  @type fallback: boolean
7202
  @ivar fallback: Whether fallback to failover is allowed if migration not
7203
                  possible
7204
  @type ignore_consistency: boolean
7205
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7206
                            and target node
7207
  @type shutdown_timeout: int
7208
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7209

7210
  """
7211

    
7212
  # Constants
7213
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7214
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7215

    
7216
  def __init__(self, lu, instance_name, cleanup=False,
7217
               failover=False, fallback=False,
7218
               ignore_consistency=False,
7219
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7220
    """Initializes this class.
7221

7222
    """
7223
    Tasklet.__init__(self, lu)
7224

    
7225
    # Parameters
7226
    self.instance_name = instance_name
7227
    self.cleanup = cleanup
7228
    self.live = False # will be overridden later
7229
    self.failover = failover
7230
    self.fallback = fallback
7231
    self.ignore_consistency = ignore_consistency
7232
    self.shutdown_timeout = shutdown_timeout
7233

    
7234
  def CheckPrereq(self):
7235
    """Check prerequisites.
7236

7237
    This checks that the instance is in the cluster.
7238

7239
    """
7240
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7241
    instance = self.cfg.GetInstanceInfo(instance_name)
7242
    assert instance is not None
7243
    self.instance = instance
7244

    
7245
    if (not self.cleanup and not instance.admin_up and not self.failover and
7246
        self.fallback):
7247
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7248
                      " to failover")
7249
      self.failover = True
7250

    
7251
    if instance.disk_template not in constants.DTS_MIRRORED:
7252
      if self.failover:
7253
        text = "failovers"
7254
      else:
7255
        text = "migrations"
7256
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7257
                                 " %s" % (instance.disk_template, text),
7258
                                 errors.ECODE_STATE)
7259

    
7260
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7261
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7262

    
7263
      if self.lu.op.iallocator:
7264
        self._RunAllocator()
7265
      else:
7266
        # We set set self.target_node as it is required by
7267
        # BuildHooksEnv
7268
        self.target_node = self.lu.op.target_node
7269

    
7270
      # self.target_node is already populated, either directly or by the
7271
      # iallocator run
7272
      target_node = self.target_node
7273
      if self.target_node == instance.primary_node:
7274
        raise errors.OpPrereqError("Cannot migrate instance %s"
7275
                                   " to its primary (%s)" %
7276
                                   (instance.name, instance.primary_node))
7277

    
7278
      if len(self.lu.tasklets) == 1:
7279
        # It is safe to release locks only when we're the only tasklet
7280
        # in the LU
7281
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7282
                      keep=[instance.primary_node, self.target_node])
7283

    
7284
    else:
7285
      secondary_nodes = instance.secondary_nodes
7286
      if not secondary_nodes:
7287
        raise errors.ConfigurationError("No secondary node but using"
7288
                                        " %s disk template" %
7289
                                        instance.disk_template)
7290
      target_node = secondary_nodes[0]
7291
      if self.lu.op.iallocator or (self.lu.op.target_node and
7292
                                   self.lu.op.target_node != target_node):
7293
        if self.failover:
7294
          text = "failed over"
7295
        else:
7296
          text = "migrated"
7297
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7298
                                   " be %s to arbitrary nodes"
7299
                                   " (neither an iallocator nor a target"
7300
                                   " node can be passed)" %
7301
                                   (instance.disk_template, text),
7302
                                   errors.ECODE_INVAL)
7303

    
7304
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7305

    
7306
    # check memory requirements on the secondary node
7307
    if not self.failover or instance.admin_up:
7308
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7309
                           instance.name, i_be[constants.BE_MEMORY],
7310
                           instance.hypervisor)
7311
    else:
7312
      self.lu.LogInfo("Not checking memory on the secondary node as"
7313
                      " instance will not be started")
7314

    
7315
    # check bridge existance
7316
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7317

    
7318
    if not self.cleanup:
7319
      _CheckNodeNotDrained(self.lu, target_node)
7320
      if not self.failover:
7321
        result = self.rpc.call_instance_migratable(instance.primary_node,
7322
                                                   instance)
7323
        if result.fail_msg and self.fallback:
7324
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7325
                          " failover")
7326
          self.failover = True
7327
        else:
7328
          result.Raise("Can't migrate, please use failover",
7329
                       prereq=True, ecode=errors.ECODE_STATE)
7330

    
7331
    assert not (self.failover and self.cleanup)
7332

    
7333
    if not self.failover:
7334
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7335
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7336
                                   " parameters are accepted",
7337
                                   errors.ECODE_INVAL)
7338
      if self.lu.op.live is not None:
7339
        if self.lu.op.live:
7340
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7341
        else:
7342
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7343
        # reset the 'live' parameter to None so that repeated
7344
        # invocations of CheckPrereq do not raise an exception
7345
        self.lu.op.live = None
7346
      elif self.lu.op.mode is None:
7347
        # read the default value from the hypervisor
7348
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7349
                                                skip_globals=False)
7350
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7351

    
7352
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7353
    else:
7354
      # Failover is never live
7355
      self.live = False
7356

    
7357
  def _RunAllocator(self):
7358
    """Run the allocator based on input opcode.
7359

7360
    """
7361
    ial = IAllocator(self.cfg, self.rpc,
7362
                     mode=constants.IALLOCATOR_MODE_RELOC,
7363
                     name=self.instance_name,
7364
                     # TODO See why hail breaks with a single node below
7365
                     relocate_from=[self.instance.primary_node,
7366
                                    self.instance.primary_node],
7367
                     )
7368

    
7369
    ial.Run(self.lu.op.iallocator)
7370

    
7371
    if not ial.success:
7372
      raise errors.OpPrereqError("Can't compute nodes using"
7373
                                 " iallocator '%s': %s" %
7374
                                 (self.lu.op.iallocator, ial.info),
7375
                                 errors.ECODE_NORES)
7376
    if len(ial.result) != ial.required_nodes:
7377
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7378
                                 " of nodes (%s), required %s" %
7379
                                 (self.lu.op.iallocator, len(ial.result),
7380
                                  ial.required_nodes), errors.ECODE_FAULT)
7381
    self.target_node = ial.result[0]
7382
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7383
                 self.instance_name, self.lu.op.iallocator,
7384
                 utils.CommaJoin(ial.result))
7385

    
7386
  def _WaitUntilSync(self):
7387
    """Poll with custom rpc for disk sync.
7388

7389
    This uses our own step-based rpc call.
7390

7391
    """
7392
    self.feedback_fn("* wait until resync is done")
7393
    all_done = False
7394
    while not all_done:
7395
      all_done = True
7396
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7397
                                            self.nodes_ip,
7398
                                            self.instance.disks)
7399
      min_percent = 100
7400
      for node, nres in result.items():
7401
        nres.Raise("Cannot resync disks on node %s" % node)
7402
        node_done, node_percent = nres.payload
7403
        all_done = all_done and node_done
7404
        if node_percent is not None:
7405
          min_percent = min(min_percent, node_percent)
7406
      if not all_done:
7407
        if min_percent < 100:
7408
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7409
        time.sleep(2)
7410

    
7411
  def _EnsureSecondary(self, node):
7412
    """Demote a node to secondary.
7413

7414
    """
7415
    self.feedback_fn("* switching node %s to secondary mode" % node)
7416

    
7417
    for dev in self.instance.disks:
7418
      self.cfg.SetDiskID(dev, node)
7419

    
7420
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7421
                                          self.instance.disks)
7422
    result.Raise("Cannot change disk to secondary on node %s" % node)
7423

    
7424
  def _GoStandalone(self):
7425
    """Disconnect from the network.
7426

7427
    """
7428
    self.feedback_fn("* changing into standalone mode")
7429
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7430
                                               self.instance.disks)
7431
    for node, nres in result.items():
7432
      nres.Raise("Cannot disconnect disks node %s" % node)
7433

    
7434
  def _GoReconnect(self, multimaster):
7435
    """Reconnect to the network.
7436

7437
    """
7438
    if multimaster:
7439
      msg = "dual-master"
7440
    else:
7441
      msg = "single-master"
7442
    self.feedback_fn("* changing disks into %s mode" % msg)
7443
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7444
                                           self.instance.disks,
7445
                                           self.instance.name, multimaster)
7446
    for node, nres in result.items():
7447
      nres.Raise("Cannot change disks config on node %s" % node)
7448

    
7449
  def _ExecCleanup(self):
7450
    """Try to cleanup after a failed migration.
7451

7452
    The cleanup is done by:
7453
      - check that the instance is running only on one node
7454
        (and update the config if needed)
7455
      - change disks on its secondary node to secondary
7456
      - wait until disks are fully synchronized
7457
      - disconnect from the network
7458
      - change disks into single-master mode
7459
      - wait again until disks are fully synchronized
7460

7461
    """
7462
    instance = self.instance
7463
    target_node = self.target_node
7464
    source_node = self.source_node
7465

    
7466
    # check running on only one node
7467
    self.feedback_fn("* checking where the instance actually runs"
7468
                     " (if this hangs, the hypervisor might be in"
7469
                     " a bad state)")
7470
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7471
    for node, result in ins_l.items():
7472
      result.Raise("Can't contact node %s" % node)
7473

    
7474
    runningon_source = instance.name in ins_l[source_node].payload
7475
    runningon_target = instance.name in ins_l[target_node].payload
7476

    
7477
    if runningon_source and runningon_target:
7478
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7479
                               " or the hypervisor is confused; you will have"
7480
                               " to ensure manually that it runs only on one"
7481
                               " and restart this operation")
7482

    
7483
    if not (runningon_source or runningon_target):
7484
      raise errors.OpExecError("Instance does not seem to be running at all;"
7485
                               " in this case it's safer to repair by"
7486
                               " running 'gnt-instance stop' to ensure disk"
7487
                               " shutdown, and then restarting it")
7488

    
7489
    if runningon_target:
7490
      # the migration has actually succeeded, we need to update the config
7491
      self.feedback_fn("* instance running on secondary node (%s),"
7492
                       " updating config" % target_node)
7493
      instance.primary_node = target_node
7494
      self.cfg.Update(instance, self.feedback_fn)
7495
      demoted_node = source_node
7496
    else:
7497
      self.feedback_fn("* instance confirmed to be running on its"
7498
                       " primary node (%s)" % source_node)
7499
      demoted_node = target_node
7500

    
7501
    if instance.disk_template in constants.DTS_INT_MIRROR:
7502
      self._EnsureSecondary(demoted_node)
7503
      try:
7504
        self._WaitUntilSync()
7505
      except errors.OpExecError:
7506
        # we ignore here errors, since if the device is standalone, it
7507
        # won't be able to sync
7508
        pass
7509
      self._GoStandalone()
7510
      self._GoReconnect(False)
7511
      self._WaitUntilSync()
7512

    
7513
    self.feedback_fn("* done")
7514

    
7515
  def _RevertDiskStatus(self):
7516
    """Try to revert the disk status after a failed migration.
7517

7518
    """
7519
    target_node = self.target_node
7520
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7521
      return
7522

    
7523
    try:
7524
      self._EnsureSecondary(target_node)
7525
      self._GoStandalone()
7526
      self._GoReconnect(False)
7527
      self._WaitUntilSync()
7528
    except errors.OpExecError, err:
7529
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7530
                         " please try to recover the instance manually;"
7531
                         " error '%s'" % str(err))
7532

    
7533
  def _AbortMigration(self):
7534
    """Call the hypervisor code to abort a started migration.
7535

7536
    """
7537
    instance = self.instance
7538
    target_node = self.target_node
7539
    source_node = self.source_node
7540
    migration_info = self.migration_info
7541

    
7542
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7543
                                                                 instance,
7544
                                                                 migration_info,
7545
                                                                 False)
7546
    abort_msg = abort_result.fail_msg
7547
    if abort_msg:
7548
      logging.error("Aborting migration failed on target node %s: %s",
7549
                    target_node, abort_msg)
7550
      # Don't raise an exception here, as we stil have to try to revert the
7551
      # disk status, even if this step failed.
7552

    
7553
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7554
        instance, False, self.live)
7555
    abort_msg = abort_result.fail_msg
7556
    if abort_msg:
7557
      logging.error("Aborting migration failed on source node %s: %s",
7558
                    source_node, abort_msg)
7559

    
7560
  def _ExecMigration(self):
7561
    """Migrate an instance.
7562

7563
    The migrate is done by:
7564
      - change the disks into dual-master mode
7565
      - wait until disks are fully synchronized again
7566
      - migrate the instance
7567
      - change disks on the new secondary node (the old primary) to secondary
7568
      - wait until disks are fully synchronized
7569
      - change disks into single-master mode
7570

7571
    """
7572
    instance = self.instance
7573
    target_node = self.target_node
7574
    source_node = self.source_node
7575

    
7576
    # Check for hypervisor version mismatch and warn the user.
7577
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7578
                                       None, self.instance.hypervisor)
7579
    src_info = nodeinfo[source_node]
7580
    dst_info = nodeinfo[target_node]
7581

    
7582
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7583
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7584
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7585
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7586
      if src_version != dst_version:
7587
        self.feedback_fn("* warning: hypervisor version mismatch between"
7588
                         " source (%s) and target (%s) node" %
7589
                         (src_version, dst_version))
7590

    
7591
    self.feedback_fn("* checking disk consistency between source and target")
7592
    for dev in instance.disks:
7593
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7594
        raise errors.OpExecError("Disk %s is degraded or not fully"
7595
                                 " synchronized on target node,"
7596
                                 " aborting migration" % dev.iv_name)
7597

    
7598
    # First get the migration information from the remote node
7599
    result = self.rpc.call_migration_info(source_node, instance)
7600
    msg = result.fail_msg
7601
    if msg:
7602
      log_err = ("Failed fetching source migration information from %s: %s" %
7603
                 (source_node, msg))
7604
      logging.error(log_err)
7605
      raise errors.OpExecError(log_err)
7606

    
7607
    self.migration_info = migration_info = result.payload
7608

    
7609
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7610
      # Then switch the disks to master/master mode
7611
      self._EnsureSecondary(target_node)
7612
      self._GoStandalone()
7613
      self._GoReconnect(True)
7614
      self._WaitUntilSync()
7615

    
7616
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7617
    result = self.rpc.call_accept_instance(target_node,
7618
                                           instance,
7619
                                           migration_info,
7620
                                           self.nodes_ip[target_node])
7621

    
7622
    msg = result.fail_msg
7623
    if msg:
7624
      logging.error("Instance pre-migration failed, trying to revert"
7625
                    " disk status: %s", msg)
7626
      self.feedback_fn("Pre-migration failed, aborting")
7627
      self._AbortMigration()
7628
      self._RevertDiskStatus()
7629
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7630
                               (instance.name, msg))
7631

    
7632
    self.feedback_fn("* migrating instance to %s" % target_node)
7633
    result = self.rpc.call_instance_migrate(source_node, instance,
7634
                                            self.nodes_ip[target_node],
7635
                                            self.live)
7636
    msg = result.fail_msg
7637
    if msg:
7638
      logging.error("Instance migration failed, trying to revert"
7639
                    " disk status: %s", msg)
7640
      self.feedback_fn("Migration failed, aborting")
7641
      self._AbortMigration()
7642
      self._RevertDiskStatus()
7643
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7644
                               (instance.name, msg))
7645

    
7646
    self.feedback_fn("* starting memory transfer")
7647
    last_feedback = time.time()
7648
    while True:
7649
      result = self.rpc.call_instance_get_migration_status(source_node,
7650
                                                           instance)
7651
      msg = result.fail_msg
7652
      ms = result.payload   # MigrationStatus instance
7653
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7654
        logging.error("Instance migration failed, trying to revert"
7655
                      " disk status: %s", msg)
7656
        self.feedback_fn("Migration failed, aborting")
7657
        self._AbortMigration()
7658
        self._RevertDiskStatus()
7659
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7660
                                 (instance.name, msg))
7661

    
7662
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7663
        self.feedback_fn("* memory transfer complete")
7664
        break
7665

    
7666
      if (utils.TimeoutExpired(last_feedback,
7667
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7668
          ms.transferred_ram is not None):
7669
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7670
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7671
        last_feedback = time.time()
7672

    
7673
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7674

    
7675
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7676
                                                           instance,
7677
                                                           True,
7678
                                                           self.live)
7679
    msg = result.fail_msg
7680
    if msg:
7681
      logging.error("Instance migration succeeded, but finalization failed"
7682
                    " on the source node: %s", msg)
7683
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7684
                               msg)
7685

    
7686
    instance.primary_node = target_node
7687

    
7688
    # distribute new instance config to the other nodes
7689
    self.cfg.Update(instance, self.feedback_fn)
7690

    
7691
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7692
                                                           instance,
7693
                                                           migration_info,
7694
                                                           True)
7695
    msg = result.fail_msg
7696
    if msg:
7697
      logging.error("Instance migration succeeded, but finalization failed"
7698
                    " on the target node: %s", msg)
7699
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7700
                               msg)
7701

    
7702
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7703
      self._EnsureSecondary(source_node)
7704
      self._WaitUntilSync()
7705
      self._GoStandalone()
7706
      self._GoReconnect(False)
7707
      self._WaitUntilSync()
7708

    
7709
    self.feedback_fn("* done")
7710

    
7711
  def _ExecFailover(self):
7712
    """Failover an instance.
7713

7714
    The failover is done by shutting it down on its present node and
7715
    starting it on the secondary.
7716

7717
    """
7718
    instance = self.instance
7719
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7720

    
7721
    source_node = instance.primary_node
7722
    target_node = self.target_node
7723

    
7724
    if instance.admin_up:
7725
      self.feedback_fn("* checking disk consistency between source and target")
7726
      for dev in instance.disks:
7727
        # for drbd, these are drbd over lvm
7728
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7729
          if primary_node.offline:
7730
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7731
                             " target node %s" %
7732
                             (primary_node.name, dev.iv_name, target_node))
7733
          elif not self.ignore_consistency:
7734
            raise errors.OpExecError("Disk %s is degraded on target node,"
7735
                                     " aborting failover" % dev.iv_name)
7736
    else:
7737
      self.feedback_fn("* not checking disk consistency as instance is not"
7738
                       " running")
7739

    
7740
    self.feedback_fn("* shutting down instance on source node")
7741
    logging.info("Shutting down instance %s on node %s",
7742
                 instance.name, source_node)
7743

    
7744
    result = self.rpc.call_instance_shutdown(source_node, instance,
7745
                                             self.shutdown_timeout)
7746
    msg = result.fail_msg
7747
    if msg:
7748
      if self.ignore_consistency or primary_node.offline:
7749
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7750
                           " proceeding anyway; please make sure node"
7751
                           " %s is down; error details: %s",
7752
                           instance.name, source_node, source_node, msg)
7753
      else:
7754
        raise errors.OpExecError("Could not shutdown instance %s on"
7755
                                 " node %s: %s" %
7756
                                 (instance.name, source_node, msg))
7757

    
7758
    self.feedback_fn("* deactivating the instance's disks on source node")
7759
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7760
      raise errors.OpExecError("Can't shut down the instance's disks")
7761

    
7762
    instance.primary_node = target_node
7763
    # distribute new instance config to the other nodes
7764
    self.cfg.Update(instance, self.feedback_fn)
7765

    
7766
    # Only start the instance if it's marked as up
7767
    if instance.admin_up:
7768
      self.feedback_fn("* activating the instance's disks on target node %s" %
7769
                       target_node)
7770
      logging.info("Starting instance %s on node %s",
7771
                   instance.name, target_node)
7772

    
7773
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7774
                                           ignore_secondaries=True)
7775
      if not disks_ok:
7776
        _ShutdownInstanceDisks(self.lu, instance)
7777
        raise errors.OpExecError("Can't activate the instance's disks")
7778

    
7779
      self.feedback_fn("* starting the instance on the target node %s" %
7780
                       target_node)
7781
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7782
                                            False)
7783
      msg = result.fail_msg
7784
      if msg:
7785
        _ShutdownInstanceDisks(self.lu, instance)
7786
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7787
                                 (instance.name, target_node, msg))
7788

    
7789
  def Exec(self, feedback_fn):
7790
    """Perform the migration.
7791

7792
    """
7793
    self.feedback_fn = feedback_fn
7794
    self.source_node = self.instance.primary_node
7795

    
7796
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7797
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7798
      self.target_node = self.instance.secondary_nodes[0]
7799
      # Otherwise self.target_node has been populated either
7800
      # directly, or through an iallocator.
7801

    
7802
    self.all_nodes = [self.source_node, self.target_node]
7803
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7804
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7805

    
7806
    if self.failover:
7807
      feedback_fn("Failover instance %s" % self.instance.name)
7808
      self._ExecFailover()
7809
    else:
7810
      feedback_fn("Migrating instance %s" % self.instance.name)
7811

    
7812
      if self.cleanup:
7813
        return self._ExecCleanup()
7814
      else:
7815
        return self._ExecMigration()
7816

    
7817

    
7818
def _CreateBlockDev(lu, node, instance, device, force_create,
7819
                    info, force_open):
7820
  """Create a tree of block devices on a given node.
7821

7822
  If this device type has to be created on secondaries, create it and
7823
  all its children.
7824

7825
  If not, just recurse to children keeping the same 'force' value.
7826

7827
  @param lu: the lu on whose behalf we execute
7828
  @param node: the node on which to create the device
7829
  @type instance: L{objects.Instance}
7830
  @param instance: the instance which owns the device
7831
  @type device: L{objects.Disk}
7832
  @param device: the device to create
7833
  @type force_create: boolean
7834
  @param force_create: whether to force creation of this device; this
7835
      will be change to True whenever we find a device which has
7836
      CreateOnSecondary() attribute
7837
  @param info: the extra 'metadata' we should attach to the device
7838
      (this will be represented as a LVM tag)
7839
  @type force_open: boolean
7840
  @param force_open: this parameter will be passes to the
7841
      L{backend.BlockdevCreate} function where it specifies
7842
      whether we run on primary or not, and it affects both
7843
      the child assembly and the device own Open() execution
7844

7845
  """
7846
  if device.CreateOnSecondary():
7847
    force_create = True
7848

    
7849
  if device.children:
7850
    for child in device.children:
7851
      _CreateBlockDev(lu, node, instance, child, force_create,
7852
                      info, force_open)
7853

    
7854
  if not force_create:
7855
    return
7856

    
7857
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7858

    
7859

    
7860
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7861
  """Create a single block device on a given node.
7862

7863
  This will not recurse over children of the device, so they must be
7864
  created in advance.
7865

7866
  @param lu: the lu on whose behalf we execute
7867
  @param node: the node on which to create the device
7868
  @type instance: L{objects.Instance}
7869
  @param instance: the instance which owns the device
7870
  @type device: L{objects.Disk}
7871
  @param device: the device to create
7872
  @param info: the extra 'metadata' we should attach to the device
7873
      (this will be represented as a LVM tag)
7874
  @type force_open: boolean
7875
  @param force_open: this parameter will be passes to the
7876
      L{backend.BlockdevCreate} function where it specifies
7877
      whether we run on primary or not, and it affects both
7878
      the child assembly and the device own Open() execution
7879

7880
  """
7881
  lu.cfg.SetDiskID(device, node)
7882
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7883
                                       instance.name, force_open, info)
7884
  result.Raise("Can't create block device %s on"
7885
               " node %s for instance %s" % (device, node, instance.name))
7886
  if device.physical_id is None:
7887
    device.physical_id = result.payload
7888

    
7889

    
7890
def _GenerateUniqueNames(lu, exts):
7891
  """Generate a suitable LV name.
7892

7893
  This will generate a logical volume name for the given instance.
7894

7895
  """
7896
  results = []
7897
  for val in exts:
7898
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7899
    results.append("%s%s" % (new_id, val))
7900
  return results
7901

    
7902

    
7903
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7904
                         iv_name, p_minor, s_minor):
7905
  """Generate a drbd8 device complete with its children.
7906

7907
  """
7908
  assert len(vgnames) == len(names) == 2
7909
  port = lu.cfg.AllocatePort()
7910
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7911
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7912
                          logical_id=(vgnames[0], names[0]))
7913
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7914
                          logical_id=(vgnames[1], names[1]))
7915
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7916
                          logical_id=(primary, secondary, port,
7917
                                      p_minor, s_minor,
7918
                                      shared_secret),
7919
                          children=[dev_data, dev_meta],
7920
                          iv_name=iv_name)
7921
  return drbd_dev
7922

    
7923

    
7924
def _GenerateDiskTemplate(lu, template_name,
7925
                          instance_name, primary_node,
7926
                          secondary_nodes, disk_info,
7927
                          file_storage_dir, file_driver,
7928
                          base_index, feedback_fn):
7929
  """Generate the entire disk layout for a given template type.
7930

7931
  """
7932
  #TODO: compute space requirements
7933

    
7934
  vgname = lu.cfg.GetVGName()
7935
  disk_count = len(disk_info)
7936
  disks = []
7937
  if template_name == constants.DT_DISKLESS:
7938
    pass
7939
  elif template_name == constants.DT_PLAIN:
7940
    if len(secondary_nodes) != 0:
7941
      raise errors.ProgrammerError("Wrong template configuration")
7942

    
7943
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7944
                                      for i in range(disk_count)])
7945
    for idx, disk in enumerate(disk_info):
7946
      disk_index = idx + base_index
7947
      vg = disk.get(constants.IDISK_VG, vgname)
7948
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7949
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7950
                              size=disk[constants.IDISK_SIZE],
7951
                              logical_id=(vg, names[idx]),
7952
                              iv_name="disk/%d" % disk_index,
7953
                              mode=disk[constants.IDISK_MODE])
7954
      disks.append(disk_dev)
7955
  elif template_name == constants.DT_DRBD8:
7956
    if len(secondary_nodes) != 1:
7957
      raise errors.ProgrammerError("Wrong template configuration")
7958
    remote_node = secondary_nodes[0]
7959
    minors = lu.cfg.AllocateDRBDMinor(
7960
      [primary_node, remote_node] * len(disk_info), instance_name)
7961

    
7962
    names = []
7963
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7964
                                               for i in range(disk_count)]):
7965
      names.append(lv_prefix + "_data")
7966
      names.append(lv_prefix + "_meta")
7967
    for idx, disk in enumerate(disk_info):
7968
      disk_index = idx + base_index
7969
      data_vg = disk.get(constants.IDISK_VG, vgname)
7970
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7971
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7972
                                      disk[constants.IDISK_SIZE],
7973
                                      [data_vg, meta_vg],
7974
                                      names[idx * 2:idx * 2 + 2],
7975
                                      "disk/%d" % disk_index,
7976
                                      minors[idx * 2], minors[idx * 2 + 1])
7977
      disk_dev.mode = disk[constants.IDISK_MODE]
7978
      disks.append(disk_dev)
7979
  elif template_name == constants.DT_FILE:
7980
    if len(secondary_nodes) != 0:
7981
      raise errors.ProgrammerError("Wrong template configuration")
7982

    
7983
    opcodes.RequireFileStorage()
7984

    
7985
    for idx, disk in enumerate(disk_info):
7986
      disk_index = idx + base_index
7987
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7988
                              size=disk[constants.IDISK_SIZE],
7989
                              iv_name="disk/%d" % disk_index,
7990
                              logical_id=(file_driver,
7991
                                          "%s/disk%d" % (file_storage_dir,
7992
                                                         disk_index)),
7993
                              mode=disk[constants.IDISK_MODE])
7994
      disks.append(disk_dev)
7995
  elif template_name == constants.DT_SHARED_FILE:
7996
    if len(secondary_nodes) != 0:
7997
      raise errors.ProgrammerError("Wrong template configuration")
7998

    
7999
    opcodes.RequireSharedFileStorage()
8000

    
8001
    for idx, disk in enumerate(disk_info):
8002
      disk_index = idx + base_index
8003
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8004
                              size=disk[constants.IDISK_SIZE],
8005
                              iv_name="disk/%d" % disk_index,
8006
                              logical_id=(file_driver,
8007
                                          "%s/disk%d" % (file_storage_dir,
8008
                                                         disk_index)),
8009
                              mode=disk[constants.IDISK_MODE])
8010
      disks.append(disk_dev)
8011
  elif template_name == constants.DT_BLOCK:
8012
    if len(secondary_nodes) != 0:
8013
      raise errors.ProgrammerError("Wrong template configuration")
8014

    
8015
    for idx, disk in enumerate(disk_info):
8016
      disk_index = idx + base_index
8017
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8018
                              size=disk[constants.IDISK_SIZE],
8019
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8020
                                          disk[constants.IDISK_ADOPT]),
8021
                              iv_name="disk/%d" % disk_index,
8022
                              mode=disk[constants.IDISK_MODE])
8023
      disks.append(disk_dev)
8024

    
8025
  else:
8026
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8027
  return disks
8028

    
8029

    
8030
def _GetInstanceInfoText(instance):
8031
  """Compute that text that should be added to the disk's metadata.
8032

8033
  """
8034
  return "originstname+%s" % instance.name
8035

    
8036

    
8037
def _CalcEta(time_taken, written, total_size):
8038
  """Calculates the ETA based on size written and total size.
8039

8040
  @param time_taken: The time taken so far
8041
  @param written: amount written so far
8042
  @param total_size: The total size of data to be written
8043
  @return: The remaining time in seconds
8044

8045
  """
8046
  avg_time = time_taken / float(written)
8047
  return (total_size - written) * avg_time
8048

    
8049

    
8050
def _WipeDisks(lu, instance):
8051
  """Wipes instance disks.
8052

8053
  @type lu: L{LogicalUnit}
8054
  @param lu: the logical unit on whose behalf we execute
8055
  @type instance: L{objects.Instance}
8056
  @param instance: the instance whose disks we should create
8057
  @return: the success of the wipe
8058

8059
  """
8060
  node = instance.primary_node
8061

    
8062
  for device in instance.disks:
8063
    lu.cfg.SetDiskID(device, node)
8064

    
8065
  logging.info("Pause sync of instance %s disks", instance.name)
8066
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8067

    
8068
  for idx, success in enumerate(result.payload):
8069
    if not success:
8070
      logging.warn("pause-sync of instance %s for disks %d failed",
8071
                   instance.name, idx)
8072

    
8073
  try:
8074
    for idx, device in enumerate(instance.disks):
8075
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8076
      # MAX_WIPE_CHUNK at max
8077
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8078
                            constants.MIN_WIPE_CHUNK_PERCENT)
8079
      # we _must_ make this an int, otherwise rounding errors will
8080
      # occur
8081
      wipe_chunk_size = int(wipe_chunk_size)
8082

    
8083
      lu.LogInfo("* Wiping disk %d", idx)
8084
      logging.info("Wiping disk %d for instance %s, node %s using"
8085
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8086

    
8087
      offset = 0
8088
      size = device.size
8089
      last_output = 0
8090
      start_time = time.time()
8091

    
8092
      while offset < size:
8093
        wipe_size = min(wipe_chunk_size, size - offset)
8094
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8095
                      idx, offset, wipe_size)
8096
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8097
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8098
                     (idx, offset, wipe_size))
8099
        now = time.time()
8100
        offset += wipe_size
8101
        if now - last_output >= 60:
8102
          eta = _CalcEta(now - start_time, offset, size)
8103
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8104
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8105
          last_output = now
8106
  finally:
8107
    logging.info("Resume sync of instance %s disks", instance.name)
8108

    
8109
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8110

    
8111
    for idx, success in enumerate(result.payload):
8112
      if not success:
8113
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8114
                      " look at the status and troubleshoot the issue", idx)
8115
        logging.warn("resume-sync of instance %s for disks %d failed",
8116
                     instance.name, idx)
8117

    
8118

    
8119
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8120
  """Create all disks for an instance.
8121

8122
  This abstracts away some work from AddInstance.
8123

8124
  @type lu: L{LogicalUnit}
8125
  @param lu: the logical unit on whose behalf we execute
8126
  @type instance: L{objects.Instance}
8127
  @param instance: the instance whose disks we should create
8128
  @type to_skip: list
8129
  @param to_skip: list of indices to skip
8130
  @type target_node: string
8131
  @param target_node: if passed, overrides the target node for creation
8132
  @rtype: boolean
8133
  @return: the success of the creation
8134

8135
  """
8136
  info = _GetInstanceInfoText(instance)
8137
  if target_node is None:
8138
    pnode = instance.primary_node
8139
    all_nodes = instance.all_nodes
8140
  else:
8141
    pnode = target_node
8142
    all_nodes = [pnode]
8143

    
8144
  if instance.disk_template in constants.DTS_FILEBASED:
8145
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8146
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8147

    
8148
    result.Raise("Failed to create directory '%s' on"
8149
                 " node %s" % (file_storage_dir, pnode))
8150

    
8151
  # Note: this needs to be kept in sync with adding of disks in
8152
  # LUInstanceSetParams
8153
  for idx, device in enumerate(instance.disks):
8154
    if to_skip and idx in to_skip:
8155
      continue
8156
    logging.info("Creating volume %s for instance %s",
8157
                 device.iv_name, instance.name)
8158
    #HARDCODE
8159
    for node in all_nodes:
8160
      f_create = node == pnode
8161
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8162

    
8163

    
8164
def _RemoveDisks(lu, instance, target_node=None):
8165
  """Remove all disks for an instance.
8166

8167
  This abstracts away some work from `AddInstance()` and
8168
  `RemoveInstance()`. Note that in case some of the devices couldn't
8169
  be removed, the removal will continue with the other ones (compare
8170
  with `_CreateDisks()`).
8171

8172
  @type lu: L{LogicalUnit}
8173
  @param lu: the logical unit on whose behalf we execute
8174
  @type instance: L{objects.Instance}
8175
  @param instance: the instance whose disks we should remove
8176
  @type target_node: string
8177
  @param target_node: used to override the node on which to remove the disks
8178
  @rtype: boolean
8179
  @return: the success of the removal
8180

8181
  """
8182
  logging.info("Removing block devices for instance %s", instance.name)
8183

    
8184
  all_result = True
8185
  for device in instance.disks:
8186
    if target_node:
8187
      edata = [(target_node, device)]
8188
    else:
8189
      edata = device.ComputeNodeTree(instance.primary_node)
8190
    for node, disk in edata:
8191
      lu.cfg.SetDiskID(disk, node)
8192
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8193
      if msg:
8194
        lu.LogWarning("Could not remove block device %s on node %s,"
8195
                      " continuing anyway: %s", device.iv_name, node, msg)
8196
        all_result = False
8197

    
8198
  if instance.disk_template == constants.DT_FILE:
8199
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8200
    if target_node:
8201
      tgt = target_node
8202
    else:
8203
      tgt = instance.primary_node
8204
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8205
    if result.fail_msg:
8206
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8207
                    file_storage_dir, instance.primary_node, result.fail_msg)
8208
      all_result = False
8209

    
8210
  return all_result
8211

    
8212

    
8213
def _ComputeDiskSizePerVG(disk_template, disks):
8214
  """Compute disk size requirements in the volume group
8215

8216
  """
8217
  def _compute(disks, payload):
8218
    """Universal algorithm.
8219

8220
    """
8221
    vgs = {}
8222
    for disk in disks:
8223
      vgs[disk[constants.IDISK_VG]] = \
8224
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8225

    
8226
    return vgs
8227

    
8228
  # Required free disk space as a function of disk and swap space
8229
  req_size_dict = {
8230
    constants.DT_DISKLESS: {},
8231
    constants.DT_PLAIN: _compute(disks, 0),
8232
    # 128 MB are added for drbd metadata for each disk
8233
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8234
    constants.DT_FILE: {},
8235
    constants.DT_SHARED_FILE: {},
8236
  }
8237

    
8238
  if disk_template not in req_size_dict:
8239
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8240
                                 " is unknown" % disk_template)
8241

    
8242
  return req_size_dict[disk_template]
8243

    
8244

    
8245
def _ComputeDiskSize(disk_template, disks):
8246
  """Compute disk size requirements in the volume group
8247

8248
  """
8249
  # Required free disk space as a function of disk and swap space
8250
  req_size_dict = {
8251
    constants.DT_DISKLESS: None,
8252
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8253
    # 128 MB are added for drbd metadata for each disk
8254
    constants.DT_DRBD8:
8255
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8256
    constants.DT_FILE: None,
8257
    constants.DT_SHARED_FILE: 0,
8258
    constants.DT_BLOCK: 0,
8259
  }
8260

    
8261
  if disk_template not in req_size_dict:
8262
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8263
                                 " is unknown" % disk_template)
8264

    
8265
  return req_size_dict[disk_template]
8266

    
8267

    
8268
def _FilterVmNodes(lu, nodenames):
8269
  """Filters out non-vm_capable nodes from a list.
8270

8271
  @type lu: L{LogicalUnit}
8272
  @param lu: the logical unit for which we check
8273
  @type nodenames: list
8274
  @param nodenames: the list of nodes on which we should check
8275
  @rtype: list
8276
  @return: the list of vm-capable nodes
8277

8278
  """
8279
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8280
  return [name for name in nodenames if name not in vm_nodes]
8281

    
8282

    
8283
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8284
  """Hypervisor parameter validation.
8285

8286
  This function abstract the hypervisor parameter validation to be
8287
  used in both instance create and instance modify.
8288

8289
  @type lu: L{LogicalUnit}
8290
  @param lu: the logical unit for which we check
8291
  @type nodenames: list
8292
  @param nodenames: the list of nodes on which we should check
8293
  @type hvname: string
8294
  @param hvname: the name of the hypervisor we should use
8295
  @type hvparams: dict
8296
  @param hvparams: the parameters which we need to check
8297
  @raise errors.OpPrereqError: if the parameters are not valid
8298

8299
  """
8300
  nodenames = _FilterVmNodes(lu, nodenames)
8301

    
8302
  cluster = lu.cfg.GetClusterInfo()
8303
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8304

    
8305
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8306
  for node in nodenames:
8307
    info = hvinfo[node]
8308
    if info.offline:
8309
      continue
8310
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8311

    
8312

    
8313
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8314
  """OS parameters validation.
8315

8316
  @type lu: L{LogicalUnit}
8317
  @param lu: the logical unit for which we check
8318
  @type required: boolean
8319
  @param required: whether the validation should fail if the OS is not
8320
      found
8321
  @type nodenames: list
8322
  @param nodenames: the list of nodes on which we should check
8323
  @type osname: string
8324
  @param osname: the name of the hypervisor we should use
8325
  @type osparams: dict
8326
  @param osparams: the parameters which we need to check
8327
  @raise errors.OpPrereqError: if the parameters are not valid
8328

8329
  """
8330
  nodenames = _FilterVmNodes(lu, nodenames)
8331
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8332
                                   [constants.OS_VALIDATE_PARAMETERS],
8333
                                   osparams)
8334
  for node, nres in result.items():
8335
    # we don't check for offline cases since this should be run only
8336
    # against the master node and/or an instance's nodes
8337
    nres.Raise("OS Parameters validation failed on node %s" % node)
8338
    if not nres.payload:
8339
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8340
                 osname, node)
8341

    
8342

    
8343
class LUInstanceCreate(LogicalUnit):
8344
  """Create an instance.
8345

8346
  """
8347
  HPATH = "instance-add"
8348
  HTYPE = constants.HTYPE_INSTANCE
8349
  REQ_BGL = False
8350

    
8351
  def CheckArguments(self):
8352
    """Check arguments.
8353

8354
    """
8355
    # do not require name_check to ease forward/backward compatibility
8356
    # for tools
8357
    if self.op.no_install and self.op.start:
8358
      self.LogInfo("No-installation mode selected, disabling startup")
8359
      self.op.start = False
8360
    # validate/normalize the instance name
8361
    self.op.instance_name = \
8362
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8363

    
8364
    if self.op.ip_check and not self.op.name_check:
8365
      # TODO: make the ip check more flexible and not depend on the name check
8366
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8367
                                 " check", errors.ECODE_INVAL)
8368

    
8369
    # check nics' parameter names
8370
    for nic in self.op.nics:
8371
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8372

    
8373
    # check disks. parameter names and consistent adopt/no-adopt strategy
8374
    has_adopt = has_no_adopt = False
8375
    for disk in self.op.disks:
8376
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8377
      if constants.IDISK_ADOPT in disk:
8378
        has_adopt = True
8379
      else:
8380
        has_no_adopt = True
8381
    if has_adopt and has_no_adopt:
8382
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8383
                                 errors.ECODE_INVAL)
8384
    if has_adopt:
8385
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8386
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8387
                                   " '%s' disk template" %
8388
                                   self.op.disk_template,
8389
                                   errors.ECODE_INVAL)
8390
      if self.op.iallocator is not None:
8391
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8392
                                   " iallocator script", errors.ECODE_INVAL)
8393
      if self.op.mode == constants.INSTANCE_IMPORT:
8394
        raise errors.OpPrereqError("Disk adoption not allowed for"
8395
                                   " instance import", errors.ECODE_INVAL)
8396
    else:
8397
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8398
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8399
                                   " but no 'adopt' parameter given" %
8400
                                   self.op.disk_template,
8401
                                   errors.ECODE_INVAL)
8402

    
8403
    self.adopt_disks = has_adopt
8404

    
8405
    # instance name verification
8406
    if self.op.name_check:
8407
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8408
      self.op.instance_name = self.hostname1.name
8409
      # used in CheckPrereq for ip ping check
8410
      self.check_ip = self.hostname1.ip
8411
    else:
8412
      self.check_ip = None
8413

    
8414
    # file storage checks
8415
    if (self.op.file_driver and
8416
        not self.op.file_driver in constants.FILE_DRIVER):
8417
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8418
                                 self.op.file_driver, errors.ECODE_INVAL)
8419

    
8420
    if self.op.disk_template == constants.DT_FILE:
8421
      opcodes.RequireFileStorage()
8422
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8423
      opcodes.RequireSharedFileStorage()
8424

    
8425
    ### Node/iallocator related checks
8426
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8427

    
8428
    if self.op.pnode is not None:
8429
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8430
        if self.op.snode is None:
8431
          raise errors.OpPrereqError("The networked disk templates need"
8432
                                     " a mirror node", errors.ECODE_INVAL)
8433
      elif self.op.snode:
8434
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8435
                        " template")
8436
        self.op.snode = None
8437

    
8438
    self._cds = _GetClusterDomainSecret()
8439

    
8440
    if self.op.mode == constants.INSTANCE_IMPORT:
8441
      # On import force_variant must be True, because if we forced it at
8442
      # initial install, our only chance when importing it back is that it
8443
      # works again!
8444
      self.op.force_variant = True
8445

    
8446
      if self.op.no_install:
8447
        self.LogInfo("No-installation mode has no effect during import")
8448

    
8449
    elif self.op.mode == constants.INSTANCE_CREATE:
8450
      if self.op.os_type is None:
8451
        raise errors.OpPrereqError("No guest OS specified",
8452
                                   errors.ECODE_INVAL)
8453
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8454
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8455
                                   " installation" % self.op.os_type,
8456
                                   errors.ECODE_STATE)
8457
      if self.op.disk_template is None:
8458
        raise errors.OpPrereqError("No disk template specified",
8459
                                   errors.ECODE_INVAL)
8460

    
8461
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8462
      # Check handshake to ensure both clusters have the same domain secret
8463
      src_handshake = self.op.source_handshake
8464
      if not src_handshake:
8465
        raise errors.OpPrereqError("Missing source handshake",
8466
                                   errors.ECODE_INVAL)
8467

    
8468
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8469
                                                           src_handshake)
8470
      if errmsg:
8471
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8472
                                   errors.ECODE_INVAL)
8473

    
8474
      # Load and check source CA
8475
      self.source_x509_ca_pem = self.op.source_x509_ca
8476
      if not self.source_x509_ca_pem:
8477
        raise errors.OpPrereqError("Missing source X509 CA",
8478
                                   errors.ECODE_INVAL)
8479

    
8480
      try:
8481
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8482
                                                    self._cds)
8483
      except OpenSSL.crypto.Error, err:
8484
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8485
                                   (err, ), errors.ECODE_INVAL)
8486

    
8487
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8488
      if errcode is not None:
8489
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8490
                                   errors.ECODE_INVAL)
8491

    
8492
      self.source_x509_ca = cert
8493

    
8494
      src_instance_name = self.op.source_instance_name
8495
      if not src_instance_name:
8496
        raise errors.OpPrereqError("Missing source instance name",
8497
                                   errors.ECODE_INVAL)
8498

    
8499
      self.source_instance_name = \
8500
          netutils.GetHostname(name=src_instance_name).name
8501

    
8502
    else:
8503
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8504
                                 self.op.mode, errors.ECODE_INVAL)
8505

    
8506
  def ExpandNames(self):
8507
    """ExpandNames for CreateInstance.
8508

8509
    Figure out the right locks for instance creation.
8510

8511
    """
8512
    self.needed_locks = {}
8513

    
8514
    instance_name = self.op.instance_name
8515
    # this is just a preventive check, but someone might still add this
8516
    # instance in the meantime, and creation will fail at lock-add time
8517
    if instance_name in self.cfg.GetInstanceList():
8518
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8519
                                 instance_name, errors.ECODE_EXISTS)
8520

    
8521
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8522

    
8523
    if self.op.iallocator:
8524
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8525
    else:
8526
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8527
      nodelist = [self.op.pnode]
8528
      if self.op.snode is not None:
8529
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8530
        nodelist.append(self.op.snode)
8531
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8532

    
8533
    # in case of import lock the source node too
8534
    if self.op.mode == constants.INSTANCE_IMPORT:
8535
      src_node = self.op.src_node
8536
      src_path = self.op.src_path
8537

    
8538
      if src_path is None:
8539
        self.op.src_path = src_path = self.op.instance_name
8540

    
8541
      if src_node is None:
8542
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8543
        self.op.src_node = None
8544
        if os.path.isabs(src_path):
8545
          raise errors.OpPrereqError("Importing an instance from a path"
8546
                                     " requires a source node option",
8547
                                     errors.ECODE_INVAL)
8548
      else:
8549
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8550
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8551
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8552
        if not os.path.isabs(src_path):
8553
          self.op.src_path = src_path = \
8554
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8555

    
8556
  def _RunAllocator(self):
8557
    """Run the allocator based on input opcode.
8558

8559
    """
8560
    nics = [n.ToDict() for n in self.nics]
8561
    ial = IAllocator(self.cfg, self.rpc,
8562
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8563
                     name=self.op.instance_name,
8564
                     disk_template=self.op.disk_template,
8565
                     tags=self.op.tags,
8566
                     os=self.op.os_type,
8567
                     vcpus=self.be_full[constants.BE_VCPUS],
8568
                     memory=self.be_full[constants.BE_MEMORY],
8569
                     disks=self.disks,
8570
                     nics=nics,
8571
                     hypervisor=self.op.hypervisor,
8572
                     )
8573

    
8574
    ial.Run(self.op.iallocator)
8575

    
8576
    if not ial.success:
8577
      raise errors.OpPrereqError("Can't compute nodes using"
8578
                                 " iallocator '%s': %s" %
8579
                                 (self.op.iallocator, ial.info),
8580
                                 errors.ECODE_NORES)
8581
    if len(ial.result) != ial.required_nodes:
8582
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8583
                                 " of nodes (%s), required %s" %
8584
                                 (self.op.iallocator, len(ial.result),
8585
                                  ial.required_nodes), errors.ECODE_FAULT)
8586
    self.op.pnode = ial.result[0]
8587
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8588
                 self.op.instance_name, self.op.iallocator,
8589
                 utils.CommaJoin(ial.result))
8590
    if ial.required_nodes == 2:
8591
      self.op.snode = ial.result[1]
8592

    
8593
  def BuildHooksEnv(self):
8594
    """Build hooks env.
8595

8596
    This runs on master, primary and secondary nodes of the instance.
8597

8598
    """
8599
    env = {
8600
      "ADD_MODE": self.op.mode,
8601
      }
8602
    if self.op.mode == constants.INSTANCE_IMPORT:
8603
      env["SRC_NODE"] = self.op.src_node
8604
      env["SRC_PATH"] = self.op.src_path
8605
      env["SRC_IMAGES"] = self.src_images
8606

    
8607
    env.update(_BuildInstanceHookEnv(
8608
      name=self.op.instance_name,
8609
      primary_node=self.op.pnode,
8610
      secondary_nodes=self.secondaries,
8611
      status=self.op.start,
8612
      os_type=self.op.os_type,
8613
      memory=self.be_full[constants.BE_MEMORY],
8614
      vcpus=self.be_full[constants.BE_VCPUS],
8615
      nics=_NICListToTuple(self, self.nics),
8616
      disk_template=self.op.disk_template,
8617
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8618
             for d in self.disks],
8619
      bep=self.be_full,
8620
      hvp=self.hv_full,
8621
      hypervisor_name=self.op.hypervisor,
8622
      tags=self.op.tags,
8623
    ))
8624

    
8625
    return env
8626

    
8627
  def BuildHooksNodes(self):
8628
    """Build hooks nodes.
8629

8630
    """
8631
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8632
    return nl, nl
8633

    
8634
  def _ReadExportInfo(self):
8635
    """Reads the export information from disk.
8636

8637
    It will override the opcode source node and path with the actual
8638
    information, if these two were not specified before.
8639

8640
    @return: the export information
8641

8642
    """
8643
    assert self.op.mode == constants.INSTANCE_IMPORT
8644

    
8645
    src_node = self.op.src_node
8646
    src_path = self.op.src_path
8647

    
8648
    if src_node is None:
8649
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8650
      exp_list = self.rpc.call_export_list(locked_nodes)
8651
      found = False
8652
      for node in exp_list:
8653
        if exp_list[node].fail_msg:
8654
          continue
8655
        if src_path in exp_list[node].payload:
8656
          found = True
8657
          self.op.src_node = src_node = node
8658
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8659
                                                       src_path)
8660
          break
8661
      if not found:
8662
        raise errors.OpPrereqError("No export found for relative path %s" %
8663
                                    src_path, errors.ECODE_INVAL)
8664

    
8665
    _CheckNodeOnline(self, src_node)
8666
    result = self.rpc.call_export_info(src_node, src_path)
8667
    result.Raise("No export or invalid export found in dir %s" % src_path)
8668

    
8669
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8670
    if not export_info.has_section(constants.INISECT_EXP):
8671
      raise errors.ProgrammerError("Corrupted export config",
8672
                                   errors.ECODE_ENVIRON)
8673

    
8674
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8675
    if (int(ei_version) != constants.EXPORT_VERSION):
8676
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8677
                                 (ei_version, constants.EXPORT_VERSION),
8678
                                 errors.ECODE_ENVIRON)
8679
    return export_info
8680

    
8681
  def _ReadExportParams(self, einfo):
8682
    """Use export parameters as defaults.
8683

8684
    In case the opcode doesn't specify (as in override) some instance
8685
    parameters, then try to use them from the export information, if
8686
    that declares them.
8687

8688
    """
8689
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8690

    
8691
    if self.op.disk_template is None:
8692
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8693
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8694
                                          "disk_template")
8695
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8696
          raise errors.OpPrereqError("Disk template specified in configuration"
8697
                                     " file is not one of the allowed values:"
8698
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8699
      else:
8700
        raise errors.OpPrereqError("No disk template specified and the export"
8701
                                   " is missing the disk_template information",
8702
                                   errors.ECODE_INVAL)
8703

    
8704
    if not self.op.disks:
8705
      disks = []
8706
      # TODO: import the disk iv_name too
8707
      for idx in range(constants.MAX_DISKS):
8708
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8709
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8710
          disks.append({constants.IDISK_SIZE: disk_sz})
8711
      self.op.disks = disks
8712
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8713
        raise errors.OpPrereqError("No disk info specified and the export"
8714
                                   " is missing the disk information",
8715
                                   errors.ECODE_INVAL)
8716

    
8717
    if not self.op.nics:
8718
      nics = []
8719
      for idx in range(constants.MAX_NICS):
8720
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8721
          ndict = {}
8722
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8723
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8724
            ndict[name] = v
8725
          nics.append(ndict)
8726
        else:
8727
          break
8728
      self.op.nics = nics
8729

    
8730
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8731
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8732

    
8733
    if (self.op.hypervisor is None and
8734
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8735
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8736

    
8737
    if einfo.has_section(constants.INISECT_HYP):
8738
      # use the export parameters but do not override the ones
8739
      # specified by the user
8740
      for name, value in einfo.items(constants.INISECT_HYP):
8741
        if name not in self.op.hvparams:
8742
          self.op.hvparams[name] = value
8743

    
8744
    if einfo.has_section(constants.INISECT_BEP):
8745
      # use the parameters, without overriding
8746
      for name, value in einfo.items(constants.INISECT_BEP):
8747
        if name not in self.op.beparams:
8748
          self.op.beparams[name] = value
8749
    else:
8750
      # try to read the parameters old style, from the main section
8751
      for name in constants.BES_PARAMETERS:
8752
        if (name not in self.op.beparams and
8753
            einfo.has_option(constants.INISECT_INS, name)):
8754
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8755

    
8756
    if einfo.has_section(constants.INISECT_OSP):
8757
      # use the parameters, without overriding
8758
      for name, value in einfo.items(constants.INISECT_OSP):
8759
        if name not in self.op.osparams:
8760
          self.op.osparams[name] = value
8761

    
8762
  def _RevertToDefaults(self, cluster):
8763
    """Revert the instance parameters to the default values.
8764

8765
    """
8766
    # hvparams
8767
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8768
    for name in self.op.hvparams.keys():
8769
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8770
        del self.op.hvparams[name]
8771
    # beparams
8772
    be_defs = cluster.SimpleFillBE({})
8773
    for name in self.op.beparams.keys():
8774
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8775
        del self.op.beparams[name]
8776
    # nic params
8777
    nic_defs = cluster.SimpleFillNIC({})
8778
    for nic in self.op.nics:
8779
      for name in constants.NICS_PARAMETERS:
8780
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8781
          del nic[name]
8782
    # osparams
8783
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8784
    for name in self.op.osparams.keys():
8785
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8786
        del self.op.osparams[name]
8787

    
8788
  def _CalculateFileStorageDir(self):
8789
    """Calculate final instance file storage dir.
8790

8791
    """
8792
    # file storage dir calculation/check
8793
    self.instance_file_storage_dir = None
8794
    if self.op.disk_template in constants.DTS_FILEBASED:
8795
      # build the full file storage dir path
8796
      joinargs = []
8797

    
8798
      if self.op.disk_template == constants.DT_SHARED_FILE:
8799
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8800
      else:
8801
        get_fsd_fn = self.cfg.GetFileStorageDir
8802

    
8803
      cfg_storagedir = get_fsd_fn()
8804
      if not cfg_storagedir:
8805
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8806
      joinargs.append(cfg_storagedir)
8807

    
8808
      if self.op.file_storage_dir is not None:
8809
        joinargs.append(self.op.file_storage_dir)
8810

    
8811
      joinargs.append(self.op.instance_name)
8812

    
8813
      # pylint: disable=W0142
8814
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8815

    
8816
  def CheckPrereq(self):
8817
    """Check prerequisites.
8818

8819
    """
8820
    self._CalculateFileStorageDir()
8821

    
8822
    if self.op.mode == constants.INSTANCE_IMPORT:
8823
      export_info = self._ReadExportInfo()
8824
      self._ReadExportParams(export_info)
8825

    
8826
    if (not self.cfg.GetVGName() and
8827
        self.op.disk_template not in constants.DTS_NOT_LVM):
8828
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8829
                                 " instances", errors.ECODE_STATE)
8830

    
8831
    if (self.op.hypervisor is None or
8832
        self.op.hypervisor == constants.VALUE_AUTO):
8833
      self.op.hypervisor = self.cfg.GetHypervisorType()
8834

    
8835
    cluster = self.cfg.GetClusterInfo()
8836
    enabled_hvs = cluster.enabled_hypervisors
8837
    if self.op.hypervisor not in enabled_hvs:
8838
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8839
                                 " cluster (%s)" % (self.op.hypervisor,
8840
                                  ",".join(enabled_hvs)),
8841
                                 errors.ECODE_STATE)
8842

    
8843
    # Check tag validity
8844
    for tag in self.op.tags:
8845
      objects.TaggableObject.ValidateTag(tag)
8846

    
8847
    # check hypervisor parameter syntax (locally)
8848
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8849
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8850
                                      self.op.hvparams)
8851
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8852
    hv_type.CheckParameterSyntax(filled_hvp)
8853
    self.hv_full = filled_hvp
8854
    # check that we don't specify global parameters on an instance
8855
    _CheckGlobalHvParams(self.op.hvparams)
8856

    
8857
    # fill and remember the beparams dict
8858
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8859
    for param, value in self.op.beparams.iteritems():
8860
      if value == constants.VALUE_AUTO:
8861
        self.op.beparams[param] = default_beparams[param]
8862
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8863
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8864

    
8865
    # build os parameters
8866
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8867

    
8868
    # now that hvp/bep are in final format, let's reset to defaults,
8869
    # if told to do so
8870
    if self.op.identify_defaults:
8871
      self._RevertToDefaults(cluster)
8872

    
8873
    # NIC buildup
8874
    self.nics = []
8875
    for idx, nic in enumerate(self.op.nics):
8876
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8877
      nic_mode = nic_mode_req
8878
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8879
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8880

    
8881
      # in routed mode, for the first nic, the default ip is 'auto'
8882
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8883
        default_ip_mode = constants.VALUE_AUTO
8884
      else:
8885
        default_ip_mode = constants.VALUE_NONE
8886

    
8887
      # ip validity checks
8888
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8889
      if ip is None or ip.lower() == constants.VALUE_NONE:
8890
        nic_ip = None
8891
      elif ip.lower() == constants.VALUE_AUTO:
8892
        if not self.op.name_check:
8893
          raise errors.OpPrereqError("IP address set to auto but name checks"
8894
                                     " have been skipped",
8895
                                     errors.ECODE_INVAL)
8896
        nic_ip = self.hostname1.ip
8897
      else:
8898
        if not netutils.IPAddress.IsValid(ip):
8899
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8900
                                     errors.ECODE_INVAL)
8901
        nic_ip = ip
8902

    
8903
      # TODO: check the ip address for uniqueness
8904
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8905
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8906
                                   errors.ECODE_INVAL)
8907

    
8908
      # MAC address verification
8909
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8910
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8911
        mac = utils.NormalizeAndValidateMac(mac)
8912

    
8913
        try:
8914
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8915
        except errors.ReservationError:
8916
          raise errors.OpPrereqError("MAC address %s already in use"
8917
                                     " in cluster" % mac,
8918
                                     errors.ECODE_NOTUNIQUE)
8919

    
8920
      #  Build nic parameters
8921
      link = nic.get(constants.INIC_LINK, None)
8922
      if link == constants.VALUE_AUTO:
8923
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8924
      nicparams = {}
8925
      if nic_mode_req:
8926
        nicparams[constants.NIC_MODE] = nic_mode
8927
      if link:
8928
        nicparams[constants.NIC_LINK] = link
8929

    
8930
      check_params = cluster.SimpleFillNIC(nicparams)
8931
      objects.NIC.CheckParameterSyntax(check_params)
8932
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8933

    
8934
    # disk checks/pre-build
8935
    default_vg = self.cfg.GetVGName()
8936
    self.disks = []
8937
    for disk in self.op.disks:
8938
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8939
      if mode not in constants.DISK_ACCESS_SET:
8940
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8941
                                   mode, errors.ECODE_INVAL)
8942
      size = disk.get(constants.IDISK_SIZE, None)
8943
      if size is None:
8944
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8945
      try:
8946
        size = int(size)
8947
      except (TypeError, ValueError):
8948
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8949
                                   errors.ECODE_INVAL)
8950

    
8951
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8952
      new_disk = {
8953
        constants.IDISK_SIZE: size,
8954
        constants.IDISK_MODE: mode,
8955
        constants.IDISK_VG: data_vg,
8956
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8957
        }
8958
      if constants.IDISK_ADOPT in disk:
8959
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8960
      self.disks.append(new_disk)
8961

    
8962
    if self.op.mode == constants.INSTANCE_IMPORT:
8963
      disk_images = []
8964
      for idx in range(len(self.disks)):
8965
        option = "disk%d_dump" % idx
8966
        if export_info.has_option(constants.INISECT_INS, option):
8967
          # FIXME: are the old os-es, disk sizes, etc. useful?
8968
          export_name = export_info.get(constants.INISECT_INS, option)
8969
          image = utils.PathJoin(self.op.src_path, export_name)
8970
          disk_images.append(image)
8971
        else:
8972
          disk_images.append(False)
8973

    
8974
      self.src_images = disk_images
8975

    
8976
      old_name = export_info.get(constants.INISECT_INS, "name")
8977
      if self.op.instance_name == old_name:
8978
        for idx, nic in enumerate(self.nics):
8979
          if nic.mac == constants.VALUE_AUTO:
8980
            nic_mac_ini = "nic%d_mac" % idx
8981
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8982

    
8983
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8984

    
8985
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8986
    if self.op.ip_check:
8987
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8988
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8989
                                   (self.check_ip, self.op.instance_name),
8990
                                   errors.ECODE_NOTUNIQUE)
8991

    
8992
    #### mac address generation
8993
    # By generating here the mac address both the allocator and the hooks get
8994
    # the real final mac address rather than the 'auto' or 'generate' value.
8995
    # There is a race condition between the generation and the instance object
8996
    # creation, which means that we know the mac is valid now, but we're not
8997
    # sure it will be when we actually add the instance. If things go bad
8998
    # adding the instance will abort because of a duplicate mac, and the
8999
    # creation job will fail.
9000
    for nic in self.nics:
9001
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9002
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9003

    
9004
    #### allocator run
9005

    
9006
    if self.op.iallocator is not None:
9007
      self._RunAllocator()
9008

    
9009
    #### node related checks
9010

    
9011
    # check primary node
9012
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9013
    assert self.pnode is not None, \
9014
      "Cannot retrieve locked node %s" % self.op.pnode
9015
    if pnode.offline:
9016
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9017
                                 pnode.name, errors.ECODE_STATE)
9018
    if pnode.drained:
9019
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9020
                                 pnode.name, errors.ECODE_STATE)
9021
    if not pnode.vm_capable:
9022
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9023
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9024

    
9025
    self.secondaries = []
9026

    
9027
    # mirror node verification
9028
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9029
      if self.op.snode == pnode.name:
9030
        raise errors.OpPrereqError("The secondary node cannot be the"
9031
                                   " primary node", errors.ECODE_INVAL)
9032
      _CheckNodeOnline(self, self.op.snode)
9033
      _CheckNodeNotDrained(self, self.op.snode)
9034
      _CheckNodeVmCapable(self, self.op.snode)
9035
      self.secondaries.append(self.op.snode)
9036

    
9037
    nodenames = [pnode.name] + self.secondaries
9038

    
9039
    if not self.adopt_disks:
9040
      # Check lv size requirements, if not adopting
9041
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9042
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9043

    
9044
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9045
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9046
                                disk[constants.IDISK_ADOPT])
9047
                     for disk in self.disks])
9048
      if len(all_lvs) != len(self.disks):
9049
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9050
                                   errors.ECODE_INVAL)
9051
      for lv_name in all_lvs:
9052
        try:
9053
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9054
          # to ReserveLV uses the same syntax
9055
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9056
        except errors.ReservationError:
9057
          raise errors.OpPrereqError("LV named %s used by another instance" %
9058
                                     lv_name, errors.ECODE_NOTUNIQUE)
9059

    
9060
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9061
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9062

    
9063
      node_lvs = self.rpc.call_lv_list([pnode.name],
9064
                                       vg_names.payload.keys())[pnode.name]
9065
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9066
      node_lvs = node_lvs.payload
9067

    
9068
      delta = all_lvs.difference(node_lvs.keys())
9069
      if delta:
9070
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9071
                                   utils.CommaJoin(delta),
9072
                                   errors.ECODE_INVAL)
9073
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9074
      if online_lvs:
9075
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9076
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9077
                                   errors.ECODE_STATE)
9078
      # update the size of disk based on what is found
9079
      for dsk in self.disks:
9080
        dsk[constants.IDISK_SIZE] = \
9081
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9082
                                        dsk[constants.IDISK_ADOPT])][0]))
9083

    
9084
    elif self.op.disk_template == constants.DT_BLOCK:
9085
      # Normalize and de-duplicate device paths
9086
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9087
                       for disk in self.disks])
9088
      if len(all_disks) != len(self.disks):
9089
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9090
                                   errors.ECODE_INVAL)
9091
      baddisks = [d for d in all_disks
9092
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9093
      if baddisks:
9094
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9095
                                   " cannot be adopted" %
9096
                                   (", ".join(baddisks),
9097
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9098
                                   errors.ECODE_INVAL)
9099

    
9100
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9101
                                            list(all_disks))[pnode.name]
9102
      node_disks.Raise("Cannot get block device information from node %s" %
9103
                       pnode.name)
9104
      node_disks = node_disks.payload
9105
      delta = all_disks.difference(node_disks.keys())
9106
      if delta:
9107
        raise errors.OpPrereqError("Missing block device(s): %s" %
9108
                                   utils.CommaJoin(delta),
9109
                                   errors.ECODE_INVAL)
9110
      for dsk in self.disks:
9111
        dsk[constants.IDISK_SIZE] = \
9112
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9113

    
9114
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9115

    
9116
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9117
    # check OS parameters (remotely)
9118
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9119

    
9120
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9121

    
9122
    # memory check on primary node
9123
    if self.op.start:
9124
      _CheckNodeFreeMemory(self, self.pnode.name,
9125
                           "creating instance %s" % self.op.instance_name,
9126
                           self.be_full[constants.BE_MEMORY],
9127
                           self.op.hypervisor)
9128

    
9129
    self.dry_run_result = list(nodenames)
9130

    
9131
  def Exec(self, feedback_fn):
9132
    """Create and add the instance to the cluster.
9133

9134
    """
9135
    instance = self.op.instance_name
9136
    pnode_name = self.pnode.name
9137

    
9138
    ht_kind = self.op.hypervisor
9139
    if ht_kind in constants.HTS_REQ_PORT:
9140
      network_port = self.cfg.AllocatePort()
9141
    else:
9142
      network_port = None
9143

    
9144
    disks = _GenerateDiskTemplate(self,
9145
                                  self.op.disk_template,
9146
                                  instance, pnode_name,
9147
                                  self.secondaries,
9148
                                  self.disks,
9149
                                  self.instance_file_storage_dir,
9150
                                  self.op.file_driver,
9151
                                  0,
9152
                                  feedback_fn)
9153

    
9154
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9155
                            primary_node=pnode_name,
9156
                            nics=self.nics, disks=disks,
9157
                            disk_template=self.op.disk_template,
9158
                            admin_up=False,
9159
                            network_port=network_port,
9160
                            beparams=self.op.beparams,
9161
                            hvparams=self.op.hvparams,
9162
                            hypervisor=self.op.hypervisor,
9163
                            osparams=self.op.osparams,
9164
                            )
9165

    
9166
    if self.op.tags:
9167
      for tag in self.op.tags:
9168
        iobj.AddTag(tag)
9169

    
9170
    if self.adopt_disks:
9171
      if self.op.disk_template == constants.DT_PLAIN:
9172
        # rename LVs to the newly-generated names; we need to construct
9173
        # 'fake' LV disks with the old data, plus the new unique_id
9174
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9175
        rename_to = []
9176
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9177
          rename_to.append(t_dsk.logical_id)
9178
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9179
          self.cfg.SetDiskID(t_dsk, pnode_name)
9180
        result = self.rpc.call_blockdev_rename(pnode_name,
9181
                                               zip(tmp_disks, rename_to))
9182
        result.Raise("Failed to rename adoped LVs")
9183
    else:
9184
      feedback_fn("* creating instance disks...")
9185
      try:
9186
        _CreateDisks(self, iobj)
9187
      except errors.OpExecError:
9188
        self.LogWarning("Device creation failed, reverting...")
9189
        try:
9190
          _RemoveDisks(self, iobj)
9191
        finally:
9192
          self.cfg.ReleaseDRBDMinors(instance)
9193
          raise
9194

    
9195
    feedback_fn("adding instance %s to cluster config" % instance)
9196

    
9197
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9198

    
9199
    # Declare that we don't want to remove the instance lock anymore, as we've
9200
    # added the instance to the config
9201
    del self.remove_locks[locking.LEVEL_INSTANCE]
9202

    
9203
    if self.op.mode == constants.INSTANCE_IMPORT:
9204
      # Release unused nodes
9205
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9206
    else:
9207
      # Release all nodes
9208
      _ReleaseLocks(self, locking.LEVEL_NODE)
9209

    
9210
    disk_abort = False
9211
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9212
      feedback_fn("* wiping instance disks...")
9213
      try:
9214
        _WipeDisks(self, iobj)
9215
      except errors.OpExecError, err:
9216
        logging.exception("Wiping disks failed")
9217
        self.LogWarning("Wiping instance disks failed (%s)", err)
9218
        disk_abort = True
9219

    
9220
    if disk_abort:
9221
      # Something is already wrong with the disks, don't do anything else
9222
      pass
9223
    elif self.op.wait_for_sync:
9224
      disk_abort = not _WaitForSync(self, iobj)
9225
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9226
      # make sure the disks are not degraded (still sync-ing is ok)
9227
      feedback_fn("* checking mirrors status")
9228
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9229
    else:
9230
      disk_abort = False
9231

    
9232
    if disk_abort:
9233
      _RemoveDisks(self, iobj)
9234
      self.cfg.RemoveInstance(iobj.name)
9235
      # Make sure the instance lock gets removed
9236
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9237
      raise errors.OpExecError("There are some degraded disks for"
9238
                               " this instance")
9239

    
9240
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9241
      if self.op.mode == constants.INSTANCE_CREATE:
9242
        if not self.op.no_install:
9243
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9244
                        not self.op.wait_for_sync)
9245
          if pause_sync:
9246
            feedback_fn("* pausing disk sync to install instance OS")
9247
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9248
                                                              iobj.disks, True)
9249
            for idx, success in enumerate(result.payload):
9250
              if not success:
9251
                logging.warn("pause-sync of instance %s for disk %d failed",
9252
                             instance, idx)
9253

    
9254
          feedback_fn("* running the instance OS create scripts...")
9255
          # FIXME: pass debug option from opcode to backend
9256
          os_add_result = \
9257
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9258
                                          self.op.debug_level)
9259
          if pause_sync:
9260
            feedback_fn("* resuming disk sync")
9261
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9262
                                                              iobj.disks, False)
9263
            for idx, success in enumerate(result.payload):
9264
              if not success:
9265
                logging.warn("resume-sync of instance %s for disk %d failed",
9266
                             instance, idx)
9267

    
9268
          os_add_result.Raise("Could not add os for instance %s"
9269
                              " on node %s" % (instance, pnode_name))
9270

    
9271
      elif self.op.mode == constants.INSTANCE_IMPORT:
9272
        feedback_fn("* running the instance OS import scripts...")
9273

    
9274
        transfers = []
9275

    
9276
        for idx, image in enumerate(self.src_images):
9277
          if not image:
9278
            continue
9279

    
9280
          # FIXME: pass debug option from opcode to backend
9281
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9282
                                             constants.IEIO_FILE, (image, ),
9283
                                             constants.IEIO_SCRIPT,
9284
                                             (iobj.disks[idx], idx),
9285
                                             None)
9286
          transfers.append(dt)
9287

    
9288
        import_result = \
9289
          masterd.instance.TransferInstanceData(self, feedback_fn,
9290
                                                self.op.src_node, pnode_name,
9291
                                                self.pnode.secondary_ip,
9292
                                                iobj, transfers)
9293
        if not compat.all(import_result):
9294
          self.LogWarning("Some disks for instance %s on node %s were not"
9295
                          " imported successfully" % (instance, pnode_name))
9296

    
9297
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9298
        feedback_fn("* preparing remote import...")
9299
        # The source cluster will stop the instance before attempting to make a
9300
        # connection. In some cases stopping an instance can take a long time,
9301
        # hence the shutdown timeout is added to the connection timeout.
9302
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9303
                           self.op.source_shutdown_timeout)
9304
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9305

    
9306
        assert iobj.primary_node == self.pnode.name
9307
        disk_results = \
9308
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9309
                                        self.source_x509_ca,
9310
                                        self._cds, timeouts)
9311
        if not compat.all(disk_results):
9312
          # TODO: Should the instance still be started, even if some disks
9313
          # failed to import (valid for local imports, too)?
9314
          self.LogWarning("Some disks for instance %s on node %s were not"
9315
                          " imported successfully" % (instance, pnode_name))
9316

    
9317
        # Run rename script on newly imported instance
9318
        assert iobj.name == instance
9319
        feedback_fn("Running rename script for %s" % instance)
9320
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9321
                                                   self.source_instance_name,
9322
                                                   self.op.debug_level)
9323
        if result.fail_msg:
9324
          self.LogWarning("Failed to run rename script for %s on node"
9325
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9326

    
9327
      else:
9328
        # also checked in the prereq part
9329
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9330
                                     % self.op.mode)
9331

    
9332
    if self.op.start:
9333
      iobj.admin_up = True
9334
      self.cfg.Update(iobj, feedback_fn)
9335
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9336
      feedback_fn("* starting instance...")
9337
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9338
                                            False)
9339
      result.Raise("Could not start instance")
9340

    
9341
    return list(iobj.all_nodes)
9342

    
9343

    
9344
class LUInstanceConsole(NoHooksLU):
9345
  """Connect to an instance's console.
9346

9347
  This is somewhat special in that it returns the command line that
9348
  you need to run on the master node in order to connect to the
9349
  console.
9350

9351
  """
9352
  REQ_BGL = False
9353

    
9354
  def ExpandNames(self):
9355
    self._ExpandAndLockInstance()
9356

    
9357
  def CheckPrereq(self):
9358
    """Check prerequisites.
9359

9360
    This checks that the instance is in the cluster.
9361

9362
    """
9363
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9364
    assert self.instance is not None, \
9365
      "Cannot retrieve locked instance %s" % self.op.instance_name
9366
    _CheckNodeOnline(self, self.instance.primary_node)
9367

    
9368
  def Exec(self, feedback_fn):
9369
    """Connect to the console of an instance
9370

9371
    """
9372
    instance = self.instance
9373
    node = instance.primary_node
9374

    
9375
    node_insts = self.rpc.call_instance_list([node],
9376
                                             [instance.hypervisor])[node]
9377
    node_insts.Raise("Can't get node information from %s" % node)
9378

    
9379
    if instance.name not in node_insts.payload:
9380
      if instance.admin_up:
9381
        state = constants.INSTST_ERRORDOWN
9382
      else:
9383
        state = constants.INSTST_ADMINDOWN
9384
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9385
                               (instance.name, state))
9386

    
9387
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9388

    
9389
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9390

    
9391

    
9392
def _GetInstanceConsole(cluster, instance):
9393
  """Returns console information for an instance.
9394

9395
  @type cluster: L{objects.Cluster}
9396
  @type instance: L{objects.Instance}
9397
  @rtype: dict
9398

9399
  """
9400
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9401
  # beparams and hvparams are passed separately, to avoid editing the
9402
  # instance and then saving the defaults in the instance itself.
9403
  hvparams = cluster.FillHV(instance)
9404
  beparams = cluster.FillBE(instance)
9405
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9406

    
9407
  assert console.instance == instance.name
9408
  assert console.Validate()
9409

    
9410
  return console.ToDict()
9411

    
9412

    
9413
class LUInstanceReplaceDisks(LogicalUnit):
9414
  """Replace the disks of an instance.
9415

9416
  """
9417
  HPATH = "mirrors-replace"
9418
  HTYPE = constants.HTYPE_INSTANCE
9419
  REQ_BGL = False
9420

    
9421
  def CheckArguments(self):
9422
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9423
                                  self.op.iallocator)
9424

    
9425
  def ExpandNames(self):
9426
    self._ExpandAndLockInstance()
9427

    
9428
    assert locking.LEVEL_NODE not in self.needed_locks
9429
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9430

    
9431
    assert self.op.iallocator is None or self.op.remote_node is None, \
9432
      "Conflicting options"
9433

    
9434
    if self.op.remote_node is not None:
9435
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9436

    
9437
      # Warning: do not remove the locking of the new secondary here
9438
      # unless DRBD8.AddChildren is changed to work in parallel;
9439
      # currently it doesn't since parallel invocations of
9440
      # FindUnusedMinor will conflict
9441
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9442
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9443
    else:
9444
      self.needed_locks[locking.LEVEL_NODE] = []
9445
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9446

    
9447
      if self.op.iallocator is not None:
9448
        # iallocator will select a new node in the same group
9449
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9450

    
9451
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9452
                                   self.op.iallocator, self.op.remote_node,
9453
                                   self.op.disks, False, self.op.early_release)
9454

    
9455
    self.tasklets = [self.replacer]
9456

    
9457
  def DeclareLocks(self, level):
9458
    if level == locking.LEVEL_NODEGROUP:
9459
      assert self.op.remote_node is None
9460
      assert self.op.iallocator is not None
9461
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9462

    
9463
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9464
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9465
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9466

    
9467
    elif level == locking.LEVEL_NODE:
9468
      if self.op.iallocator is not None:
9469
        assert self.op.remote_node is None
9470
        assert not self.needed_locks[locking.LEVEL_NODE]
9471

    
9472
        # Lock member nodes of all locked groups
9473
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9474
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9475
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9476
      else:
9477
        self._LockInstancesNodes()
9478

    
9479
  def BuildHooksEnv(self):
9480
    """Build hooks env.
9481

9482
    This runs on the master, the primary and all the secondaries.
9483

9484
    """
9485
    instance = self.replacer.instance
9486
    env = {
9487
      "MODE": self.op.mode,
9488
      "NEW_SECONDARY": self.op.remote_node,
9489
      "OLD_SECONDARY": instance.secondary_nodes[0],
9490
      }
9491
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9492
    return env
9493

    
9494
  def BuildHooksNodes(self):
9495
    """Build hooks nodes.
9496

9497
    """
9498
    instance = self.replacer.instance
9499
    nl = [
9500
      self.cfg.GetMasterNode(),
9501
      instance.primary_node,
9502
      ]
9503
    if self.op.remote_node is not None:
9504
      nl.append(self.op.remote_node)
9505
    return nl, nl
9506

    
9507
  def CheckPrereq(self):
9508
    """Check prerequisites.
9509

9510
    """
9511
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9512
            self.op.iallocator is None)
9513

    
9514
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9515
    if owned_groups:
9516
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9517

    
9518
    return LogicalUnit.CheckPrereq(self)
9519

    
9520

    
9521
class TLReplaceDisks(Tasklet):
9522
  """Replaces disks for an instance.
9523

9524
  Note: Locking is not within the scope of this class.
9525

9526
  """
9527
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9528
               disks, delay_iallocator, early_release):
9529
    """Initializes this class.
9530

9531
    """
9532
    Tasklet.__init__(self, lu)
9533

    
9534
    # Parameters
9535
    self.instance_name = instance_name
9536
    self.mode = mode
9537
    self.iallocator_name = iallocator_name
9538
    self.remote_node = remote_node
9539
    self.disks = disks
9540
    self.delay_iallocator = delay_iallocator
9541
    self.early_release = early_release
9542

    
9543
    # Runtime data
9544
    self.instance = None
9545
    self.new_node = None
9546
    self.target_node = None
9547
    self.other_node = None
9548
    self.remote_node_info = None
9549
    self.node_secondary_ip = None
9550

    
9551
  @staticmethod
9552
  def CheckArguments(mode, remote_node, iallocator):
9553
    """Helper function for users of this class.
9554

9555
    """
9556
    # check for valid parameter combination
9557
    if mode == constants.REPLACE_DISK_CHG:
9558
      if remote_node is None and iallocator is None:
9559
        raise errors.OpPrereqError("When changing the secondary either an"
9560
                                   " iallocator script must be used or the"
9561
                                   " new node given", errors.ECODE_INVAL)
9562

    
9563
      if remote_node is not None and iallocator is not None:
9564
        raise errors.OpPrereqError("Give either the iallocator or the new"
9565
                                   " secondary, not both", errors.ECODE_INVAL)
9566

    
9567
    elif remote_node is not None or iallocator is not None:
9568
      # Not replacing the secondary
9569
      raise errors.OpPrereqError("The iallocator and new node options can"
9570
                                 " only be used when changing the"
9571
                                 " secondary node", errors.ECODE_INVAL)
9572

    
9573
  @staticmethod
9574
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9575
    """Compute a new secondary node using an IAllocator.
9576

9577
    """
9578
    ial = IAllocator(lu.cfg, lu.rpc,
9579
                     mode=constants.IALLOCATOR_MODE_RELOC,
9580
                     name=instance_name,
9581
                     relocate_from=list(relocate_from))
9582

    
9583
    ial.Run(iallocator_name)
9584

    
9585
    if not ial.success:
9586
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9587
                                 " %s" % (iallocator_name, ial.info),
9588
                                 errors.ECODE_NORES)
9589

    
9590
    if len(ial.result) != ial.required_nodes:
9591
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9592
                                 " of nodes (%s), required %s" %
9593
                                 (iallocator_name,
9594
                                  len(ial.result), ial.required_nodes),
9595
                                 errors.ECODE_FAULT)
9596

    
9597
    remote_node_name = ial.result[0]
9598

    
9599
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9600
               instance_name, remote_node_name)
9601

    
9602
    return remote_node_name
9603

    
9604
  def _FindFaultyDisks(self, node_name):
9605
    """Wrapper for L{_FindFaultyInstanceDisks}.
9606

9607
    """
9608
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9609
                                    node_name, True)
9610

    
9611
  def _CheckDisksActivated(self, instance):
9612
    """Checks if the instance disks are activated.
9613

9614
    @param instance: The instance to check disks
9615
    @return: True if they are activated, False otherwise
9616

9617
    """
9618
    nodes = instance.all_nodes
9619

    
9620
    for idx, dev in enumerate(instance.disks):
9621
      for node in nodes:
9622
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9623
        self.cfg.SetDiskID(dev, node)
9624

    
9625
        result = self.rpc.call_blockdev_find(node, dev)
9626

    
9627
        if result.offline:
9628
          continue
9629
        elif result.fail_msg or not result.payload:
9630
          return False
9631

    
9632
    return True
9633

    
9634
  def CheckPrereq(self):
9635
    """Check prerequisites.
9636

9637
    This checks that the instance is in the cluster.
9638

9639
    """
9640
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9641
    assert instance is not None, \
9642
      "Cannot retrieve locked instance %s" % self.instance_name
9643

    
9644
    if instance.disk_template != constants.DT_DRBD8:
9645
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9646
                                 " instances", errors.ECODE_INVAL)
9647

    
9648
    if len(instance.secondary_nodes) != 1:
9649
      raise errors.OpPrereqError("The instance has a strange layout,"
9650
                                 " expected one secondary but found %d" %
9651
                                 len(instance.secondary_nodes),
9652
                                 errors.ECODE_FAULT)
9653

    
9654
    if not self.delay_iallocator:
9655
      self._CheckPrereq2()
9656

    
9657
  def _CheckPrereq2(self):
9658
    """Check prerequisites, second part.
9659

9660
    This function should always be part of CheckPrereq. It was separated and is
9661
    now called from Exec because during node evacuation iallocator was only
9662
    called with an unmodified cluster model, not taking planned changes into
9663
    account.
9664

9665
    """
9666
    instance = self.instance
9667
    secondary_node = instance.secondary_nodes[0]
9668

    
9669
    if self.iallocator_name is None:
9670
      remote_node = self.remote_node
9671
    else:
9672
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9673
                                       instance.name, instance.secondary_nodes)
9674

    
9675
    if remote_node is None:
9676
      self.remote_node_info = None
9677
    else:
9678
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9679
             "Remote node '%s' is not locked" % remote_node
9680

    
9681
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9682
      assert self.remote_node_info is not None, \
9683
        "Cannot retrieve locked node %s" % remote_node
9684

    
9685
    if remote_node == self.instance.primary_node:
9686
      raise errors.OpPrereqError("The specified node is the primary node of"
9687
                                 " the instance", errors.ECODE_INVAL)
9688

    
9689
    if remote_node == secondary_node:
9690
      raise errors.OpPrereqError("The specified node is already the"
9691
                                 " secondary node of the instance",
9692
                                 errors.ECODE_INVAL)
9693

    
9694
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9695
                                    constants.REPLACE_DISK_CHG):
9696
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9697
                                 errors.ECODE_INVAL)
9698

    
9699
    if self.mode == constants.REPLACE_DISK_AUTO:
9700
      if not self._CheckDisksActivated(instance):
9701
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9702
                                   " first" % self.instance_name,
9703
                                   errors.ECODE_STATE)
9704
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9705
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9706

    
9707
      if faulty_primary and faulty_secondary:
9708
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9709
                                   " one node and can not be repaired"
9710
                                   " automatically" % self.instance_name,
9711
                                   errors.ECODE_STATE)
9712

    
9713
      if faulty_primary:
9714
        self.disks = faulty_primary
9715
        self.target_node = instance.primary_node
9716
        self.other_node = secondary_node
9717
        check_nodes = [self.target_node, self.other_node]
9718
      elif faulty_secondary:
9719
        self.disks = faulty_secondary
9720
        self.target_node = secondary_node
9721
        self.other_node = instance.primary_node
9722
        check_nodes = [self.target_node, self.other_node]
9723
      else:
9724
        self.disks = []
9725
        check_nodes = []
9726

    
9727
    else:
9728
      # Non-automatic modes
9729
      if self.mode == constants.REPLACE_DISK_PRI:
9730
        self.target_node = instance.primary_node
9731
        self.other_node = secondary_node
9732
        check_nodes = [self.target_node, self.other_node]
9733

    
9734
      elif self.mode == constants.REPLACE_DISK_SEC:
9735
        self.target_node = secondary_node
9736
        self.other_node = instance.primary_node
9737
        check_nodes = [self.target_node, self.other_node]
9738

    
9739
      elif self.mode == constants.REPLACE_DISK_CHG:
9740
        self.new_node = remote_node
9741
        self.other_node = instance.primary_node
9742
        self.target_node = secondary_node
9743
        check_nodes = [self.new_node, self.other_node]
9744

    
9745
        _CheckNodeNotDrained(self.lu, remote_node)
9746
        _CheckNodeVmCapable(self.lu, remote_node)
9747

    
9748
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9749
        assert old_node_info is not None
9750
        if old_node_info.offline and not self.early_release:
9751
          # doesn't make sense to delay the release
9752
          self.early_release = True
9753
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9754
                          " early-release mode", secondary_node)
9755

    
9756
      else:
9757
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9758
                                     self.mode)
9759

    
9760
      # If not specified all disks should be replaced
9761
      if not self.disks:
9762
        self.disks = range(len(self.instance.disks))
9763

    
9764
    for node in check_nodes:
9765
      _CheckNodeOnline(self.lu, node)
9766

    
9767
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9768
                                                          self.other_node,
9769
                                                          self.target_node]
9770
                              if node_name is not None)
9771

    
9772
    # Release unneeded node locks
9773
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9774

    
9775
    # Release any owned node group
9776
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9777
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9778

    
9779
    # Check whether disks are valid
9780
    for disk_idx in self.disks:
9781
      instance.FindDisk(disk_idx)
9782

    
9783
    # Get secondary node IP addresses
9784
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9785
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9786

    
9787
  def Exec(self, feedback_fn):
9788
    """Execute disk replacement.
9789

9790
    This dispatches the disk replacement to the appropriate handler.
9791

9792
    """
9793
    if self.delay_iallocator:
9794
      self._CheckPrereq2()
9795

    
9796
    if __debug__:
9797
      # Verify owned locks before starting operation
9798
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9799
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9800
          ("Incorrect node locks, owning %s, expected %s" %
9801
           (owned_nodes, self.node_secondary_ip.keys()))
9802

    
9803
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9804
      assert list(owned_instances) == [self.instance_name], \
9805
          "Instance '%s' not locked" % self.instance_name
9806

    
9807
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9808
          "Should not own any node group lock at this point"
9809

    
9810
    if not self.disks:
9811
      feedback_fn("No disks need replacement")
9812
      return
9813

    
9814
    feedback_fn("Replacing disk(s) %s for %s" %
9815
                (utils.CommaJoin(self.disks), self.instance.name))
9816

    
9817
    activate_disks = (not self.instance.admin_up)
9818

    
9819
    # Activate the instance disks if we're replacing them on a down instance
9820
    if activate_disks:
9821
      _StartInstanceDisks(self.lu, self.instance, True)
9822

    
9823
    try:
9824
      # Should we replace the secondary node?
9825
      if self.new_node is not None:
9826
        fn = self._ExecDrbd8Secondary
9827
      else:
9828
        fn = self._ExecDrbd8DiskOnly
9829

    
9830
      result = fn(feedback_fn)
9831
    finally:
9832
      # Deactivate the instance disks if we're replacing them on a
9833
      # down instance
9834
      if activate_disks:
9835
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9836

    
9837
    if __debug__:
9838
      # Verify owned locks
9839
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9840
      nodes = frozenset(self.node_secondary_ip)
9841
      assert ((self.early_release and not owned_nodes) or
9842
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9843
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9844
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9845

    
9846
    return result
9847

    
9848
  def _CheckVolumeGroup(self, nodes):
9849
    self.lu.LogInfo("Checking volume groups")
9850

    
9851
    vgname = self.cfg.GetVGName()
9852

    
9853
    # Make sure volume group exists on all involved nodes
9854
    results = self.rpc.call_vg_list(nodes)
9855
    if not results:
9856
      raise errors.OpExecError("Can't list volume groups on the nodes")
9857

    
9858
    for node in nodes:
9859
      res = results[node]
9860
      res.Raise("Error checking node %s" % node)
9861
      if vgname not in res.payload:
9862
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9863
                                 (vgname, node))
9864

    
9865
  def _CheckDisksExistence(self, nodes):
9866
    # Check disk existence
9867
    for idx, dev in enumerate(self.instance.disks):
9868
      if idx not in self.disks:
9869
        continue
9870

    
9871
      for node in nodes:
9872
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9873
        self.cfg.SetDiskID(dev, node)
9874

    
9875
        result = self.rpc.call_blockdev_find(node, dev)
9876

    
9877
        msg = result.fail_msg
9878
        if msg or not result.payload:
9879
          if not msg:
9880
            msg = "disk not found"
9881
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9882
                                   (idx, node, msg))
9883

    
9884
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9885
    for idx, dev in enumerate(self.instance.disks):
9886
      if idx not in self.disks:
9887
        continue
9888

    
9889
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9890
                      (idx, node_name))
9891

    
9892
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9893
                                   ldisk=ldisk):
9894
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9895
                                 " replace disks for instance %s" %
9896
                                 (node_name, self.instance.name))
9897

    
9898
  def _CreateNewStorage(self, node_name):
9899
    """Create new storage on the primary or secondary node.
9900

9901
    This is only used for same-node replaces, not for changing the
9902
    secondary node, hence we don't want to modify the existing disk.
9903

9904
    """
9905
    iv_names = {}
9906

    
9907
    for idx, dev in enumerate(self.instance.disks):
9908
      if idx not in self.disks:
9909
        continue
9910

    
9911
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9912

    
9913
      self.cfg.SetDiskID(dev, node_name)
9914

    
9915
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9916
      names = _GenerateUniqueNames(self.lu, lv_names)
9917

    
9918
      vg_data = dev.children[0].logical_id[0]
9919
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9920
                             logical_id=(vg_data, names[0]))
9921
      vg_meta = dev.children[1].logical_id[0]
9922
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9923
                             logical_id=(vg_meta, names[1]))
9924

    
9925
      new_lvs = [lv_data, lv_meta]
9926
      old_lvs = [child.Copy() for child in dev.children]
9927
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9928

    
9929
      # we pass force_create=True to force the LVM creation
9930
      for new_lv in new_lvs:
9931
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9932
                        _GetInstanceInfoText(self.instance), False)
9933

    
9934
    return iv_names
9935

    
9936
  def _CheckDevices(self, node_name, iv_names):
9937
    for name, (dev, _, _) in iv_names.iteritems():
9938
      self.cfg.SetDiskID(dev, node_name)
9939

    
9940
      result = self.rpc.call_blockdev_find(node_name, dev)
9941

    
9942
      msg = result.fail_msg
9943
      if msg or not result.payload:
9944
        if not msg:
9945
          msg = "disk not found"
9946
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9947
                                 (name, msg))
9948

    
9949
      if result.payload.is_degraded:
9950
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9951

    
9952
  def _RemoveOldStorage(self, node_name, iv_names):
9953
    for name, (_, old_lvs, _) in iv_names.iteritems():
9954
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9955

    
9956
      for lv in old_lvs:
9957
        self.cfg.SetDiskID(lv, node_name)
9958

    
9959
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9960
        if msg:
9961
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9962
                             hint="remove unused LVs manually")
9963

    
9964
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9965
    """Replace a disk on the primary or secondary for DRBD 8.
9966

9967
    The algorithm for replace is quite complicated:
9968

9969
      1. for each disk to be replaced:
9970

9971
        1. create new LVs on the target node with unique names
9972
        1. detach old LVs from the drbd device
9973
        1. rename old LVs to name_replaced.<time_t>
9974
        1. rename new LVs to old LVs
9975
        1. attach the new LVs (with the old names now) to the drbd device
9976

9977
      1. wait for sync across all devices
9978

9979
      1. for each modified disk:
9980

9981
        1. remove old LVs (which have the name name_replaces.<time_t>)
9982

9983
    Failures are not very well handled.
9984

9985
    """
9986
    steps_total = 6
9987

    
9988
    # Step: check device activation
9989
    self.lu.LogStep(1, steps_total, "Check device existence")
9990
    self._CheckDisksExistence([self.other_node, self.target_node])
9991
    self._CheckVolumeGroup([self.target_node, self.other_node])
9992

    
9993
    # Step: check other node consistency
9994
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9995
    self._CheckDisksConsistency(self.other_node,
9996
                                self.other_node == self.instance.primary_node,
9997
                                False)
9998

    
9999
    # Step: create new storage
10000
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10001
    iv_names = self._CreateNewStorage(self.target_node)
10002

    
10003
    # Step: for each lv, detach+rename*2+attach
10004
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10005
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10006
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10007

    
10008
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10009
                                                     old_lvs)
10010
      result.Raise("Can't detach drbd from local storage on node"
10011
                   " %s for device %s" % (self.target_node, dev.iv_name))
10012
      #dev.children = []
10013
      #cfg.Update(instance)
10014

    
10015
      # ok, we created the new LVs, so now we know we have the needed
10016
      # storage; as such, we proceed on the target node to rename
10017
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10018
      # using the assumption that logical_id == physical_id (which in
10019
      # turn is the unique_id on that node)
10020

    
10021
      # FIXME(iustin): use a better name for the replaced LVs
10022
      temp_suffix = int(time.time())
10023
      ren_fn = lambda d, suff: (d.physical_id[0],
10024
                                d.physical_id[1] + "_replaced-%s" % suff)
10025

    
10026
      # Build the rename list based on what LVs exist on the node
10027
      rename_old_to_new = []
10028
      for to_ren in old_lvs:
10029
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10030
        if not result.fail_msg and result.payload:
10031
          # device exists
10032
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10033

    
10034
      self.lu.LogInfo("Renaming the old LVs on the target node")
10035
      result = self.rpc.call_blockdev_rename(self.target_node,
10036
                                             rename_old_to_new)
10037
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10038

    
10039
      # Now we rename the new LVs to the old LVs
10040
      self.lu.LogInfo("Renaming the new LVs on the target node")
10041
      rename_new_to_old = [(new, old.physical_id)
10042
                           for old, new in zip(old_lvs, new_lvs)]
10043
      result = self.rpc.call_blockdev_rename(self.target_node,
10044
                                             rename_new_to_old)
10045
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10046

    
10047
      # Intermediate steps of in memory modifications
10048
      for old, new in zip(old_lvs, new_lvs):
10049
        new.logical_id = old.logical_id
10050
        self.cfg.SetDiskID(new, self.target_node)
10051

    
10052
      # We need to modify old_lvs so that removal later removes the
10053
      # right LVs, not the newly added ones; note that old_lvs is a
10054
      # copy here
10055
      for disk in old_lvs:
10056
        disk.logical_id = ren_fn(disk, temp_suffix)
10057
        self.cfg.SetDiskID(disk, self.target_node)
10058

    
10059
      # Now that the new lvs have the old name, we can add them to the device
10060
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10061
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10062
                                                  new_lvs)
10063
      msg = result.fail_msg
10064
      if msg:
10065
        for new_lv in new_lvs:
10066
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10067
                                               new_lv).fail_msg
10068
          if msg2:
10069
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10070
                               hint=("cleanup manually the unused logical"
10071
                                     "volumes"))
10072
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10073

    
10074
    cstep = 5
10075
    if self.early_release:
10076
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10077
      cstep += 1
10078
      self._RemoveOldStorage(self.target_node, iv_names)
10079
      # WARNING: we release both node locks here, do not do other RPCs
10080
      # than WaitForSync to the primary node
10081
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10082
                    names=[self.target_node, self.other_node])
10083

    
10084
    # Wait for sync
10085
    # This can fail as the old devices are degraded and _WaitForSync
10086
    # does a combined result over all disks, so we don't check its return value
10087
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10088
    cstep += 1
10089
    _WaitForSync(self.lu, self.instance)
10090

    
10091
    # Check all devices manually
10092
    self._CheckDevices(self.instance.primary_node, iv_names)
10093

    
10094
    # Step: remove old storage
10095
    if not self.early_release:
10096
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10097
      cstep += 1
10098
      self._RemoveOldStorage(self.target_node, iv_names)
10099

    
10100
  def _ExecDrbd8Secondary(self, feedback_fn):
10101
    """Replace the secondary node for DRBD 8.
10102

10103
    The algorithm for replace is quite complicated:
10104
      - for all disks of the instance:
10105
        - create new LVs on the new node with same names
10106
        - shutdown the drbd device on the old secondary
10107
        - disconnect the drbd network on the primary
10108
        - create the drbd device on the new secondary
10109
        - network attach the drbd on the primary, using an artifice:
10110
          the drbd code for Attach() will connect to the network if it
10111
          finds a device which is connected to the good local disks but
10112
          not network enabled
10113
      - wait for sync across all devices
10114
      - remove all disks from the old secondary
10115

10116
    Failures are not very well handled.
10117

10118
    """
10119
    steps_total = 6
10120

    
10121
    pnode = self.instance.primary_node
10122

    
10123
    # Step: check device activation
10124
    self.lu.LogStep(1, steps_total, "Check device existence")
10125
    self._CheckDisksExistence([self.instance.primary_node])
10126
    self._CheckVolumeGroup([self.instance.primary_node])
10127

    
10128
    # Step: check other node consistency
10129
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10130
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10131

    
10132
    # Step: create new storage
10133
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10134
    for idx, dev in enumerate(self.instance.disks):
10135
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10136
                      (self.new_node, idx))
10137
      # we pass force_create=True to force LVM creation
10138
      for new_lv in dev.children:
10139
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10140
                        _GetInstanceInfoText(self.instance), False)
10141

    
10142
    # Step 4: dbrd minors and drbd setups changes
10143
    # after this, we must manually remove the drbd minors on both the
10144
    # error and the success paths
10145
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10146
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10147
                                         for dev in self.instance.disks],
10148
                                        self.instance.name)
10149
    logging.debug("Allocated minors %r", minors)
10150

    
10151
    iv_names = {}
10152
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10153
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10154
                      (self.new_node, idx))
10155
      # create new devices on new_node; note that we create two IDs:
10156
      # one without port, so the drbd will be activated without
10157
      # networking information on the new node at this stage, and one
10158
      # with network, for the latter activation in step 4
10159
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10160
      if self.instance.primary_node == o_node1:
10161
        p_minor = o_minor1
10162
      else:
10163
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10164
        p_minor = o_minor2
10165

    
10166
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10167
                      p_minor, new_minor, o_secret)
10168
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10169
                    p_minor, new_minor, o_secret)
10170

    
10171
      iv_names[idx] = (dev, dev.children, new_net_id)
10172
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10173
                    new_net_id)
10174
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10175
                              logical_id=new_alone_id,
10176
                              children=dev.children,
10177
                              size=dev.size)
10178
      try:
10179
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10180
                              _GetInstanceInfoText(self.instance), False)
10181
      except errors.GenericError:
10182
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10183
        raise
10184

    
10185
    # We have new devices, shutdown the drbd on the old secondary
10186
    for idx, dev in enumerate(self.instance.disks):
10187
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10188
      self.cfg.SetDiskID(dev, self.target_node)
10189
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10190
      if msg:
10191
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10192
                           "node: %s" % (idx, msg),
10193
                           hint=("Please cleanup this device manually as"
10194
                                 " soon as possible"))
10195

    
10196
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10197
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10198
                                               self.instance.disks)[pnode]
10199

    
10200
    msg = result.fail_msg
10201
    if msg:
10202
      # detaches didn't succeed (unlikely)
10203
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10204
      raise errors.OpExecError("Can't detach the disks from the network on"
10205
                               " old node: %s" % (msg,))
10206

    
10207
    # if we managed to detach at least one, we update all the disks of
10208
    # the instance to point to the new secondary
10209
    self.lu.LogInfo("Updating instance configuration")
10210
    for dev, _, new_logical_id in iv_names.itervalues():
10211
      dev.logical_id = new_logical_id
10212
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10213

    
10214
    self.cfg.Update(self.instance, feedback_fn)
10215

    
10216
    # and now perform the drbd attach
10217
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10218
                    " (standalone => connected)")
10219
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10220
                                            self.new_node],
10221
                                           self.node_secondary_ip,
10222
                                           self.instance.disks,
10223
                                           self.instance.name,
10224
                                           False)
10225
    for to_node, to_result in result.items():
10226
      msg = to_result.fail_msg
10227
      if msg:
10228
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10229
                           to_node, msg,
10230
                           hint=("please do a gnt-instance info to see the"
10231
                                 " status of disks"))
10232
    cstep = 5
10233
    if self.early_release:
10234
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10235
      cstep += 1
10236
      self._RemoveOldStorage(self.target_node, iv_names)
10237
      # WARNING: we release all node locks here, do not do other RPCs
10238
      # than WaitForSync to the primary node
10239
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10240
                    names=[self.instance.primary_node,
10241
                           self.target_node,
10242
                           self.new_node])
10243

    
10244
    # Wait for sync
10245
    # This can fail as the old devices are degraded and _WaitForSync
10246
    # does a combined result over all disks, so we don't check its return value
10247
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10248
    cstep += 1
10249
    _WaitForSync(self.lu, self.instance)
10250

    
10251
    # Check all devices manually
10252
    self._CheckDevices(self.instance.primary_node, iv_names)
10253

    
10254
    # Step: remove old storage
10255
    if not self.early_release:
10256
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10257
      self._RemoveOldStorage(self.target_node, iv_names)
10258

    
10259

    
10260
class LURepairNodeStorage(NoHooksLU):
10261
  """Repairs the volume group on a node.
10262

10263
  """
10264
  REQ_BGL = False
10265

    
10266
  def CheckArguments(self):
10267
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10268

    
10269
    storage_type = self.op.storage_type
10270

    
10271
    if (constants.SO_FIX_CONSISTENCY not in
10272
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10273
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10274
                                 " repaired" % storage_type,
10275
                                 errors.ECODE_INVAL)
10276

    
10277
  def ExpandNames(self):
10278
    self.needed_locks = {
10279
      locking.LEVEL_NODE: [self.op.node_name],
10280
      }
10281

    
10282
  def _CheckFaultyDisks(self, instance, node_name):
10283
    """Ensure faulty disks abort the opcode or at least warn."""
10284
    try:
10285
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10286
                                  node_name, True):
10287
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10288
                                   " node '%s'" % (instance.name, node_name),
10289
                                   errors.ECODE_STATE)
10290
    except errors.OpPrereqError, err:
10291
      if self.op.ignore_consistency:
10292
        self.proc.LogWarning(str(err.args[0]))
10293
      else:
10294
        raise
10295

    
10296
  def CheckPrereq(self):
10297
    """Check prerequisites.
10298

10299
    """
10300
    # Check whether any instance on this node has faulty disks
10301
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10302
      if not inst.admin_up:
10303
        continue
10304
      check_nodes = set(inst.all_nodes)
10305
      check_nodes.discard(self.op.node_name)
10306
      for inst_node_name in check_nodes:
10307
        self._CheckFaultyDisks(inst, inst_node_name)
10308

    
10309
  def Exec(self, feedback_fn):
10310
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10311
                (self.op.name, self.op.node_name))
10312

    
10313
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10314
    result = self.rpc.call_storage_execute(self.op.node_name,
10315
                                           self.op.storage_type, st_args,
10316
                                           self.op.name,
10317
                                           constants.SO_FIX_CONSISTENCY)
10318
    result.Raise("Failed to repair storage unit '%s' on %s" %
10319
                 (self.op.name, self.op.node_name))
10320

    
10321

    
10322
class LUNodeEvacuate(NoHooksLU):
10323
  """Evacuates instances off a list of nodes.
10324

10325
  """
10326
  REQ_BGL = False
10327

    
10328
  def CheckArguments(self):
10329
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10330

    
10331
  def ExpandNames(self):
10332
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10333

    
10334
    if self.op.remote_node is not None:
10335
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10336
      assert self.op.remote_node
10337

    
10338
      if self.op.remote_node == self.op.node_name:
10339
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10340
                                   " secondary node", errors.ECODE_INVAL)
10341

    
10342
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10343
        raise errors.OpPrereqError("Without the use of an iallocator only"
10344
                                   " secondary instances can be evacuated",
10345
                                   errors.ECODE_INVAL)
10346

    
10347
    # Declare locks
10348
    self.share_locks = _ShareAll()
10349
    self.needed_locks = {
10350
      locking.LEVEL_INSTANCE: [],
10351
      locking.LEVEL_NODEGROUP: [],
10352
      locking.LEVEL_NODE: [],
10353
      }
10354

    
10355
    if self.op.remote_node is None:
10356
      # Iallocator will choose any node(s) in the same group
10357
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10358
    else:
10359
      group_nodes = frozenset([self.op.remote_node])
10360

    
10361
    # Determine nodes to be locked
10362
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10363

    
10364
  def _DetermineInstances(self):
10365
    """Builds list of instances to operate on.
10366

10367
    """
10368
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10369

    
10370
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10371
      # Primary instances only
10372
      inst_fn = _GetNodePrimaryInstances
10373
      assert self.op.remote_node is None, \
10374
        "Evacuating primary instances requires iallocator"
10375
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10376
      # Secondary instances only
10377
      inst_fn = _GetNodeSecondaryInstances
10378
    else:
10379
      # All instances
10380
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10381
      inst_fn = _GetNodeInstances
10382

    
10383
    return inst_fn(self.cfg, self.op.node_name)
10384

    
10385
  def DeclareLocks(self, level):
10386
    if level == locking.LEVEL_INSTANCE:
10387
      # Lock instances optimistically, needs verification once node and group
10388
      # locks have been acquired
10389
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10390
        set(i.name for i in self._DetermineInstances())
10391

    
10392
    elif level == locking.LEVEL_NODEGROUP:
10393
      # Lock node groups optimistically, needs verification once nodes have
10394
      # been acquired
10395
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10396
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10397

    
10398
    elif level == locking.LEVEL_NODE:
10399
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10400

    
10401
  def CheckPrereq(self):
10402
    # Verify locks
10403
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10404
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10405
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10406

    
10407
    assert owned_nodes == self.lock_nodes
10408

    
10409
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10410
    if owned_groups != wanted_groups:
10411
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10412
                               " current groups are '%s', used to be '%s'" %
10413
                               (utils.CommaJoin(wanted_groups),
10414
                                utils.CommaJoin(owned_groups)))
10415

    
10416
    # Determine affected instances
10417
    self.instances = self._DetermineInstances()
10418
    self.instance_names = [i.name for i in self.instances]
10419

    
10420
    if set(self.instance_names) != owned_instances:
10421
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10422
                               " were acquired, current instances are '%s',"
10423
                               " used to be '%s'" %
10424
                               (self.op.node_name,
10425
                                utils.CommaJoin(self.instance_names),
10426
                                utils.CommaJoin(owned_instances)))
10427

    
10428
    if self.instance_names:
10429
      self.LogInfo("Evacuating instances from node '%s': %s",
10430
                   self.op.node_name,
10431
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10432
    else:
10433
      self.LogInfo("No instances to evacuate from node '%s'",
10434
                   self.op.node_name)
10435

    
10436
    if self.op.remote_node is not None:
10437
      for i in self.instances:
10438
        if i.primary_node == self.op.remote_node:
10439
          raise errors.OpPrereqError("Node %s is the primary node of"
10440
                                     " instance %s, cannot use it as"
10441
                                     " secondary" %
10442
                                     (self.op.remote_node, i.name),
10443
                                     errors.ECODE_INVAL)
10444

    
10445
  def Exec(self, feedback_fn):
10446
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10447

    
10448
    if not self.instance_names:
10449
      # No instances to evacuate
10450
      jobs = []
10451

    
10452
    elif self.op.iallocator is not None:
10453
      # TODO: Implement relocation to other group
10454
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10455
                       evac_mode=self.op.mode,
10456
                       instances=list(self.instance_names))
10457

    
10458
      ial.Run(self.op.iallocator)
10459

    
10460
      if not ial.success:
10461
        raise errors.OpPrereqError("Can't compute node evacuation using"
10462
                                   " iallocator '%s': %s" %
10463
                                   (self.op.iallocator, ial.info),
10464
                                   errors.ECODE_NORES)
10465

    
10466
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10467

    
10468
    elif self.op.remote_node is not None:
10469
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10470
      jobs = [
10471
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10472
                                        remote_node=self.op.remote_node,
10473
                                        disks=[],
10474
                                        mode=constants.REPLACE_DISK_CHG,
10475
                                        early_release=self.op.early_release)]
10476
        for instance_name in self.instance_names
10477
        ]
10478

    
10479
    else:
10480
      raise errors.ProgrammerError("No iallocator or remote node")
10481

    
10482
    return ResultWithJobs(jobs)
10483

    
10484

    
10485
def _SetOpEarlyRelease(early_release, op):
10486
  """Sets C{early_release} flag on opcodes if available.
10487

10488
  """
10489
  try:
10490
    op.early_release = early_release
10491
  except AttributeError:
10492
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10493

    
10494
  return op
10495

    
10496

    
10497
def _NodeEvacDest(use_nodes, group, nodes):
10498
  """Returns group or nodes depending on caller's choice.
10499

10500
  """
10501
  if use_nodes:
10502
    return utils.CommaJoin(nodes)
10503
  else:
10504
    return group
10505

    
10506

    
10507
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10508
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10509

10510
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10511
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10512

10513
  @type lu: L{LogicalUnit}
10514
  @param lu: Logical unit instance
10515
  @type alloc_result: tuple/list
10516
  @param alloc_result: Result from iallocator
10517
  @type early_release: bool
10518
  @param early_release: Whether to release locks early if possible
10519
  @type use_nodes: bool
10520
  @param use_nodes: Whether to display node names instead of groups
10521

10522
  """
10523
  (moved, failed, jobs) = alloc_result
10524

    
10525
  if failed:
10526
    lu.LogWarning("Unable to evacuate instances %s",
10527
                  utils.CommaJoin("%s (%s)" % (name, reason)
10528
                                  for (name, reason) in failed))
10529

    
10530
  if moved:
10531
    lu.LogInfo("Instances to be moved: %s",
10532
               utils.CommaJoin("%s (to %s)" %
10533
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10534
                               for (name, group, nodes) in moved))
10535

    
10536
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10537
              map(opcodes.OpCode.LoadOpCode, ops))
10538
          for ops in jobs]
10539

    
10540

    
10541
class LUInstanceGrowDisk(LogicalUnit):
10542
  """Grow a disk of an instance.
10543

10544
  """
10545
  HPATH = "disk-grow"
10546
  HTYPE = constants.HTYPE_INSTANCE
10547
  REQ_BGL = False
10548

    
10549
  def ExpandNames(self):
10550
    self._ExpandAndLockInstance()
10551
    self.needed_locks[locking.LEVEL_NODE] = []
10552
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10553

    
10554
  def DeclareLocks(self, level):
10555
    if level == locking.LEVEL_NODE:
10556
      self._LockInstancesNodes()
10557

    
10558
  def BuildHooksEnv(self):
10559
    """Build hooks env.
10560

10561
    This runs on the master, the primary and all the secondaries.
10562

10563
    """
10564
    env = {
10565
      "DISK": self.op.disk,
10566
      "AMOUNT": self.op.amount,
10567
      }
10568
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10569
    return env
10570

    
10571
  def BuildHooksNodes(self):
10572
    """Build hooks nodes.
10573

10574
    """
10575
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10576
    return (nl, nl)
10577

    
10578
  def CheckPrereq(self):
10579
    """Check prerequisites.
10580

10581
    This checks that the instance is in the cluster.
10582

10583
    """
10584
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10585
    assert instance is not None, \
10586
      "Cannot retrieve locked instance %s" % self.op.instance_name
10587
    nodenames = list(instance.all_nodes)
10588
    for node in nodenames:
10589
      _CheckNodeOnline(self, node)
10590

    
10591
    self.instance = instance
10592

    
10593
    if instance.disk_template not in constants.DTS_GROWABLE:
10594
      raise errors.OpPrereqError("Instance's disk layout does not support"
10595
                                 " growing", errors.ECODE_INVAL)
10596

    
10597
    self.disk = instance.FindDisk(self.op.disk)
10598

    
10599
    if instance.disk_template not in (constants.DT_FILE,
10600
                                      constants.DT_SHARED_FILE):
10601
      # TODO: check the free disk space for file, when that feature will be
10602
      # supported
10603
      _CheckNodesFreeDiskPerVG(self, nodenames,
10604
                               self.disk.ComputeGrowth(self.op.amount))
10605

    
10606
  def Exec(self, feedback_fn):
10607
    """Execute disk grow.
10608

10609
    """
10610
    instance = self.instance
10611
    disk = self.disk
10612

    
10613
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10614
    if not disks_ok:
10615
      raise errors.OpExecError("Cannot activate block device to grow")
10616

    
10617
    # First run all grow ops in dry-run mode
10618
    for node in instance.all_nodes:
10619
      self.cfg.SetDiskID(disk, node)
10620
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10621
      result.Raise("Grow request failed to node %s" % node)
10622

    
10623
    # We know that (as far as we can test) operations across different
10624
    # nodes will succeed, time to run it for real
10625
    for node in instance.all_nodes:
10626
      self.cfg.SetDiskID(disk, node)
10627
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10628
      result.Raise("Grow request failed to node %s" % node)
10629

    
10630
      # TODO: Rewrite code to work properly
10631
      # DRBD goes into sync mode for a short amount of time after executing the
10632
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10633
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10634
      # time is a work-around.
10635
      time.sleep(5)
10636

    
10637
    disk.RecordGrow(self.op.amount)
10638
    self.cfg.Update(instance, feedback_fn)
10639
    if self.op.wait_for_sync:
10640
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10641
      if disk_abort:
10642
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10643
                             " status; please check the instance")
10644
      if not instance.admin_up:
10645
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10646
    elif not instance.admin_up:
10647
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10648
                           " not supposed to be running because no wait for"
10649
                           " sync mode was requested")
10650

    
10651

    
10652
class LUInstanceQueryData(NoHooksLU):
10653
  """Query runtime instance data.
10654

10655
  """
10656
  REQ_BGL = False
10657

    
10658
  def ExpandNames(self):
10659
    self.needed_locks = {}
10660

    
10661
    # Use locking if requested or when non-static information is wanted
10662
    if not (self.op.static or self.op.use_locking):
10663
      self.LogWarning("Non-static data requested, locks need to be acquired")
10664
      self.op.use_locking = True
10665

    
10666
    if self.op.instances or not self.op.use_locking:
10667
      # Expand instance names right here
10668
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10669
    else:
10670
      # Will use acquired locks
10671
      self.wanted_names = None
10672

    
10673
    if self.op.use_locking:
10674
      self.share_locks = _ShareAll()
10675

    
10676
      if self.wanted_names is None:
10677
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10678
      else:
10679
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10680

    
10681
      self.needed_locks[locking.LEVEL_NODE] = []
10682
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10683

    
10684
  def DeclareLocks(self, level):
10685
    if self.op.use_locking and level == locking.LEVEL_NODE:
10686
      self._LockInstancesNodes()
10687

    
10688
  def CheckPrereq(self):
10689
    """Check prerequisites.
10690

10691
    This only checks the optional instance list against the existing names.
10692

10693
    """
10694
    if self.wanted_names is None:
10695
      assert self.op.use_locking, "Locking was not used"
10696
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10697

    
10698
    self.wanted_instances = \
10699
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10700

    
10701
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10702
    """Returns the status of a block device
10703

10704
    """
10705
    if self.op.static or not node:
10706
      return None
10707

    
10708
    self.cfg.SetDiskID(dev, node)
10709

    
10710
    result = self.rpc.call_blockdev_find(node, dev)
10711
    if result.offline:
10712
      return None
10713

    
10714
    result.Raise("Can't compute disk status for %s" % instance_name)
10715

    
10716
    status = result.payload
10717
    if status is None:
10718
      return None
10719

    
10720
    return (status.dev_path, status.major, status.minor,
10721
            status.sync_percent, status.estimated_time,
10722
            status.is_degraded, status.ldisk_status)
10723

    
10724
  def _ComputeDiskStatus(self, instance, snode, dev):
10725
    """Compute block device status.
10726

10727
    """
10728
    if dev.dev_type in constants.LDS_DRBD:
10729
      # we change the snode then (otherwise we use the one passed in)
10730
      if dev.logical_id[0] == instance.primary_node:
10731
        snode = dev.logical_id[1]
10732
      else:
10733
        snode = dev.logical_id[0]
10734

    
10735
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10736
                                              instance.name, dev)
10737
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10738

    
10739
    if dev.children:
10740
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10741
                                        instance, snode),
10742
                         dev.children)
10743
    else:
10744
      dev_children = []
10745

    
10746
    return {
10747
      "iv_name": dev.iv_name,
10748
      "dev_type": dev.dev_type,
10749
      "logical_id": dev.logical_id,
10750
      "physical_id": dev.physical_id,
10751
      "pstatus": dev_pstatus,
10752
      "sstatus": dev_sstatus,
10753
      "children": dev_children,
10754
      "mode": dev.mode,
10755
      "size": dev.size,
10756
      }
10757

    
10758
  def Exec(self, feedback_fn):
10759
    """Gather and return data"""
10760
    result = {}
10761

    
10762
    cluster = self.cfg.GetClusterInfo()
10763

    
10764
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10765
                                          for i in self.wanted_instances)
10766
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10767
      if self.op.static or pnode.offline:
10768
        remote_state = None
10769
        if pnode.offline:
10770
          self.LogWarning("Primary node %s is marked offline, returning static"
10771
                          " information only for instance %s" %
10772
                          (pnode.name, instance.name))
10773
      else:
10774
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10775
                                                  instance.name,
10776
                                                  instance.hypervisor)
10777
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10778
        remote_info = remote_info.payload
10779
        if remote_info and "state" in remote_info:
10780
          remote_state = "up"
10781
        else:
10782
          remote_state = "down"
10783

    
10784
      if instance.admin_up:
10785
        config_state = "up"
10786
      else:
10787
        config_state = "down"
10788

    
10789
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10790
                  instance.disks)
10791

    
10792
      result[instance.name] = {
10793
        "name": instance.name,
10794
        "config_state": config_state,
10795
        "run_state": remote_state,
10796
        "pnode": instance.primary_node,
10797
        "snodes": instance.secondary_nodes,
10798
        "os": instance.os,
10799
        # this happens to be the same format used for hooks
10800
        "nics": _NICListToTuple(self, instance.nics),
10801
        "disk_template": instance.disk_template,
10802
        "disks": disks,
10803
        "hypervisor": instance.hypervisor,
10804
        "network_port": instance.network_port,
10805
        "hv_instance": instance.hvparams,
10806
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10807
        "be_instance": instance.beparams,
10808
        "be_actual": cluster.FillBE(instance),
10809
        "os_instance": instance.osparams,
10810
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10811
        "serial_no": instance.serial_no,
10812
        "mtime": instance.mtime,
10813
        "ctime": instance.ctime,
10814
        "uuid": instance.uuid,
10815
        }
10816

    
10817
    return result
10818

    
10819

    
10820
class LUInstanceSetParams(LogicalUnit):
10821
  """Modifies an instances's parameters.
10822

10823
  """
10824
  HPATH = "instance-modify"
10825
  HTYPE = constants.HTYPE_INSTANCE
10826
  REQ_BGL = False
10827

    
10828
  def CheckArguments(self):
10829
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10830
            self.op.hvparams or self.op.beparams or self.op.os_name):
10831
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10832

    
10833
    if self.op.hvparams:
10834
      _CheckGlobalHvParams(self.op.hvparams)
10835

    
10836
    # Disk validation
10837
    disk_addremove = 0
10838
    for disk_op, disk_dict in self.op.disks:
10839
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10840
      if disk_op == constants.DDM_REMOVE:
10841
        disk_addremove += 1
10842
        continue
10843
      elif disk_op == constants.DDM_ADD:
10844
        disk_addremove += 1
10845
      else:
10846
        if not isinstance(disk_op, int):
10847
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10848
        if not isinstance(disk_dict, dict):
10849
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10850
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10851

    
10852
      if disk_op == constants.DDM_ADD:
10853
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10854
        if mode not in constants.DISK_ACCESS_SET:
10855
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10856
                                     errors.ECODE_INVAL)
10857
        size = disk_dict.get(constants.IDISK_SIZE, None)
10858
        if size is None:
10859
          raise errors.OpPrereqError("Required disk parameter size missing",
10860
                                     errors.ECODE_INVAL)
10861
        try:
10862
          size = int(size)
10863
        except (TypeError, ValueError), err:
10864
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10865
                                     str(err), errors.ECODE_INVAL)
10866
        disk_dict[constants.IDISK_SIZE] = size
10867
      else:
10868
        # modification of disk
10869
        if constants.IDISK_SIZE in disk_dict:
10870
          raise errors.OpPrereqError("Disk size change not possible, use"
10871
                                     " grow-disk", errors.ECODE_INVAL)
10872

    
10873
    if disk_addremove > 1:
10874
      raise errors.OpPrereqError("Only one disk add or remove operation"
10875
                                 " supported at a time", errors.ECODE_INVAL)
10876

    
10877
    if self.op.disks and self.op.disk_template is not None:
10878
      raise errors.OpPrereqError("Disk template conversion and other disk"
10879
                                 " changes not supported at the same time",
10880
                                 errors.ECODE_INVAL)
10881

    
10882
    if (self.op.disk_template and
10883
        self.op.disk_template in constants.DTS_INT_MIRROR and
10884
        self.op.remote_node is None):
10885
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10886
                                 " one requires specifying a secondary node",
10887
                                 errors.ECODE_INVAL)
10888

    
10889
    # NIC validation
10890
    nic_addremove = 0
10891
    for nic_op, nic_dict in self.op.nics:
10892
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10893
      if nic_op == constants.DDM_REMOVE:
10894
        nic_addremove += 1
10895
        continue
10896
      elif nic_op == constants.DDM_ADD:
10897
        nic_addremove += 1
10898
      else:
10899
        if not isinstance(nic_op, int):
10900
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10901
        if not isinstance(nic_dict, dict):
10902
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10903
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10904

    
10905
      # nic_dict should be a dict
10906
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10907
      if nic_ip is not None:
10908
        if nic_ip.lower() == constants.VALUE_NONE:
10909
          nic_dict[constants.INIC_IP] = None
10910
        else:
10911
          if not netutils.IPAddress.IsValid(nic_ip):
10912
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10913
                                       errors.ECODE_INVAL)
10914

    
10915
      nic_bridge = nic_dict.get("bridge", None)
10916
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10917
      if nic_bridge and nic_link:
10918
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10919
                                   " at the same time", errors.ECODE_INVAL)
10920
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10921
        nic_dict["bridge"] = None
10922
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10923
        nic_dict[constants.INIC_LINK] = None
10924

    
10925
      if nic_op == constants.DDM_ADD:
10926
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10927
        if nic_mac is None:
10928
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10929

    
10930
      if constants.INIC_MAC in nic_dict:
10931
        nic_mac = nic_dict[constants.INIC_MAC]
10932
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10933
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10934

    
10935
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10936
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10937
                                     " modifying an existing nic",
10938
                                     errors.ECODE_INVAL)
10939

    
10940
    if nic_addremove > 1:
10941
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10942
                                 " supported at a time", errors.ECODE_INVAL)
10943

    
10944
  def ExpandNames(self):
10945
    self._ExpandAndLockInstance()
10946
    self.needed_locks[locking.LEVEL_NODE] = []
10947
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10948

    
10949
  def DeclareLocks(self, level):
10950
    if level == locking.LEVEL_NODE:
10951
      self._LockInstancesNodes()
10952
      if self.op.disk_template and self.op.remote_node:
10953
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10954
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10955

    
10956
  def BuildHooksEnv(self):
10957
    """Build hooks env.
10958

10959
    This runs on the master, primary and secondaries.
10960

10961
    """
10962
    args = dict()
10963
    if constants.BE_MEMORY in self.be_new:
10964
      args["memory"] = self.be_new[constants.BE_MEMORY]
10965
    if constants.BE_VCPUS in self.be_new:
10966
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10967
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10968
    # information at all.
10969
    if self.op.nics:
10970
      args["nics"] = []
10971
      nic_override = dict(self.op.nics)
10972
      for idx, nic in enumerate(self.instance.nics):
10973
        if idx in nic_override:
10974
          this_nic_override = nic_override[idx]
10975
        else:
10976
          this_nic_override = {}
10977
        if constants.INIC_IP in this_nic_override:
10978
          ip = this_nic_override[constants.INIC_IP]
10979
        else:
10980
          ip = nic.ip
10981
        if constants.INIC_MAC in this_nic_override:
10982
          mac = this_nic_override[constants.INIC_MAC]
10983
        else:
10984
          mac = nic.mac
10985
        if idx in self.nic_pnew:
10986
          nicparams = self.nic_pnew[idx]
10987
        else:
10988
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10989
        mode = nicparams[constants.NIC_MODE]
10990
        link = nicparams[constants.NIC_LINK]
10991
        args["nics"].append((ip, mac, mode, link))
10992
      if constants.DDM_ADD in nic_override:
10993
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10994
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10995
        nicparams = self.nic_pnew[constants.DDM_ADD]
10996
        mode = nicparams[constants.NIC_MODE]
10997
        link = nicparams[constants.NIC_LINK]
10998
        args["nics"].append((ip, mac, mode, link))
10999
      elif constants.DDM_REMOVE in nic_override:
11000
        del args["nics"][-1]
11001

    
11002
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11003
    if self.op.disk_template:
11004
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11005

    
11006
    return env
11007

    
11008
  def BuildHooksNodes(self):
11009
    """Build hooks nodes.
11010

11011
    """
11012
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11013
    return (nl, nl)
11014

    
11015
  def CheckPrereq(self):
11016
    """Check prerequisites.
11017

11018
    This only checks the instance list against the existing names.
11019

11020
    """
11021
    # checking the new params on the primary/secondary nodes
11022

    
11023
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11024
    cluster = self.cluster = self.cfg.GetClusterInfo()
11025
    assert self.instance is not None, \
11026
      "Cannot retrieve locked instance %s" % self.op.instance_name
11027
    pnode = instance.primary_node
11028
    nodelist = list(instance.all_nodes)
11029

    
11030
    # OS change
11031
    if self.op.os_name and not self.op.force:
11032
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11033
                      self.op.force_variant)
11034
      instance_os = self.op.os_name
11035
    else:
11036
      instance_os = instance.os
11037

    
11038
    if self.op.disk_template:
11039
      if instance.disk_template == self.op.disk_template:
11040
        raise errors.OpPrereqError("Instance already has disk template %s" %
11041
                                   instance.disk_template, errors.ECODE_INVAL)
11042

    
11043
      if (instance.disk_template,
11044
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11045
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11046
                                   " %s to %s" % (instance.disk_template,
11047
                                                  self.op.disk_template),
11048
                                   errors.ECODE_INVAL)
11049
      _CheckInstanceDown(self, instance, "cannot change disk template")
11050
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11051
        if self.op.remote_node == pnode:
11052
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11053
                                     " as the primary node of the instance" %
11054
                                     self.op.remote_node, errors.ECODE_STATE)
11055
        _CheckNodeOnline(self, self.op.remote_node)
11056
        _CheckNodeNotDrained(self, self.op.remote_node)
11057
        # FIXME: here we assume that the old instance type is DT_PLAIN
11058
        assert instance.disk_template == constants.DT_PLAIN
11059
        disks = [{constants.IDISK_SIZE: d.size,
11060
                  constants.IDISK_VG: d.logical_id[0]}
11061
                 for d in instance.disks]
11062
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11063
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11064

    
11065
    # hvparams processing
11066
    if self.op.hvparams:
11067
      hv_type = instance.hypervisor
11068
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11069
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11070
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11071

    
11072
      # local check
11073
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11074
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11075
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11076
      self.hv_inst = i_hvdict # the new dict (without defaults)
11077
    else:
11078
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11079
                                              instance.hvparams)
11080
      self.hv_new = self.hv_inst = {}
11081

    
11082
    # beparams processing
11083
    if self.op.beparams:
11084
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11085
                                   use_none=True)
11086
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11087
      be_new = cluster.SimpleFillBE(i_bedict)
11088
      self.be_proposed = self.be_new = be_new # the new actual values
11089
      self.be_inst = i_bedict # the new dict (without defaults)
11090
    else:
11091
      self.be_new = self.be_inst = {}
11092
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11093
    be_old = cluster.FillBE(instance)
11094

    
11095
    # CPU param validation -- checking every time a paramtere is
11096
    # changed to cover all cases where either CPU mask or vcpus have
11097
    # changed
11098
    if (constants.BE_VCPUS in self.be_proposed and
11099
        constants.HV_CPU_MASK in self.hv_proposed):
11100
      cpu_list = \
11101
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11102
      # Verify mask is consistent with number of vCPUs. Can skip this
11103
      # test if only 1 entry in the CPU mask, which means same mask
11104
      # is applied to all vCPUs.
11105
      if (len(cpu_list) > 1 and
11106
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11107
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11108
                                   " CPU mask [%s]" %
11109
                                   (self.be_proposed[constants.BE_VCPUS],
11110
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11111
                                   errors.ECODE_INVAL)
11112

    
11113
      # Only perform this test if a new CPU mask is given
11114
      if constants.HV_CPU_MASK in self.hv_new:
11115
        # Calculate the largest CPU number requested
11116
        max_requested_cpu = max(map(max, cpu_list))
11117
        # Check that all of the instance's nodes have enough physical CPUs to
11118
        # satisfy the requested CPU mask
11119
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11120
                                max_requested_cpu + 1, instance.hypervisor)
11121

    
11122
    # osparams processing
11123
    if self.op.osparams:
11124
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11125
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11126
      self.os_inst = i_osdict # the new dict (without defaults)
11127
    else:
11128
      self.os_inst = {}
11129

    
11130
    self.warn = []
11131

    
11132
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11133
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11134
      mem_check_list = [pnode]
11135
      if be_new[constants.BE_AUTO_BALANCE]:
11136
        # either we changed auto_balance to yes or it was from before
11137
        mem_check_list.extend(instance.secondary_nodes)
11138
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11139
                                                  instance.hypervisor)
11140
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11141
                                         instance.hypervisor)
11142
      pninfo = nodeinfo[pnode]
11143
      msg = pninfo.fail_msg
11144
      if msg:
11145
        # Assume the primary node is unreachable and go ahead
11146
        self.warn.append("Can't get info from primary node %s: %s" %
11147
                         (pnode, msg))
11148
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11149
        self.warn.append("Node data from primary node %s doesn't contain"
11150
                         " free memory information" % pnode)
11151
      elif instance_info.fail_msg:
11152
        self.warn.append("Can't get instance runtime information: %s" %
11153
                        instance_info.fail_msg)
11154
      else:
11155
        if instance_info.payload:
11156
          current_mem = int(instance_info.payload["memory"])
11157
        else:
11158
          # Assume instance not running
11159
          # (there is a slight race condition here, but it's not very probable,
11160
          # and we have no other way to check)
11161
          current_mem = 0
11162
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11163
                    pninfo.payload["memory_free"])
11164
        if miss_mem > 0:
11165
          raise errors.OpPrereqError("This change will prevent the instance"
11166
                                     " from starting, due to %d MB of memory"
11167
                                     " missing on its primary node" % miss_mem,
11168
                                     errors.ECODE_NORES)
11169

    
11170
      if be_new[constants.BE_AUTO_BALANCE]:
11171
        for node, nres in nodeinfo.items():
11172
          if node not in instance.secondary_nodes:
11173
            continue
11174
          nres.Raise("Can't get info from secondary node %s" % node,
11175
                     prereq=True, ecode=errors.ECODE_STATE)
11176
          if not isinstance(nres.payload.get("memory_free", None), int):
11177
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11178
                                       " memory information" % node,
11179
                                       errors.ECODE_STATE)
11180
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11181
            raise errors.OpPrereqError("This change will prevent the instance"
11182
                                       " from failover to its secondary node"
11183
                                       " %s, due to not enough memory" % node,
11184
                                       errors.ECODE_STATE)
11185

    
11186
    # NIC processing
11187
    self.nic_pnew = {}
11188
    self.nic_pinst = {}
11189
    for nic_op, nic_dict in self.op.nics:
11190
      if nic_op == constants.DDM_REMOVE:
11191
        if not instance.nics:
11192
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11193
                                     errors.ECODE_INVAL)
11194
        continue
11195
      if nic_op != constants.DDM_ADD:
11196
        # an existing nic
11197
        if not instance.nics:
11198
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11199
                                     " no NICs" % nic_op,
11200
                                     errors.ECODE_INVAL)
11201
        if nic_op < 0 or nic_op >= len(instance.nics):
11202
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11203
                                     " are 0 to %d" %
11204
                                     (nic_op, len(instance.nics) - 1),
11205
                                     errors.ECODE_INVAL)
11206
        old_nic_params = instance.nics[nic_op].nicparams
11207
        old_nic_ip = instance.nics[nic_op].ip
11208
      else:
11209
        old_nic_params = {}
11210
        old_nic_ip = None
11211

    
11212
      update_params_dict = dict([(key, nic_dict[key])
11213
                                 for key in constants.NICS_PARAMETERS
11214
                                 if key in nic_dict])
11215

    
11216
      if "bridge" in nic_dict:
11217
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11218

    
11219
      new_nic_params = _GetUpdatedParams(old_nic_params,
11220
                                         update_params_dict)
11221
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11222
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11223
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11224
      self.nic_pinst[nic_op] = new_nic_params
11225
      self.nic_pnew[nic_op] = new_filled_nic_params
11226
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11227

    
11228
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11229
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11230
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11231
        if msg:
11232
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11233
          if self.op.force:
11234
            self.warn.append(msg)
11235
          else:
11236
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11237
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11238
        if constants.INIC_IP in nic_dict:
11239
          nic_ip = nic_dict[constants.INIC_IP]
11240
        else:
11241
          nic_ip = old_nic_ip
11242
        if nic_ip is None:
11243
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11244
                                     " on a routed nic", errors.ECODE_INVAL)
11245
      if constants.INIC_MAC in nic_dict:
11246
        nic_mac = nic_dict[constants.INIC_MAC]
11247
        if nic_mac is None:
11248
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11249
                                     errors.ECODE_INVAL)
11250
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11251
          # otherwise generate the mac
11252
          nic_dict[constants.INIC_MAC] = \
11253
            self.cfg.GenerateMAC(self.proc.GetECId())
11254
        else:
11255
          # or validate/reserve the current one
11256
          try:
11257
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11258
          except errors.ReservationError:
11259
            raise errors.OpPrereqError("MAC address %s already in use"
11260
                                       " in cluster" % nic_mac,
11261
                                       errors.ECODE_NOTUNIQUE)
11262

    
11263
    # DISK processing
11264
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11265
      raise errors.OpPrereqError("Disk operations not supported for"
11266
                                 " diskless instances",
11267
                                 errors.ECODE_INVAL)
11268
    for disk_op, _ in self.op.disks:
11269
      if disk_op == constants.DDM_REMOVE:
11270
        if len(instance.disks) == 1:
11271
          raise errors.OpPrereqError("Cannot remove the last disk of"
11272
                                     " an instance", errors.ECODE_INVAL)
11273
        _CheckInstanceDown(self, instance, "cannot remove disks")
11274

    
11275
      if (disk_op == constants.DDM_ADD and
11276
          len(instance.disks) >= constants.MAX_DISKS):
11277
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11278
                                   " add more" % constants.MAX_DISKS,
11279
                                   errors.ECODE_STATE)
11280
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11281
        # an existing disk
11282
        if disk_op < 0 or disk_op >= len(instance.disks):
11283
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11284
                                     " are 0 to %d" %
11285
                                     (disk_op, len(instance.disks)),
11286
                                     errors.ECODE_INVAL)
11287

    
11288
    return
11289

    
11290
  def _ConvertPlainToDrbd(self, feedback_fn):
11291
    """Converts an instance from plain to drbd.
11292

11293
    """
11294
    feedback_fn("Converting template to drbd")
11295
    instance = self.instance
11296
    pnode = instance.primary_node
11297
    snode = self.op.remote_node
11298

    
11299
    # create a fake disk info for _GenerateDiskTemplate
11300
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11301
                  constants.IDISK_VG: d.logical_id[0]}
11302
                 for d in instance.disks]
11303
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11304
                                      instance.name, pnode, [snode],
11305
                                      disk_info, None, None, 0, feedback_fn)
11306
    info = _GetInstanceInfoText(instance)
11307
    feedback_fn("Creating aditional volumes...")
11308
    # first, create the missing data and meta devices
11309
    for disk in new_disks:
11310
      # unfortunately this is... not too nice
11311
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11312
                            info, True)
11313
      for child in disk.children:
11314
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11315
    # at this stage, all new LVs have been created, we can rename the
11316
    # old ones
11317
    feedback_fn("Renaming original volumes...")
11318
    rename_list = [(o, n.children[0].logical_id)
11319
                   for (o, n) in zip(instance.disks, new_disks)]
11320
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11321
    result.Raise("Failed to rename original LVs")
11322

    
11323
    feedback_fn("Initializing DRBD devices...")
11324
    # all child devices are in place, we can now create the DRBD devices
11325
    for disk in new_disks:
11326
      for node in [pnode, snode]:
11327
        f_create = node == pnode
11328
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11329

    
11330
    # at this point, the instance has been modified
11331
    instance.disk_template = constants.DT_DRBD8
11332
    instance.disks = new_disks
11333
    self.cfg.Update(instance, feedback_fn)
11334

    
11335
    # disks are created, waiting for sync
11336
    disk_abort = not _WaitForSync(self, instance,
11337
                                  oneshot=not self.op.wait_for_sync)
11338
    if disk_abort:
11339
      raise errors.OpExecError("There are some degraded disks for"
11340
                               " this instance, please cleanup manually")
11341

    
11342
  def _ConvertDrbdToPlain(self, feedback_fn):
11343
    """Converts an instance from drbd to plain.
11344

11345
    """
11346
    instance = self.instance
11347
    assert len(instance.secondary_nodes) == 1
11348
    pnode = instance.primary_node
11349
    snode = instance.secondary_nodes[0]
11350
    feedback_fn("Converting template to plain")
11351

    
11352
    old_disks = instance.disks
11353
    new_disks = [d.children[0] for d in old_disks]
11354

    
11355
    # copy over size and mode
11356
    for parent, child in zip(old_disks, new_disks):
11357
      child.size = parent.size
11358
      child.mode = parent.mode
11359

    
11360
    # update instance structure
11361
    instance.disks = new_disks
11362
    instance.disk_template = constants.DT_PLAIN
11363
    self.cfg.Update(instance, feedback_fn)
11364

    
11365
    feedback_fn("Removing volumes on the secondary node...")
11366
    for disk in old_disks:
11367
      self.cfg.SetDiskID(disk, snode)
11368
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11369
      if msg:
11370
        self.LogWarning("Could not remove block device %s on node %s,"
11371
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11372

    
11373
    feedback_fn("Removing unneeded volumes on the primary node...")
11374
    for idx, disk in enumerate(old_disks):
11375
      meta = disk.children[1]
11376
      self.cfg.SetDiskID(meta, pnode)
11377
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11378
      if msg:
11379
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11380
                        " continuing anyway: %s", idx, pnode, msg)
11381

    
11382
  def Exec(self, feedback_fn):
11383
    """Modifies an instance.
11384

11385
    All parameters take effect only at the next restart of the instance.
11386

11387
    """
11388
    # Process here the warnings from CheckPrereq, as we don't have a
11389
    # feedback_fn there.
11390
    for warn in self.warn:
11391
      feedback_fn("WARNING: %s" % warn)
11392

    
11393
    result = []
11394
    instance = self.instance
11395
    # disk changes
11396
    for disk_op, disk_dict in self.op.disks:
11397
      if disk_op == constants.DDM_REMOVE:
11398
        # remove the last disk
11399
        device = instance.disks.pop()
11400
        device_idx = len(instance.disks)
11401
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11402
          self.cfg.SetDiskID(disk, node)
11403
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11404
          if msg:
11405
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11406
                            " continuing anyway", device_idx, node, msg)
11407
        result.append(("disk/%d" % device_idx, "remove"))
11408
      elif disk_op == constants.DDM_ADD:
11409
        # add a new disk
11410
        if instance.disk_template in (constants.DT_FILE,
11411
                                        constants.DT_SHARED_FILE):
11412
          file_driver, file_path = instance.disks[0].logical_id
11413
          file_path = os.path.dirname(file_path)
11414
        else:
11415
          file_driver = file_path = None
11416
        disk_idx_base = len(instance.disks)
11417
        new_disk = _GenerateDiskTemplate(self,
11418
                                         instance.disk_template,
11419
                                         instance.name, instance.primary_node,
11420
                                         instance.secondary_nodes,
11421
                                         [disk_dict],
11422
                                         file_path,
11423
                                         file_driver,
11424
                                         disk_idx_base, feedback_fn)[0]
11425
        instance.disks.append(new_disk)
11426
        info = _GetInstanceInfoText(instance)
11427

    
11428
        logging.info("Creating volume %s for instance %s",
11429
                     new_disk.iv_name, instance.name)
11430
        # Note: this needs to be kept in sync with _CreateDisks
11431
        #HARDCODE
11432
        for node in instance.all_nodes:
11433
          f_create = node == instance.primary_node
11434
          try:
11435
            _CreateBlockDev(self, node, instance, new_disk,
11436
                            f_create, info, f_create)
11437
          except errors.OpExecError, err:
11438
            self.LogWarning("Failed to create volume %s (%s) on"
11439
                            " node %s: %s",
11440
                            new_disk.iv_name, new_disk, node, err)
11441
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11442
                       (new_disk.size, new_disk.mode)))
11443
      else:
11444
        # change a given disk
11445
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11446
        result.append(("disk.mode/%d" % disk_op,
11447
                       disk_dict[constants.IDISK_MODE]))
11448

    
11449
    if self.op.disk_template:
11450
      r_shut = _ShutdownInstanceDisks(self, instance)
11451
      if not r_shut:
11452
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11453
                                 " proceed with disk template conversion")
11454
      mode = (instance.disk_template, self.op.disk_template)
11455
      try:
11456
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11457
      except:
11458
        self.cfg.ReleaseDRBDMinors(instance.name)
11459
        raise
11460
      result.append(("disk_template", self.op.disk_template))
11461

    
11462
    # NIC changes
11463
    for nic_op, nic_dict in self.op.nics:
11464
      if nic_op == constants.DDM_REMOVE:
11465
        # remove the last nic
11466
        del instance.nics[-1]
11467
        result.append(("nic.%d" % len(instance.nics), "remove"))
11468
      elif nic_op == constants.DDM_ADD:
11469
        # mac and bridge should be set, by now
11470
        mac = nic_dict[constants.INIC_MAC]
11471
        ip = nic_dict.get(constants.INIC_IP, None)
11472
        nicparams = self.nic_pinst[constants.DDM_ADD]
11473
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11474
        instance.nics.append(new_nic)
11475
        result.append(("nic.%d" % (len(instance.nics) - 1),
11476
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11477
                       (new_nic.mac, new_nic.ip,
11478
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11479
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11480
                       )))
11481
      else:
11482
        for key in (constants.INIC_MAC, constants.INIC_IP):
11483
          if key in nic_dict:
11484
            setattr(instance.nics[nic_op], key, nic_dict[key])
11485
        if nic_op in self.nic_pinst:
11486
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11487
        for key, val in nic_dict.iteritems():
11488
          result.append(("nic.%s/%d" % (key, nic_op), val))
11489

    
11490
    # hvparams changes
11491
    if self.op.hvparams:
11492
      instance.hvparams = self.hv_inst
11493
      for key, val in self.op.hvparams.iteritems():
11494
        result.append(("hv/%s" % key, val))
11495

    
11496
    # beparams changes
11497
    if self.op.beparams:
11498
      instance.beparams = self.be_inst
11499
      for key, val in self.op.beparams.iteritems():
11500
        result.append(("be/%s" % key, val))
11501

    
11502
    # OS change
11503
    if self.op.os_name:
11504
      instance.os = self.op.os_name
11505

    
11506
    # osparams changes
11507
    if self.op.osparams:
11508
      instance.osparams = self.os_inst
11509
      for key, val in self.op.osparams.iteritems():
11510
        result.append(("os/%s" % key, val))
11511

    
11512
    self.cfg.Update(instance, feedback_fn)
11513

    
11514
    return result
11515

    
11516
  _DISK_CONVERSIONS = {
11517
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11518
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11519
    }
11520

    
11521

    
11522
class LUInstanceChangeGroup(LogicalUnit):
11523
  HPATH = "instance-change-group"
11524
  HTYPE = constants.HTYPE_INSTANCE
11525
  REQ_BGL = False
11526

    
11527
  def ExpandNames(self):
11528
    self.share_locks = _ShareAll()
11529
    self.needed_locks = {
11530
      locking.LEVEL_NODEGROUP: [],
11531
      locking.LEVEL_NODE: [],
11532
      }
11533

    
11534
    self._ExpandAndLockInstance()
11535

    
11536
    if self.op.target_groups:
11537
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11538
                                  self.op.target_groups)
11539
    else:
11540
      self.req_target_uuids = None
11541

    
11542
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11543

    
11544
  def DeclareLocks(self, level):
11545
    if level == locking.LEVEL_NODEGROUP:
11546
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11547

    
11548
      if self.req_target_uuids:
11549
        lock_groups = set(self.req_target_uuids)
11550

    
11551
        # Lock all groups used by instance optimistically; this requires going
11552
        # via the node before it's locked, requiring verification later on
11553
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11554
        lock_groups.update(instance_groups)
11555
      else:
11556
        # No target groups, need to lock all of them
11557
        lock_groups = locking.ALL_SET
11558

    
11559
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11560

    
11561
    elif level == locking.LEVEL_NODE:
11562
      if self.req_target_uuids:
11563
        # Lock all nodes used by instances
11564
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11565
        self._LockInstancesNodes()
11566

    
11567
        # Lock all nodes in all potential target groups
11568
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11569
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11570
        member_nodes = [node_name
11571
                        for group in lock_groups
11572
                        for node_name in self.cfg.GetNodeGroup(group).members]
11573
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11574
      else:
11575
        # Lock all nodes as all groups are potential targets
11576
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11577

    
11578
  def CheckPrereq(self):
11579
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11580
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11581
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11582

    
11583
    assert (self.req_target_uuids is None or
11584
            owned_groups.issuperset(self.req_target_uuids))
11585
    assert owned_instances == set([self.op.instance_name])
11586

    
11587
    # Get instance information
11588
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11589

    
11590
    # Check if node groups for locked instance are still correct
11591
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11592
      ("Instance %s's nodes changed while we kept the lock" %
11593
       self.op.instance_name)
11594

    
11595
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11596
                                           owned_groups)
11597

    
11598
    if self.req_target_uuids:
11599
      # User requested specific target groups
11600
      self.target_uuids = self.req_target_uuids
11601
    else:
11602
      # All groups except those used by the instance are potential targets
11603
      self.target_uuids = owned_groups - inst_groups
11604

    
11605
    conflicting_groups = self.target_uuids & inst_groups
11606
    if conflicting_groups:
11607
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11608
                                 " used by the instance '%s'" %
11609
                                 (utils.CommaJoin(conflicting_groups),
11610
                                  self.op.instance_name),
11611
                                 errors.ECODE_INVAL)
11612

    
11613
    if not self.target_uuids:
11614
      raise errors.OpPrereqError("There are no possible target groups",
11615
                                 errors.ECODE_INVAL)
11616

    
11617
  def BuildHooksEnv(self):
11618
    """Build hooks env.
11619

11620
    """
11621
    assert self.target_uuids
11622

    
11623
    env = {
11624
      "TARGET_GROUPS": " ".join(self.target_uuids),
11625
      }
11626

    
11627
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11628

    
11629
    return env
11630

    
11631
  def BuildHooksNodes(self):
11632
    """Build hooks nodes.
11633

11634
    """
11635
    mn = self.cfg.GetMasterNode()
11636
    return ([mn], [mn])
11637

    
11638
  def Exec(self, feedback_fn):
11639
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11640

    
11641
    assert instances == [self.op.instance_name], "Instance not locked"
11642

    
11643
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11644
                     instances=instances, target_groups=list(self.target_uuids))
11645

    
11646
    ial.Run(self.op.iallocator)
11647

    
11648
    if not ial.success:
11649
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11650
                                 " instance '%s' using iallocator '%s': %s" %
11651
                                 (self.op.instance_name, self.op.iallocator,
11652
                                  ial.info),
11653
                                 errors.ECODE_NORES)
11654

    
11655
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11656

    
11657
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11658
                 " instance '%s'", len(jobs), self.op.instance_name)
11659

    
11660
    return ResultWithJobs(jobs)
11661

    
11662

    
11663
class LUBackupQuery(NoHooksLU):
11664
  """Query the exports list
11665

11666
  """
11667
  REQ_BGL = False
11668

    
11669
  def ExpandNames(self):
11670
    self.needed_locks = {}
11671
    self.share_locks[locking.LEVEL_NODE] = 1
11672
    if not self.op.nodes:
11673
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11674
    else:
11675
      self.needed_locks[locking.LEVEL_NODE] = \
11676
        _GetWantedNodes(self, self.op.nodes)
11677

    
11678
  def Exec(self, feedback_fn):
11679
    """Compute the list of all the exported system images.
11680

11681
    @rtype: dict
11682
    @return: a dictionary with the structure node->(export-list)
11683
        where export-list is a list of the instances exported on
11684
        that node.
11685

11686
    """
11687
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11688
    rpcresult = self.rpc.call_export_list(self.nodes)
11689
    result = {}
11690
    for node in rpcresult:
11691
      if rpcresult[node].fail_msg:
11692
        result[node] = False
11693
      else:
11694
        result[node] = rpcresult[node].payload
11695

    
11696
    return result
11697

    
11698

    
11699
class LUBackupPrepare(NoHooksLU):
11700
  """Prepares an instance for an export and returns useful information.
11701

11702
  """
11703
  REQ_BGL = False
11704

    
11705
  def ExpandNames(self):
11706
    self._ExpandAndLockInstance()
11707

    
11708
  def CheckPrereq(self):
11709
    """Check prerequisites.
11710

11711
    """
11712
    instance_name = self.op.instance_name
11713

    
11714
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11715
    assert self.instance is not None, \
11716
          "Cannot retrieve locked instance %s" % self.op.instance_name
11717
    _CheckNodeOnline(self, self.instance.primary_node)
11718

    
11719
    self._cds = _GetClusterDomainSecret()
11720

    
11721
  def Exec(self, feedback_fn):
11722
    """Prepares an instance for an export.
11723

11724
    """
11725
    instance = self.instance
11726

    
11727
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11728
      salt = utils.GenerateSecret(8)
11729

    
11730
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11731
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11732
                                              constants.RIE_CERT_VALIDITY)
11733
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11734

    
11735
      (name, cert_pem) = result.payload
11736

    
11737
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11738
                                             cert_pem)
11739

    
11740
      return {
11741
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11742
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11743
                          salt),
11744
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11745
        }
11746

    
11747
    return None
11748

    
11749

    
11750
class LUBackupExport(LogicalUnit):
11751
  """Export an instance to an image in the cluster.
11752

11753
  """
11754
  HPATH = "instance-export"
11755
  HTYPE = constants.HTYPE_INSTANCE
11756
  REQ_BGL = False
11757

    
11758
  def CheckArguments(self):
11759
    """Check the arguments.
11760

11761
    """
11762
    self.x509_key_name = self.op.x509_key_name
11763
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11764

    
11765
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11766
      if not self.x509_key_name:
11767
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11768
                                   errors.ECODE_INVAL)
11769

    
11770
      if not self.dest_x509_ca_pem:
11771
        raise errors.OpPrereqError("Missing destination X509 CA",
11772
                                   errors.ECODE_INVAL)
11773

    
11774
  def ExpandNames(self):
11775
    self._ExpandAndLockInstance()
11776

    
11777
    # Lock all nodes for local exports
11778
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11779
      # FIXME: lock only instance primary and destination node
11780
      #
11781
      # Sad but true, for now we have do lock all nodes, as we don't know where
11782
      # the previous export might be, and in this LU we search for it and
11783
      # remove it from its current node. In the future we could fix this by:
11784
      #  - making a tasklet to search (share-lock all), then create the
11785
      #    new one, then one to remove, after
11786
      #  - removing the removal operation altogether
11787
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11788

    
11789
  def DeclareLocks(self, level):
11790
    """Last minute lock declaration."""
11791
    # All nodes are locked anyway, so nothing to do here.
11792

    
11793
  def BuildHooksEnv(self):
11794
    """Build hooks env.
11795

11796
    This will run on the master, primary node and target node.
11797

11798
    """
11799
    env = {
11800
      "EXPORT_MODE": self.op.mode,
11801
      "EXPORT_NODE": self.op.target_node,
11802
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11803
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11804
      # TODO: Generic function for boolean env variables
11805
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11806
      }
11807

    
11808
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11809

    
11810
    return env
11811

    
11812
  def BuildHooksNodes(self):
11813
    """Build hooks nodes.
11814

11815
    """
11816
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11817

    
11818
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11819
      nl.append(self.op.target_node)
11820

    
11821
    return (nl, nl)
11822

    
11823
  def CheckPrereq(self):
11824
    """Check prerequisites.
11825

11826
    This checks that the instance and node names are valid.
11827

11828
    """
11829
    instance_name = self.op.instance_name
11830

    
11831
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11832
    assert self.instance is not None, \
11833
          "Cannot retrieve locked instance %s" % self.op.instance_name
11834
    _CheckNodeOnline(self, self.instance.primary_node)
11835

    
11836
    if (self.op.remove_instance and self.instance.admin_up and
11837
        not self.op.shutdown):
11838
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11839
                                 " down before")
11840

    
11841
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11842
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11843
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11844
      assert self.dst_node is not None
11845

    
11846
      _CheckNodeOnline(self, self.dst_node.name)
11847
      _CheckNodeNotDrained(self, self.dst_node.name)
11848

    
11849
      self._cds = None
11850
      self.dest_disk_info = None
11851
      self.dest_x509_ca = None
11852

    
11853
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11854
      self.dst_node = None
11855

    
11856
      if len(self.op.target_node) != len(self.instance.disks):
11857
        raise errors.OpPrereqError(("Received destination information for %s"
11858
                                    " disks, but instance %s has %s disks") %
11859
                                   (len(self.op.target_node), instance_name,
11860
                                    len(self.instance.disks)),
11861
                                   errors.ECODE_INVAL)
11862

    
11863
      cds = _GetClusterDomainSecret()
11864

    
11865
      # Check X509 key name
11866
      try:
11867
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11868
      except (TypeError, ValueError), err:
11869
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11870

    
11871
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11872
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11873
                                   errors.ECODE_INVAL)
11874

    
11875
      # Load and verify CA
11876
      try:
11877
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11878
      except OpenSSL.crypto.Error, err:
11879
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11880
                                   (err, ), errors.ECODE_INVAL)
11881

    
11882
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11883
      if errcode is not None:
11884
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11885
                                   (msg, ), errors.ECODE_INVAL)
11886

    
11887
      self.dest_x509_ca = cert
11888

    
11889
      # Verify target information
11890
      disk_info = []
11891
      for idx, disk_data in enumerate(self.op.target_node):
11892
        try:
11893
          (host, port, magic) = \
11894
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11895
        except errors.GenericError, err:
11896
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11897
                                     (idx, err), errors.ECODE_INVAL)
11898

    
11899
        disk_info.append((host, port, magic))
11900

    
11901
      assert len(disk_info) == len(self.op.target_node)
11902
      self.dest_disk_info = disk_info
11903

    
11904
    else:
11905
      raise errors.ProgrammerError("Unhandled export mode %r" %
11906
                                   self.op.mode)
11907

    
11908
    # instance disk type verification
11909
    # TODO: Implement export support for file-based disks
11910
    for disk in self.instance.disks:
11911
      if disk.dev_type == constants.LD_FILE:
11912
        raise errors.OpPrereqError("Export not supported for instances with"
11913
                                   " file-based disks", errors.ECODE_INVAL)
11914

    
11915
  def _CleanupExports(self, feedback_fn):
11916
    """Removes exports of current instance from all other nodes.
11917

11918
    If an instance in a cluster with nodes A..D was exported to node C, its
11919
    exports will be removed from the nodes A, B and D.
11920

11921
    """
11922
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11923

    
11924
    nodelist = self.cfg.GetNodeList()
11925
    nodelist.remove(self.dst_node.name)
11926

    
11927
    # on one-node clusters nodelist will be empty after the removal
11928
    # if we proceed the backup would be removed because OpBackupQuery
11929
    # substitutes an empty list with the full cluster node list.
11930
    iname = self.instance.name
11931
    if nodelist:
11932
      feedback_fn("Removing old exports for instance %s" % iname)
11933
      exportlist = self.rpc.call_export_list(nodelist)
11934
      for node in exportlist:
11935
        if exportlist[node].fail_msg:
11936
          continue
11937
        if iname in exportlist[node].payload:
11938
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11939
          if msg:
11940
            self.LogWarning("Could not remove older export for instance %s"
11941
                            " on node %s: %s", iname, node, msg)
11942

    
11943
  def Exec(self, feedback_fn):
11944
    """Export an instance to an image in the cluster.
11945

11946
    """
11947
    assert self.op.mode in constants.EXPORT_MODES
11948

    
11949
    instance = self.instance
11950
    src_node = instance.primary_node
11951

    
11952
    if self.op.shutdown:
11953
      # shutdown the instance, but not the disks
11954
      feedback_fn("Shutting down instance %s" % instance.name)
11955
      result = self.rpc.call_instance_shutdown(src_node, instance,
11956
                                               self.op.shutdown_timeout)
11957
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11958
      result.Raise("Could not shutdown instance %s on"
11959
                   " node %s" % (instance.name, src_node))
11960

    
11961
    # set the disks ID correctly since call_instance_start needs the
11962
    # correct drbd minor to create the symlinks
11963
    for disk in instance.disks:
11964
      self.cfg.SetDiskID(disk, src_node)
11965

    
11966
    activate_disks = (not instance.admin_up)
11967

    
11968
    if activate_disks:
11969
      # Activate the instance disks if we'exporting a stopped instance
11970
      feedback_fn("Activating disks for %s" % instance.name)
11971
      _StartInstanceDisks(self, instance, None)
11972

    
11973
    try:
11974
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11975
                                                     instance)
11976

    
11977
      helper.CreateSnapshots()
11978
      try:
11979
        if (self.op.shutdown and instance.admin_up and
11980
            not self.op.remove_instance):
11981
          assert not activate_disks
11982
          feedback_fn("Starting instance %s" % instance.name)
11983
          result = self.rpc.call_instance_start(src_node,
11984
                                                (instance, None, None), False)
11985
          msg = result.fail_msg
11986
          if msg:
11987
            feedback_fn("Failed to start instance: %s" % msg)
11988
            _ShutdownInstanceDisks(self, instance)
11989
            raise errors.OpExecError("Could not start instance: %s" % msg)
11990

    
11991
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11992
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11993
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11994
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11995
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11996

    
11997
          (key_name, _, _) = self.x509_key_name
11998

    
11999
          dest_ca_pem = \
12000
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12001
                                            self.dest_x509_ca)
12002

    
12003
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12004
                                                     key_name, dest_ca_pem,
12005
                                                     timeouts)
12006
      finally:
12007
        helper.Cleanup()
12008

    
12009
      # Check for backwards compatibility
12010
      assert len(dresults) == len(instance.disks)
12011
      assert compat.all(isinstance(i, bool) for i in dresults), \
12012
             "Not all results are boolean: %r" % dresults
12013

    
12014
    finally:
12015
      if activate_disks:
12016
        feedback_fn("Deactivating disks for %s" % instance.name)
12017
        _ShutdownInstanceDisks(self, instance)
12018

    
12019
    if not (compat.all(dresults) and fin_resu):
12020
      failures = []
12021
      if not fin_resu:
12022
        failures.append("export finalization")
12023
      if not compat.all(dresults):
12024
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12025
                               if not dsk)
12026
        failures.append("disk export: disk(s) %s" % fdsk)
12027

    
12028
      raise errors.OpExecError("Export failed, errors in %s" %
12029
                               utils.CommaJoin(failures))
12030

    
12031
    # At this point, the export was successful, we can cleanup/finish
12032

    
12033
    # Remove instance if requested
12034
    if self.op.remove_instance:
12035
      feedback_fn("Removing instance %s" % instance.name)
12036
      _RemoveInstance(self, feedback_fn, instance,
12037
                      self.op.ignore_remove_failures)
12038

    
12039
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12040
      self._CleanupExports(feedback_fn)
12041

    
12042
    return fin_resu, dresults
12043

    
12044

    
12045
class LUBackupRemove(NoHooksLU):
12046
  """Remove exports related to the named instance.
12047

12048
  """
12049
  REQ_BGL = False
12050

    
12051
  def ExpandNames(self):
12052
    self.needed_locks = {}
12053
    # We need all nodes to be locked in order for RemoveExport to work, but we
12054
    # don't need to lock the instance itself, as nothing will happen to it (and
12055
    # we can remove exports also for a removed instance)
12056
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12057

    
12058
  def Exec(self, feedback_fn):
12059
    """Remove any export.
12060

12061
    """
12062
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12063
    # If the instance was not found we'll try with the name that was passed in.
12064
    # This will only work if it was an FQDN, though.
12065
    fqdn_warn = False
12066
    if not instance_name:
12067
      fqdn_warn = True
12068
      instance_name = self.op.instance_name
12069

    
12070
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12071
    exportlist = self.rpc.call_export_list(locked_nodes)
12072
    found = False
12073
    for node in exportlist:
12074
      msg = exportlist[node].fail_msg
12075
      if msg:
12076
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12077
        continue
12078
      if instance_name in exportlist[node].payload:
12079
        found = True
12080
        result = self.rpc.call_export_remove(node, instance_name)
12081
        msg = result.fail_msg
12082
        if msg:
12083
          logging.error("Could not remove export for instance %s"
12084
                        " on node %s: %s", instance_name, node, msg)
12085

    
12086
    if fqdn_warn and not found:
12087
      feedback_fn("Export not found. If trying to remove an export belonging"
12088
                  " to a deleted instance please use its Fully Qualified"
12089
                  " Domain Name.")
12090

    
12091

    
12092
class LUGroupAdd(LogicalUnit):
12093
  """Logical unit for creating node groups.
12094

12095
  """
12096
  HPATH = "group-add"
12097
  HTYPE = constants.HTYPE_GROUP
12098
  REQ_BGL = False
12099

    
12100
  def ExpandNames(self):
12101
    # We need the new group's UUID here so that we can create and acquire the
12102
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12103
    # that it should not check whether the UUID exists in the configuration.
12104
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12105
    self.needed_locks = {}
12106
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12107

    
12108
  def CheckPrereq(self):
12109
    """Check prerequisites.
12110

12111
    This checks that the given group name is not an existing node group
12112
    already.
12113

12114
    """
12115
    try:
12116
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12117
    except errors.OpPrereqError:
12118
      pass
12119
    else:
12120
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12121
                                 " node group (UUID: %s)" %
12122
                                 (self.op.group_name, existing_uuid),
12123
                                 errors.ECODE_EXISTS)
12124

    
12125
    if self.op.ndparams:
12126
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12127

    
12128
  def BuildHooksEnv(self):
12129
    """Build hooks env.
12130

12131
    """
12132
    return {
12133
      "GROUP_NAME": self.op.group_name,
12134
      }
12135

    
12136
  def BuildHooksNodes(self):
12137
    """Build hooks nodes.
12138

12139
    """
12140
    mn = self.cfg.GetMasterNode()
12141
    return ([mn], [mn])
12142

    
12143
  def Exec(self, feedback_fn):
12144
    """Add the node group to the cluster.
12145

12146
    """
12147
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12148
                                  uuid=self.group_uuid,
12149
                                  alloc_policy=self.op.alloc_policy,
12150
                                  ndparams=self.op.ndparams)
12151

    
12152
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12153
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12154

    
12155

    
12156
class LUGroupAssignNodes(NoHooksLU):
12157
  """Logical unit for assigning nodes to groups.
12158

12159
  """
12160
  REQ_BGL = False
12161

    
12162
  def ExpandNames(self):
12163
    # These raise errors.OpPrereqError on their own:
12164
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12165
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12166

    
12167
    # We want to lock all the affected nodes and groups. We have readily
12168
    # available the list of nodes, and the *destination* group. To gather the
12169
    # list of "source" groups, we need to fetch node information later on.
12170
    self.needed_locks = {
12171
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12172
      locking.LEVEL_NODE: self.op.nodes,
12173
      }
12174

    
12175
  def DeclareLocks(self, level):
12176
    if level == locking.LEVEL_NODEGROUP:
12177
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12178

    
12179
      # Try to get all affected nodes' groups without having the group or node
12180
      # lock yet. Needs verification later in the code flow.
12181
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12182

    
12183
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12184

    
12185
  def CheckPrereq(self):
12186
    """Check prerequisites.
12187

12188
    """
12189
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12190
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12191
            frozenset(self.op.nodes))
12192

    
12193
    expected_locks = (set([self.group_uuid]) |
12194
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12195
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12196
    if actual_locks != expected_locks:
12197
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12198
                               " current groups are '%s', used to be '%s'" %
12199
                               (utils.CommaJoin(expected_locks),
12200
                                utils.CommaJoin(actual_locks)))
12201

    
12202
    self.node_data = self.cfg.GetAllNodesInfo()
12203
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12204
    instance_data = self.cfg.GetAllInstancesInfo()
12205

    
12206
    if self.group is None:
12207
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12208
                               (self.op.group_name, self.group_uuid))
12209

    
12210
    (new_splits, previous_splits) = \
12211
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12212
                                             for node in self.op.nodes],
12213
                                            self.node_data, instance_data)
12214

    
12215
    if new_splits:
12216
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12217

    
12218
      if not self.op.force:
12219
        raise errors.OpExecError("The following instances get split by this"
12220
                                 " change and --force was not given: %s" %
12221
                                 fmt_new_splits)
12222
      else:
12223
        self.LogWarning("This operation will split the following instances: %s",
12224
                        fmt_new_splits)
12225

    
12226
        if previous_splits:
12227
          self.LogWarning("In addition, these already-split instances continue"
12228
                          " to be split across groups: %s",
12229
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12230

    
12231
  def Exec(self, feedback_fn):
12232
    """Assign nodes to a new group.
12233

12234
    """
12235
    for node in self.op.nodes:
12236
      self.node_data[node].group = self.group_uuid
12237

    
12238
    # FIXME: Depends on side-effects of modifying the result of
12239
    # C{cfg.GetAllNodesInfo}
12240

    
12241
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12242

    
12243
  @staticmethod
12244
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12245
    """Check for split instances after a node assignment.
12246

12247
    This method considers a series of node assignments as an atomic operation,
12248
    and returns information about split instances after applying the set of
12249
    changes.
12250

12251
    In particular, it returns information about newly split instances, and
12252
    instances that were already split, and remain so after the change.
12253

12254
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12255
    considered.
12256

12257
    @type changes: list of (node_name, new_group_uuid) pairs.
12258
    @param changes: list of node assignments to consider.
12259
    @param node_data: a dict with data for all nodes
12260
    @param instance_data: a dict with all instances to consider
12261
    @rtype: a two-tuple
12262
    @return: a list of instances that were previously okay and result split as a
12263
      consequence of this change, and a list of instances that were previously
12264
      split and this change does not fix.
12265

12266
    """
12267
    changed_nodes = dict((node, group) for node, group in changes
12268
                         if node_data[node].group != group)
12269

    
12270
    all_split_instances = set()
12271
    previously_split_instances = set()
12272

    
12273
    def InstanceNodes(instance):
12274
      return [instance.primary_node] + list(instance.secondary_nodes)
12275

    
12276
    for inst in instance_data.values():
12277
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12278
        continue
12279

    
12280
      instance_nodes = InstanceNodes(inst)
12281

    
12282
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12283
        previously_split_instances.add(inst.name)
12284

    
12285
      if len(set(changed_nodes.get(node, node_data[node].group)
12286
                 for node in instance_nodes)) > 1:
12287
        all_split_instances.add(inst.name)
12288

    
12289
    return (list(all_split_instances - previously_split_instances),
12290
            list(previously_split_instances & all_split_instances))
12291

    
12292

    
12293
class _GroupQuery(_QueryBase):
12294
  FIELDS = query.GROUP_FIELDS
12295

    
12296
  def ExpandNames(self, lu):
12297
    lu.needed_locks = {}
12298

    
12299
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12300
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12301

    
12302
    if not self.names:
12303
      self.wanted = [name_to_uuid[name]
12304
                     for name in utils.NiceSort(name_to_uuid.keys())]
12305
    else:
12306
      # Accept names to be either names or UUIDs.
12307
      missing = []
12308
      self.wanted = []
12309
      all_uuid = frozenset(self._all_groups.keys())
12310

    
12311
      for name in self.names:
12312
        if name in all_uuid:
12313
          self.wanted.append(name)
12314
        elif name in name_to_uuid:
12315
          self.wanted.append(name_to_uuid[name])
12316
        else:
12317
          missing.append(name)
12318

    
12319
      if missing:
12320
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12321
                                   utils.CommaJoin(missing),
12322
                                   errors.ECODE_NOENT)
12323

    
12324
  def DeclareLocks(self, lu, level):
12325
    pass
12326

    
12327
  def _GetQueryData(self, lu):
12328
    """Computes the list of node groups and their attributes.
12329

12330
    """
12331
    do_nodes = query.GQ_NODE in self.requested_data
12332
    do_instances = query.GQ_INST in self.requested_data
12333

    
12334
    group_to_nodes = None
12335
    group_to_instances = None
12336

    
12337
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12338
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12339
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12340
    # instance->node. Hence, we will need to process nodes even if we only need
12341
    # instance information.
12342
    if do_nodes or do_instances:
12343
      all_nodes = lu.cfg.GetAllNodesInfo()
12344
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12345
      node_to_group = {}
12346

    
12347
      for node in all_nodes.values():
12348
        if node.group in group_to_nodes:
12349
          group_to_nodes[node.group].append(node.name)
12350
          node_to_group[node.name] = node.group
12351

    
12352
      if do_instances:
12353
        all_instances = lu.cfg.GetAllInstancesInfo()
12354
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12355

    
12356
        for instance in all_instances.values():
12357
          node = instance.primary_node
12358
          if node in node_to_group:
12359
            group_to_instances[node_to_group[node]].append(instance.name)
12360

    
12361
        if not do_nodes:
12362
          # Do not pass on node information if it was not requested.
12363
          group_to_nodes = None
12364

    
12365
    return query.GroupQueryData([self._all_groups[uuid]
12366
                                 for uuid in self.wanted],
12367
                                group_to_nodes, group_to_instances)
12368

    
12369

    
12370
class LUGroupQuery(NoHooksLU):
12371
  """Logical unit for querying node groups.
12372

12373
  """
12374
  REQ_BGL = False
12375

    
12376
  def CheckArguments(self):
12377
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12378
                          self.op.output_fields, False)
12379

    
12380
  def ExpandNames(self):
12381
    self.gq.ExpandNames(self)
12382

    
12383
  def DeclareLocks(self, level):
12384
    self.gq.DeclareLocks(self, level)
12385

    
12386
  def Exec(self, feedback_fn):
12387
    return self.gq.OldStyleQuery(self)
12388

    
12389

    
12390
class LUGroupSetParams(LogicalUnit):
12391
  """Modifies the parameters of a node group.
12392

12393
  """
12394
  HPATH = "group-modify"
12395
  HTYPE = constants.HTYPE_GROUP
12396
  REQ_BGL = False
12397

    
12398
  def CheckArguments(self):
12399
    all_changes = [
12400
      self.op.ndparams,
12401
      self.op.alloc_policy,
12402
      ]
12403

    
12404
    if all_changes.count(None) == len(all_changes):
12405
      raise errors.OpPrereqError("Please pass at least one modification",
12406
                                 errors.ECODE_INVAL)
12407

    
12408
  def ExpandNames(self):
12409
    # This raises errors.OpPrereqError on its own:
12410
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12411

    
12412
    self.needed_locks = {
12413
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12414
      }
12415

    
12416
  def CheckPrereq(self):
12417
    """Check prerequisites.
12418

12419
    """
12420
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12421

    
12422
    if self.group is None:
12423
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12424
                               (self.op.group_name, self.group_uuid))
12425

    
12426
    if self.op.ndparams:
12427
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12428
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12429
      self.new_ndparams = new_ndparams
12430

    
12431
  def BuildHooksEnv(self):
12432
    """Build hooks env.
12433

12434
    """
12435
    return {
12436
      "GROUP_NAME": self.op.group_name,
12437
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12438
      }
12439

    
12440
  def BuildHooksNodes(self):
12441
    """Build hooks nodes.
12442

12443
    """
12444
    mn = self.cfg.GetMasterNode()
12445
    return ([mn], [mn])
12446

    
12447
  def Exec(self, feedback_fn):
12448
    """Modifies the node group.
12449

12450
    """
12451
    result = []
12452

    
12453
    if self.op.ndparams:
12454
      self.group.ndparams = self.new_ndparams
12455
      result.append(("ndparams", str(self.group.ndparams)))
12456

    
12457
    if self.op.alloc_policy:
12458
      self.group.alloc_policy = self.op.alloc_policy
12459

    
12460
    self.cfg.Update(self.group, feedback_fn)
12461
    return result
12462

    
12463

    
12464
class LUGroupRemove(LogicalUnit):
12465
  HPATH = "group-remove"
12466
  HTYPE = constants.HTYPE_GROUP
12467
  REQ_BGL = False
12468

    
12469
  def ExpandNames(self):
12470
    # This will raises errors.OpPrereqError on its own:
12471
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12472
    self.needed_locks = {
12473
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12474
      }
12475

    
12476
  def CheckPrereq(self):
12477
    """Check prerequisites.
12478

12479
    This checks that the given group name exists as a node group, that is
12480
    empty (i.e., contains no nodes), and that is not the last group of the
12481
    cluster.
12482

12483
    """
12484
    # Verify that the group is empty.
12485
    group_nodes = [node.name
12486
                   for node in self.cfg.GetAllNodesInfo().values()
12487
                   if node.group == self.group_uuid]
12488

    
12489
    if group_nodes:
12490
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12491
                                 " nodes: %s" %
12492
                                 (self.op.group_name,
12493
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12494
                                 errors.ECODE_STATE)
12495

    
12496
    # Verify the cluster would not be left group-less.
12497
    if len(self.cfg.GetNodeGroupList()) == 1:
12498
      raise errors.OpPrereqError("Group '%s' is the only group,"
12499
                                 " cannot be removed" %
12500
                                 self.op.group_name,
12501
                                 errors.ECODE_STATE)
12502

    
12503
  def BuildHooksEnv(self):
12504
    """Build hooks env.
12505

12506
    """
12507
    return {
12508
      "GROUP_NAME": self.op.group_name,
12509
      }
12510

    
12511
  def BuildHooksNodes(self):
12512
    """Build hooks nodes.
12513

12514
    """
12515
    mn = self.cfg.GetMasterNode()
12516
    return ([mn], [mn])
12517

    
12518
  def Exec(self, feedback_fn):
12519
    """Remove the node group.
12520

12521
    """
12522
    try:
12523
      self.cfg.RemoveNodeGroup(self.group_uuid)
12524
    except errors.ConfigurationError:
12525
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12526
                               (self.op.group_name, self.group_uuid))
12527

    
12528
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12529

    
12530

    
12531
class LUGroupRename(LogicalUnit):
12532
  HPATH = "group-rename"
12533
  HTYPE = constants.HTYPE_GROUP
12534
  REQ_BGL = False
12535

    
12536
  def ExpandNames(self):
12537
    # This raises errors.OpPrereqError on its own:
12538
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12539

    
12540
    self.needed_locks = {
12541
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12542
      }
12543

    
12544
  def CheckPrereq(self):
12545
    """Check prerequisites.
12546

12547
    Ensures requested new name is not yet used.
12548

12549
    """
12550
    try:
12551
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12552
    except errors.OpPrereqError:
12553
      pass
12554
    else:
12555
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12556
                                 " node group (UUID: %s)" %
12557
                                 (self.op.new_name, new_name_uuid),
12558
                                 errors.ECODE_EXISTS)
12559

    
12560
  def BuildHooksEnv(self):
12561
    """Build hooks env.
12562

12563
    """
12564
    return {
12565
      "OLD_NAME": self.op.group_name,
12566
      "NEW_NAME": self.op.new_name,
12567
      }
12568

    
12569
  def BuildHooksNodes(self):
12570
    """Build hooks nodes.
12571

12572
    """
12573
    mn = self.cfg.GetMasterNode()
12574

    
12575
    all_nodes = self.cfg.GetAllNodesInfo()
12576
    all_nodes.pop(mn, None)
12577

    
12578
    run_nodes = [mn]
12579
    run_nodes.extend(node.name for node in all_nodes.values()
12580
                     if node.group == self.group_uuid)
12581

    
12582
    return (run_nodes, run_nodes)
12583

    
12584
  def Exec(self, feedback_fn):
12585
    """Rename the node group.
12586

12587
    """
12588
    group = self.cfg.GetNodeGroup(self.group_uuid)
12589

    
12590
    if group is None:
12591
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12592
                               (self.op.group_name, self.group_uuid))
12593

    
12594
    group.name = self.op.new_name
12595
    self.cfg.Update(group, feedback_fn)
12596

    
12597
    return self.op.new_name
12598

    
12599

    
12600
class LUGroupEvacuate(LogicalUnit):
12601
  HPATH = "group-evacuate"
12602
  HTYPE = constants.HTYPE_GROUP
12603
  REQ_BGL = False
12604

    
12605
  def ExpandNames(self):
12606
    # This raises errors.OpPrereqError on its own:
12607
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12608

    
12609
    if self.op.target_groups:
12610
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12611
                                  self.op.target_groups)
12612
    else:
12613
      self.req_target_uuids = []
12614

    
12615
    if self.group_uuid in self.req_target_uuids:
12616
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12617
                                 " as a target group (targets are %s)" %
12618
                                 (self.group_uuid,
12619
                                  utils.CommaJoin(self.req_target_uuids)),
12620
                                 errors.ECODE_INVAL)
12621

    
12622
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12623

    
12624
    self.share_locks = _ShareAll()
12625
    self.needed_locks = {
12626
      locking.LEVEL_INSTANCE: [],
12627
      locking.LEVEL_NODEGROUP: [],
12628
      locking.LEVEL_NODE: [],
12629
      }
12630

    
12631
  def DeclareLocks(self, level):
12632
    if level == locking.LEVEL_INSTANCE:
12633
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12634

    
12635
      # Lock instances optimistically, needs verification once node and group
12636
      # locks have been acquired
12637
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12638
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12639

    
12640
    elif level == locking.LEVEL_NODEGROUP:
12641
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12642

    
12643
      if self.req_target_uuids:
12644
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12645

    
12646
        # Lock all groups used by instances optimistically; this requires going
12647
        # via the node before it's locked, requiring verification later on
12648
        lock_groups.update(group_uuid
12649
                           for instance_name in
12650
                             self.owned_locks(locking.LEVEL_INSTANCE)
12651
                           for group_uuid in
12652
                             self.cfg.GetInstanceNodeGroups(instance_name))
12653
      else:
12654
        # No target groups, need to lock all of them
12655
        lock_groups = locking.ALL_SET
12656

    
12657
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12658

    
12659
    elif level == locking.LEVEL_NODE:
12660
      # This will only lock the nodes in the group to be evacuated which
12661
      # contain actual instances
12662
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12663
      self._LockInstancesNodes()
12664

    
12665
      # Lock all nodes in group to be evacuated and target groups
12666
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12667
      assert self.group_uuid in owned_groups
12668
      member_nodes = [node_name
12669
                      for group in owned_groups
12670
                      for node_name in self.cfg.GetNodeGroup(group).members]
12671
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12672

    
12673
  def CheckPrereq(self):
12674
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12675
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12676
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12677

    
12678
    assert owned_groups.issuperset(self.req_target_uuids)
12679
    assert self.group_uuid in owned_groups
12680

    
12681
    # Check if locked instances are still correct
12682
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12683

    
12684
    # Get instance information
12685
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12686

    
12687
    # Check if node groups for locked instances are still correct
12688
    for instance_name in owned_instances:
12689
      inst = self.instances[instance_name]
12690
      assert owned_nodes.issuperset(inst.all_nodes), \
12691
        "Instance %s's nodes changed while we kept the lock" % instance_name
12692

    
12693
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12694
                                             owned_groups)
12695

    
12696
      assert self.group_uuid in inst_groups, \
12697
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12698

    
12699
    if self.req_target_uuids:
12700
      # User requested specific target groups
12701
      self.target_uuids = self.req_target_uuids
12702
    else:
12703
      # All groups except the one to be evacuated are potential targets
12704
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12705
                           if group_uuid != self.group_uuid]
12706

    
12707
      if not self.target_uuids:
12708
        raise errors.OpPrereqError("There are no possible target groups",
12709
                                   errors.ECODE_INVAL)
12710

    
12711
  def BuildHooksEnv(self):
12712
    """Build hooks env.
12713

12714
    """
12715
    return {
12716
      "GROUP_NAME": self.op.group_name,
12717
      "TARGET_GROUPS": " ".join(self.target_uuids),
12718
      }
12719

    
12720
  def BuildHooksNodes(self):
12721
    """Build hooks nodes.
12722

12723
    """
12724
    mn = self.cfg.GetMasterNode()
12725

    
12726
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12727

    
12728
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12729

    
12730
    return (run_nodes, run_nodes)
12731

    
12732
  def Exec(self, feedback_fn):
12733
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12734

    
12735
    assert self.group_uuid not in self.target_uuids
12736

    
12737
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12738
                     instances=instances, target_groups=self.target_uuids)
12739

    
12740
    ial.Run(self.op.iallocator)
12741

    
12742
    if not ial.success:
12743
      raise errors.OpPrereqError("Can't compute group evacuation using"
12744
                                 " iallocator '%s': %s" %
12745
                                 (self.op.iallocator, ial.info),
12746
                                 errors.ECODE_NORES)
12747

    
12748
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12749

    
12750
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12751
                 len(jobs), self.op.group_name)
12752

    
12753
    return ResultWithJobs(jobs)
12754

    
12755

    
12756
class TagsLU(NoHooksLU): # pylint: disable=W0223
12757
  """Generic tags LU.
12758

12759
  This is an abstract class which is the parent of all the other tags LUs.
12760

12761
  """
12762
  def ExpandNames(self):
12763
    self.group_uuid = None
12764
    self.needed_locks = {}
12765
    if self.op.kind == constants.TAG_NODE:
12766
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12767
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12768
    elif self.op.kind == constants.TAG_INSTANCE:
12769
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12770
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12771
    elif self.op.kind == constants.TAG_NODEGROUP:
12772
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12773

    
12774
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12775
    # not possible to acquire the BGL based on opcode parameters)
12776

    
12777
  def CheckPrereq(self):
12778
    """Check prerequisites.
12779

12780
    """
12781
    if self.op.kind == constants.TAG_CLUSTER:
12782
      self.target = self.cfg.GetClusterInfo()
12783
    elif self.op.kind == constants.TAG_NODE:
12784
      self.target = self.cfg.GetNodeInfo(self.op.name)
12785
    elif self.op.kind == constants.TAG_INSTANCE:
12786
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12787
    elif self.op.kind == constants.TAG_NODEGROUP:
12788
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12789
    else:
12790
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12791
                                 str(self.op.kind), errors.ECODE_INVAL)
12792

    
12793

    
12794
class LUTagsGet(TagsLU):
12795
  """Returns the tags of a given object.
12796

12797
  """
12798
  REQ_BGL = False
12799

    
12800
  def ExpandNames(self):
12801
    TagsLU.ExpandNames(self)
12802

    
12803
    # Share locks as this is only a read operation
12804
    self.share_locks = _ShareAll()
12805

    
12806
  def Exec(self, feedback_fn):
12807
    """Returns the tag list.
12808

12809
    """
12810
    return list(self.target.GetTags())
12811

    
12812

    
12813
class LUTagsSearch(NoHooksLU):
12814
  """Searches the tags for a given pattern.
12815

12816
  """
12817
  REQ_BGL = False
12818

    
12819
  def ExpandNames(self):
12820
    self.needed_locks = {}
12821

    
12822
  def CheckPrereq(self):
12823
    """Check prerequisites.
12824

12825
    This checks the pattern passed for validity by compiling it.
12826

12827
    """
12828
    try:
12829
      self.re = re.compile(self.op.pattern)
12830
    except re.error, err:
12831
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12832
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12833

    
12834
  def Exec(self, feedback_fn):
12835
    """Returns the tag list.
12836

12837
    """
12838
    cfg = self.cfg
12839
    tgts = [("/cluster", cfg.GetClusterInfo())]
12840
    ilist = cfg.GetAllInstancesInfo().values()
12841
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12842
    nlist = cfg.GetAllNodesInfo().values()
12843
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12844
    tgts.extend(("/nodegroup/%s" % n.name, n)
12845
                for n in cfg.GetAllNodeGroupsInfo().values())
12846
    results = []
12847
    for path, target in tgts:
12848
      for tag in target.GetTags():
12849
        if self.re.search(tag):
12850
          results.append((path, tag))
12851
    return results
12852

    
12853

    
12854
class LUTagsSet(TagsLU):
12855
  """Sets a tag on a given object.
12856

12857
  """
12858
  REQ_BGL = False
12859

    
12860
  def CheckPrereq(self):
12861
    """Check prerequisites.
12862

12863
    This checks the type and length of the tag name and value.
12864

12865
    """
12866
    TagsLU.CheckPrereq(self)
12867
    for tag in self.op.tags:
12868
      objects.TaggableObject.ValidateTag(tag)
12869

    
12870
  def Exec(self, feedback_fn):
12871
    """Sets the tag.
12872

12873
    """
12874
    try:
12875
      for tag in self.op.tags:
12876
        self.target.AddTag(tag)
12877
    except errors.TagError, err:
12878
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12879
    self.cfg.Update(self.target, feedback_fn)
12880

    
12881

    
12882
class LUTagsDel(TagsLU):
12883
  """Delete a list of tags from a given object.
12884

12885
  """
12886
  REQ_BGL = False
12887

    
12888
  def CheckPrereq(self):
12889
    """Check prerequisites.
12890

12891
    This checks that we have the given tag.
12892

12893
    """
12894
    TagsLU.CheckPrereq(self)
12895
    for tag in self.op.tags:
12896
      objects.TaggableObject.ValidateTag(tag)
12897
    del_tags = frozenset(self.op.tags)
12898
    cur_tags = self.target.GetTags()
12899

    
12900
    diff_tags = del_tags - cur_tags
12901
    if diff_tags:
12902
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12903
      raise errors.OpPrereqError("Tag(s) %s not found" %
12904
                                 (utils.CommaJoin(diff_names), ),
12905
                                 errors.ECODE_NOENT)
12906

    
12907
  def Exec(self, feedback_fn):
12908
    """Remove the tag from the object.
12909

12910
    """
12911
    for tag in self.op.tags:
12912
      self.target.RemoveTag(tag)
12913
    self.cfg.Update(self.target, feedback_fn)
12914

    
12915

    
12916
class LUTestDelay(NoHooksLU):
12917
  """Sleep for a specified amount of time.
12918

12919
  This LU sleeps on the master and/or nodes for a specified amount of
12920
  time.
12921

12922
  """
12923
  REQ_BGL = False
12924

    
12925
  def ExpandNames(self):
12926
    """Expand names and set required locks.
12927

12928
    This expands the node list, if any.
12929

12930
    """
12931
    self.needed_locks = {}
12932
    if self.op.on_nodes:
12933
      # _GetWantedNodes can be used here, but is not always appropriate to use
12934
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12935
      # more information.
12936
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12937
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12938

    
12939
  def _TestDelay(self):
12940
    """Do the actual sleep.
12941

12942
    """
12943
    if self.op.on_master:
12944
      if not utils.TestDelay(self.op.duration):
12945
        raise errors.OpExecError("Error during master delay test")
12946
    if self.op.on_nodes:
12947
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12948
      for node, node_result in result.items():
12949
        node_result.Raise("Failure during rpc call to node %s" % node)
12950

    
12951
  def Exec(self, feedback_fn):
12952
    """Execute the test delay opcode, with the wanted repetitions.
12953

12954
    """
12955
    if self.op.repeat == 0:
12956
      self._TestDelay()
12957
    else:
12958
      top_value = self.op.repeat - 1
12959
      for i in range(self.op.repeat):
12960
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12961
        self._TestDelay()
12962

    
12963

    
12964
class LUTestJqueue(NoHooksLU):
12965
  """Utility LU to test some aspects of the job queue.
12966

12967
  """
12968
  REQ_BGL = False
12969

    
12970
  # Must be lower than default timeout for WaitForJobChange to see whether it
12971
  # notices changed jobs
12972
  _CLIENT_CONNECT_TIMEOUT = 20.0
12973
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12974

    
12975
  @classmethod
12976
  def _NotifyUsingSocket(cls, cb, errcls):
12977
    """Opens a Unix socket and waits for another program to connect.
12978

12979
    @type cb: callable
12980
    @param cb: Callback to send socket name to client
12981
    @type errcls: class
12982
    @param errcls: Exception class to use for errors
12983

12984
    """
12985
    # Using a temporary directory as there's no easy way to create temporary
12986
    # sockets without writing a custom loop around tempfile.mktemp and
12987
    # socket.bind
12988
    tmpdir = tempfile.mkdtemp()
12989
    try:
12990
      tmpsock = utils.PathJoin(tmpdir, "sock")
12991

    
12992
      logging.debug("Creating temporary socket at %s", tmpsock)
12993
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12994
      try:
12995
        sock.bind(tmpsock)
12996
        sock.listen(1)
12997

    
12998
        # Send details to client
12999
        cb(tmpsock)
13000

    
13001
        # Wait for client to connect before continuing
13002
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13003
        try:
13004
          (conn, _) = sock.accept()
13005
        except socket.error, err:
13006
          raise errcls("Client didn't connect in time (%s)" % err)
13007
      finally:
13008
        sock.close()
13009
    finally:
13010
      # Remove as soon as client is connected
13011
      shutil.rmtree(tmpdir)
13012

    
13013
    # Wait for client to close
13014
    try:
13015
      try:
13016
        # pylint: disable=E1101
13017
        # Instance of '_socketobject' has no ... member
13018
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13019
        conn.recv(1)
13020
      except socket.error, err:
13021
        raise errcls("Client failed to confirm notification (%s)" % err)
13022
    finally:
13023
      conn.close()
13024

    
13025
  def _SendNotification(self, test, arg, sockname):
13026
    """Sends a notification to the client.
13027

13028
    @type test: string
13029
    @param test: Test name
13030
    @param arg: Test argument (depends on test)
13031
    @type sockname: string
13032
    @param sockname: Socket path
13033

13034
    """
13035
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13036

    
13037
  def _Notify(self, prereq, test, arg):
13038
    """Notifies the client of a test.
13039

13040
    @type prereq: bool
13041
    @param prereq: Whether this is a prereq-phase test
13042
    @type test: string
13043
    @param test: Test name
13044
    @param arg: Test argument (depends on test)
13045

13046
    """
13047
    if prereq:
13048
      errcls = errors.OpPrereqError
13049
    else:
13050
      errcls = errors.OpExecError
13051

    
13052
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13053
                                                  test, arg),
13054
                                   errcls)
13055

    
13056
  def CheckArguments(self):
13057
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13058
    self.expandnames_calls = 0
13059

    
13060
  def ExpandNames(self):
13061
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13062
    if checkargs_calls < 1:
13063
      raise errors.ProgrammerError("CheckArguments was not called")
13064

    
13065
    self.expandnames_calls += 1
13066

    
13067
    if self.op.notify_waitlock:
13068
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13069

    
13070
    self.LogInfo("Expanding names")
13071

    
13072
    # Get lock on master node (just to get a lock, not for a particular reason)
13073
    self.needed_locks = {
13074
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13075
      }
13076

    
13077
  def Exec(self, feedback_fn):
13078
    if self.expandnames_calls < 1:
13079
      raise errors.ProgrammerError("ExpandNames was not called")
13080

    
13081
    if self.op.notify_exec:
13082
      self._Notify(False, constants.JQT_EXEC, None)
13083

    
13084
    self.LogInfo("Executing")
13085

    
13086
    if self.op.log_messages:
13087
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13088
      for idx, msg in enumerate(self.op.log_messages):
13089
        self.LogInfo("Sending log message %s", idx + 1)
13090
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13091
        # Report how many test messages have been sent
13092
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13093

    
13094
    if self.op.fail:
13095
      raise errors.OpExecError("Opcode failure was requested")
13096

    
13097
    return True
13098

    
13099

    
13100
class IAllocator(object):
13101
  """IAllocator framework.
13102

13103
  An IAllocator instance has three sets of attributes:
13104
    - cfg that is needed to query the cluster
13105
    - input data (all members of the _KEYS class attribute are required)
13106
    - four buffer attributes (in|out_data|text), that represent the
13107
      input (to the external script) in text and data structure format,
13108
      and the output from it, again in two formats
13109
    - the result variables from the script (success, info, nodes) for
13110
      easy usage
13111

13112
  """
13113
  # pylint: disable=R0902
13114
  # lots of instance attributes
13115

    
13116
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13117
    self.cfg = cfg
13118
    self.rpc = rpc_runner
13119
    # init buffer variables
13120
    self.in_text = self.out_text = self.in_data = self.out_data = None
13121
    # init all input fields so that pylint is happy
13122
    self.mode = mode
13123
    self.memory = self.disks = self.disk_template = None
13124
    self.os = self.tags = self.nics = self.vcpus = None
13125
    self.hypervisor = None
13126
    self.relocate_from = None
13127
    self.name = None
13128
    self.instances = None
13129
    self.evac_mode = None
13130
    self.target_groups = []
13131
    # computed fields
13132
    self.required_nodes = None
13133
    # init result fields
13134
    self.success = self.info = self.result = None
13135

    
13136
    try:
13137
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13138
    except KeyError:
13139
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13140
                                   " IAllocator" % self.mode)
13141

    
13142
    keyset = [n for (n, _) in keydata]
13143

    
13144
    for key in kwargs:
13145
      if key not in keyset:
13146
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13147
                                     " IAllocator" % key)
13148
      setattr(self, key, kwargs[key])
13149

    
13150
    for key in keyset:
13151
      if key not in kwargs:
13152
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13153
                                     " IAllocator" % key)
13154
    self._BuildInputData(compat.partial(fn, self), keydata)
13155

    
13156
  def _ComputeClusterData(self):
13157
    """Compute the generic allocator input data.
13158

13159
    This is the data that is independent of the actual operation.
13160

13161
    """
13162
    cfg = self.cfg
13163
    cluster_info = cfg.GetClusterInfo()
13164
    # cluster data
13165
    data = {
13166
      "version": constants.IALLOCATOR_VERSION,
13167
      "cluster_name": cfg.GetClusterName(),
13168
      "cluster_tags": list(cluster_info.GetTags()),
13169
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13170
      # we don't have job IDs
13171
      }
13172
    ninfo = cfg.GetAllNodesInfo()
13173
    iinfo = cfg.GetAllInstancesInfo().values()
13174
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13175

    
13176
    # node data
13177
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13178

    
13179
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13180
      hypervisor_name = self.hypervisor
13181
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13182
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13183
    else:
13184
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13185

    
13186
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13187
                                        hypervisor_name)
13188
    node_iinfo = \
13189
      self.rpc.call_all_instances_info(node_list,
13190
                                       cluster_info.enabled_hypervisors)
13191

    
13192
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13193

    
13194
    config_ndata = self._ComputeBasicNodeData(ninfo)
13195
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13196
                                                 i_list, config_ndata)
13197
    assert len(data["nodes"]) == len(ninfo), \
13198
        "Incomplete node data computed"
13199

    
13200
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13201

    
13202
    self.in_data = data
13203

    
13204
  @staticmethod
13205
  def _ComputeNodeGroupData(cfg):
13206
    """Compute node groups data.
13207

13208
    """
13209
    ng = dict((guuid, {
13210
      "name": gdata.name,
13211
      "alloc_policy": gdata.alloc_policy,
13212
      })
13213
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13214

    
13215
    return ng
13216

    
13217
  @staticmethod
13218
  def _ComputeBasicNodeData(node_cfg):
13219
    """Compute global node data.
13220

13221
    @rtype: dict
13222
    @returns: a dict of name: (node dict, node config)
13223

13224
    """
13225
    # fill in static (config-based) values
13226
    node_results = dict((ninfo.name, {
13227
      "tags": list(ninfo.GetTags()),
13228
      "primary_ip": ninfo.primary_ip,
13229
      "secondary_ip": ninfo.secondary_ip,
13230
      "offline": ninfo.offline,
13231
      "drained": ninfo.drained,
13232
      "master_candidate": ninfo.master_candidate,
13233
      "group": ninfo.group,
13234
      "master_capable": ninfo.master_capable,
13235
      "vm_capable": ninfo.vm_capable,
13236
      })
13237
      for ninfo in node_cfg.values())
13238

    
13239
    return node_results
13240

    
13241
  @staticmethod
13242
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13243
                              node_results):
13244
    """Compute global node data.
13245

13246
    @param node_results: the basic node structures as filled from the config
13247

13248
    """
13249
    # make a copy of the current dict
13250
    node_results = dict(node_results)
13251
    for nname, nresult in node_data.items():
13252
      assert nname in node_results, "Missing basic data for node %s" % nname
13253
      ninfo = node_cfg[nname]
13254

    
13255
      if not (ninfo.offline or ninfo.drained):
13256
        nresult.Raise("Can't get data for node %s" % nname)
13257
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13258
                                nname)
13259
        remote_info = nresult.payload
13260

    
13261
        for attr in ["memory_total", "memory_free", "memory_dom0",
13262
                     "vg_size", "vg_free", "cpu_total"]:
13263
          if attr not in remote_info:
13264
            raise errors.OpExecError("Node '%s' didn't return attribute"
13265
                                     " '%s'" % (nname, attr))
13266
          if not isinstance(remote_info[attr], int):
13267
            raise errors.OpExecError("Node '%s' returned invalid value"
13268
                                     " for '%s': %s" %
13269
                                     (nname, attr, remote_info[attr]))
13270
        # compute memory used by primary instances
13271
        i_p_mem = i_p_up_mem = 0
13272
        for iinfo, beinfo in i_list:
13273
          if iinfo.primary_node == nname:
13274
            i_p_mem += beinfo[constants.BE_MEMORY]
13275
            if iinfo.name not in node_iinfo[nname].payload:
13276
              i_used_mem = 0
13277
            else:
13278
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13279
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13280
            remote_info["memory_free"] -= max(0, i_mem_diff)
13281

    
13282
            if iinfo.admin_up:
13283
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13284

    
13285
        # compute memory used by instances
13286
        pnr_dyn = {
13287
          "total_memory": remote_info["memory_total"],
13288
          "reserved_memory": remote_info["memory_dom0"],
13289
          "free_memory": remote_info["memory_free"],
13290
          "total_disk": remote_info["vg_size"],
13291
          "free_disk": remote_info["vg_free"],
13292
          "total_cpus": remote_info["cpu_total"],
13293
          "i_pri_memory": i_p_mem,
13294
          "i_pri_up_memory": i_p_up_mem,
13295
          }
13296
        pnr_dyn.update(node_results[nname])
13297
        node_results[nname] = pnr_dyn
13298

    
13299
    return node_results
13300

    
13301
  @staticmethod
13302
  def _ComputeInstanceData(cluster_info, i_list):
13303
    """Compute global instance data.
13304

13305
    """
13306
    instance_data = {}
13307
    for iinfo, beinfo in i_list:
13308
      nic_data = []
13309
      for nic in iinfo.nics:
13310
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13311
        nic_dict = {
13312
          "mac": nic.mac,
13313
          "ip": nic.ip,
13314
          "mode": filled_params[constants.NIC_MODE],
13315
          "link": filled_params[constants.NIC_LINK],
13316
          }
13317
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13318
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13319
        nic_data.append(nic_dict)
13320
      pir = {
13321
        "tags": list(iinfo.GetTags()),
13322
        "admin_up": iinfo.admin_up,
13323
        "vcpus": beinfo[constants.BE_VCPUS],
13324
        "memory": beinfo[constants.BE_MEMORY],
13325
        "os": iinfo.os,
13326
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13327
        "nics": nic_data,
13328
        "disks": [{constants.IDISK_SIZE: dsk.size,
13329
                   constants.IDISK_MODE: dsk.mode}
13330
                  for dsk in iinfo.disks],
13331
        "disk_template": iinfo.disk_template,
13332
        "hypervisor": iinfo.hypervisor,
13333
        }
13334
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13335
                                                 pir["disks"])
13336
      instance_data[iinfo.name] = pir
13337

    
13338
    return instance_data
13339

    
13340
  def _AddNewInstance(self):
13341
    """Add new instance data to allocator structure.
13342

13343
    This in combination with _AllocatorGetClusterData will create the
13344
    correct structure needed as input for the allocator.
13345

13346
    The checks for the completeness of the opcode must have already been
13347
    done.
13348

13349
    """
13350
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13351

    
13352
    if self.disk_template in constants.DTS_INT_MIRROR:
13353
      self.required_nodes = 2
13354
    else:
13355
      self.required_nodes = 1
13356

    
13357
    request = {
13358
      "name": self.name,
13359
      "disk_template": self.disk_template,
13360
      "tags": self.tags,
13361
      "os": self.os,
13362
      "vcpus": self.vcpus,
13363
      "memory": self.memory,
13364
      "disks": self.disks,
13365
      "disk_space_total": disk_space,
13366
      "nics": self.nics,
13367
      "required_nodes": self.required_nodes,
13368
      "hypervisor": self.hypervisor,
13369
      }
13370

    
13371
    return request
13372

    
13373
  def _AddRelocateInstance(self):
13374
    """Add relocate instance data to allocator structure.
13375

13376
    This in combination with _IAllocatorGetClusterData will create the
13377
    correct structure needed as input for the allocator.
13378

13379
    The checks for the completeness of the opcode must have already been
13380
    done.
13381

13382
    """
13383
    instance = self.cfg.GetInstanceInfo(self.name)
13384
    if instance is None:
13385
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13386
                                   " IAllocator" % self.name)
13387

    
13388
    if instance.disk_template not in constants.DTS_MIRRORED:
13389
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13390
                                 errors.ECODE_INVAL)
13391

    
13392
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13393
        len(instance.secondary_nodes) != 1:
13394
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13395
                                 errors.ECODE_STATE)
13396

    
13397
    self.required_nodes = 1
13398
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13399
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13400

    
13401
    request = {
13402
      "name": self.name,
13403
      "disk_space_total": disk_space,
13404
      "required_nodes": self.required_nodes,
13405
      "relocate_from": self.relocate_from,
13406
      }
13407
    return request
13408

    
13409
  def _AddNodeEvacuate(self):
13410
    """Get data for node-evacuate requests.
13411

13412
    """
13413
    return {
13414
      "instances": self.instances,
13415
      "evac_mode": self.evac_mode,
13416
      }
13417

    
13418
  def _AddChangeGroup(self):
13419
    """Get data for node-evacuate requests.
13420

13421
    """
13422
    return {
13423
      "instances": self.instances,
13424
      "target_groups": self.target_groups,
13425
      }
13426

    
13427
  def _BuildInputData(self, fn, keydata):
13428
    """Build input data structures.
13429

13430
    """
13431
    self._ComputeClusterData()
13432

    
13433
    request = fn()
13434
    request["type"] = self.mode
13435
    for keyname, keytype in keydata:
13436
      if keyname not in request:
13437
        raise errors.ProgrammerError("Request parameter %s is missing" %
13438
                                     keyname)
13439
      val = request[keyname]
13440
      if not keytype(val):
13441
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13442
                                     " validation, value %s, expected"
13443
                                     " type %s" % (keyname, val, keytype))
13444
    self.in_data["request"] = request
13445

    
13446
    self.in_text = serializer.Dump(self.in_data)
13447

    
13448
  _STRING_LIST = ht.TListOf(ht.TString)
13449
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13450
     # pylint: disable=E1101
13451
     # Class '...' has no 'OP_ID' member
13452
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13453
                          opcodes.OpInstanceMigrate.OP_ID,
13454
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13455
     })))
13456

    
13457
  _NEVAC_MOVED = \
13458
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13459
                       ht.TItems([ht.TNonEmptyString,
13460
                                  ht.TNonEmptyString,
13461
                                  ht.TListOf(ht.TNonEmptyString),
13462
                                 ])))
13463
  _NEVAC_FAILED = \
13464
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13465
                       ht.TItems([ht.TNonEmptyString,
13466
                                  ht.TMaybeString,
13467
                                 ])))
13468
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13469
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13470

    
13471
  _MODE_DATA = {
13472
    constants.IALLOCATOR_MODE_ALLOC:
13473
      (_AddNewInstance,
13474
       [
13475
        ("name", ht.TString),
13476
        ("memory", ht.TInt),
13477
        ("disks", ht.TListOf(ht.TDict)),
13478
        ("disk_template", ht.TString),
13479
        ("os", ht.TString),
13480
        ("tags", _STRING_LIST),
13481
        ("nics", ht.TListOf(ht.TDict)),
13482
        ("vcpus", ht.TInt),
13483
        ("hypervisor", ht.TString),
13484
        ], ht.TList),
13485
    constants.IALLOCATOR_MODE_RELOC:
13486
      (_AddRelocateInstance,
13487
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13488
       ht.TList),
13489
     constants.IALLOCATOR_MODE_NODE_EVAC:
13490
      (_AddNodeEvacuate, [
13491
        ("instances", _STRING_LIST),
13492
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13493
        ], _NEVAC_RESULT),
13494
     constants.IALLOCATOR_MODE_CHG_GROUP:
13495
      (_AddChangeGroup, [
13496
        ("instances", _STRING_LIST),
13497
        ("target_groups", _STRING_LIST),
13498
        ], _NEVAC_RESULT),
13499
    }
13500

    
13501
  def Run(self, name, validate=True, call_fn=None):
13502
    """Run an instance allocator and return the results.
13503

13504
    """
13505
    if call_fn is None:
13506
      call_fn = self.rpc.call_iallocator_runner
13507

    
13508
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13509
    result.Raise("Failure while running the iallocator script")
13510

    
13511
    self.out_text = result.payload
13512
    if validate:
13513
      self._ValidateResult()
13514

    
13515
  def _ValidateResult(self):
13516
    """Process the allocator results.
13517

13518
    This will process and if successful save the result in
13519
    self.out_data and the other parameters.
13520

13521
    """
13522
    try:
13523
      rdict = serializer.Load(self.out_text)
13524
    except Exception, err:
13525
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13526

    
13527
    if not isinstance(rdict, dict):
13528
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13529

    
13530
    # TODO: remove backwards compatiblity in later versions
13531
    if "nodes" in rdict and "result" not in rdict:
13532
      rdict["result"] = rdict["nodes"]
13533
      del rdict["nodes"]
13534

    
13535
    for key in "success", "info", "result":
13536
      if key not in rdict:
13537
        raise errors.OpExecError("Can't parse iallocator results:"
13538
                                 " missing key '%s'" % key)
13539
      setattr(self, key, rdict[key])
13540

    
13541
    if not self._result_check(self.result):
13542
      raise errors.OpExecError("Iallocator returned invalid result,"
13543
                               " expected %s, got %s" %
13544
                               (self._result_check, self.result),
13545
                               errors.ECODE_INVAL)
13546

    
13547
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13548
      assert self.relocate_from is not None
13549
      assert self.required_nodes == 1
13550

    
13551
      node2group = dict((name, ndata["group"])
13552
                        for (name, ndata) in self.in_data["nodes"].items())
13553

    
13554
      fn = compat.partial(self._NodesToGroups, node2group,
13555
                          self.in_data["nodegroups"])
13556

    
13557
      instance = self.cfg.GetInstanceInfo(self.name)
13558
      request_groups = fn(self.relocate_from + [instance.primary_node])
13559
      result_groups = fn(rdict["result"] + [instance.primary_node])
13560

    
13561
      if self.success and not set(result_groups).issubset(request_groups):
13562
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13563
                                 " differ from original groups (%s)" %
13564
                                 (utils.CommaJoin(result_groups),
13565
                                  utils.CommaJoin(request_groups)))
13566

    
13567
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13568
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13569

    
13570
    self.out_data = rdict
13571

    
13572
  @staticmethod
13573
  def _NodesToGroups(node2group, groups, nodes):
13574
    """Returns a list of unique group names for a list of nodes.
13575

13576
    @type node2group: dict
13577
    @param node2group: Map from node name to group UUID
13578
    @type groups: dict
13579
    @param groups: Group information
13580
    @type nodes: list
13581
    @param nodes: Node names
13582

13583
    """
13584
    result = set()
13585

    
13586
    for node in nodes:
13587
      try:
13588
        group_uuid = node2group[node]
13589
      except KeyError:
13590
        # Ignore unknown node
13591
        pass
13592
      else:
13593
        try:
13594
          group = groups[group_uuid]
13595
        except KeyError:
13596
          # Can't find group, let's use UUID
13597
          group_name = group_uuid
13598
        else:
13599
          group_name = group["name"]
13600

    
13601
        result.add(group_name)
13602

    
13603
    return sorted(result)
13604

    
13605

    
13606
class LUTestAllocator(NoHooksLU):
13607
  """Run allocator tests.
13608

13609
  This LU runs the allocator tests
13610

13611
  """
13612
  def CheckPrereq(self):
13613
    """Check prerequisites.
13614

13615
    This checks the opcode parameters depending on the director and mode test.
13616

13617
    """
13618
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13619
      for attr in ["memory", "disks", "disk_template",
13620
                   "os", "tags", "nics", "vcpus"]:
13621
        if not hasattr(self.op, attr):
13622
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13623
                                     attr, errors.ECODE_INVAL)
13624
      iname = self.cfg.ExpandInstanceName(self.op.name)
13625
      if iname is not None:
13626
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13627
                                   iname, errors.ECODE_EXISTS)
13628
      if not isinstance(self.op.nics, list):
13629
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13630
                                   errors.ECODE_INVAL)
13631
      if not isinstance(self.op.disks, list):
13632
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13633
                                   errors.ECODE_INVAL)
13634
      for row in self.op.disks:
13635
        if (not isinstance(row, dict) or
13636
            constants.IDISK_SIZE not in row or
13637
            not isinstance(row[constants.IDISK_SIZE], int) or
13638
            constants.IDISK_MODE not in row or
13639
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13640
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13641
                                     " parameter", errors.ECODE_INVAL)
13642
      if self.op.hypervisor is None:
13643
        self.op.hypervisor = self.cfg.GetHypervisorType()
13644
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13645
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13646
      self.op.name = fname
13647
      self.relocate_from = \
13648
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13649
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13650
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13651
      if not self.op.instances:
13652
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13653
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13654
    else:
13655
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13656
                                 self.op.mode, errors.ECODE_INVAL)
13657

    
13658
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13659
      if self.op.allocator is None:
13660
        raise errors.OpPrereqError("Missing allocator name",
13661
                                   errors.ECODE_INVAL)
13662
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13663
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13664
                                 self.op.direction, errors.ECODE_INVAL)
13665

    
13666
  def Exec(self, feedback_fn):
13667
    """Run the allocator test.
13668

13669
    """
13670
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13671
      ial = IAllocator(self.cfg, self.rpc,
13672
                       mode=self.op.mode,
13673
                       name=self.op.name,
13674
                       memory=self.op.memory,
13675
                       disks=self.op.disks,
13676
                       disk_template=self.op.disk_template,
13677
                       os=self.op.os,
13678
                       tags=self.op.tags,
13679
                       nics=self.op.nics,
13680
                       vcpus=self.op.vcpus,
13681
                       hypervisor=self.op.hypervisor,
13682
                       )
13683
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13684
      ial = IAllocator(self.cfg, self.rpc,
13685
                       mode=self.op.mode,
13686
                       name=self.op.name,
13687
                       relocate_from=list(self.relocate_from),
13688
                       )
13689
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13690
      ial = IAllocator(self.cfg, self.rpc,
13691
                       mode=self.op.mode,
13692
                       instances=self.op.instances,
13693
                       target_groups=self.op.target_groups)
13694
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13695
      ial = IAllocator(self.cfg, self.rpc,
13696
                       mode=self.op.mode,
13697
                       instances=self.op.instances,
13698
                       evac_mode=self.op.evac_mode)
13699
    else:
13700
      raise errors.ProgrammerError("Uncatched mode %s in"
13701
                                   " LUTestAllocator.Exec", self.op.mode)
13702

    
13703
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13704
      result = ial.in_text
13705
    else:
13706
      ial.Run(self.op.allocator, validate=False)
13707
      result = ial.out_text
13708
    return result
13709

    
13710

    
13711
#: Query type implementations
13712
_QUERY_IMPL = {
13713
  constants.QR_INSTANCE: _InstanceQuery,
13714
  constants.QR_NODE: _NodeQuery,
13715
  constants.QR_GROUP: _GroupQuery,
13716
  constants.QR_OS: _OsQuery,
13717
  }
13718

    
13719
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13720

    
13721

    
13722
def _GetQueryImplementation(name):
13723
  """Returns the implemtnation for a query type.
13724

13725
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13726

13727
  """
13728
  try:
13729
    return _QUERY_IMPL[name]
13730
  except KeyError:
13731
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13732
                               errors.ECODE_INVAL)