Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a4e588b7

History | View | Annotate | Download (486.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False,
353
                          level=locking.LEVEL_NODE):
354
    """Helper function to declare instances' nodes for locking.
355

356
    This function should be called after locking one or more instances to lock
357
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
358
    with all primary or secondary nodes for instances already locked and
359
    present in self.needed_locks[locking.LEVEL_INSTANCE].
360

361
    It should be called from DeclareLocks, and for safety only works if
362
    self.recalculate_locks[locking.LEVEL_NODE] is set.
363

364
    In the future it may grow parameters to just lock some instance's nodes, or
365
    to just lock primaries or secondary nodes, if needed.
366

367
    If should be called in DeclareLocks in a way similar to::
368

369
      if level == locking.LEVEL_NODE:
370
        self._LockInstancesNodes()
371

372
    @type primary_only: boolean
373
    @param primary_only: only lock primary nodes of locked instances
374
    @param level: Which lock level to use for locking nodes
375

376
    """
377
    assert level in self.recalculate_locks, \
378
      "_LockInstancesNodes helper function called with no nodes to recalculate"
379

    
380
    # TODO: check if we're really been called with the instance locks held
381

    
382
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
383
    # future we might want to have different behaviors depending on the value
384
    # of self.recalculate_locks[locking.LEVEL_NODE]
385
    wanted_nodes = []
386
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
387
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
388
      wanted_nodes.append(instance.primary_node)
389
      if not primary_only:
390
        wanted_nodes.extend(instance.secondary_nodes)
391

    
392
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
393
      self.needed_locks[level] = wanted_nodes
394
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
395
      self.needed_locks[level].extend(wanted_nodes)
396
    else:
397
      raise errors.ProgrammerError("Unknown recalculation mode")
398

    
399
    del self.recalculate_locks[level]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, qfilter, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.owned_locks(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _ShareAll():
563
  """Returns a dict declaring all lock levels shared.
564

565
  """
566
  return dict.fromkeys(locking.LEVELS, 1)
567

    
568

    
569
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
570
  """Checks if the owned node groups are still correct for an instance.
571

572
  @type cfg: L{config.ConfigWriter}
573
  @param cfg: The cluster configuration
574
  @type instance_name: string
575
  @param instance_name: Instance name
576
  @type owned_groups: set or frozenset
577
  @param owned_groups: List of currently owned node groups
578

579
  """
580
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
581

    
582
  if not owned_groups.issuperset(inst_groups):
583
    raise errors.OpPrereqError("Instance %s's node groups changed since"
584
                               " locks were acquired, current groups are"
585
                               " are '%s', owning groups '%s'; retry the"
586
                               " operation" %
587
                               (instance_name,
588
                                utils.CommaJoin(inst_groups),
589
                                utils.CommaJoin(owned_groups)),
590
                               errors.ECODE_STATE)
591

    
592
  return inst_groups
593

    
594

    
595
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
596
  """Checks if the instances in a node group are still correct.
597

598
  @type cfg: L{config.ConfigWriter}
599
  @param cfg: The cluster configuration
600
  @type group_uuid: string
601
  @param group_uuid: Node group UUID
602
  @type owned_instances: set or frozenset
603
  @param owned_instances: List of currently owned instances
604

605
  """
606
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
607
  if owned_instances != wanted_instances:
608
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
609
                               " locks were acquired, wanted '%s', have '%s';"
610
                               " retry the operation" %
611
                               (group_uuid,
612
                                utils.CommaJoin(wanted_instances),
613
                                utils.CommaJoin(owned_instances)),
614
                               errors.ECODE_STATE)
615

    
616
  return wanted_instances
617

    
618

    
619
def _SupportsOob(cfg, node):
620
  """Tells if node supports OOB.
621

622
  @type cfg: L{config.ConfigWriter}
623
  @param cfg: The cluster configuration
624
  @type node: L{objects.Node}
625
  @param node: The node
626
  @return: The OOB script if supported or an empty string otherwise
627

628
  """
629
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if nodes:
645
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
646

    
647
  return utils.NiceSort(lu.cfg.GetNodeList())
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _ReleaseLocks(lu, level, names=None, keep=None):
704
  """Releases locks owned by an LU.
705

706
  @type lu: L{LogicalUnit}
707
  @param level: Lock level
708
  @type names: list or None
709
  @param names: Names of locks to release
710
  @type keep: list or None
711
  @param keep: Names of locks to retain
712

713
  """
714
  assert not (keep is not None and names is not None), \
715
         "Only one of the 'names' and the 'keep' parameters can be given"
716

    
717
  if names is not None:
718
    should_release = names.__contains__
719
  elif keep:
720
    should_release = lambda name: name not in keep
721
  else:
722
    should_release = None
723

    
724
  if should_release:
725
    retain = []
726
    release = []
727

    
728
    # Determine which locks to release
729
    for name in lu.owned_locks(level):
730
      if should_release(name):
731
        release.append(name)
732
      else:
733
        retain.append(name)
734

    
735
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
736

    
737
    # Release just some locks
738
    lu.glm.release(level, names=release)
739

    
740
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
741
  else:
742
    # Release everything
743
    lu.glm.release(level)
744

    
745
    assert not lu.glm.is_owned(level), "No locks should be owned"
746

    
747

    
748
def _MapInstanceDisksToNodes(instances):
749
  """Creates a map from (node, volume) to instance name.
750

751
  @type instances: list of L{objects.Instance}
752
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
753

754
  """
755
  return dict(((node, vol), inst.name)
756
              for inst in instances
757
              for (node, vols) in inst.MapLVsByNode().items()
758
              for vol in vols)
759

    
760

    
761
def _RunPostHook(lu, node_name):
762
  """Runs the post-hook for an opcode on a single node.
763

764
  """
765
  hm = lu.proc.BuildHooksManager(lu)
766
  try:
767
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
768
  except:
769
    # pylint: disable=W0702
770
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
771

    
772

    
773
def _CheckOutputFields(static, dynamic, selected):
774
  """Checks whether all selected fields are valid.
775

776
  @type static: L{utils.FieldSet}
777
  @param static: static fields set
778
  @type dynamic: L{utils.FieldSet}
779
  @param dynamic: dynamic fields set
780

781
  """
782
  f = utils.FieldSet()
783
  f.Extend(static)
784
  f.Extend(dynamic)
785

    
786
  delta = f.NonMatching(selected)
787
  if delta:
788
    raise errors.OpPrereqError("Unknown output fields selected: %s"
789
                               % ",".join(delta), errors.ECODE_INVAL)
790

    
791

    
792
def _CheckGlobalHvParams(params):
793
  """Validates that given hypervisor params are not global ones.
794

795
  This will ensure that instances don't get customised versions of
796
  global params.
797

798
  """
799
  used_globals = constants.HVC_GLOBALS.intersection(params)
800
  if used_globals:
801
    msg = ("The following hypervisor parameters are global and cannot"
802
           " be customized at instance level, please modify them at"
803
           " cluster level: %s" % utils.CommaJoin(used_globals))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805

    
806

    
807
def _CheckNodeOnline(lu, node, msg=None):
808
  """Ensure that a given node is online.
809

810
  @param lu: the LU on behalf of which we make the check
811
  @param node: the node to check
812
  @param msg: if passed, should be a message to replace the default one
813
  @raise errors.OpPrereqError: if the node is offline
814

815
  """
816
  if msg is None:
817
    msg = "Can't use offline node"
818
  if lu.cfg.GetNodeInfo(node).offline:
819
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
820

    
821

    
822
def _CheckNodeNotDrained(lu, node):
823
  """Ensure that a given node is not drained.
824

825
  @param lu: the LU on behalf of which we make the check
826
  @param node: the node to check
827
  @raise errors.OpPrereqError: if the node is drained
828

829
  """
830
  if lu.cfg.GetNodeInfo(node).drained:
831
    raise errors.OpPrereqError("Can't use drained node %s" % node,
832
                               errors.ECODE_STATE)
833

    
834

    
835
def _CheckNodeVmCapable(lu, node):
836
  """Ensure that a given node is vm capable.
837

838
  @param lu: the LU on behalf of which we make the check
839
  @param node: the node to check
840
  @raise errors.OpPrereqError: if the node is not vm capable
841

842
  """
843
  if not lu.cfg.GetNodeInfo(node).vm_capable:
844
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
845
                               errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeHasOS(lu, node, os_name, force_variant):
849
  """Ensure that a node supports a given OS.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @param os_name: the OS to query about
854
  @param force_variant: whether to ignore variant errors
855
  @raise errors.OpPrereqError: if the node is not supporting the OS
856

857
  """
858
  result = lu.rpc.call_os_get(node, os_name)
859
  result.Raise("OS '%s' not in supported OS list for node %s" %
860
               (os_name, node),
861
               prereq=True, ecode=errors.ECODE_INVAL)
862
  if not force_variant:
863
    _CheckOSVariant(result.payload, os_name)
864

    
865

    
866
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
867
  """Ensure that a node has the given secondary ip.
868

869
  @type lu: L{LogicalUnit}
870
  @param lu: the LU on behalf of which we make the check
871
  @type node: string
872
  @param node: the node to check
873
  @type secondary_ip: string
874
  @param secondary_ip: the ip to check
875
  @type prereq: boolean
876
  @param prereq: whether to throw a prerequisite or an execute error
877
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
878
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
879

880
  """
881
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
882
  result.Raise("Failure checking secondary ip on node %s" % node,
883
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
884
  if not result.payload:
885
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
886
           " please fix and re-run this command" % secondary_ip)
887
    if prereq:
888
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
889
    else:
890
      raise errors.OpExecError(msg)
891

    
892

    
893
def _GetClusterDomainSecret():
894
  """Reads the cluster domain secret.
895

896
  """
897
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
898
                               strict=True)
899

    
900

    
901
def _CheckInstanceDown(lu, instance, reason):
902
  """Ensure that an instance is not running."""
903
  if instance.admin_up:
904
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907
  pnode = instance.primary_node
908
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
909
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
910
              prereq=True, ecode=errors.ECODE_ENVIRON)
911

    
912
  if instance.name in ins_l.payload:
913
    raise errors.OpPrereqError("Instance %s is running, %s" %
914
                               (instance.name, reason), errors.ECODE_STATE)
915

    
916

    
917
def _ExpandItemName(fn, name, kind):
918
  """Expand an item name.
919

920
  @param fn: the function to use for expansion
921
  @param name: requested item name
922
  @param kind: text description ('Node' or 'Instance')
923
  @return: the resolved (full) name
924
  @raise errors.OpPrereqError: if the item is not found
925

926
  """
927
  full_name = fn(name)
928
  if full_name is None:
929
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
930
                               errors.ECODE_NOENT)
931
  return full_name
932

    
933

    
934
def _ExpandNodeName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for nodes."""
936
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
937

    
938

    
939
def _ExpandInstanceName(cfg, name):
940
  """Wrapper over L{_ExpandItemName} for instance."""
941
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
942

    
943

    
944
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
945
                          memory, vcpus, nics, disk_template, disks,
946
                          bep, hvp, hypervisor_name, tags):
947
  """Builds instance related env variables for hooks
948

949
  This builds the hook environment from individual variables.
950

951
  @type name: string
952
  @param name: the name of the instance
953
  @type primary_node: string
954
  @param primary_node: the name of the instance's primary node
955
  @type secondary_nodes: list
956
  @param secondary_nodes: list of secondary nodes as strings
957
  @type os_type: string
958
  @param os_type: the name of the instance's OS
959
  @type status: boolean
960
  @param status: the should_run status of the instance
961
  @type memory: string
962
  @param memory: the memory size of the instance
963
  @type vcpus: string
964
  @param vcpus: the count of VCPUs the instance has
965
  @type nics: list
966
  @param nics: list of tuples (ip, mac, mode, link) representing
967
      the NICs the instance has
968
  @type disk_template: string
969
  @param disk_template: the disk template of the instance
970
  @type disks: list
971
  @param disks: the list of (size, mode) pairs
972
  @type bep: dict
973
  @param bep: the backend parameters for the instance
974
  @type hvp: dict
975
  @param hvp: the hypervisor parameters for the instance
976
  @type hypervisor_name: string
977
  @param hypervisor_name: the hypervisor for the instance
978
  @type tags: list
979
  @param tags: list of instance tags as strings
980
  @rtype: dict
981
  @return: the hook environment for this instance
982

983
  """
984
  if status:
985
    str_status = "up"
986
  else:
987
    str_status = "down"
988
  env = {
989
    "OP_TARGET": name,
990
    "INSTANCE_NAME": name,
991
    "INSTANCE_PRIMARY": primary_node,
992
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
993
    "INSTANCE_OS_TYPE": os_type,
994
    "INSTANCE_STATUS": str_status,
995
    "INSTANCE_MEMORY": memory,
996
    "INSTANCE_VCPUS": vcpus,
997
    "INSTANCE_DISK_TEMPLATE": disk_template,
998
    "INSTANCE_HYPERVISOR": hypervisor_name,
999
  }
1000

    
1001
  if nics:
1002
    nic_count = len(nics)
1003
    for idx, (ip, mac, mode, link) in enumerate(nics):
1004
      if ip is None:
1005
        ip = ""
1006
      env["INSTANCE_NIC%d_IP" % idx] = ip
1007
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1008
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1009
      env["INSTANCE_NIC%d_LINK" % idx] = link
1010
      if mode == constants.NIC_MODE_BRIDGED:
1011
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012
  else:
1013
    nic_count = 0
1014

    
1015
  env["INSTANCE_NIC_COUNT"] = nic_count
1016

    
1017
  if disks:
1018
    disk_count = len(disks)
1019
    for idx, (size, mode) in enumerate(disks):
1020
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1021
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1022
  else:
1023
    disk_count = 0
1024

    
1025
  env["INSTANCE_DISK_COUNT"] = disk_count
1026

    
1027
  if not tags:
1028
    tags = []
1029

    
1030
  env["INSTANCE_TAGS"] = " ".join(tags)
1031

    
1032
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033
    for key, value in source.items():
1034
      env["INSTANCE_%s_%s" % (kind, key)] = value
1035

    
1036
  return env
1037

    
1038

    
1039
def _NICListToTuple(lu, nics):
1040
  """Build a list of nic information tuples.
1041

1042
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043
  value in LUInstanceQueryData.
1044

1045
  @type lu:  L{LogicalUnit}
1046
  @param lu: the logical unit on whose behalf we execute
1047
  @type nics: list of L{objects.NIC}
1048
  @param nics: list of nics to convert to hooks tuples
1049

1050
  """
1051
  hooks_nics = []
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  for nic in nics:
1054
    ip = nic.ip
1055
    mac = nic.mac
1056
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057
    mode = filled_params[constants.NIC_MODE]
1058
    link = filled_params[constants.NIC_LINK]
1059
    hooks_nics.append((ip, mac, mode, link))
1060
  return hooks_nics
1061

    
1062

    
1063
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064
  """Builds instance related env variables for hooks from an object.
1065

1066
  @type lu: L{LogicalUnit}
1067
  @param lu: the logical unit on whose behalf we execute
1068
  @type instance: L{objects.Instance}
1069
  @param instance: the instance for which we should build the
1070
      environment
1071
  @type override: dict
1072
  @param override: dictionary with key/values that will override
1073
      our values
1074
  @rtype: dict
1075
  @return: the hook environment dictionary
1076

1077
  """
1078
  cluster = lu.cfg.GetClusterInfo()
1079
  bep = cluster.FillBE(instance)
1080
  hvp = cluster.FillHV(instance)
1081
  args = {
1082
    "name": instance.name,
1083
    "primary_node": instance.primary_node,
1084
    "secondary_nodes": instance.secondary_nodes,
1085
    "os_type": instance.os,
1086
    "status": instance.admin_up,
1087
    "memory": bep[constants.BE_MEMORY],
1088
    "vcpus": bep[constants.BE_VCPUS],
1089
    "nics": _NICListToTuple(lu, instance.nics),
1090
    "disk_template": instance.disk_template,
1091
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092
    "bep": bep,
1093
    "hvp": hvp,
1094
    "hypervisor_name": instance.hypervisor,
1095
    "tags": instance.tags,
1096
  }
1097
  if override:
1098
    args.update(override)
1099
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100

    
1101

    
1102
def _AdjustCandidatePool(lu, exceptions):
1103
  """Adjust the candidate pool after node operations.
1104

1105
  """
1106
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107
  if mod_list:
1108
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1109
               utils.CommaJoin(node.name for node in mod_list))
1110
    for name in mod_list:
1111
      lu.context.ReaddNode(name)
1112
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113
  if mc_now > mc_max:
1114
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115
               (mc_now, mc_max))
1116

    
1117

    
1118
def _DecideSelfPromotion(lu, exceptions=None):
1119
  """Decide whether I should promote myself as a master candidate.
1120

1121
  """
1122
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124
  # the new node will increase mc_max with one, so:
1125
  mc_should = min(mc_should + 1, cp_size)
1126
  return mc_now < mc_should
1127

    
1128

    
1129
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130
  """Check that the brigdes needed by a list of nics exist.
1131

1132
  """
1133
  cluster = lu.cfg.GetClusterInfo()
1134
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135
  brlist = [params[constants.NIC_LINK] for params in paramslist
1136
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137
  if brlist:
1138
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1139
    result.Raise("Error checking bridges on destination node '%s'" %
1140
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141

    
1142

    
1143
def _CheckInstanceBridgesExist(lu, instance, node=None):
1144
  """Check that the brigdes needed by an instance exist.
1145

1146
  """
1147
  if node is None:
1148
    node = instance.primary_node
1149
  _CheckNicsBridgesExist(lu, instance.nics, node)
1150

    
1151

    
1152
def _CheckOSVariant(os_obj, name):
1153
  """Check whether an OS name conforms to the os variants specification.
1154

1155
  @type os_obj: L{objects.OS}
1156
  @param os_obj: OS object to check
1157
  @type name: string
1158
  @param name: OS name passed by the user, to check for validity
1159

1160
  """
1161
  variant = objects.OS.GetVariant(name)
1162
  if not os_obj.supported_variants:
1163
    if variant:
1164
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165
                                 " passed)" % (os_obj.name, variant),
1166
                                 errors.ECODE_INVAL)
1167
    return
1168
  if not variant:
1169
    raise errors.OpPrereqError("OS name must include a variant",
1170
                               errors.ECODE_INVAL)
1171

    
1172
  if variant not in os_obj.supported_variants:
1173
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174

    
1175

    
1176
def _GetNodeInstancesInner(cfg, fn):
1177
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178

    
1179

    
1180
def _GetNodeInstances(cfg, node_name):
1181
  """Returns a list of all primary and secondary instances on a node.
1182

1183
  """
1184

    
1185
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186

    
1187

    
1188
def _GetNodePrimaryInstances(cfg, node_name):
1189
  """Returns primary instances on a node.
1190

1191
  """
1192
  return _GetNodeInstancesInner(cfg,
1193
                                lambda inst: node_name == inst.primary_node)
1194

    
1195

    
1196
def _GetNodeSecondaryInstances(cfg, node_name):
1197
  """Returns secondary instances on a node.
1198

1199
  """
1200
  return _GetNodeInstancesInner(cfg,
1201
                                lambda inst: node_name in inst.secondary_nodes)
1202

    
1203

    
1204
def _GetStorageTypeArgs(cfg, storage_type):
1205
  """Returns the arguments for a storage type.
1206

1207
  """
1208
  # Special case for file storage
1209
  if storage_type == constants.ST_FILE:
1210
    # storage.FileStorage wants a list of storage directories
1211
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212

    
1213
  return []
1214

    
1215

    
1216
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217
  faulty = []
1218

    
1219
  for dev in instance.disks:
1220
    cfg.SetDiskID(dev, node_name)
1221

    
1222
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223
  result.Raise("Failed to get disk status from node %s" % node_name,
1224
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225

    
1226
  for idx, bdev_status in enumerate(result.payload):
1227
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228
      faulty.append(idx)
1229

    
1230
  return faulty
1231

    
1232

    
1233
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234
  """Check the sanity of iallocator and node arguments and use the
1235
  cluster-wide iallocator if appropriate.
1236

1237
  Check that at most one of (iallocator, node) is specified. If none is
1238
  specified, then the LU's opcode's iallocator slot is filled with the
1239
  cluster-wide default iallocator.
1240

1241
  @type iallocator_slot: string
1242
  @param iallocator_slot: the name of the opcode iallocator slot
1243
  @type node_slot: string
1244
  @param node_slot: the name of the opcode target node slot
1245

1246
  """
1247
  node = getattr(lu.op, node_slot, None)
1248
  iallocator = getattr(lu.op, iallocator_slot, None)
1249

    
1250
  if node is not None and iallocator is not None:
1251
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252
                               errors.ECODE_INVAL)
1253
  elif node is None and iallocator is None:
1254
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1255
    if default_iallocator:
1256
      setattr(lu.op, iallocator_slot, default_iallocator)
1257
    else:
1258
      raise errors.OpPrereqError("No iallocator or node given and no"
1259
                                 " cluster-wide default iallocator found;"
1260
                                 " please specify either an iallocator or a"
1261
                                 " node, or set a cluster-wide default"
1262
                                 " iallocator")
1263

    
1264

    
1265
def _GetDefaultIAllocator(cfg, iallocator):
1266
  """Decides on which iallocator to use.
1267

1268
  @type cfg: L{config.ConfigWriter}
1269
  @param cfg: Cluster configuration object
1270
  @type iallocator: string or None
1271
  @param iallocator: Iallocator specified in opcode
1272
  @rtype: string
1273
  @return: Iallocator name
1274

1275
  """
1276
  if not iallocator:
1277
    # Use default iallocator
1278
    iallocator = cfg.GetDefaultIAllocator()
1279

    
1280
  if not iallocator:
1281
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282
                               " opcode nor as a cluster-wide default",
1283
                               errors.ECODE_INVAL)
1284

    
1285
  return iallocator
1286

    
1287

    
1288
class LUClusterPostInit(LogicalUnit):
1289
  """Logical unit for running hooks after cluster initialization.
1290

1291
  """
1292
  HPATH = "cluster-init"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294

    
1295
  def BuildHooksEnv(self):
1296
    """Build hooks env.
1297

1298
    """
1299
    return {
1300
      "OP_TARGET": self.cfg.GetClusterName(),
1301
      }
1302

    
1303
  def BuildHooksNodes(self):
1304
    """Build hooks nodes.
1305

1306
    """
1307
    return ([], [self.cfg.GetMasterNode()])
1308

    
1309
  def Exec(self, feedback_fn):
1310
    """Nothing to do.
1311

1312
    """
1313
    return True
1314

    
1315

    
1316
class LUClusterDestroy(LogicalUnit):
1317
  """Logical unit for destroying the cluster.
1318

1319
  """
1320
  HPATH = "cluster-destroy"
1321
  HTYPE = constants.HTYPE_CLUSTER
1322

    
1323
  def BuildHooksEnv(self):
1324
    """Build hooks env.
1325

1326
    """
1327
    return {
1328
      "OP_TARGET": self.cfg.GetClusterName(),
1329
      }
1330

    
1331
  def BuildHooksNodes(self):
1332
    """Build hooks nodes.
1333

1334
    """
1335
    return ([], [])
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    This checks whether the cluster is empty.
1341

1342
    Any errors are signaled by raising errors.OpPrereqError.
1343

1344
    """
1345
    master = self.cfg.GetMasterNode()
1346

    
1347
    nodelist = self.cfg.GetNodeList()
1348
    if len(nodelist) != 1 or nodelist[0] != master:
1349
      raise errors.OpPrereqError("There are still %d node(s) in"
1350
                                 " this cluster." % (len(nodelist) - 1),
1351
                                 errors.ECODE_INVAL)
1352
    instancelist = self.cfg.GetInstanceList()
1353
    if instancelist:
1354
      raise errors.OpPrereqError("There are still %d instance(s) in"
1355
                                 " this cluster." % len(instancelist),
1356
                                 errors.ECODE_INVAL)
1357

    
1358
  def Exec(self, feedback_fn):
1359
    """Destroys the cluster.
1360

1361
    """
1362
    master_params = self.cfg.GetMasterNetworkParameters()
1363

    
1364
    # Run post hooks on master node before it's removed
1365
    _RunPostHook(self, master_params.name)
1366

    
1367
    ems = self.cfg.GetUseExternalMipScript()
1368
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1369
                                                     master_params, ems)
1370
    result.Raise("Could not disable the master role")
1371

    
1372
    return master_params.name
1373

    
1374

    
1375
def _VerifyCertificate(filename):
1376
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1377

1378
  @type filename: string
1379
  @param filename: Path to PEM file
1380

1381
  """
1382
  try:
1383
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1384
                                           utils.ReadFile(filename))
1385
  except Exception, err: # pylint: disable=W0703
1386
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1387
            "Failed to load X509 certificate %s: %s" % (filename, err))
1388

    
1389
  (errcode, msg) = \
1390
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1391
                                constants.SSL_CERT_EXPIRATION_ERROR)
1392

    
1393
  if msg:
1394
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1395
  else:
1396
    fnamemsg = None
1397

    
1398
  if errcode is None:
1399
    return (None, fnamemsg)
1400
  elif errcode == utils.CERT_WARNING:
1401
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1402
  elif errcode == utils.CERT_ERROR:
1403
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1404

    
1405
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1406

    
1407

    
1408
def _GetAllHypervisorParameters(cluster, instances):
1409
  """Compute the set of all hypervisor parameters.
1410

1411
  @type cluster: L{objects.Cluster}
1412
  @param cluster: the cluster object
1413
  @param instances: list of L{objects.Instance}
1414
  @param instances: additional instances from which to obtain parameters
1415
  @rtype: list of (origin, hypervisor, parameters)
1416
  @return: a list with all parameters found, indicating the hypervisor they
1417
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1418

1419
  """
1420
  hvp_data = []
1421

    
1422
  for hv_name in cluster.enabled_hypervisors:
1423
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1424

    
1425
  for os_name, os_hvp in cluster.os_hvp.items():
1426
    for hv_name, hv_params in os_hvp.items():
1427
      if hv_params:
1428
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1429
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1430

    
1431
  # TODO: collapse identical parameter values in a single one
1432
  for instance in instances:
1433
    if instance.hvparams:
1434
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1435
                       cluster.FillHV(instance)))
1436

    
1437
  return hvp_data
1438

    
1439

    
1440
class _VerifyErrors(object):
1441
  """Mix-in for cluster/group verify LUs.
1442

1443
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1444
  self.op and self._feedback_fn to be available.)
1445

1446
  """
1447

    
1448
  ETYPE_FIELD = "code"
1449
  ETYPE_ERROR = "ERROR"
1450
  ETYPE_WARNING = "WARNING"
1451

    
1452
  def _Error(self, ecode, item, msg, *args, **kwargs):
1453
    """Format an error message.
1454

1455
    Based on the opcode's error_codes parameter, either format a
1456
    parseable error code, or a simpler error string.
1457

1458
    This must be called only from Exec and functions called from Exec.
1459

1460
    """
1461
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1462
    itype, etxt, _ = ecode
1463
    # first complete the msg
1464
    if args:
1465
      msg = msg % args
1466
    # then format the whole message
1467
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1468
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1469
    else:
1470
      if item:
1471
        item = " " + item
1472
      else:
1473
        item = ""
1474
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1475
    # and finally report it via the feedback_fn
1476
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1477

    
1478
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1479
    """Log an error message if the passed condition is True.
1480

1481
    """
1482
    cond = (bool(cond)
1483
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1484

    
1485
    # If the error code is in the list of ignored errors, demote the error to a
1486
    # warning
1487
    (_, etxt, _) = ecode
1488
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1489
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1490

    
1491
    if cond:
1492
      self._Error(ecode, *args, **kwargs)
1493

    
1494
    # do not mark the operation as failed for WARN cases only
1495
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1496
      self.bad = self.bad or cond
1497

    
1498

    
1499
class LUClusterVerify(NoHooksLU):
1500
  """Submits all jobs necessary to verify the cluster.
1501

1502
  """
1503
  REQ_BGL = False
1504

    
1505
  def ExpandNames(self):
1506
    self.needed_locks = {}
1507

    
1508
  def Exec(self, feedback_fn):
1509
    jobs = []
1510

    
1511
    if self.op.group_name:
1512
      groups = [self.op.group_name]
1513
      depends_fn = lambda: None
1514
    else:
1515
      groups = self.cfg.GetNodeGroupList()
1516

    
1517
      # Verify global configuration
1518
      jobs.append([
1519
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1520
        ])
1521

    
1522
      # Always depend on global verification
1523
      depends_fn = lambda: [(-len(jobs), [])]
1524

    
1525
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1526
                                            ignore_errors=self.op.ignore_errors,
1527
                                            depends=depends_fn())]
1528
                for group in groups)
1529

    
1530
    # Fix up all parameters
1531
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1532
      op.debug_simulate_errors = self.op.debug_simulate_errors
1533
      op.verbose = self.op.verbose
1534
      op.error_codes = self.op.error_codes
1535
      try:
1536
        op.skip_checks = self.op.skip_checks
1537
      except AttributeError:
1538
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1539

    
1540
    return ResultWithJobs(jobs)
1541

    
1542

    
1543
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1544
  """Verifies the cluster config.
1545

1546
  """
1547
  REQ_BGL = True
1548

    
1549
  def _VerifyHVP(self, hvp_data):
1550
    """Verifies locally the syntax of the hypervisor parameters.
1551

1552
    """
1553
    for item, hv_name, hv_params in hvp_data:
1554
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1555
             (item, hv_name))
1556
      try:
1557
        hv_class = hypervisor.GetHypervisor(hv_name)
1558
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1559
        hv_class.CheckParameterSyntax(hv_params)
1560
      except errors.GenericError, err:
1561
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1562

    
1563
  def ExpandNames(self):
1564
    # Information can be safely retrieved as the BGL is acquired in exclusive
1565
    # mode
1566
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1567
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1568
    self.all_node_info = self.cfg.GetAllNodesInfo()
1569
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1570
    self.needed_locks = {}
1571

    
1572
  def Exec(self, feedback_fn):
1573
    """Verify integrity of cluster, performing various test on nodes.
1574

1575
    """
1576
    self.bad = False
1577
    self._feedback_fn = feedback_fn
1578

    
1579
    feedback_fn("* Verifying cluster config")
1580

    
1581
    for msg in self.cfg.VerifyConfig():
1582
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1583

    
1584
    feedback_fn("* Verifying cluster certificate files")
1585

    
1586
    for cert_filename in constants.ALL_CERT_FILES:
1587
      (errcode, msg) = _VerifyCertificate(cert_filename)
1588
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1589

    
1590
    feedback_fn("* Verifying hypervisor parameters")
1591

    
1592
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1593
                                                self.all_inst_info.values()))
1594

    
1595
    feedback_fn("* Verifying all nodes belong to an existing group")
1596

    
1597
    # We do this verification here because, should this bogus circumstance
1598
    # occur, it would never be caught by VerifyGroup, which only acts on
1599
    # nodes/instances reachable from existing node groups.
1600

    
1601
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1602
                         if node.group not in self.all_group_info)
1603

    
1604
    dangling_instances = {}
1605
    no_node_instances = []
1606

    
1607
    for inst in self.all_inst_info.values():
1608
      if inst.primary_node in dangling_nodes:
1609
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1610
      elif inst.primary_node not in self.all_node_info:
1611
        no_node_instances.append(inst.name)
1612

    
1613
    pretty_dangling = [
1614
        "%s (%s)" %
1615
        (node.name,
1616
         utils.CommaJoin(dangling_instances.get(node.name,
1617
                                                ["no instances"])))
1618
        for node in dangling_nodes]
1619

    
1620
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1621
                  None,
1622
                  "the following nodes (and their instances) belong to a non"
1623
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1624

    
1625
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1626
                  None,
1627
                  "the following instances have a non-existing primary-node:"
1628
                  " %s", utils.CommaJoin(no_node_instances))
1629

    
1630
    return not self.bad
1631

    
1632

    
1633
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1634
  """Verifies the status of a node group.
1635

1636
  """
1637
  HPATH = "cluster-verify"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  REQ_BGL = False
1640

    
1641
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1642

    
1643
  class NodeImage(object):
1644
    """A class representing the logical and physical status of a node.
1645

1646
    @type name: string
1647
    @ivar name: the node name to which this object refers
1648
    @ivar volumes: a structure as returned from
1649
        L{ganeti.backend.GetVolumeList} (runtime)
1650
    @ivar instances: a list of running instances (runtime)
1651
    @ivar pinst: list of configured primary instances (config)
1652
    @ivar sinst: list of configured secondary instances (config)
1653
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1654
        instances for which this node is secondary (config)
1655
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1656
    @ivar dfree: free disk, as reported by the node (runtime)
1657
    @ivar offline: the offline status (config)
1658
    @type rpc_fail: boolean
1659
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1660
        not whether the individual keys were correct) (runtime)
1661
    @type lvm_fail: boolean
1662
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1663
    @type hyp_fail: boolean
1664
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1665
    @type ghost: boolean
1666
    @ivar ghost: whether this is a known node or not (config)
1667
    @type os_fail: boolean
1668
    @ivar os_fail: whether the RPC call didn't return valid OS data
1669
    @type oslist: list
1670
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1671
    @type vm_capable: boolean
1672
    @ivar vm_capable: whether the node can host instances
1673

1674
    """
1675
    def __init__(self, offline=False, name=None, vm_capable=True):
1676
      self.name = name
1677
      self.volumes = {}
1678
      self.instances = []
1679
      self.pinst = []
1680
      self.sinst = []
1681
      self.sbp = {}
1682
      self.mfree = 0
1683
      self.dfree = 0
1684
      self.offline = offline
1685
      self.vm_capable = vm_capable
1686
      self.rpc_fail = False
1687
      self.lvm_fail = False
1688
      self.hyp_fail = False
1689
      self.ghost = False
1690
      self.os_fail = False
1691
      self.oslist = {}
1692

    
1693
  def ExpandNames(self):
1694
    # This raises errors.OpPrereqError on its own:
1695
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1696

    
1697
    # Get instances in node group; this is unsafe and needs verification later
1698
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1699

    
1700
    self.needed_locks = {
1701
      locking.LEVEL_INSTANCE: inst_names,
1702
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1703
      locking.LEVEL_NODE: [],
1704
      }
1705

    
1706
    self.share_locks = _ShareAll()
1707

    
1708
  def DeclareLocks(self, level):
1709
    if level == locking.LEVEL_NODE:
1710
      # Get members of node group; this is unsafe and needs verification later
1711
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1712

    
1713
      all_inst_info = self.cfg.GetAllInstancesInfo()
1714

    
1715
      # In Exec(), we warn about mirrored instances that have primary and
1716
      # secondary living in separate node groups. To fully verify that
1717
      # volumes for these instances are healthy, we will need to do an
1718
      # extra call to their secondaries. We ensure here those nodes will
1719
      # be locked.
1720
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1721
        # Important: access only the instances whose lock is owned
1722
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1723
          nodes.update(all_inst_info[inst].secondary_nodes)
1724

    
1725
      self.needed_locks[locking.LEVEL_NODE] = nodes
1726

    
1727
  def CheckPrereq(self):
1728
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1729
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1730

    
1731
    group_nodes = set(self.group_info.members)
1732
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1733

    
1734
    unlocked_nodes = \
1735
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1736

    
1737
    unlocked_instances = \
1738
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1739

    
1740
    if unlocked_nodes:
1741
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1742
                                 utils.CommaJoin(unlocked_nodes))
1743

    
1744
    if unlocked_instances:
1745
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1746
                                 utils.CommaJoin(unlocked_instances))
1747

    
1748
    self.all_node_info = self.cfg.GetAllNodesInfo()
1749
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1750

    
1751
    self.my_node_names = utils.NiceSort(group_nodes)
1752
    self.my_inst_names = utils.NiceSort(group_instances)
1753

    
1754
    self.my_node_info = dict((name, self.all_node_info[name])
1755
                             for name in self.my_node_names)
1756

    
1757
    self.my_inst_info = dict((name, self.all_inst_info[name])
1758
                             for name in self.my_inst_names)
1759

    
1760
    # We detect here the nodes that will need the extra RPC calls for verifying
1761
    # split LV volumes; they should be locked.
1762
    extra_lv_nodes = set()
1763

    
1764
    for inst in self.my_inst_info.values():
1765
      if inst.disk_template in constants.DTS_INT_MIRROR:
1766
        group = self.my_node_info[inst.primary_node].group
1767
        for nname in inst.secondary_nodes:
1768
          if self.all_node_info[nname].group != group:
1769
            extra_lv_nodes.add(nname)
1770

    
1771
    unlocked_lv_nodes = \
1772
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773

    
1774
    if unlocked_lv_nodes:
1775
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1776
                                 utils.CommaJoin(unlocked_lv_nodes))
1777
    self.extra_lv_nodes = list(extra_lv_nodes)
1778

    
1779
  def _VerifyNode(self, ninfo, nresult):
1780
    """Perform some basic validation on data returned from a node.
1781

1782
      - check the result data structure is well formed and has all the
1783
        mandatory fields
1784
      - check ganeti version
1785

1786
    @type ninfo: L{objects.Node}
1787
    @param ninfo: the node to check
1788
    @param nresult: the results from the node
1789
    @rtype: boolean
1790
    @return: whether overall this call was successful (and we can expect
1791
         reasonable values in the respose)
1792

1793
    """
1794
    node = ninfo.name
1795
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1796

    
1797
    # main result, nresult should be a non-empty dict
1798
    test = not nresult or not isinstance(nresult, dict)
1799
    _ErrorIf(test, constants.CV_ENODERPC, node,
1800
                  "unable to verify node: no data returned")
1801
    if test:
1802
      return False
1803

    
1804
    # compares ganeti version
1805
    local_version = constants.PROTOCOL_VERSION
1806
    remote_version = nresult.get("version", None)
1807
    test = not (remote_version and
1808
                isinstance(remote_version, (list, tuple)) and
1809
                len(remote_version) == 2)
1810
    _ErrorIf(test, constants.CV_ENODERPC, node,
1811
             "connection to node returned invalid data")
1812
    if test:
1813
      return False
1814

    
1815
    test = local_version != remote_version[0]
1816
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1817
             "incompatible protocol versions: master %s,"
1818
             " node %s", local_version, remote_version[0])
1819
    if test:
1820
      return False
1821

    
1822
    # node seems compatible, we can actually try to look into its results
1823

    
1824
    # full package version
1825
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1826
                  constants.CV_ENODEVERSION, node,
1827
                  "software version mismatch: master %s, node %s",
1828
                  constants.RELEASE_VERSION, remote_version[1],
1829
                  code=self.ETYPE_WARNING)
1830

    
1831
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1832
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1833
      for hv_name, hv_result in hyp_result.iteritems():
1834
        test = hv_result is not None
1835
        _ErrorIf(test, constants.CV_ENODEHV, node,
1836
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1837

    
1838
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1839
    if ninfo.vm_capable and isinstance(hvp_result, list):
1840
      for item, hv_name, hv_result in hvp_result:
1841
        _ErrorIf(True, constants.CV_ENODEHV, node,
1842
                 "hypervisor %s parameter verify failure (source %s): %s",
1843
                 hv_name, item, hv_result)
1844

    
1845
    test = nresult.get(constants.NV_NODESETUP,
1846
                       ["Missing NODESETUP results"])
1847
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1848
             "; ".join(test))
1849

    
1850
    return True
1851

    
1852
  def _VerifyNodeTime(self, ninfo, nresult,
1853
                      nvinfo_starttime, nvinfo_endtime):
1854
    """Check the node time.
1855

1856
    @type ninfo: L{objects.Node}
1857
    @param ninfo: the node to check
1858
    @param nresult: the remote results for the node
1859
    @param nvinfo_starttime: the start time of the RPC call
1860
    @param nvinfo_endtime: the end time of the RPC call
1861

1862
    """
1863
    node = ninfo.name
1864
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1865

    
1866
    ntime = nresult.get(constants.NV_TIME, None)
1867
    try:
1868
      ntime_merged = utils.MergeTime(ntime)
1869
    except (ValueError, TypeError):
1870
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1871
      return
1872

    
1873
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1874
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1875
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1876
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1877
    else:
1878
      ntime_diff = None
1879

    
1880
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1881
             "Node time diverges by at least %s from master node time",
1882
             ntime_diff)
1883

    
1884
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1885
    """Check the node LVM results.
1886

1887
    @type ninfo: L{objects.Node}
1888
    @param ninfo: the node to check
1889
    @param nresult: the remote results for the node
1890
    @param vg_name: the configured VG name
1891

1892
    """
1893
    if vg_name is None:
1894
      return
1895

    
1896
    node = ninfo.name
1897
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1898

    
1899
    # checks vg existence and size > 20G
1900
    vglist = nresult.get(constants.NV_VGLIST, None)
1901
    test = not vglist
1902
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1903
    if not test:
1904
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1905
                                            constants.MIN_VG_SIZE)
1906
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1907

    
1908
    # check pv names
1909
    pvlist = nresult.get(constants.NV_PVLIST, None)
1910
    test = pvlist is None
1911
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1912
    if not test:
1913
      # check that ':' is not present in PV names, since it's a
1914
      # special character for lvcreate (denotes the range of PEs to
1915
      # use on the PV)
1916
      for _, pvname, owner_vg in pvlist:
1917
        test = ":" in pvname
1918
        _ErrorIf(test, constants.CV_ENODELVM, node,
1919
                 "Invalid character ':' in PV '%s' of VG '%s'",
1920
                 pvname, owner_vg)
1921

    
1922
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1923
    """Check the node bridges.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param bridges: the expected list of bridges
1929

1930
    """
1931
    if not bridges:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    missing = nresult.get(constants.NV_BRIDGES, None)
1938
    test = not isinstance(missing, list)
1939
    _ErrorIf(test, constants.CV_ENODENET, node,
1940
             "did not return valid bridge information")
1941
    if not test:
1942
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1943
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1944

    
1945
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1946
    """Check the results of user scripts presence and executability on the node
1947

1948
    @type ninfo: L{objects.Node}
1949
    @param ninfo: the node to check
1950
    @param nresult: the remote results for the node
1951

1952
    """
1953
    node = ninfo.name
1954

    
1955
    test = not constants.NV_USERSCRIPTS in nresult
1956
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1957
                  "did not return user scripts information")
1958

    
1959
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1960
    if not test:
1961
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1962
                    "user scripts not present or not executable: %s" %
1963
                    utils.CommaJoin(sorted(broken_scripts)))
1964

    
1965
  def _VerifyNodeNetwork(self, ninfo, nresult):
1966
    """Check the node network connectivity results.
1967

1968
    @type ninfo: L{objects.Node}
1969
    @param ninfo: the node to check
1970
    @param nresult: the remote results for the node
1971

1972
    """
1973
    node = ninfo.name
1974
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975

    
1976
    test = constants.NV_NODELIST not in nresult
1977
    _ErrorIf(test, constants.CV_ENODESSH, node,
1978
             "node hasn't returned node ssh connectivity data")
1979
    if not test:
1980
      if nresult[constants.NV_NODELIST]:
1981
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1982
          _ErrorIf(True, constants.CV_ENODESSH, node,
1983
                   "ssh communication with node '%s': %s", a_node, a_msg)
1984

    
1985
    test = constants.NV_NODENETTEST not in nresult
1986
    _ErrorIf(test, constants.CV_ENODENET, node,
1987
             "node hasn't returned node tcp connectivity data")
1988
    if not test:
1989
      if nresult[constants.NV_NODENETTEST]:
1990
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1991
        for anode in nlist:
1992
          _ErrorIf(True, constants.CV_ENODENET, node,
1993
                   "tcp communication with node '%s': %s",
1994
                   anode, nresult[constants.NV_NODENETTEST][anode])
1995

    
1996
    test = constants.NV_MASTERIP not in nresult
1997
    _ErrorIf(test, constants.CV_ENODENET, node,
1998
             "node hasn't returned node master IP reachability data")
1999
    if not test:
2000
      if not nresult[constants.NV_MASTERIP]:
2001
        if node == self.master_node:
2002
          msg = "the master node cannot reach the master IP (not configured?)"
2003
        else:
2004
          msg = "cannot reach the master IP"
2005
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2006

    
2007
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2008
                      diskstatus):
2009
    """Verify an instance.
2010

2011
    This function checks to see if the required block devices are
2012
    available on the instance's node.
2013

2014
    """
2015
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2016
    node_current = instanceconfig.primary_node
2017

    
2018
    node_vol_should = {}
2019
    instanceconfig.MapLVsByNode(node_vol_should)
2020

    
2021
    for node in node_vol_should:
2022
      n_img = node_image[node]
2023
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2024
        # ignore missing volumes on offline or broken nodes
2025
        continue
2026
      for volume in node_vol_should[node]:
2027
        test = volume not in n_img.volumes
2028
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2029
                 "volume %s missing on node %s", volume, node)
2030

    
2031
    if instanceconfig.admin_up:
2032
      pri_img = node_image[node_current]
2033
      test = instance not in pri_img.instances and not pri_img.offline
2034
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2035
               "instance not running on its primary node %s",
2036
               node_current)
2037

    
2038
    diskdata = [(nname, success, status, idx)
2039
                for (nname, disks) in diskstatus.items()
2040
                for idx, (success, status) in enumerate(disks)]
2041

    
2042
    for nname, success, bdev_status, idx in diskdata:
2043
      # the 'ghost node' construction in Exec() ensures that we have a
2044
      # node here
2045
      snode = node_image[nname]
2046
      bad_snode = snode.ghost or snode.offline
2047
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2048
               constants.CV_EINSTANCEFAULTYDISK, instance,
2049
               "couldn't retrieve status for disk/%s on %s: %s",
2050
               idx, nname, bdev_status)
2051
      _ErrorIf((instanceconfig.admin_up and success and
2052
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2053
               constants.CV_EINSTANCEFAULTYDISK, instance,
2054
               "disk/%s on %s is faulty", idx, nname)
2055

    
2056
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2057
    """Verify if there are any unknown volumes in the cluster.
2058

2059
    The .os, .swap and backup volumes are ignored. All other volumes are
2060
    reported as unknown.
2061

2062
    @type reserved: L{ganeti.utils.FieldSet}
2063
    @param reserved: a FieldSet of reserved volume names
2064

2065
    """
2066
    for node, n_img in node_image.items():
2067
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2068
        # skip non-healthy nodes
2069
        continue
2070
      for volume in n_img.volumes:
2071
        test = ((node not in node_vol_should or
2072
                volume not in node_vol_should[node]) and
2073
                not reserved.Matches(volume))
2074
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2075
                      "volume %s is unknown", volume)
2076

    
2077
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2078
    """Verify N+1 Memory Resilience.
2079

2080
    Check that if one single node dies we can still start all the
2081
    instances it was primary for.
2082

2083
    """
2084
    cluster_info = self.cfg.GetClusterInfo()
2085
    for node, n_img in node_image.items():
2086
      # This code checks that every node which is now listed as
2087
      # secondary has enough memory to host all instances it is
2088
      # supposed to should a single other node in the cluster fail.
2089
      # FIXME: not ready for failover to an arbitrary node
2090
      # FIXME: does not support file-backed instances
2091
      # WARNING: we currently take into account down instances as well
2092
      # as up ones, considering that even if they're down someone
2093
      # might want to start them even in the event of a node failure.
2094
      if n_img.offline:
2095
        # we're skipping offline nodes from the N+1 warning, since
2096
        # most likely we don't have good memory infromation from them;
2097
        # we already list instances living on such nodes, and that's
2098
        # enough warning
2099
        continue
2100
      for prinode, instances in n_img.sbp.items():
2101
        needed_mem = 0
2102
        for instance in instances:
2103
          bep = cluster_info.FillBE(instance_cfg[instance])
2104
          if bep[constants.BE_AUTO_BALANCE]:
2105
            needed_mem += bep[constants.BE_MEMORY]
2106
        test = n_img.mfree < needed_mem
2107
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2108
                      "not enough memory to accomodate instance failovers"
2109
                      " should node %s fail (%dMiB needed, %dMiB available)",
2110
                      prinode, needed_mem, n_img.mfree)
2111

    
2112
  @classmethod
2113
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2114
                   (files_all, files_opt, files_mc, files_vm)):
2115
    """Verifies file checksums collected from all nodes.
2116

2117
    @param errorif: Callback for reporting errors
2118
    @param nodeinfo: List of L{objects.Node} objects
2119
    @param master_node: Name of master node
2120
    @param all_nvinfo: RPC results
2121

2122
    """
2123
    # Define functions determining which nodes to consider for a file
2124
    files2nodefn = [
2125
      (files_all, None),
2126
      (files_mc, lambda node: (node.master_candidate or
2127
                               node.name == master_node)),
2128
      (files_vm, lambda node: node.vm_capable),
2129
      ]
2130

    
2131
    # Build mapping from filename to list of nodes which should have the file
2132
    nodefiles = {}
2133
    for (files, fn) in files2nodefn:
2134
      if fn is None:
2135
        filenodes = nodeinfo
2136
      else:
2137
        filenodes = filter(fn, nodeinfo)
2138
      nodefiles.update((filename,
2139
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2140
                       for filename in files)
2141

    
2142
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2143

    
2144
    fileinfo = dict((filename, {}) for filename in nodefiles)
2145
    ignore_nodes = set()
2146

    
2147
    for node in nodeinfo:
2148
      if node.offline:
2149
        ignore_nodes.add(node.name)
2150
        continue
2151

    
2152
      nresult = all_nvinfo[node.name]
2153

    
2154
      if nresult.fail_msg or not nresult.payload:
2155
        node_files = None
2156
      else:
2157
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2158

    
2159
      test = not (node_files and isinstance(node_files, dict))
2160
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2161
              "Node did not return file checksum data")
2162
      if test:
2163
        ignore_nodes.add(node.name)
2164
        continue
2165

    
2166
      # Build per-checksum mapping from filename to nodes having it
2167
      for (filename, checksum) in node_files.items():
2168
        assert filename in nodefiles
2169
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2170

    
2171
    for (filename, checksums) in fileinfo.items():
2172
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2173

    
2174
      # Nodes having the file
2175
      with_file = frozenset(node_name
2176
                            for nodes in fileinfo[filename].values()
2177
                            for node_name in nodes) - ignore_nodes
2178

    
2179
      expected_nodes = nodefiles[filename] - ignore_nodes
2180

    
2181
      # Nodes missing file
2182
      missing_file = expected_nodes - with_file
2183

    
2184
      if filename in files_opt:
2185
        # All or no nodes
2186
        errorif(missing_file and missing_file != expected_nodes,
2187
                constants.CV_ECLUSTERFILECHECK, None,
2188
                "File %s is optional, but it must exist on all or no"
2189
                " nodes (not found on %s)",
2190
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2191
      else:
2192
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2193
                "File %s is missing from node(s) %s", filename,
2194
                utils.CommaJoin(utils.NiceSort(missing_file)))
2195

    
2196
        # Warn if a node has a file it shouldn't
2197
        unexpected = with_file - expected_nodes
2198
        errorif(unexpected,
2199
                constants.CV_ECLUSTERFILECHECK, None,
2200
                "File %s should not exist on node(s) %s",
2201
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2202

    
2203
      # See if there are multiple versions of the file
2204
      test = len(checksums) > 1
2205
      if test:
2206
        variants = ["variant %s on %s" %
2207
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2208
                    for (idx, (checksum, nodes)) in
2209
                      enumerate(sorted(checksums.items()))]
2210
      else:
2211
        variants = []
2212

    
2213
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2214
              "File %s found with %s different checksums (%s)",
2215
              filename, len(checksums), "; ".join(variants))
2216

    
2217
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2218
                      drbd_map):
2219
    """Verifies and the node DRBD status.
2220

2221
    @type ninfo: L{objects.Node}
2222
    @param ninfo: the node to check
2223
    @param nresult: the remote results for the node
2224
    @param instanceinfo: the dict of instances
2225
    @param drbd_helper: the configured DRBD usermode helper
2226
    @param drbd_map: the DRBD map as returned by
2227
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2232

    
2233
    if drbd_helper:
2234
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2235
      test = (helper_result == None)
2236
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2237
               "no drbd usermode helper returned")
2238
      if helper_result:
2239
        status, payload = helper_result
2240
        test = not status
2241
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2242
                 "drbd usermode helper check unsuccessful: %s", payload)
2243
        test = status and (payload != drbd_helper)
2244
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2245
                 "wrong drbd usermode helper: %s", payload)
2246

    
2247
    # compute the DRBD minors
2248
    node_drbd = {}
2249
    for minor, instance in drbd_map[node].items():
2250
      test = instance not in instanceinfo
2251
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2252
               "ghost instance '%s' in temporary DRBD map", instance)
2253
        # ghost instance should not be running, but otherwise we
2254
        # don't give double warnings (both ghost instance and
2255
        # unallocated minor in use)
2256
      if test:
2257
        node_drbd[minor] = (instance, False)
2258
      else:
2259
        instance = instanceinfo[instance]
2260
        node_drbd[minor] = (instance.name, instance.admin_up)
2261

    
2262
    # and now check them
2263
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2264
    test = not isinstance(used_minors, (tuple, list))
2265
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2266
             "cannot parse drbd status file: %s", str(used_minors))
2267
    if test:
2268
      # we cannot check drbd status
2269
      return
2270

    
2271
    for minor, (iname, must_exist) in node_drbd.items():
2272
      test = minor not in used_minors and must_exist
2273
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2274
               "drbd minor %d of instance %s is not active", minor, iname)
2275
    for minor in used_minors:
2276
      test = minor not in node_drbd
2277
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2278
               "unallocated drbd minor %d is in use", minor)
2279

    
2280
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2281
    """Builds the node OS structures.
2282

2283
    @type ninfo: L{objects.Node}
2284
    @param ninfo: the node to check
2285
    @param nresult: the remote results for the node
2286
    @param nimg: the node image object
2287

2288
    """
2289
    node = ninfo.name
2290
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291

    
2292
    remote_os = nresult.get(constants.NV_OSLIST, None)
2293
    test = (not isinstance(remote_os, list) or
2294
            not compat.all(isinstance(v, list) and len(v) == 7
2295
                           for v in remote_os))
2296

    
2297
    _ErrorIf(test, constants.CV_ENODEOS, node,
2298
             "node hasn't returned valid OS data")
2299

    
2300
    nimg.os_fail = test
2301

    
2302
    if test:
2303
      return
2304

    
2305
    os_dict = {}
2306

    
2307
    for (name, os_path, status, diagnose,
2308
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2309

    
2310
      if name not in os_dict:
2311
        os_dict[name] = []
2312

    
2313
      # parameters is a list of lists instead of list of tuples due to
2314
      # JSON lacking a real tuple type, fix it:
2315
      parameters = [tuple(v) for v in parameters]
2316
      os_dict[name].append((os_path, status, diagnose,
2317
                            set(variants), set(parameters), set(api_ver)))
2318

    
2319
    nimg.oslist = os_dict
2320

    
2321
  def _VerifyNodeOS(self, ninfo, nimg, base):
2322
    """Verifies the node OS list.
2323

2324
    @type ninfo: L{objects.Node}
2325
    @param ninfo: the node to check
2326
    @param nimg: the node image object
2327
    @param base: the 'template' node we match against (e.g. from the master)
2328

2329
    """
2330
    node = ninfo.name
2331
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332

    
2333
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2334

    
2335
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2336
    for os_name, os_data in nimg.oslist.items():
2337
      assert os_data, "Empty OS status for OS %s?!" % os_name
2338
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2339
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2340
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2341
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2342
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2343
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2344
      # comparisons with the 'base' image
2345
      test = os_name not in base.oslist
2346
      _ErrorIf(test, constants.CV_ENODEOS, node,
2347
               "Extra OS %s not present on reference node (%s)",
2348
               os_name, base.name)
2349
      if test:
2350
        continue
2351
      assert base.oslist[os_name], "Base node has empty OS status?"
2352
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2353
      if not b_status:
2354
        # base OS is invalid, skipping
2355
        continue
2356
      for kind, a, b in [("API version", f_api, b_api),
2357
                         ("variants list", f_var, b_var),
2358
                         ("parameters", beautify_params(f_param),
2359
                          beautify_params(b_param))]:
2360
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2361
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2362
                 kind, os_name, base.name,
2363
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2364

    
2365
    # check any missing OSes
2366
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2367
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2368
             "OSes present on reference node %s but missing on this node: %s",
2369
             base.name, utils.CommaJoin(missing))
2370

    
2371
  def _VerifyOob(self, ninfo, nresult):
2372
    """Verifies out of band functionality of a node.
2373

2374
    @type ninfo: L{objects.Node}
2375
    @param ninfo: the node to check
2376
    @param nresult: the remote results for the node
2377

2378
    """
2379
    node = ninfo.name
2380
    # We just have to verify the paths on master and/or master candidates
2381
    # as the oob helper is invoked on the master
2382
    if ((ninfo.master_candidate or ninfo.master_capable) and
2383
        constants.NV_OOB_PATHS in nresult):
2384
      for path_result in nresult[constants.NV_OOB_PATHS]:
2385
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2386

    
2387
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2388
    """Verifies and updates the node volume data.
2389

2390
    This function will update a L{NodeImage}'s internal structures
2391
    with data from the remote call.
2392

2393
    @type ninfo: L{objects.Node}
2394
    @param ninfo: the node to check
2395
    @param nresult: the remote results for the node
2396
    @param nimg: the node image object
2397
    @param vg_name: the configured VG name
2398

2399
    """
2400
    node = ninfo.name
2401
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402

    
2403
    nimg.lvm_fail = True
2404
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2405
    if vg_name is None:
2406
      pass
2407
    elif isinstance(lvdata, basestring):
2408
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2409
               utils.SafeEncode(lvdata))
2410
    elif not isinstance(lvdata, dict):
2411
      _ErrorIf(True, constants.CV_ENODELVM, node,
2412
               "rpc call to node failed (lvlist)")
2413
    else:
2414
      nimg.volumes = lvdata
2415
      nimg.lvm_fail = False
2416

    
2417
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2418
    """Verifies and updates the node instance list.
2419

2420
    If the listing was successful, then updates this node's instance
2421
    list. Otherwise, it marks the RPC call as failed for the instance
2422
    list key.
2423

2424
    @type ninfo: L{objects.Node}
2425
    @param ninfo: the node to check
2426
    @param nresult: the remote results for the node
2427
    @param nimg: the node image object
2428

2429
    """
2430
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2431
    test = not isinstance(idata, list)
2432
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2433
                  "rpc call to node failed (instancelist): %s",
2434
                  utils.SafeEncode(str(idata)))
2435
    if test:
2436
      nimg.hyp_fail = True
2437
    else:
2438
      nimg.instances = idata
2439

    
2440
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2441
    """Verifies and computes a node information map
2442

2443
    @type ninfo: L{objects.Node}
2444
    @param ninfo: the node to check
2445
    @param nresult: the remote results for the node
2446
    @param nimg: the node image object
2447
    @param vg_name: the configured VG name
2448

2449
    """
2450
    node = ninfo.name
2451
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452

    
2453
    # try to read free memory (from the hypervisor)
2454
    hv_info = nresult.get(constants.NV_HVINFO, None)
2455
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2456
    _ErrorIf(test, constants.CV_ENODEHV, node,
2457
             "rpc call to node failed (hvinfo)")
2458
    if not test:
2459
      try:
2460
        nimg.mfree = int(hv_info["memory_free"])
2461
      except (ValueError, TypeError):
2462
        _ErrorIf(True, constants.CV_ENODERPC, node,
2463
                 "node returned invalid nodeinfo, check hypervisor")
2464

    
2465
    # FIXME: devise a free space model for file based instances as well
2466
    if vg_name is not None:
2467
      test = (constants.NV_VGLIST not in nresult or
2468
              vg_name not in nresult[constants.NV_VGLIST])
2469
      _ErrorIf(test, constants.CV_ENODELVM, node,
2470
               "node didn't return data for the volume group '%s'"
2471
               " - it is either missing or broken", vg_name)
2472
      if not test:
2473
        try:
2474
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475
        except (ValueError, TypeError):
2476
          _ErrorIf(True, constants.CV_ENODERPC, node,
2477
                   "node returned invalid LVM info, check LVM status")
2478

    
2479
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480
    """Gets per-disk status information for all instances.
2481

2482
    @type nodelist: list of strings
2483
    @param nodelist: Node names
2484
    @type node_image: dict of (name, L{objects.Node})
2485
    @param node_image: Node objects
2486
    @type instanceinfo: dict of (name, L{objects.Instance})
2487
    @param instanceinfo: Instance objects
2488
    @rtype: {instance: {node: [(succes, payload)]}}
2489
    @return: a dictionary of per-instance dictionaries with nodes as
2490
        keys and disk information as values; the disk information is a
2491
        list of tuples (success, payload)
2492

2493
    """
2494
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495

    
2496
    node_disks = {}
2497
    node_disks_devonly = {}
2498
    diskless_instances = set()
2499
    diskless = constants.DT_DISKLESS
2500

    
2501
    for nname in nodelist:
2502
      node_instances = list(itertools.chain(node_image[nname].pinst,
2503
                                            node_image[nname].sinst))
2504
      diskless_instances.update(inst for inst in node_instances
2505
                                if instanceinfo[inst].disk_template == diskless)
2506
      disks = [(inst, disk)
2507
               for inst in node_instances
2508
               for disk in instanceinfo[inst].disks]
2509

    
2510
      if not disks:
2511
        # No need to collect data
2512
        continue
2513

    
2514
      node_disks[nname] = disks
2515

    
2516
      # Creating copies as SetDiskID below will modify the objects and that can
2517
      # lead to incorrect data returned from nodes
2518
      devonly = [dev.Copy() for (_, dev) in disks]
2519

    
2520
      for dev in devonly:
2521
        self.cfg.SetDiskID(dev, nname)
2522

    
2523
      node_disks_devonly[nname] = devonly
2524

    
2525
    assert len(node_disks) == len(node_disks_devonly)
2526

    
2527
    # Collect data from all nodes with disks
2528
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529
                                                          node_disks_devonly)
2530

    
2531
    assert len(result) == len(node_disks)
2532

    
2533
    instdisk = {}
2534

    
2535
    for (nname, nres) in result.items():
2536
      disks = node_disks[nname]
2537

    
2538
      if nres.offline:
2539
        # No data from this node
2540
        data = len(disks) * [(False, "node offline")]
2541
      else:
2542
        msg = nres.fail_msg
2543
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2544
                 "while getting disk information: %s", msg)
2545
        if msg:
2546
          # No data from this node
2547
          data = len(disks) * [(False, msg)]
2548
        else:
2549
          data = []
2550
          for idx, i in enumerate(nres.payload):
2551
            if isinstance(i, (tuple, list)) and len(i) == 2:
2552
              data.append(i)
2553
            else:
2554
              logging.warning("Invalid result from node %s, entry %d: %s",
2555
                              nname, idx, i)
2556
              data.append((False, "Invalid result from the remote node"))
2557

    
2558
      for ((inst, _), status) in zip(disks, data):
2559
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560

    
2561
    # Add empty entries for diskless instances.
2562
    for inst in diskless_instances:
2563
      assert inst not in instdisk
2564
      instdisk[inst] = {}
2565

    
2566
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568
                      compat.all(isinstance(s, (tuple, list)) and
2569
                                 len(s) == 2 for s in statuses)
2570
                      for inst, nnames in instdisk.items()
2571
                      for nname, statuses in nnames.items())
2572
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573

    
2574
    return instdisk
2575

    
2576
  @staticmethod
2577
  def _SshNodeSelector(group_uuid, all_nodes):
2578
    """Create endless iterators for all potential SSH check hosts.
2579

2580
    """
2581
    nodes = [node for node in all_nodes
2582
             if (node.group != group_uuid and
2583
                 not node.offline)]
2584
    keyfunc = operator.attrgetter("group")
2585

    
2586
    return map(itertools.cycle,
2587
               [sorted(map(operator.attrgetter("name"), names))
2588
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589
                                                  keyfunc)])
2590

    
2591
  @classmethod
2592
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593
    """Choose which nodes should talk to which other nodes.
2594

2595
    We will make nodes contact all nodes in their group, and one node from
2596
    every other group.
2597

2598
    @warning: This algorithm has a known issue if one node group is much
2599
      smaller than others (e.g. just one node). In such a case all other
2600
      nodes will talk to the single node.
2601

2602
    """
2603
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605

    
2606
    return (online_nodes,
2607
            dict((name, sorted([i.next() for i in sel]))
2608
                 for name in online_nodes))
2609

    
2610
  def BuildHooksEnv(self):
2611
    """Build hooks env.
2612

2613
    Cluster-Verify hooks just ran in the post phase and their failure makes
2614
    the output be logged in the verify output and the verification to fail.
2615

2616
    """
2617
    env = {
2618
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619
      }
2620

    
2621
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622
               for node in self.my_node_info.values())
2623

    
2624
    return env
2625

    
2626
  def BuildHooksNodes(self):
2627
    """Build hooks nodes.
2628

2629
    """
2630
    return ([], self.my_node_names)
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Verify integrity of the node group, performing various test on nodes.
2634

2635
    """
2636
    # This method has too many local variables. pylint: disable=R0914
2637
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638

    
2639
    if not self.my_node_names:
2640
      # empty node group
2641
      feedback_fn("* Empty node group, skipping verification")
2642
      return True
2643

    
2644
    self.bad = False
2645
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646
    verbose = self.op.verbose
2647
    self._feedback_fn = feedback_fn
2648

    
2649
    vg_name = self.cfg.GetVGName()
2650
    drbd_helper = self.cfg.GetDRBDHelper()
2651
    cluster = self.cfg.GetClusterInfo()
2652
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653
    hypervisors = cluster.enabled_hypervisors
2654
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655

    
2656
    i_non_redundant = [] # Non redundant instances
2657
    i_non_a_balanced = [] # Non auto-balanced instances
2658
    n_offline = 0 # Count of offline nodes
2659
    n_drained = 0 # Count of nodes being drained
2660
    node_vol_should = {}
2661

    
2662
    # FIXME: verify OS list
2663

    
2664
    # File verification
2665
    filemap = _ComputeAncillaryFiles(cluster, False)
2666

    
2667
    # do local checksums
2668
    master_node = self.master_node = self.cfg.GetMasterNode()
2669
    master_ip = self.cfg.GetMasterIP()
2670

    
2671
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672

    
2673
    user_scripts = []
2674
    if self.cfg.GetUseExternalMipScript():
2675
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2676

    
2677
    node_verify_param = {
2678
      constants.NV_FILELIST:
2679
        utils.UniqueSequence(filename
2680
                             for files in filemap
2681
                             for filename in files),
2682
      constants.NV_NODELIST:
2683
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2684
                                  self.all_node_info.values()),
2685
      constants.NV_HYPERVISOR: hypervisors,
2686
      constants.NV_HVPARAMS:
2687
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2688
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2689
                                 for node in node_data_list
2690
                                 if not node.offline],
2691
      constants.NV_INSTANCELIST: hypervisors,
2692
      constants.NV_VERSION: None,
2693
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2694
      constants.NV_NODESETUP: None,
2695
      constants.NV_TIME: None,
2696
      constants.NV_MASTERIP: (master_node, master_ip),
2697
      constants.NV_OSLIST: None,
2698
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2699
      constants.NV_USERSCRIPTS: user_scripts,
2700
      }
2701

    
2702
    if vg_name is not None:
2703
      node_verify_param[constants.NV_VGLIST] = None
2704
      node_verify_param[constants.NV_LVLIST] = vg_name
2705
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2706
      node_verify_param[constants.NV_DRBDLIST] = None
2707

    
2708
    if drbd_helper:
2709
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2710

    
2711
    # bridge checks
2712
    # FIXME: this needs to be changed per node-group, not cluster-wide
2713
    bridges = set()
2714
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2715
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716
      bridges.add(default_nicpp[constants.NIC_LINK])
2717
    for instance in self.my_inst_info.values():
2718
      for nic in instance.nics:
2719
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2720
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2721
          bridges.add(full_nic[constants.NIC_LINK])
2722

    
2723
    if bridges:
2724
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2725

    
2726
    # Build our expected cluster state
2727
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2728
                                                 name=node.name,
2729
                                                 vm_capable=node.vm_capable))
2730
                      for node in node_data_list)
2731

    
2732
    # Gather OOB paths
2733
    oob_paths = []
2734
    for node in self.all_node_info.values():
2735
      path = _SupportsOob(self.cfg, node)
2736
      if path and path not in oob_paths:
2737
        oob_paths.append(path)
2738

    
2739
    if oob_paths:
2740
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2741

    
2742
    for instance in self.my_inst_names:
2743
      inst_config = self.my_inst_info[instance]
2744

    
2745
      for nname in inst_config.all_nodes:
2746
        if nname not in node_image:
2747
          gnode = self.NodeImage(name=nname)
2748
          gnode.ghost = (nname not in self.all_node_info)
2749
          node_image[nname] = gnode
2750

    
2751
      inst_config.MapLVsByNode(node_vol_should)
2752

    
2753
      pnode = inst_config.primary_node
2754
      node_image[pnode].pinst.append(instance)
2755

    
2756
      for snode in inst_config.secondary_nodes:
2757
        nimg = node_image[snode]
2758
        nimg.sinst.append(instance)
2759
        if pnode not in nimg.sbp:
2760
          nimg.sbp[pnode] = []
2761
        nimg.sbp[pnode].append(instance)
2762

    
2763
    # At this point, we have the in-memory data structures complete,
2764
    # except for the runtime information, which we'll gather next
2765

    
2766
    # Due to the way our RPC system works, exact response times cannot be
2767
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2768
    # time before and after executing the request, we can at least have a time
2769
    # window.
2770
    nvinfo_starttime = time.time()
2771
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2772
                                           node_verify_param,
2773
                                           self.cfg.GetClusterName())
2774
    nvinfo_endtime = time.time()
2775

    
2776
    if self.extra_lv_nodes and vg_name is not None:
2777
      extra_lv_nvinfo = \
2778
          self.rpc.call_node_verify(self.extra_lv_nodes,
2779
                                    {constants.NV_LVLIST: vg_name},
2780
                                    self.cfg.GetClusterName())
2781
    else:
2782
      extra_lv_nvinfo = {}
2783

    
2784
    all_drbd_map = self.cfg.ComputeDRBDMap()
2785

    
2786
    feedback_fn("* Gathering disk information (%s nodes)" %
2787
                len(self.my_node_names))
2788
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2789
                                     self.my_inst_info)
2790

    
2791
    feedback_fn("* Verifying configuration file consistency")
2792

    
2793
    # If not all nodes are being checked, we need to make sure the master node
2794
    # and a non-checked vm_capable node are in the list.
2795
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2796
    if absent_nodes:
2797
      vf_nvinfo = all_nvinfo.copy()
2798
      vf_node_info = list(self.my_node_info.values())
2799
      additional_nodes = []
2800
      if master_node not in self.my_node_info:
2801
        additional_nodes.append(master_node)
2802
        vf_node_info.append(self.all_node_info[master_node])
2803
      # Add the first vm_capable node we find which is not included
2804
      for node in absent_nodes:
2805
        nodeinfo = self.all_node_info[node]
2806
        if nodeinfo.vm_capable and not nodeinfo.offline:
2807
          additional_nodes.append(node)
2808
          vf_node_info.append(self.all_node_info[node])
2809
          break
2810
      key = constants.NV_FILELIST
2811
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2812
                                                 {key: node_verify_param[key]},
2813
                                                 self.cfg.GetClusterName()))
2814
    else:
2815
      vf_nvinfo = all_nvinfo
2816
      vf_node_info = self.my_node_info.values()
2817

    
2818
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2819

    
2820
    feedback_fn("* Verifying node status")
2821

    
2822
    refos_img = None
2823

    
2824
    for node_i in node_data_list:
2825
      node = node_i.name
2826
      nimg = node_image[node]
2827

    
2828
      if node_i.offline:
2829
        if verbose:
2830
          feedback_fn("* Skipping offline node %s" % (node,))
2831
        n_offline += 1
2832
        continue
2833

    
2834
      if node == master_node:
2835
        ntype = "master"
2836
      elif node_i.master_candidate:
2837
        ntype = "master candidate"
2838
      elif node_i.drained:
2839
        ntype = "drained"
2840
        n_drained += 1
2841
      else:
2842
        ntype = "regular"
2843
      if verbose:
2844
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2845

    
2846
      msg = all_nvinfo[node].fail_msg
2847
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2848
               msg)
2849
      if msg:
2850
        nimg.rpc_fail = True
2851
        continue
2852

    
2853
      nresult = all_nvinfo[node].payload
2854

    
2855
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2856
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2857
      self._VerifyNodeNetwork(node_i, nresult)
2858
      self._VerifyNodeUserScripts(node_i, nresult)
2859
      self._VerifyOob(node_i, nresult)
2860

    
2861
      if nimg.vm_capable:
2862
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2863
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2864
                             all_drbd_map)
2865

    
2866
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2867
        self._UpdateNodeInstances(node_i, nresult, nimg)
2868
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2869
        self._UpdateNodeOS(node_i, nresult, nimg)
2870

    
2871
        if not nimg.os_fail:
2872
          if refos_img is None:
2873
            refos_img = nimg
2874
          self._VerifyNodeOS(node_i, nimg, refos_img)
2875
        self._VerifyNodeBridges(node_i, nresult, bridges)
2876

    
2877
        # Check whether all running instancies are primary for the node. (This
2878
        # can no longer be done from _VerifyInstance below, since some of the
2879
        # wrong instances could be from other node groups.)
2880
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2881

    
2882
        for inst in non_primary_inst:
2883
          test = inst in self.all_inst_info
2884
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2885
                   "instance should not run on node %s", node_i.name)
2886
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2887
                   "node is running unknown instance %s", inst)
2888

    
2889
    for node, result in extra_lv_nvinfo.items():
2890
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2891
                              node_image[node], vg_name)
2892

    
2893
    feedback_fn("* Verifying instance status")
2894
    for instance in self.my_inst_names:
2895
      if verbose:
2896
        feedback_fn("* Verifying instance %s" % instance)
2897
      inst_config = self.my_inst_info[instance]
2898
      self._VerifyInstance(instance, inst_config, node_image,
2899
                           instdisk[instance])
2900
      inst_nodes_offline = []
2901

    
2902
      pnode = inst_config.primary_node
2903
      pnode_img = node_image[pnode]
2904
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2905
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2906
               " primary node failed", instance)
2907

    
2908
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2909
               constants.CV_EINSTANCEBADNODE, instance,
2910
               "instance is marked as running and lives on offline node %s",
2911
               inst_config.primary_node)
2912

    
2913
      # If the instance is non-redundant we cannot survive losing its primary
2914
      # node, so we are not N+1 compliant. On the other hand we have no disk
2915
      # templates with more than one secondary so that situation is not well
2916
      # supported either.
2917
      # FIXME: does not support file-backed instances
2918
      if not inst_config.secondary_nodes:
2919
        i_non_redundant.append(instance)
2920

    
2921
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2922
               constants.CV_EINSTANCELAYOUT,
2923
               instance, "instance has multiple secondary nodes: %s",
2924
               utils.CommaJoin(inst_config.secondary_nodes),
2925
               code=self.ETYPE_WARNING)
2926

    
2927
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2928
        pnode = inst_config.primary_node
2929
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2930
        instance_groups = {}
2931

    
2932
        for node in instance_nodes:
2933
          instance_groups.setdefault(self.all_node_info[node].group,
2934
                                     []).append(node)
2935

    
2936
        pretty_list = [
2937
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2938
          # Sort so that we always list the primary node first.
2939
          for group, nodes in sorted(instance_groups.items(),
2940
                                     key=lambda (_, nodes): pnode in nodes,
2941
                                     reverse=True)]
2942

    
2943
        self._ErrorIf(len(instance_groups) > 1,
2944
                      constants.CV_EINSTANCESPLITGROUPS,
2945
                      instance, "instance has primary and secondary nodes in"
2946
                      " different groups: %s", utils.CommaJoin(pretty_list),
2947
                      code=self.ETYPE_WARNING)
2948

    
2949
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2950
        i_non_a_balanced.append(instance)
2951

    
2952
      for snode in inst_config.secondary_nodes:
2953
        s_img = node_image[snode]
2954
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2955
                 snode, "instance %s, connection to secondary node failed",
2956
                 instance)
2957

    
2958
        if s_img.offline:
2959
          inst_nodes_offline.append(snode)
2960

    
2961
      # warn that the instance lives on offline nodes
2962
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2963
               "instance has offline secondary node(s) %s",
2964
               utils.CommaJoin(inst_nodes_offline))
2965
      # ... or ghost/non-vm_capable nodes
2966
      for node in inst_config.all_nodes:
2967
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2968
                 instance, "instance lives on ghost node %s", node)
2969
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2970
                 instance, "instance lives on non-vm_capable node %s", node)
2971

    
2972
    feedback_fn("* Verifying orphan volumes")
2973
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2974

    
2975
    # We will get spurious "unknown volume" warnings if any node of this group
2976
    # is secondary for an instance whose primary is in another group. To avoid
2977
    # them, we find these instances and add their volumes to node_vol_should.
2978
    for inst in self.all_inst_info.values():
2979
      for secondary in inst.secondary_nodes:
2980
        if (secondary in self.my_node_info
2981
            and inst.name not in self.my_inst_info):
2982
          inst.MapLVsByNode(node_vol_should)
2983
          break
2984

    
2985
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2986

    
2987
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2988
      feedback_fn("* Verifying N+1 Memory redundancy")
2989
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2990

    
2991
    feedback_fn("* Other Notes")
2992
    if i_non_redundant:
2993
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2994
                  % len(i_non_redundant))
2995

    
2996
    if i_non_a_balanced:
2997
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2998
                  % len(i_non_a_balanced))
2999

    
3000
    if n_offline:
3001
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3002

    
3003
    if n_drained:
3004
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3005

    
3006
    return not self.bad
3007

    
3008
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3009
    """Analyze the post-hooks' result
3010

3011
    This method analyses the hook result, handles it, and sends some
3012
    nicely-formatted feedback back to the user.
3013

3014
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3015
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3016
    @param hooks_results: the results of the multi-node hooks rpc call
3017
    @param feedback_fn: function used send feedback back to the caller
3018
    @param lu_result: previous Exec result
3019
    @return: the new Exec result, based on the previous result
3020
        and hook results
3021

3022
    """
3023
    # We only really run POST phase hooks, only for non-empty groups,
3024
    # and are only interested in their results
3025
    if not self.my_node_names:
3026
      # empty node group
3027
      pass
3028
    elif phase == constants.HOOKS_PHASE_POST:
3029
      # Used to change hooks' output to proper indentation
3030
      feedback_fn("* Hooks Results")
3031
      assert hooks_results, "invalid result from hooks"
3032

    
3033
      for node_name in hooks_results:
3034
        res = hooks_results[node_name]
3035
        msg = res.fail_msg
3036
        test = msg and not res.offline
3037
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3038
                      "Communication failure in hooks execution: %s", msg)
3039
        if res.offline or msg:
3040
          # No need to investigate payload if node is offline or gave
3041
          # an error.
3042
          continue
3043
        for script, hkr, output in res.payload:
3044
          test = hkr == constants.HKR_FAIL
3045
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3046
                        "Script %s failed, output:", script)
3047
          if test:
3048
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3049
            feedback_fn("%s" % output)
3050
            lu_result = False
3051

    
3052
    return lu_result
3053

    
3054

    
3055
class LUClusterVerifyDisks(NoHooksLU):
3056
  """Verifies the cluster disks status.
3057

3058
  """
3059
  REQ_BGL = False
3060

    
3061
  def ExpandNames(self):
3062
    self.share_locks = _ShareAll()
3063
    self.needed_locks = {
3064
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3065
      }
3066

    
3067
  def Exec(self, feedback_fn):
3068
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3069

    
3070
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3071
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3072
                           for group in group_names])
3073

    
3074

    
3075
class LUGroupVerifyDisks(NoHooksLU):
3076
  """Verifies the status of all disks in a node group.
3077

3078
  """
3079
  REQ_BGL = False
3080

    
3081
  def ExpandNames(self):
3082
    # Raises errors.OpPrereqError on its own if group can't be found
3083
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3084

    
3085
    self.share_locks = _ShareAll()
3086
    self.needed_locks = {
3087
      locking.LEVEL_INSTANCE: [],
3088
      locking.LEVEL_NODEGROUP: [],
3089
      locking.LEVEL_NODE: [],
3090
      }
3091

    
3092
  def DeclareLocks(self, level):
3093
    if level == locking.LEVEL_INSTANCE:
3094
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3095

    
3096
      # Lock instances optimistically, needs verification once node and group
3097
      # locks have been acquired
3098
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3099
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3100

    
3101
    elif level == locking.LEVEL_NODEGROUP:
3102
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3103

    
3104
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3105
        set([self.group_uuid] +
3106
            # Lock all groups used by instances optimistically; this requires
3107
            # going via the node before it's locked, requiring verification
3108
            # later on
3109
            [group_uuid
3110
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3111
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3112

    
3113
    elif level == locking.LEVEL_NODE:
3114
      # This will only lock the nodes in the group to be verified which contain
3115
      # actual instances
3116
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3117
      self._LockInstancesNodes()
3118

    
3119
      # Lock all nodes in group to be verified
3120
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3121
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3122
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3123

    
3124
  def CheckPrereq(self):
3125
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3126
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3127
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3128

    
3129
    assert self.group_uuid in owned_groups
3130

    
3131
    # Check if locked instances are still correct
3132
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3133

    
3134
    # Get instance information
3135
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3136

    
3137
    # Check if node groups for locked instances are still correct
3138
    for (instance_name, inst) in self.instances.items():
3139
      assert owned_nodes.issuperset(inst.all_nodes), \
3140
        "Instance %s's nodes changed while we kept the lock" % instance_name
3141

    
3142
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3143
                                             owned_groups)
3144

    
3145
      assert self.group_uuid in inst_groups, \
3146
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Verify integrity of cluster disks.
3150

3151
    @rtype: tuple of three items
3152
    @return: a tuple of (dict of node-to-node_error, list of instances
3153
        which need activate-disks, dict of instance: (node, volume) for
3154
        missing volumes
3155

3156
    """
3157
    res_nodes = {}
3158
    res_instances = set()
3159
    res_missing = {}
3160

    
3161
    nv_dict = _MapInstanceDisksToNodes([inst
3162
                                        for inst in self.instances.values()
3163
                                        if inst.admin_up])
3164

    
3165
    if nv_dict:
3166
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167
                             set(self.cfg.GetVmCapableNodeList()))
3168

    
3169
      node_lvs = self.rpc.call_lv_list(nodes, [])
3170

    
3171
      for (node, node_res) in node_lvs.items():
3172
        if node_res.offline:
3173
          continue
3174

    
3175
        msg = node_res.fail_msg
3176
        if msg:
3177
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178
          res_nodes[node] = msg
3179
          continue
3180

    
3181
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3182
          inst = nv_dict.pop((node, lv_name), None)
3183
          if not (lv_online or inst is None):
3184
            res_instances.add(inst)
3185

    
3186
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3187
      # better
3188
      for key, inst in nv_dict.iteritems():
3189
        res_missing.setdefault(inst, []).append(list(key))
3190

    
3191
    return (res_nodes, list(res_instances), res_missing)
3192

    
3193

    
3194
class LUClusterRepairDiskSizes(NoHooksLU):
3195
  """Verifies the cluster disks sizes.
3196

3197
  """
3198
  REQ_BGL = False
3199

    
3200
  def ExpandNames(self):
3201
    if self.op.instances:
3202
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203
      self.needed_locks = {
3204
        locking.LEVEL_NODE_RES: [],
3205
        locking.LEVEL_INSTANCE: self.wanted_names,
3206
        }
3207
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3208
    else:
3209
      self.wanted_names = None
3210
      self.needed_locks = {
3211
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3212
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3213
        }
3214
    self.share_locks = _ShareAll()
3215

    
3216
  def DeclareLocks(self, level):
3217
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3218
      self._LockInstancesNodes(primary_only=True, level=level)
3219

    
3220
  def CheckPrereq(self):
3221
    """Check prerequisites.
3222

3223
    This only checks the optional instance list against the existing names.
3224

3225
    """
3226
    if self.wanted_names is None:
3227
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3228

    
3229
    self.wanted_instances = \
3230
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3231

    
3232
  def _EnsureChildSizes(self, disk):
3233
    """Ensure children of the disk have the needed disk size.
3234

3235
    This is valid mainly for DRBD8 and fixes an issue where the
3236
    children have smaller disk size.
3237

3238
    @param disk: an L{ganeti.objects.Disk} object
3239

3240
    """
3241
    if disk.dev_type == constants.LD_DRBD8:
3242
      assert disk.children, "Empty children for DRBD8?"
3243
      fchild = disk.children[0]
3244
      mismatch = fchild.size < disk.size
3245
      if mismatch:
3246
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3247
                     fchild.size, disk.size)
3248
        fchild.size = disk.size
3249

    
3250
      # and we recurse on this child only, not on the metadev
3251
      return self._EnsureChildSizes(fchild) or mismatch
3252
    else:
3253
      return False
3254

    
3255
  def Exec(self, feedback_fn):
3256
    """Verify the size of cluster disks.
3257

3258
    """
3259
    # TODO: check child disks too
3260
    # TODO: check differences in size between primary/secondary nodes
3261
    per_node_disks = {}
3262
    for instance in self.wanted_instances:
3263
      pnode = instance.primary_node
3264
      if pnode not in per_node_disks:
3265
        per_node_disks[pnode] = []
3266
      for idx, disk in enumerate(instance.disks):
3267
        per_node_disks[pnode].append((instance, idx, disk))
3268

    
3269
    assert not (frozenset(per_node_disks.keys()) -
3270
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3271
      "Not owning correct locks"
3272
    assert not self.owned_locks(locking.LEVEL_NODE)
3273

    
3274
    changed = []
3275
    for node, dskl in per_node_disks.items():
3276
      newl = [v[2].Copy() for v in dskl]
3277
      for dsk in newl:
3278
        self.cfg.SetDiskID(dsk, node)
3279
      result = self.rpc.call_blockdev_getsize(node, newl)
3280
      if result.fail_msg:
3281
        self.LogWarning("Failure in blockdev_getsize call to node"
3282
                        " %s, ignoring", node)
3283
        continue
3284
      if len(result.payload) != len(dskl):
3285
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3286
                        " result.payload=%s", node, len(dskl), result.payload)
3287
        self.LogWarning("Invalid result from node %s, ignoring node results",
3288
                        node)
3289
        continue
3290
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3291
        if size is None:
3292
          self.LogWarning("Disk %d of instance %s did not return size"
3293
                          " information, ignoring", idx, instance.name)
3294
          continue
3295
        if not isinstance(size, (int, long)):
3296
          self.LogWarning("Disk %d of instance %s did not return valid"
3297
                          " size information, ignoring", idx, instance.name)
3298
          continue
3299
        size = size >> 20
3300
        if size != disk.size:
3301
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3302
                       " correcting: recorded %d, actual %d", idx,
3303
                       instance.name, disk.size, size)
3304
          disk.size = size
3305
          self.cfg.Update(instance, feedback_fn)
3306
          changed.append((instance.name, idx, size))
3307
        if self._EnsureChildSizes(disk):
3308
          self.cfg.Update(instance, feedback_fn)
3309
          changed.append((instance.name, idx, disk.size))
3310
    return changed
3311

    
3312

    
3313
class LUClusterRename(LogicalUnit):
3314
  """Rename the cluster.
3315

3316
  """
3317
  HPATH = "cluster-rename"
3318
  HTYPE = constants.HTYPE_CLUSTER
3319

    
3320
  def BuildHooksEnv(self):
3321
    """Build hooks env.
3322

3323
    """
3324
    return {
3325
      "OP_TARGET": self.cfg.GetClusterName(),
3326
      "NEW_NAME": self.op.name,
3327
      }
3328

    
3329
  def BuildHooksNodes(self):
3330
    """Build hooks nodes.
3331

3332
    """
3333
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3334

    
3335
  def CheckPrereq(self):
3336
    """Verify that the passed name is a valid one.
3337

3338
    """
3339
    hostname = netutils.GetHostname(name=self.op.name,
3340
                                    family=self.cfg.GetPrimaryIPFamily())
3341

    
3342
    new_name = hostname.name
3343
    self.ip = new_ip = hostname.ip
3344
    old_name = self.cfg.GetClusterName()
3345
    old_ip = self.cfg.GetMasterIP()
3346
    if new_name == old_name and new_ip == old_ip:
3347
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3348
                                 " cluster has changed",
3349
                                 errors.ECODE_INVAL)
3350
    if new_ip != old_ip:
3351
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3352
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3353
                                   " reachable on the network" %
3354
                                   new_ip, errors.ECODE_NOTUNIQUE)
3355

    
3356
    self.op.name = new_name
3357

    
3358
  def Exec(self, feedback_fn):
3359
    """Rename the cluster.
3360

3361
    """
3362
    clustername = self.op.name
3363
    new_ip = self.ip
3364

    
3365
    # shutdown the master IP
3366
    master_params = self.cfg.GetMasterNetworkParameters()
3367
    ems = self.cfg.GetUseExternalMipScript()
3368
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3369
                                                     master_params, ems)
3370
    result.Raise("Could not disable the master role")
3371

    
3372
    try:
3373
      cluster = self.cfg.GetClusterInfo()
3374
      cluster.cluster_name = clustername
3375
      cluster.master_ip = new_ip
3376
      self.cfg.Update(cluster, feedback_fn)
3377

    
3378
      # update the known hosts file
3379
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3380
      node_list = self.cfg.GetOnlineNodeList()
3381
      try:
3382
        node_list.remove(master_params.name)
3383
      except ValueError:
3384
        pass
3385
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3386
    finally:
3387
      master_params.ip = new_ip
3388
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3389
                                                     master_params, ems)
3390
      msg = result.fail_msg
3391
      if msg:
3392
        self.LogWarning("Could not re-enable the master role on"
3393
                        " the master, please restart manually: %s", msg)
3394

    
3395
    return clustername
3396

    
3397

    
3398
def _ValidateNetmask(cfg, netmask):
3399
  """Checks if a netmask is valid.
3400

3401
  @type cfg: L{config.ConfigWriter}
3402
  @param cfg: The cluster configuration
3403
  @type netmask: int
3404
  @param netmask: the netmask to be verified
3405
  @raise errors.OpPrereqError: if the validation fails
3406

3407
  """
3408
  ip_family = cfg.GetPrimaryIPFamily()
3409
  try:
3410
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3411
  except errors.ProgrammerError:
3412
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3413
                               ip_family)
3414
  if not ipcls.ValidateNetmask(netmask):
3415
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3416
                                (netmask))
3417

    
3418

    
3419
class LUClusterSetParams(LogicalUnit):
3420
  """Change the parameters of the cluster.
3421

3422
  """
3423
  HPATH = "cluster-modify"
3424
  HTYPE = constants.HTYPE_CLUSTER
3425
  REQ_BGL = False
3426

    
3427
  def CheckArguments(self):
3428
    """Check parameters
3429

3430
    """
3431
    if self.op.uid_pool:
3432
      uidpool.CheckUidPool(self.op.uid_pool)
3433

    
3434
    if self.op.add_uids:
3435
      uidpool.CheckUidPool(self.op.add_uids)
3436

    
3437
    if self.op.remove_uids:
3438
      uidpool.CheckUidPool(self.op.remove_uids)
3439

    
3440
    if self.op.master_netmask is not None:
3441
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3442

    
3443
  def ExpandNames(self):
3444
    # FIXME: in the future maybe other cluster params won't require checking on
3445
    # all nodes to be modified.
3446
    self.needed_locks = {
3447
      locking.LEVEL_NODE: locking.ALL_SET,
3448
    }
3449
    self.share_locks[locking.LEVEL_NODE] = 1
3450

    
3451
  def BuildHooksEnv(self):
3452
    """Build hooks env.
3453

3454
    """
3455
    return {
3456
      "OP_TARGET": self.cfg.GetClusterName(),
3457
      "NEW_VG_NAME": self.op.vg_name,
3458
      }
3459

    
3460
  def BuildHooksNodes(self):
3461
    """Build hooks nodes.
3462

3463
    """
3464
    mn = self.cfg.GetMasterNode()
3465
    return ([mn], [mn])
3466

    
3467
  def CheckPrereq(self):
3468
    """Check prerequisites.
3469

3470
    This checks whether the given params don't conflict and
3471
    if the given volume group is valid.
3472

3473
    """
3474
    if self.op.vg_name is not None and not self.op.vg_name:
3475
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3476
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3477
                                   " instances exist", errors.ECODE_INVAL)
3478

    
3479
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3480
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3481
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3482
                                   " drbd-based instances exist",
3483
                                   errors.ECODE_INVAL)
3484

    
3485
    node_list = self.owned_locks(locking.LEVEL_NODE)
3486

    
3487
    # if vg_name not None, checks given volume group on all nodes
3488
    if self.op.vg_name:
3489
      vglist = self.rpc.call_vg_list(node_list)
3490
      for node in node_list:
3491
        msg = vglist[node].fail_msg
3492
        if msg:
3493
          # ignoring down node
3494
          self.LogWarning("Error while gathering data on node %s"
3495
                          " (ignoring node): %s", node, msg)
3496
          continue
3497
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3498
                                              self.op.vg_name,
3499
                                              constants.MIN_VG_SIZE)
3500
        if vgstatus:
3501
          raise errors.OpPrereqError("Error on node '%s': %s" %
3502
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3503

    
3504
    if self.op.drbd_helper:
3505
      # checks given drbd helper on all nodes
3506
      helpers = self.rpc.call_drbd_helper(node_list)
3507
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3508
        if ninfo.offline:
3509
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3510
          continue
3511
        msg = helpers[node].fail_msg
3512
        if msg:
3513
          raise errors.OpPrereqError("Error checking drbd helper on node"
3514
                                     " '%s': %s" % (node, msg),
3515
                                     errors.ECODE_ENVIRON)
3516
        node_helper = helpers[node].payload
3517
        if node_helper != self.op.drbd_helper:
3518
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3519
                                     (node, node_helper), errors.ECODE_ENVIRON)
3520

    
3521
    self.cluster = cluster = self.cfg.GetClusterInfo()
3522
    # validate params changes
3523
    if self.op.beparams:
3524
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3525
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3526

    
3527
    if self.op.ndparams:
3528
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3529
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3530

    
3531
      # TODO: we need a more general way to handle resetting
3532
      # cluster-level parameters to default values
3533
      if self.new_ndparams["oob_program"] == "":
3534
        self.new_ndparams["oob_program"] = \
3535
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3536

    
3537
    if self.op.nicparams:
3538
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3539
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3540
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3541
      nic_errors = []
3542

    
3543
      # check all instances for consistency
3544
      for instance in self.cfg.GetAllInstancesInfo().values():
3545
        for nic_idx, nic in enumerate(instance.nics):
3546
          params_copy = copy.deepcopy(nic.nicparams)
3547
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3548

    
3549
          # check parameter syntax
3550
          try:
3551
            objects.NIC.CheckParameterSyntax(params_filled)
3552
          except errors.ConfigurationError, err:
3553
            nic_errors.append("Instance %s, nic/%d: %s" %
3554
                              (instance.name, nic_idx, err))
3555

    
3556
          # if we're moving instances to routed, check that they have an ip
3557
          target_mode = params_filled[constants.NIC_MODE]
3558
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3559
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3560
                              " address" % (instance.name, nic_idx))
3561
      if nic_errors:
3562
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3563
                                   "\n".join(nic_errors))
3564

    
3565
    # hypervisor list/parameters
3566
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3567
    if self.op.hvparams:
3568
      for hv_name, hv_dict in self.op.hvparams.items():
3569
        if hv_name not in self.new_hvparams:
3570
          self.new_hvparams[hv_name] = hv_dict
3571
        else:
3572
          self.new_hvparams[hv_name].update(hv_dict)
3573

    
3574
    # os hypervisor parameters
3575
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3576
    if self.op.os_hvp:
3577
      for os_name, hvs in self.op.os_hvp.items():
3578
        if os_name not in self.new_os_hvp:
3579
          self.new_os_hvp[os_name] = hvs
3580
        else:
3581
          for hv_name, hv_dict in hvs.items():
3582
            if hv_name not in self.new_os_hvp[os_name]:
3583
              self.new_os_hvp[os_name][hv_name] = hv_dict
3584
            else:
3585
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3586

    
3587
    # os parameters
3588
    self.new_osp = objects.FillDict(cluster.osparams, {})
3589
    if self.op.osparams:
3590
      for os_name, osp in self.op.osparams.items():
3591
        if os_name not in self.new_osp:
3592
          self.new_osp[os_name] = {}
3593

    
3594
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3595
                                                  use_none=True)
3596

    
3597
        if not self.new_osp[os_name]:
3598
          # we removed all parameters
3599
          del self.new_osp[os_name]
3600
        else:
3601
          # check the parameter validity (remote check)
3602
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3603
                         os_name, self.new_osp[os_name])
3604

    
3605
    # changes to the hypervisor list
3606
    if self.op.enabled_hypervisors is not None:
3607
      self.hv_list = self.op.enabled_hypervisors
3608
      for hv in self.hv_list:
3609
        # if the hypervisor doesn't already exist in the cluster
3610
        # hvparams, we initialize it to empty, and then (in both
3611
        # cases) we make sure to fill the defaults, as we might not
3612
        # have a complete defaults list if the hypervisor wasn't
3613
        # enabled before
3614
        if hv not in new_hvp:
3615
          new_hvp[hv] = {}
3616
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3617
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3618
    else:
3619
      self.hv_list = cluster.enabled_hypervisors
3620

    
3621
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3622
      # either the enabled list has changed, or the parameters have, validate
3623
      for hv_name, hv_params in self.new_hvparams.items():
3624
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3625
            (self.op.enabled_hypervisors and
3626
             hv_name in self.op.enabled_hypervisors)):
3627
          # either this is a new hypervisor, or its parameters have changed
3628
          hv_class = hypervisor.GetHypervisor(hv_name)
3629
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3630
          hv_class.CheckParameterSyntax(hv_params)
3631
          _CheckHVParams(self, node_list, hv_name, hv_params)
3632

    
3633
    if self.op.os_hvp:
3634
      # no need to check any newly-enabled hypervisors, since the
3635
      # defaults have already been checked in the above code-block
3636
      for os_name, os_hvp in self.new_os_hvp.items():
3637
        for hv_name, hv_params in os_hvp.items():
3638
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3639
          # we need to fill in the new os_hvp on top of the actual hv_p
3640
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3641
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3642
          hv_class = hypervisor.GetHypervisor(hv_name)
3643
          hv_class.CheckParameterSyntax(new_osp)
3644
          _CheckHVParams(self, node_list, hv_name, new_osp)
3645

    
3646
    if self.op.default_iallocator:
3647
      alloc_script = utils.FindFile(self.op.default_iallocator,
3648
                                    constants.IALLOCATOR_SEARCH_PATH,
3649
                                    os.path.isfile)
3650
      if alloc_script is None:
3651
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3652
                                   " specified" % self.op.default_iallocator,
3653
                                   errors.ECODE_INVAL)
3654

    
3655
  def Exec(self, feedback_fn):
3656
    """Change the parameters of the cluster.
3657

3658
    """
3659
    if self.op.vg_name is not None:
3660
      new_volume = self.op.vg_name
3661
      if not new_volume:
3662
        new_volume = None
3663
      if new_volume != self.cfg.GetVGName():
3664
        self.cfg.SetVGName(new_volume)
3665
      else:
3666
        feedback_fn("Cluster LVM configuration already in desired"
3667
                    " state, not changing")
3668
    if self.op.drbd_helper is not None:
3669
      new_helper = self.op.drbd_helper
3670
      if not new_helper:
3671
        new_helper = None
3672
      if new_helper != self.cfg.GetDRBDHelper():
3673
        self.cfg.SetDRBDHelper(new_helper)
3674
      else:
3675
        feedback_fn("Cluster DRBD helper already in desired state,"
3676
                    " not changing")
3677
    if self.op.hvparams:
3678
      self.cluster.hvparams = self.new_hvparams
3679
    if self.op.os_hvp:
3680
      self.cluster.os_hvp = self.new_os_hvp
3681
    if self.op.enabled_hypervisors is not None:
3682
      self.cluster.hvparams = self.new_hvparams
3683
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3684
    if self.op.beparams:
3685
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3686
    if self.op.nicparams:
3687
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3688
    if self.op.osparams:
3689
      self.cluster.osparams = self.new_osp
3690
    if self.op.ndparams:
3691
      self.cluster.ndparams = self.new_ndparams
3692

    
3693
    if self.op.candidate_pool_size is not None:
3694
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3695
      # we need to update the pool size here, otherwise the save will fail
3696
      _AdjustCandidatePool(self, [])
3697

    
3698
    if self.op.maintain_node_health is not None:
3699
      self.cluster.maintain_node_health = self.op.maintain_node_health
3700

    
3701
    if self.op.prealloc_wipe_disks is not None:
3702
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3703

    
3704
    if self.op.add_uids is not None:
3705
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3706

    
3707
    if self.op.remove_uids is not None:
3708
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3709

    
3710
    if self.op.uid_pool is not None:
3711
      self.cluster.uid_pool = self.op.uid_pool
3712

    
3713
    if self.op.default_iallocator is not None:
3714
      self.cluster.default_iallocator = self.op.default_iallocator
3715

    
3716
    if self.op.reserved_lvs is not None:
3717
      self.cluster.reserved_lvs = self.op.reserved_lvs
3718

    
3719
    if self.op.use_external_mip_script is not None:
3720
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3721

    
3722
    def helper_os(aname, mods, desc):
3723
      desc += " OS list"
3724
      lst = getattr(self.cluster, aname)
3725
      for key, val in mods:
3726
        if key == constants.DDM_ADD:
3727
          if val in lst:
3728
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3729
          else:
3730
            lst.append(val)
3731
        elif key == constants.DDM_REMOVE:
3732
          if val in lst:
3733
            lst.remove(val)
3734
          else:
3735
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3736
        else:
3737
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3738

    
3739
    if self.op.hidden_os:
3740
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3741

    
3742
    if self.op.blacklisted_os:
3743
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3744

    
3745
    if self.op.master_netdev:
3746
      master_params = self.cfg.GetMasterNetworkParameters()
3747
      ems = self.cfg.GetUseExternalMipScript()
3748
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3749
                  self.cluster.master_netdev)
3750
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3751
                                                       master_params, ems)
3752
      result.Raise("Could not disable the master ip")
3753
      feedback_fn("Changing master_netdev from %s to %s" %
3754
                  (master_params.netdev, self.op.master_netdev))
3755
      self.cluster.master_netdev = self.op.master_netdev
3756

    
3757
    if self.op.master_netmask:
3758
      master_params = self.cfg.GetMasterNetworkParameters()
3759
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3760
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3761
                                                        master_params.netmask,
3762
                                                        self.op.master_netmask,
3763
                                                        master_params.ip,
3764
                                                        master_params.netdev)
3765
      if result.fail_msg:
3766
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3767
        feedback_fn(msg)
3768

    
3769
      self.cluster.master_netmask = self.op.master_netmask
3770

    
3771
    self.cfg.Update(self.cluster, feedback_fn)
3772

    
3773
    if self.op.master_netdev:
3774
      master_params = self.cfg.GetMasterNetworkParameters()
3775
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3776
                  self.op.master_netdev)
3777
      ems = self.cfg.GetUseExternalMipScript()
3778
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3779
                                                     master_params, ems)
3780
      if result.fail_msg:
3781
        self.LogWarning("Could not re-enable the master ip on"
3782
                        " the master, please restart manually: %s",
3783
                        result.fail_msg)
3784

    
3785

    
3786
def _UploadHelper(lu, nodes, fname):
3787
  """Helper for uploading a file and showing warnings.
3788

3789
  """
3790
  if os.path.exists(fname):
3791
    result = lu.rpc.call_upload_file(nodes, fname)
3792
    for to_node, to_result in result.items():
3793
      msg = to_result.fail_msg
3794
      if msg:
3795
        msg = ("Copy of file %s to node %s failed: %s" %
3796
               (fname, to_node, msg))
3797
        lu.proc.LogWarning(msg)
3798

    
3799

    
3800
def _ComputeAncillaryFiles(cluster, redist):
3801
  """Compute files external to Ganeti which need to be consistent.
3802

3803
  @type redist: boolean
3804
  @param redist: Whether to include files which need to be redistributed
3805

3806
  """
3807
  # Compute files for all nodes
3808
  files_all = set([
3809
    constants.SSH_KNOWN_HOSTS_FILE,
3810
    constants.CONFD_HMAC_KEY,
3811
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3812
    constants.SPICE_CERT_FILE,
3813
    constants.SPICE_CACERT_FILE,
3814
    constants.RAPI_USERS_FILE,
3815
    ])
3816

    
3817
  if not redist:
3818
    files_all.update(constants.ALL_CERT_FILES)
3819
    files_all.update(ssconf.SimpleStore().GetFileList())
3820
  else:
3821
    # we need to ship at least the RAPI certificate
3822
    files_all.add(constants.RAPI_CERT_FILE)
3823

    
3824
  if cluster.modify_etc_hosts:
3825
    files_all.add(constants.ETC_HOSTS)
3826

    
3827
  # Files which are optional, these must:
3828
  # - be present in one other category as well
3829
  # - either exist or not exist on all nodes of that category (mc, vm all)
3830
  files_opt = set([
3831
    constants.RAPI_USERS_FILE,
3832
    ])
3833

    
3834
  # Files which should only be on master candidates
3835
  files_mc = set()
3836

    
3837
  if not redist:
3838
    files_mc.add(constants.CLUSTER_CONF_FILE)
3839

    
3840
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3841
    # replication
3842
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3843

    
3844
  # Files which should only be on VM-capable nodes
3845
  files_vm = set(filename
3846
    for hv_name in cluster.enabled_hypervisors
3847
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3848

    
3849
  files_opt |= set(filename
3850
    for hv_name in cluster.enabled_hypervisors
3851
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3852

    
3853
  # Filenames in each category must be unique
3854
  all_files_set = files_all | files_mc | files_vm
3855
  assert (len(all_files_set) ==
3856
          sum(map(len, [files_all, files_mc, files_vm]))), \
3857
         "Found file listed in more than one file list"
3858

    
3859
  # Optional files must be present in one other category
3860
  assert all_files_set.issuperset(files_opt), \
3861
         "Optional file not in a different required list"
3862

    
3863
  return (files_all, files_opt, files_mc, files_vm)
3864

    
3865

    
3866
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3867
  """Distribute additional files which are part of the cluster configuration.
3868

3869
  ConfigWriter takes care of distributing the config and ssconf files, but
3870
  there are more files which should be distributed to all nodes. This function
3871
  makes sure those are copied.
3872

3873
  @param lu: calling logical unit
3874
  @param additional_nodes: list of nodes not in the config to distribute to
3875
  @type additional_vm: boolean
3876
  @param additional_vm: whether the additional nodes are vm-capable or not
3877

3878
  """
3879
  # Gather target nodes
3880
  cluster = lu.cfg.GetClusterInfo()
3881
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3882

    
3883
  online_nodes = lu.cfg.GetOnlineNodeList()
3884
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3885

    
3886
  if additional_nodes is not None:
3887
    online_nodes.extend(additional_nodes)
3888
    if additional_vm:
3889
      vm_nodes.extend(additional_nodes)
3890

    
3891
  # Never distribute to master node
3892
  for nodelist in [online_nodes, vm_nodes]:
3893
    if master_info.name in nodelist:
3894
      nodelist.remove(master_info.name)
3895

    
3896
  # Gather file lists
3897
  (files_all, _, files_mc, files_vm) = \
3898
    _ComputeAncillaryFiles(cluster, True)
3899

    
3900
  # Never re-distribute configuration file from here
3901
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3902
              constants.CLUSTER_CONF_FILE in files_vm)
3903
  assert not files_mc, "Master candidates not handled in this function"
3904

    
3905
  filemap = [
3906
    (online_nodes, files_all),
3907
    (vm_nodes, files_vm),
3908
    ]
3909

    
3910
  # Upload the files
3911
  for (node_list, files) in filemap:
3912
    for fname in files:
3913
      _UploadHelper(lu, node_list, fname)
3914

    
3915

    
3916
class LUClusterRedistConf(NoHooksLU):
3917
  """Force the redistribution of cluster configuration.
3918

3919
  This is a very simple LU.
3920

3921
  """
3922
  REQ_BGL = False
3923

    
3924
  def ExpandNames(self):
3925
    self.needed_locks = {
3926
      locking.LEVEL_NODE: locking.ALL_SET,
3927
    }
3928
    self.share_locks[locking.LEVEL_NODE] = 1
3929

    
3930
  def Exec(self, feedback_fn):
3931
    """Redistribute the configuration.
3932

3933
    """
3934
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3935
    _RedistributeAncillaryFiles(self)
3936

    
3937

    
3938
class LUClusterActivateMasterIp(NoHooksLU):
3939
  """Activate the master IP on the master node.
3940

3941
  """
3942
  def Exec(self, feedback_fn):
3943
    """Activate the master IP.
3944

3945
    """
3946
    master_params = self.cfg.GetMasterNetworkParameters()
3947
    ems = self.cfg.GetUseExternalMipScript()
3948
    self.rpc.call_node_activate_master_ip(master_params.name,
3949
                                          master_params, ems)
3950

    
3951

    
3952
class LUClusterDeactivateMasterIp(NoHooksLU):
3953
  """Deactivate the master IP on the master node.
3954

3955
  """
3956
  def Exec(self, feedback_fn):
3957
    """Deactivate the master IP.
3958

3959
    """
3960
    master_params = self.cfg.GetMasterNetworkParameters()
3961
    ems = self.cfg.GetUseExternalMipScript()
3962
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3963
                                            ems)
3964

    
3965

    
3966
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3967
  """Sleep and poll for an instance's disk to sync.
3968

3969
  """
3970
  if not instance.disks or disks is not None and not disks:
3971
    return True
3972

    
3973
  disks = _ExpandCheckDisks(instance, disks)
3974

    
3975
  if not oneshot:
3976
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3977

    
3978
  node = instance.primary_node
3979

    
3980
  for dev in disks:
3981
    lu.cfg.SetDiskID(dev, node)
3982

    
3983
  # TODO: Convert to utils.Retry
3984

    
3985
  retries = 0
3986
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3987
  while True:
3988
    max_time = 0
3989
    done = True
3990
    cumul_degraded = False
3991
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3992
    msg = rstats.fail_msg
3993
    if msg:
3994
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3995
      retries += 1
3996
      if retries >= 10:
3997
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3998
                                 " aborting." % node)
3999
      time.sleep(6)
4000
      continue
4001
    rstats = rstats.payload
4002
    retries = 0
4003
    for i, mstat in enumerate(rstats):
4004
      if mstat is None:
4005
        lu.LogWarning("Can't compute data for node %s/%s",
4006
                           node, disks[i].iv_name)
4007
        continue
4008

    
4009
      cumul_degraded = (cumul_degraded or
4010
                        (mstat.is_degraded and mstat.sync_percent is None))
4011
      if mstat.sync_percent is not None:
4012
        done = False
4013
        if mstat.estimated_time is not None:
4014
          rem_time = ("%s remaining (estimated)" %
4015
                      utils.FormatSeconds(mstat.estimated_time))
4016
          max_time = mstat.estimated_time
4017
        else:
4018
          rem_time = "no time estimate"
4019
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4020
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4021

    
4022
    # if we're done but degraded, let's do a few small retries, to
4023
    # make sure we see a stable and not transient situation; therefore
4024
    # we force restart of the loop
4025
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4026
      logging.info("Degraded disks found, %d retries left", degr_retries)
4027
      degr_retries -= 1
4028
      time.sleep(1)
4029
      continue
4030

    
4031
    if done or oneshot:
4032
      break
4033

    
4034
    time.sleep(min(60, max_time))
4035

    
4036
  if done:
4037
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4038
  return not cumul_degraded
4039

    
4040

    
4041
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4042
  """Check that mirrors are not degraded.
4043

4044
  The ldisk parameter, if True, will change the test from the
4045
  is_degraded attribute (which represents overall non-ok status for
4046
  the device(s)) to the ldisk (representing the local storage status).
4047

4048
  """
4049
  lu.cfg.SetDiskID(dev, node)
4050

    
4051
  result = True
4052

    
4053
  if on_primary or dev.AssembleOnSecondary():
4054
    rstats = lu.rpc.call_blockdev_find(node, dev)
4055
    msg = rstats.fail_msg
4056
    if msg:
4057
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4058
      result = False
4059
    elif not rstats.payload:
4060
      lu.LogWarning("Can't find disk on node %s", node)
4061
      result = False
4062
    else:
4063
      if ldisk:
4064
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4065
      else:
4066
        result = result and not rstats.payload.is_degraded
4067

    
4068
  if dev.children:
4069
    for child in dev.children:
4070
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4071

    
4072
  return result
4073

    
4074

    
4075
class LUOobCommand(NoHooksLU):
4076
  """Logical unit for OOB handling.
4077

4078
  """
4079
  REG_BGL = False
4080
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4081

    
4082
  def ExpandNames(self):
4083
    """Gather locks we need.
4084

4085
    """
4086
    if self.op.node_names:
4087
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4088
      lock_names = self.op.node_names
4089
    else:
4090
      lock_names = locking.ALL_SET
4091

    
4092
    self.needed_locks = {
4093
      locking.LEVEL_NODE: lock_names,
4094
      }
4095

    
4096
  def CheckPrereq(self):
4097
    """Check prerequisites.
4098

4099
    This checks:
4100
     - the node exists in the configuration
4101
     - OOB is supported
4102

4103
    Any errors are signaled by raising errors.OpPrereqError.
4104

4105
    """
4106
    self.nodes = []
4107
    self.master_node = self.cfg.GetMasterNode()
4108

    
4109
    assert self.op.power_delay >= 0.0
4110

    
4111
    if self.op.node_names:
4112
      if (self.op.command in self._SKIP_MASTER and
4113
          self.master_node in self.op.node_names):
4114
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4115
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4116

    
4117
        if master_oob_handler:
4118
          additional_text = ("run '%s %s %s' if you want to operate on the"
4119
                             " master regardless") % (master_oob_handler,
4120
                                                      self.op.command,
4121
                                                      self.master_node)
4122
        else:
4123
          additional_text = "it does not support out-of-band operations"
4124

    
4125
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4126
                                    " allowed for %s; %s") %
4127
                                   (self.master_node, self.op.command,
4128
                                    additional_text), errors.ECODE_INVAL)
4129
    else:
4130
      self.op.node_names = self.cfg.GetNodeList()
4131
      if self.op.command in self._SKIP_MASTER:
4132
        self.op.node_names.remove(self.master_node)
4133

    
4134
    if self.op.command in self._SKIP_MASTER:
4135
      assert self.master_node not in self.op.node_names
4136

    
4137
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4138
      if node is None:
4139
        raise errors.OpPrereqError("Node %s not found" % node_name,
4140
                                   errors.ECODE_NOENT)
4141
      else:
4142
        self.nodes.append(node)
4143

    
4144
      if (not self.op.ignore_status and
4145
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4146
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4147
                                    " not marked offline") % node_name,
4148
                                   errors.ECODE_STATE)
4149

    
4150
  def Exec(self, feedback_fn):
4151
    """Execute OOB and return result if we expect any.
4152

4153
    """
4154
    master_node = self.master_node
4155
    ret = []
4156

    
4157
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4158
                                              key=lambda node: node.name)):
4159
      node_entry = [(constants.RS_NORMAL, node.name)]
4160
      ret.append(node_entry)
4161

    
4162
      oob_program = _SupportsOob(self.cfg, node)
4163

    
4164
      if not oob_program:
4165
        node_entry.append((constants.RS_UNAVAIL, None))
4166
        continue
4167

    
4168
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4169
                   self.op.command, oob_program, node.name)
4170
      result = self.rpc.call_run_oob(master_node, oob_program,
4171
                                     self.op.command, node.name,
4172
                                     self.op.timeout)
4173

    
4174
      if result.fail_msg:
4175
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4176
                        node.name, result.fail_msg)
4177
        node_entry.append((constants.RS_NODATA, None))
4178
      else:
4179
        try:
4180
          self._CheckPayload(result)
4181
        except errors.OpExecError, err:
4182
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4183
                          node.name, err)
4184
          node_entry.append((constants.RS_NODATA, None))
4185
        else:
4186
          if self.op.command == constants.OOB_HEALTH:
4187
            # For health we should log important events
4188
            for item, status in result.payload:
4189
              if status in [constants.OOB_STATUS_WARNING,
4190
                            constants.OOB_STATUS_CRITICAL]:
4191
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4192
                                item, node.name, status)
4193

    
4194
          if self.op.command == constants.OOB_POWER_ON:
4195
            node.powered = True
4196
          elif self.op.command == constants.OOB_POWER_OFF:
4197
            node.powered = False
4198
          elif self.op.command == constants.OOB_POWER_STATUS:
4199
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4200
            if powered != node.powered:
4201
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4202
                               " match actual power state (%s)"), node.powered,
4203
                              node.name, powered)
4204

    
4205
          # For configuration changing commands we should update the node
4206
          if self.op.command in (constants.OOB_POWER_ON,
4207
                                 constants.OOB_POWER_OFF):
4208
            self.cfg.Update(node, feedback_fn)
4209

    
4210
          node_entry.append((constants.RS_NORMAL, result.payload))
4211

    
4212
          if (self.op.command == constants.OOB_POWER_ON and
4213
              idx < len(self.nodes) - 1):
4214
            time.sleep(self.op.power_delay)
4215

    
4216
    return ret
4217

    
4218
  def _CheckPayload(self, result):
4219
    """Checks if the payload is valid.
4220

4221
    @param result: RPC result
4222
    @raises errors.OpExecError: If payload is not valid
4223

4224
    """
4225
    errs = []
4226
    if self.op.command == constants.OOB_HEALTH:
4227
      if not isinstance(result.payload, list):
4228
        errs.append("command 'health' is expected to return a list but got %s" %
4229
                    type(result.payload))
4230
      else:
4231
        for item, status in result.payload:
4232
          if status not in constants.OOB_STATUSES:
4233
            errs.append("health item '%s' has invalid status '%s'" %
4234
                        (item, status))
4235

    
4236
    if self.op.command == constants.OOB_POWER_STATUS:
4237
      if not isinstance(result.payload, dict):
4238
        errs.append("power-status is expected to return a dict but got %s" %
4239
                    type(result.payload))
4240

    
4241
    if self.op.command in [
4242
        constants.OOB_POWER_ON,
4243
        constants.OOB_POWER_OFF,
4244
        constants.OOB_POWER_CYCLE,
4245
        ]:
4246
      if result.payload is not None:
4247
        errs.append("%s is expected to not return payload but got '%s'" %
4248
                    (self.op.command, result.payload))
4249

    
4250
    if errs:
4251
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4252
                               utils.CommaJoin(errs))
4253

    
4254

    
4255
class _OsQuery(_QueryBase):
4256
  FIELDS = query.OS_FIELDS
4257

    
4258
  def ExpandNames(self, lu):
4259
    # Lock all nodes in shared mode
4260
    # Temporary removal of locks, should be reverted later
4261
    # TODO: reintroduce locks when they are lighter-weight
4262
    lu.needed_locks = {}
4263
    #self.share_locks[locking.LEVEL_NODE] = 1
4264
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4265

    
4266
    # The following variables interact with _QueryBase._GetNames
4267
    if self.names:
4268
      self.wanted = self.names
4269
    else:
4270
      self.wanted = locking.ALL_SET
4271

    
4272
    self.do_locking = self.use_locking
4273

    
4274
  def DeclareLocks(self, lu, level):
4275
    pass
4276

    
4277
  @staticmethod
4278
  def _DiagnoseByOS(rlist):
4279
    """Remaps a per-node return list into an a per-os per-node dictionary
4280

4281
    @param rlist: a map with node names as keys and OS objects as values
4282

4283
    @rtype: dict
4284
    @return: a dictionary with osnames as keys and as value another
4285
        map, with nodes as keys and tuples of (path, status, diagnose,
4286
        variants, parameters, api_versions) as values, eg::
4287

4288
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4289
                                     (/srv/..., False, "invalid api")],
4290
                           "node2": [(/srv/..., True, "", [], [])]}
4291
          }
4292

4293
    """
4294
    all_os = {}
4295
    # we build here the list of nodes that didn't fail the RPC (at RPC
4296
    # level), so that nodes with a non-responding node daemon don't
4297
    # make all OSes invalid
4298
    good_nodes = [node_name for node_name in rlist
4299
                  if not rlist[node_name].fail_msg]
4300
    for node_name, nr in rlist.items():
4301
      if nr.fail_msg or not nr.payload:
4302
        continue
4303
      for (name, path, status, diagnose, variants,
4304
           params, api_versions) in nr.payload:
4305
        if name not in all_os:
4306
          # build a list of nodes for this os containing empty lists
4307
          # for each node in node_list
4308
          all_os[name] = {}
4309
          for nname in good_nodes:
4310
            all_os[name][nname] = []
4311
        # convert params from [name, help] to (name, help)
4312
        params = [tuple(v) for v in params]
4313
        all_os[name][node_name].append((path, status, diagnose,
4314
                                        variants, params, api_versions))
4315
    return all_os
4316

    
4317
  def _GetQueryData(self, lu):
4318
    """Computes the list of nodes and their attributes.
4319

4320
    """
4321
    # Locking is not used
4322
    assert not (compat.any(lu.glm.is_owned(level)
4323
                           for level in locking.LEVELS
4324
                           if level != locking.LEVEL_CLUSTER) or
4325
                self.do_locking or self.use_locking)
4326

    
4327
    valid_nodes = [node.name
4328
                   for node in lu.cfg.GetAllNodesInfo().values()
4329
                   if not node.offline and node.vm_capable]
4330
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4331
    cluster = lu.cfg.GetClusterInfo()
4332

    
4333
    data = {}
4334

    
4335
    for (os_name, os_data) in pol.items():
4336
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4337
                          hidden=(os_name in cluster.hidden_os),
4338
                          blacklisted=(os_name in cluster.blacklisted_os))
4339

    
4340
      variants = set()
4341
      parameters = set()
4342
      api_versions = set()
4343

    
4344
      for idx, osl in enumerate(os_data.values()):
4345
        info.valid = bool(info.valid and osl and osl[0][1])
4346
        if not info.valid:
4347
          break
4348

    
4349
        (node_variants, node_params, node_api) = osl[0][3:6]
4350
        if idx == 0:
4351
          # First entry
4352
          variants.update(node_variants)
4353
          parameters.update(node_params)
4354
          api_versions.update(node_api)
4355
        else:
4356
          # Filter out inconsistent values
4357
          variants.intersection_update(node_variants)
4358
          parameters.intersection_update(node_params)
4359
          api_versions.intersection_update(node_api)
4360

    
4361
      info.variants = list(variants)
4362
      info.parameters = list(parameters)
4363
      info.api_versions = list(api_versions)
4364

    
4365
      data[os_name] = info
4366

    
4367
    # Prepare data in requested order
4368
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4369
            if name in data]
4370

    
4371

    
4372
class LUOsDiagnose(NoHooksLU):
4373
  """Logical unit for OS diagnose/query.
4374

4375
  """
4376
  REQ_BGL = False
4377

    
4378
  @staticmethod
4379
  def _BuildFilter(fields, names):
4380
    """Builds a filter for querying OSes.
4381

4382
    """
4383
    name_filter = qlang.MakeSimpleFilter("name", names)
4384

    
4385
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4386
    # respective field is not requested
4387
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4388
                     for fname in ["hidden", "blacklisted"]
4389
                     if fname not in fields]
4390
    if "valid" not in fields:
4391
      status_filter.append([qlang.OP_TRUE, "valid"])
4392

    
4393
    if status_filter:
4394
      status_filter.insert(0, qlang.OP_AND)
4395
    else:
4396
      status_filter = None
4397

    
4398
    if name_filter and status_filter:
4399
      return [qlang.OP_AND, name_filter, status_filter]
4400
    elif name_filter:
4401
      return name_filter
4402
    else:
4403
      return status_filter
4404

    
4405
  def CheckArguments(self):
4406
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4407
                       self.op.output_fields, False)
4408

    
4409
  def ExpandNames(self):
4410
    self.oq.ExpandNames(self)
4411

    
4412
  def Exec(self, feedback_fn):
4413
    return self.oq.OldStyleQuery(self)
4414

    
4415

    
4416
class LUNodeRemove(LogicalUnit):
4417
  """Logical unit for removing a node.
4418

4419
  """
4420
  HPATH = "node-remove"
4421
  HTYPE = constants.HTYPE_NODE
4422

    
4423
  def BuildHooksEnv(self):
4424
    """Build hooks env.
4425

4426
    This doesn't run on the target node in the pre phase as a failed
4427
    node would then be impossible to remove.
4428

4429
    """
4430
    return {
4431
      "OP_TARGET": self.op.node_name,
4432
      "NODE_NAME": self.op.node_name,
4433
      }
4434

    
4435
  def BuildHooksNodes(self):
4436
    """Build hooks nodes.
4437

4438
    """
4439
    all_nodes = self.cfg.GetNodeList()
4440
    try:
4441
      all_nodes.remove(self.op.node_name)
4442
    except ValueError:
4443
      logging.warning("Node '%s', which is about to be removed, was not found"
4444
                      " in the list of all nodes", self.op.node_name)
4445
    return (all_nodes, all_nodes)
4446

    
4447
  def CheckPrereq(self):
4448
    """Check prerequisites.
4449

4450
    This checks:
4451
     - the node exists in the configuration
4452
     - it does not have primary or secondary instances
4453
     - it's not the master
4454

4455
    Any errors are signaled by raising errors.OpPrereqError.
4456

4457
    """
4458
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4459
    node = self.cfg.GetNodeInfo(self.op.node_name)
4460
    assert node is not None
4461

    
4462
    masternode = self.cfg.GetMasterNode()
4463
    if node.name == masternode:
4464
      raise errors.OpPrereqError("Node is the master node, failover to another"
4465
                                 " node is required", errors.ECODE_INVAL)
4466

    
4467
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4468
      if node.name in instance.all_nodes:
4469
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4470
                                   " please remove first" % instance_name,
4471
                                   errors.ECODE_INVAL)
4472
    self.op.node_name = node.name
4473
    self.node = node
4474

    
4475
  def Exec(self, feedback_fn):
4476
    """Removes the node from the cluster.
4477

4478
    """
4479
    node = self.node
4480
    logging.info("Stopping the node daemon and removing configs from node %s",
4481
                 node.name)
4482

    
4483
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4484

    
4485
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4486
      "Not owning BGL"
4487

    
4488
    # Promote nodes to master candidate as needed
4489
    _AdjustCandidatePool(self, exceptions=[node.name])
4490
    self.context.RemoveNode(node.name)
4491

    
4492
    # Run post hooks on the node before it's removed
4493
    _RunPostHook(self, node.name)
4494

    
4495
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4496
    msg = result.fail_msg
4497
    if msg:
4498
      self.LogWarning("Errors encountered on the remote node while leaving"
4499
                      " the cluster: %s", msg)
4500

    
4501
    # Remove node from our /etc/hosts
4502
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4503
      master_node = self.cfg.GetMasterNode()
4504
      result = self.rpc.call_etc_hosts_modify(master_node,
4505
                                              constants.ETC_HOSTS_REMOVE,
4506
                                              node.name, None)
4507
      result.Raise("Can't update hosts file with new host data")
4508
      _RedistributeAncillaryFiles(self)
4509

    
4510

    
4511
class _NodeQuery(_QueryBase):
4512
  FIELDS = query.NODE_FIELDS
4513

    
4514
  def ExpandNames(self, lu):
4515
    lu.needed_locks = {}
4516
    lu.share_locks = _ShareAll()
4517

    
4518
    if self.names:
4519
      self.wanted = _GetWantedNodes(lu, self.names)
4520
    else:
4521
      self.wanted = locking.ALL_SET
4522

    
4523
    self.do_locking = (self.use_locking and
4524
                       query.NQ_LIVE in self.requested_data)
4525

    
4526
    if self.do_locking:
4527
      # If any non-static field is requested we need to lock the nodes
4528
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4529

    
4530
  def DeclareLocks(self, lu, level):
4531
    pass
4532

    
4533
  def _GetQueryData(self, lu):
4534
    """Computes the list of nodes and their attributes.
4535

4536
    """
4537
    all_info = lu.cfg.GetAllNodesInfo()
4538

    
4539
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4540

    
4541
    # Gather data as requested
4542
    if query.NQ_LIVE in self.requested_data:
4543
      # filter out non-vm_capable nodes
4544
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4545

    
4546
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4547
                                        lu.cfg.GetHypervisorType())
4548
      live_data = dict((name, nresult.payload)
4549
                       for (name, nresult) in node_data.items()
4550
                       if not nresult.fail_msg and nresult.payload)
4551
    else:
4552
      live_data = None
4553

    
4554
    if query.NQ_INST in self.requested_data:
4555
      node_to_primary = dict([(name, set()) for name in nodenames])
4556
      node_to_secondary = dict([(name, set()) for name in nodenames])
4557

    
4558
      inst_data = lu.cfg.GetAllInstancesInfo()
4559

    
4560
      for inst in inst_data.values():
4561
        if inst.primary_node in node_to_primary:
4562
          node_to_primary[inst.primary_node].add(inst.name)
4563
        for secnode in inst.secondary_nodes:
4564
          if secnode in node_to_secondary:
4565
            node_to_secondary[secnode].add(inst.name)
4566
    else:
4567
      node_to_primary = None
4568
      node_to_secondary = None
4569

    
4570
    if query.NQ_OOB in self.requested_data:
4571
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4572
                         for name, node in all_info.iteritems())
4573
    else:
4574
      oob_support = None
4575

    
4576
    if query.NQ_GROUP in self.requested_data:
4577
      groups = lu.cfg.GetAllNodeGroupsInfo()
4578
    else:
4579
      groups = {}
4580

    
4581
    return query.NodeQueryData([all_info[name] for name in nodenames],
4582
                               live_data, lu.cfg.GetMasterNode(),
4583
                               node_to_primary, node_to_secondary, groups,
4584
                               oob_support, lu.cfg.GetClusterInfo())
4585

    
4586

    
4587
class LUNodeQuery(NoHooksLU):
4588
  """Logical unit for querying nodes.
4589

4590
  """
4591
  # pylint: disable=W0142
4592
  REQ_BGL = False
4593

    
4594
  def CheckArguments(self):
4595
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4596
                         self.op.output_fields, self.op.use_locking)
4597

    
4598
  def ExpandNames(self):
4599
    self.nq.ExpandNames(self)
4600

    
4601
  def DeclareLocks(self, level):
4602
    self.nq.DeclareLocks(self, level)
4603

    
4604
  def Exec(self, feedback_fn):
4605
    return self.nq.OldStyleQuery(self)
4606

    
4607

    
4608
class LUNodeQueryvols(NoHooksLU):
4609
  """Logical unit for getting volumes on node(s).
4610

4611
  """
4612
  REQ_BGL = False
4613
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4614
  _FIELDS_STATIC = utils.FieldSet("node")
4615

    
4616
  def CheckArguments(self):
4617
    _CheckOutputFields(static=self._FIELDS_STATIC,
4618
                       dynamic=self._FIELDS_DYNAMIC,
4619
                       selected=self.op.output_fields)
4620

    
4621
  def ExpandNames(self):
4622
    self.share_locks = _ShareAll()
4623
    self.needed_locks = {}
4624

    
4625
    if not self.op.nodes:
4626
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4627
    else:
4628
      self.needed_locks[locking.LEVEL_NODE] = \
4629
        _GetWantedNodes(self, self.op.nodes)
4630

    
4631
  def Exec(self, feedback_fn):
4632
    """Computes the list of nodes and their attributes.
4633

4634
    """
4635
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4636
    volumes = self.rpc.call_node_volumes(nodenames)
4637

    
4638
    ilist = self.cfg.GetAllInstancesInfo()
4639
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4640

    
4641
    output = []
4642
    for node in nodenames:
4643
      nresult = volumes[node]
4644
      if nresult.offline:
4645
        continue
4646
      msg = nresult.fail_msg
4647
      if msg:
4648
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4649
        continue
4650

    
4651
      node_vols = sorted(nresult.payload,
4652
                         key=operator.itemgetter("dev"))
4653

    
4654
      for vol in node_vols:
4655
        node_output = []
4656
        for field in self.op.output_fields:
4657
          if field == "node":
4658
            val = node
4659
          elif field == "phys":
4660
            val = vol["dev"]
4661
          elif field == "vg":
4662
            val = vol["vg"]
4663
          elif field == "name":
4664
            val = vol["name"]
4665
          elif field == "size":
4666
            val = int(float(vol["size"]))
4667
          elif field == "instance":
4668
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4669
          else:
4670
            raise errors.ParameterError(field)
4671
          node_output.append(str(val))
4672

    
4673
        output.append(node_output)
4674

    
4675
    return output
4676

    
4677

    
4678
class LUNodeQueryStorage(NoHooksLU):
4679
  """Logical unit for getting information on storage units on node(s).
4680

4681
  """
4682
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4683
  REQ_BGL = False
4684

    
4685
  def CheckArguments(self):
4686
    _CheckOutputFields(static=self._FIELDS_STATIC,
4687
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4688
                       selected=self.op.output_fields)
4689

    
4690
  def ExpandNames(self):
4691
    self.share_locks = _ShareAll()
4692
    self.needed_locks = {}
4693

    
4694
    if self.op.nodes:
4695
      self.needed_locks[locking.LEVEL_NODE] = \
4696
        _GetWantedNodes(self, self.op.nodes)
4697
    else:
4698
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4699

    
4700
  def Exec(self, feedback_fn):
4701
    """Computes the list of nodes and their attributes.
4702

4703
    """
4704
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4705

    
4706
    # Always get name to sort by
4707
    if constants.SF_NAME in self.op.output_fields:
4708
      fields = self.op.output_fields[:]
4709
    else:
4710
      fields = [constants.SF_NAME] + self.op.output_fields
4711

    
4712
    # Never ask for node or type as it's only known to the LU
4713
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4714
      while extra in fields:
4715
        fields.remove(extra)
4716

    
4717
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4718
    name_idx = field_idx[constants.SF_NAME]
4719

    
4720
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4721
    data = self.rpc.call_storage_list(self.nodes,
4722
                                      self.op.storage_type, st_args,
4723
                                      self.op.name, fields)
4724

    
4725
    result = []
4726

    
4727
    for node in utils.NiceSort(self.nodes):
4728
      nresult = data[node]
4729
      if nresult.offline:
4730
        continue
4731

    
4732
      msg = nresult.fail_msg
4733
      if msg:
4734
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4735
        continue
4736

    
4737
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4738

    
4739
      for name in utils.NiceSort(rows.keys()):
4740
        row = rows[name]
4741

    
4742
        out = []
4743

    
4744
        for field in self.op.output_fields:
4745
          if field == constants.SF_NODE:
4746
            val = node
4747
          elif field == constants.SF_TYPE:
4748
            val = self.op.storage_type
4749
          elif field in field_idx:
4750
            val = row[field_idx[field]]
4751
          else:
4752
            raise errors.ParameterError(field)
4753

    
4754
          out.append(val)
4755

    
4756
        result.append(out)
4757

    
4758
    return result
4759

    
4760

    
4761
class _InstanceQuery(_QueryBase):
4762
  FIELDS = query.INSTANCE_FIELDS
4763

    
4764
  def ExpandNames(self, lu):
4765
    lu.needed_locks = {}
4766
    lu.share_locks = _ShareAll()
4767

    
4768
    if self.names:
4769
      self.wanted = _GetWantedInstances(lu, self.names)
4770
    else:
4771
      self.wanted = locking.ALL_SET
4772

    
4773
    self.do_locking = (self.use_locking and
4774
                       query.IQ_LIVE in self.requested_data)
4775
    if self.do_locking:
4776
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4777
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4778
      lu.needed_locks[locking.LEVEL_NODE] = []
4779
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4780

    
4781
    self.do_grouplocks = (self.do_locking and
4782
                          query.IQ_NODES in self.requested_data)
4783

    
4784
  def DeclareLocks(self, lu, level):
4785
    if self.do_locking:
4786
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4787
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4788

    
4789
        # Lock all groups used by instances optimistically; this requires going
4790
        # via the node before it's locked, requiring verification later on
4791
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4792
          set(group_uuid
4793
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4794
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4795
      elif level == locking.LEVEL_NODE:
4796
        lu._LockInstancesNodes() # pylint: disable=W0212
4797

    
4798
  @staticmethod
4799
  def _CheckGroupLocks(lu):
4800
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4801
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4802

    
4803
    # Check if node groups for locked instances are still correct
4804
    for instance_name in owned_instances:
4805
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4806

    
4807
  def _GetQueryData(self, lu):
4808
    """Computes the list of instances and their attributes.
4809

4810
    """
4811
    if self.do_grouplocks:
4812
      self._CheckGroupLocks(lu)
4813

    
4814
    cluster = lu.cfg.GetClusterInfo()
4815
    all_info = lu.cfg.GetAllInstancesInfo()
4816

    
4817
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4818

    
4819
    instance_list = [all_info[name] for name in instance_names]
4820
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4821
                                        for inst in instance_list)))
4822
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4823
    bad_nodes = []
4824
    offline_nodes = []
4825
    wrongnode_inst = set()
4826

    
4827
    # Gather data as requested
4828
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4829
      live_data = {}
4830
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4831
      for name in nodes:
4832
        result = node_data[name]
4833
        if result.offline:
4834
          # offline nodes will be in both lists
4835
          assert result.fail_msg
4836
          offline_nodes.append(name)
4837
        if result.fail_msg:
4838
          bad_nodes.append(name)
4839
        elif result.payload:
4840
          for inst in result.payload:
4841
            if inst in all_info:
4842
              if all_info[inst].primary_node == name:
4843
                live_data.update(result.payload)
4844
              else:
4845
                wrongnode_inst.add(inst)
4846
            else:
4847
              # orphan instance; we don't list it here as we don't
4848
              # handle this case yet in the output of instance listing
4849
              logging.warning("Orphan instance '%s' found on node %s",
4850
                              inst, name)
4851
        # else no instance is alive
4852
    else:
4853
      live_data = {}
4854

    
4855
    if query.IQ_DISKUSAGE in self.requested_data:
4856
      disk_usage = dict((inst.name,
4857
                         _ComputeDiskSize(inst.disk_template,
4858
                                          [{constants.IDISK_SIZE: disk.size}
4859
                                           for disk in inst.disks]))
4860
                        for inst in instance_list)
4861
    else:
4862
      disk_usage = None
4863

    
4864
    if query.IQ_CONSOLE in self.requested_data:
4865
      consinfo = {}
4866
      for inst in instance_list:
4867
        if inst.name in live_data:
4868
          # Instance is running
4869
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4870
        else:
4871
          consinfo[inst.name] = None
4872
      assert set(consinfo.keys()) == set(instance_names)
4873
    else:
4874
      consinfo = None
4875

    
4876
    if query.IQ_NODES in self.requested_data:
4877
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4878
                                            instance_list)))
4879
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4880
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4881
                    for uuid in set(map(operator.attrgetter("group"),
4882
                                        nodes.values())))
4883
    else:
4884
      nodes = None
4885
      groups = None
4886

    
4887
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4888
                                   disk_usage, offline_nodes, bad_nodes,
4889
                                   live_data, wrongnode_inst, consinfo,
4890
                                   nodes, groups)
4891

    
4892

    
4893
class LUQuery(NoHooksLU):
4894
  """Query for resources/items of a certain kind.
4895

4896
  """
4897
  # pylint: disable=W0142
4898
  REQ_BGL = False
4899

    
4900
  def CheckArguments(self):
4901
    qcls = _GetQueryImplementation(self.op.what)
4902

    
4903
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4904

    
4905
  def ExpandNames(self):
4906
    self.impl.ExpandNames(self)
4907

    
4908
  def DeclareLocks(self, level):
4909
    self.impl.DeclareLocks(self, level)
4910

    
4911
  def Exec(self, feedback_fn):
4912
    return self.impl.NewStyleQuery(self)
4913

    
4914

    
4915
class LUQueryFields(NoHooksLU):
4916
  """Query for resources/items of a certain kind.
4917

4918
  """
4919
  # pylint: disable=W0142
4920
  REQ_BGL = False
4921

    
4922
  def CheckArguments(self):
4923
    self.qcls = _GetQueryImplementation(self.op.what)
4924

    
4925
  def ExpandNames(self):
4926
    self.needed_locks = {}
4927

    
4928
  def Exec(self, feedback_fn):
4929
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4930

    
4931

    
4932
class LUNodeModifyStorage(NoHooksLU):
4933
  """Logical unit for modifying a storage volume on a node.
4934

4935
  """
4936
  REQ_BGL = False
4937

    
4938
  def CheckArguments(self):
4939
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4940

    
4941
    storage_type = self.op.storage_type
4942

    
4943
    try:
4944
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4945
    except KeyError:
4946
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4947
                                 " modified" % storage_type,
4948
                                 errors.ECODE_INVAL)
4949

    
4950
    diff = set(self.op.changes.keys()) - modifiable
4951
    if diff:
4952
      raise errors.OpPrereqError("The following fields can not be modified for"
4953
                                 " storage units of type '%s': %r" %
4954
                                 (storage_type, list(diff)),
4955
                                 errors.ECODE_INVAL)
4956

    
4957
  def ExpandNames(self):
4958
    self.needed_locks = {
4959
      locking.LEVEL_NODE: self.op.node_name,
4960
      }
4961

    
4962
  def Exec(self, feedback_fn):
4963
    """Computes the list of nodes and their attributes.
4964

4965
    """
4966
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4967
    result = self.rpc.call_storage_modify(self.op.node_name,
4968
                                          self.op.storage_type, st_args,
4969
                                          self.op.name, self.op.changes)
4970
    result.Raise("Failed to modify storage unit '%s' on %s" %
4971
                 (self.op.name, self.op.node_name))
4972

    
4973

    
4974
class LUNodeAdd(LogicalUnit):
4975
  """Logical unit for adding node to the cluster.
4976

4977
  """
4978
  HPATH = "node-add"
4979
  HTYPE = constants.HTYPE_NODE
4980
  _NFLAGS = ["master_capable", "vm_capable"]
4981

    
4982
  def CheckArguments(self):
4983
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4984
    # validate/normalize the node name
4985
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4986
                                         family=self.primary_ip_family)
4987
    self.op.node_name = self.hostname.name
4988

    
4989
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4990
      raise errors.OpPrereqError("Cannot readd the master node",
4991
                                 errors.ECODE_STATE)
4992

    
4993
    if self.op.readd and self.op.group:
4994
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4995
                                 " being readded", errors.ECODE_INVAL)
4996

    
4997
  def BuildHooksEnv(self):
4998
    """Build hooks env.
4999

5000
    This will run on all nodes before, and on all nodes + the new node after.
5001

5002
    """
5003
    return {
5004
      "OP_TARGET": self.op.node_name,
5005
      "NODE_NAME": self.op.node_name,
5006
      "NODE_PIP": self.op.primary_ip,
5007
      "NODE_SIP": self.op.secondary_ip,
5008
      "MASTER_CAPABLE": str(self.op.master_capable),
5009
      "VM_CAPABLE": str(self.op.vm_capable),
5010
      }
5011

    
5012
  def BuildHooksNodes(self):
5013
    """Build hooks nodes.
5014

5015
    """
5016
    # Exclude added node
5017
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5018
    post_nodes = pre_nodes + [self.op.node_name, ]
5019

    
5020
    return (pre_nodes, post_nodes)
5021

    
5022
  def CheckPrereq(self):
5023
    """Check prerequisites.
5024

5025
    This checks:
5026
     - the new node is not already in the config
5027
     - it is resolvable
5028
     - its parameters (single/dual homed) matches the cluster
5029

5030
    Any errors are signaled by raising errors.OpPrereqError.
5031

5032
    """
5033
    cfg = self.cfg
5034
    hostname = self.hostname
5035
    node = hostname.name
5036
    primary_ip = self.op.primary_ip = hostname.ip
5037
    if self.op.secondary_ip is None:
5038
      if self.primary_ip_family == netutils.IP6Address.family:
5039
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5040
                                   " IPv4 address must be given as secondary",
5041
                                   errors.ECODE_INVAL)
5042
      self.op.secondary_ip = primary_ip
5043

    
5044
    secondary_ip = self.op.secondary_ip
5045
    if not netutils.IP4Address.IsValid(secondary_ip):
5046
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5047
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5048

    
5049
    node_list = cfg.GetNodeList()
5050
    if not self.op.readd and node in node_list:
5051
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5052
                                 node, errors.ECODE_EXISTS)
5053
    elif self.op.readd and node not in node_list:
5054
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5055
                                 errors.ECODE_NOENT)
5056

    
5057
    self.changed_primary_ip = False
5058

    
5059
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5060
      if self.op.readd and node == existing_node_name:
5061
        if existing_node.secondary_ip != secondary_ip:
5062
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5063
                                     " address configuration as before",
5064
                                     errors.ECODE_INVAL)
5065
        if existing_node.primary_ip != primary_ip:
5066
          self.changed_primary_ip = True
5067

    
5068
        continue
5069

    
5070
      if (existing_node.primary_ip == primary_ip or
5071
          existing_node.secondary_ip == primary_ip or
5072
          existing_node.primary_ip == secondary_ip or
5073
          existing_node.secondary_ip == secondary_ip):
5074
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5075
                                   " existing node %s" % existing_node.name,
5076
                                   errors.ECODE_NOTUNIQUE)
5077

    
5078
    # After this 'if' block, None is no longer a valid value for the
5079
    # _capable op attributes
5080
    if self.op.readd:
5081
      old_node = self.cfg.GetNodeInfo(node)
5082
      assert old_node is not None, "Can't retrieve locked node %s" % node
5083
      for attr in self._NFLAGS:
5084
        if getattr(self.op, attr) is None:
5085
          setattr(self.op, attr, getattr(old_node, attr))
5086
    else:
5087
      for attr in self._NFLAGS:
5088
        if getattr(self.op, attr) is None:
5089
          setattr(self.op, attr, True)
5090

    
5091
    if self.op.readd and not self.op.vm_capable:
5092
      pri, sec = cfg.GetNodeInstances(node)
5093
      if pri or sec:
5094
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5095
                                   " flag set to false, but it already holds"
5096
                                   " instances" % node,
5097
                                   errors.ECODE_STATE)
5098

    
5099
    # check that the type of the node (single versus dual homed) is the
5100
    # same as for the master
5101
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5102
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5103
    newbie_singlehomed = secondary_ip == primary_ip
5104
    if master_singlehomed != newbie_singlehomed:
5105
      if master_singlehomed:
5106
        raise errors.OpPrereqError("The master has no secondary ip but the"
5107
                                   " new node has one",
5108
                                   errors.ECODE_INVAL)
5109
      else:
5110
        raise errors.OpPrereqError("The master has a secondary ip but the"
5111
                                   " new node doesn't have one",
5112
                                   errors.ECODE_INVAL)
5113

    
5114
    # checks reachability
5115
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5116
      raise errors.OpPrereqError("Node not reachable by ping",
5117
                                 errors.ECODE_ENVIRON)
5118

    
5119
    if not newbie_singlehomed:
5120
      # check reachability from my secondary ip to newbie's secondary ip
5121
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5122
                           source=myself.secondary_ip):
5123
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5124
                                   " based ping to node daemon port",
5125
                                   errors.ECODE_ENVIRON)
5126

    
5127
    if self.op.readd:
5128
      exceptions = [node]
5129
    else:
5130
      exceptions = []
5131

    
5132
    if self.op.master_capable:
5133
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5134
    else:
5135
      self.master_candidate = False
5136

    
5137
    if self.op.readd:
5138
      self.new_node = old_node
5139
    else:
5140
      node_group = cfg.LookupNodeGroup(self.op.group)
5141
      self.new_node = objects.Node(name=node,
5142
                                   primary_ip=primary_ip,
5143
                                   secondary_ip=secondary_ip,
5144
                                   master_candidate=self.master_candidate,
5145
                                   offline=False, drained=False,
5146
                                   group=node_group)
5147

    
5148
    if self.op.ndparams:
5149
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5150

    
5151
  def Exec(self, feedback_fn):
5152
    """Adds the new node to the cluster.
5153

5154
    """
5155
    new_node = self.new_node
5156
    node = new_node.name
5157

    
5158
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5159
      "Not owning BGL"
5160

    
5161
    # We adding a new node so we assume it's powered
5162
    new_node.powered = True
5163

    
5164
    # for re-adds, reset the offline/drained/master-candidate flags;
5165
    # we need to reset here, otherwise offline would prevent RPC calls
5166
    # later in the procedure; this also means that if the re-add
5167
    # fails, we are left with a non-offlined, broken node
5168
    if self.op.readd:
5169
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5170
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5171
      # if we demote the node, we do cleanup later in the procedure
5172
      new_node.master_candidate = self.master_candidate
5173
      if self.changed_primary_ip:
5174
        new_node.primary_ip = self.op.primary_ip
5175

    
5176
    # copy the master/vm_capable flags
5177
    for attr in self._NFLAGS:
5178
      setattr(new_node, attr, getattr(self.op, attr))
5179

    
5180
    # notify the user about any possible mc promotion
5181
    if new_node.master_candidate:
5182
      self.LogInfo("Node will be a master candidate")
5183

    
5184
    if self.op.ndparams:
5185
      new_node.ndparams = self.op.ndparams
5186
    else:
5187
      new_node.ndparams = {}
5188

    
5189
    # check connectivity
5190
    result = self.rpc.call_version([node])[node]
5191
    result.Raise("Can't get version information from node %s" % node)
5192
    if constants.PROTOCOL_VERSION == result.payload:
5193
      logging.info("Communication to node %s fine, sw version %s match",
5194
                   node, result.payload)
5195
    else:
5196
      raise errors.OpExecError("Version mismatch master version %s,"
5197
                               " node version %s" %
5198
                               (constants.PROTOCOL_VERSION, result.payload))
5199

    
5200
    # Add node to our /etc/hosts, and add key to known_hosts
5201
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5202
      master_node = self.cfg.GetMasterNode()
5203
      result = self.rpc.call_etc_hosts_modify(master_node,
5204
                                              constants.ETC_HOSTS_ADD,
5205
                                              self.hostname.name,
5206
                                              self.hostname.ip)
5207
      result.Raise("Can't update hosts file with new host data")
5208

    
5209
    if new_node.secondary_ip != new_node.primary_ip:
5210
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5211
                               False)
5212

    
5213
    node_verify_list = [self.cfg.GetMasterNode()]
5214
    node_verify_param = {
5215
      constants.NV_NODELIST: ([node], {}),
5216
      # TODO: do a node-net-test as well?
5217
    }
5218

    
5219
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5220
                                       self.cfg.GetClusterName())
5221
    for verifier in node_verify_list:
5222
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5223
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5224
      if nl_payload:
5225
        for failed in nl_payload:
5226
          feedback_fn("ssh/hostname verification failed"
5227
                      " (checking from %s): %s" %
5228
                      (verifier, nl_payload[failed]))
5229
        raise errors.OpExecError("ssh/hostname verification failed")
5230

    
5231
    if self.op.readd:
5232
      _RedistributeAncillaryFiles(self)
5233
      self.context.ReaddNode(new_node)
5234
      # make sure we redistribute the config
5235
      self.cfg.Update(new_node, feedback_fn)
5236
      # and make sure the new node will not have old files around
5237
      if not new_node.master_candidate:
5238
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5239
        msg = result.fail_msg
5240
        if msg:
5241
          self.LogWarning("Node failed to demote itself from master"
5242
                          " candidate status: %s" % msg)
5243
    else:
5244
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5245
                                  additional_vm=self.op.vm_capable)
5246
      self.context.AddNode(new_node, self.proc.GetECId())
5247

    
5248

    
5249
class LUNodeSetParams(LogicalUnit):
5250
  """Modifies the parameters of a node.
5251

5252
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5253
      to the node role (as _ROLE_*)
5254
  @cvar _R2F: a dictionary from node role to tuples of flags
5255
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5256

5257
  """
5258
  HPATH = "node-modify"
5259
  HTYPE = constants.HTYPE_NODE
5260
  REQ_BGL = False
5261
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5262
  _F2R = {
5263
    (True, False, False): _ROLE_CANDIDATE,
5264
    (False, True, False): _ROLE_DRAINED,
5265
    (False, False, True): _ROLE_OFFLINE,
5266
    (False, False, False): _ROLE_REGULAR,
5267
    }
5268
  _R2F = dict((v, k) for k, v in _F2R.items())
5269
  _FLAGS = ["master_candidate", "drained", "offline"]
5270

    
5271
  def CheckArguments(self):
5272
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5273
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5274
                self.op.master_capable, self.op.vm_capable,
5275
                self.op.secondary_ip, self.op.ndparams]
5276
    if all_mods.count(None) == len(all_mods):
5277
      raise errors.OpPrereqError("Please pass at least one modification",
5278
                                 errors.ECODE_INVAL)
5279
    if all_mods.count(True) > 1:
5280
      raise errors.OpPrereqError("Can't set the node into more than one"
5281
                                 " state at the same time",
5282
                                 errors.ECODE_INVAL)
5283

    
5284
    # Boolean value that tells us whether we might be demoting from MC
5285
    self.might_demote = (self.op.master_candidate == False or
5286
                         self.op.offline == True or
5287
                         self.op.drained == True or
5288
                         self.op.master_capable == False)
5289

    
5290
    if self.op.secondary_ip:
5291
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5292
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5293
                                   " address" % self.op.secondary_ip,
5294
                                   errors.ECODE_INVAL)
5295

    
5296
    self.lock_all = self.op.auto_promote and self.might_demote
5297
    self.lock_instances = self.op.secondary_ip is not None
5298

    
5299
  def _InstanceFilter(self, instance):
5300
    """Filter for getting affected instances.
5301

5302
    """
5303
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5304
            self.op.node_name in instance.all_nodes)
5305

    
5306
  def ExpandNames(self):
5307
    if self.lock_all:
5308
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5309
    else:
5310
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5311

    
5312
    if self.lock_instances:
5313
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5314
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5315

    
5316
  def BuildHooksEnv(self):
5317
    """Build hooks env.
5318

5319
    This runs on the master node.
5320

5321
    """
5322
    return {
5323
      "OP_TARGET": self.op.node_name,
5324
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5325
      "OFFLINE": str(self.op.offline),
5326
      "DRAINED": str(self.op.drained),
5327
      "MASTER_CAPABLE": str(self.op.master_capable),
5328
      "VM_CAPABLE": str(self.op.vm_capable),
5329
      }
5330

    
5331
  def BuildHooksNodes(self):
5332
    """Build hooks nodes.
5333

5334
    """
5335
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5336
    return (nl, nl)
5337

    
5338
  def CheckPrereq(self):
5339
    """Check prerequisites.
5340

5341
    This only checks the instance list against the existing names.
5342

5343
    """
5344
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5345

    
5346
    if self.lock_instances:
5347
      affected_instances = \
5348
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5349

    
5350
      # Verify instance locks
5351
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5352
      wanted_instances = frozenset(affected_instances.keys())
5353
      if wanted_instances - owned_instances:
5354
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5355
                                   " secondary IP address have changed since"
5356
                                   " locks were acquired, wanted '%s', have"
5357
                                   " '%s'; retry the operation" %
5358
                                   (self.op.node_name,
5359
                                    utils.CommaJoin(wanted_instances),
5360
                                    utils.CommaJoin(owned_instances)),
5361
                                   errors.ECODE_STATE)
5362
    else:
5363
      affected_instances = None
5364

    
5365
    if (self.op.master_candidate is not None or
5366
        self.op.drained is not None or
5367
        self.op.offline is not None):
5368
      # we can't change the master's node flags
5369
      if self.op.node_name == self.cfg.GetMasterNode():
5370
        raise errors.OpPrereqError("The master role can be changed"
5371
                                   " only via master-failover",
5372
                                   errors.ECODE_INVAL)
5373

    
5374
    if self.op.master_candidate and not node.master_capable:
5375
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5376
                                 " it a master candidate" % node.name,
5377
                                 errors.ECODE_STATE)
5378

    
5379
    if self.op.vm_capable == False:
5380
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5381
      if ipri or isec:
5382
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5383
                                   " the vm_capable flag" % node.name,
5384
                                   errors.ECODE_STATE)
5385

    
5386
    if node.master_candidate and self.might_demote and not self.lock_all:
5387
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5388
      # check if after removing the current node, we're missing master
5389
      # candidates
5390
      (mc_remaining, mc_should, _) = \
5391
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5392
      if mc_remaining < mc_should:
5393
        raise errors.OpPrereqError("Not enough master candidates, please"
5394
                                   " pass auto promote option to allow"
5395
                                   " promotion", errors.ECODE_STATE)
5396

    
5397
    self.old_flags = old_flags = (node.master_candidate,
5398
                                  node.drained, node.offline)
5399
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5400
    self.old_role = old_role = self._F2R[old_flags]
5401

    
5402
    # Check for ineffective changes
5403
    for attr in self._FLAGS:
5404
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5405
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5406
        setattr(self.op, attr, None)
5407

    
5408
    # Past this point, any flag change to False means a transition
5409
    # away from the respective state, as only real changes are kept
5410

    
5411
    # TODO: We might query the real power state if it supports OOB
5412
    if _SupportsOob(self.cfg, node):
5413
      if self.op.offline is False and not (node.powered or
5414
                                           self.op.powered == True):
5415
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5416
                                    " offline status can be reset") %
5417
                                   self.op.node_name)
5418
    elif self.op.powered is not None:
5419
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5420
                                  " as it does not support out-of-band"
5421
                                  " handling") % self.op.node_name)
5422

    
5423
    # If we're being deofflined/drained, we'll MC ourself if needed
5424
    if (self.op.drained == False or self.op.offline == False or
5425
        (self.op.master_capable and not node.master_capable)):
5426
      if _DecideSelfPromotion(self):
5427
        self.op.master_candidate = True
5428
        self.LogInfo("Auto-promoting node to master candidate")
5429

    
5430
    # If we're no longer master capable, we'll demote ourselves from MC
5431
    if self.op.master_capable == False and node.master_candidate:
5432
      self.LogInfo("Demoting from master candidate")
5433
      self.op.master_candidate = False
5434

    
5435
    # Compute new role
5436
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5437
    if self.op.master_candidate:
5438
      new_role = self._ROLE_CANDIDATE
5439
    elif self.op.drained:
5440
      new_role = self._ROLE_DRAINED
5441
    elif self.op.offline:
5442
      new_role = self._ROLE_OFFLINE
5443
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5444
      # False is still in new flags, which means we're un-setting (the
5445
      # only) True flag
5446
      new_role = self._ROLE_REGULAR
5447
    else: # no new flags, nothing, keep old role
5448
      new_role = old_role
5449

    
5450
    self.new_role = new_role
5451

    
5452
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5453
      # Trying to transition out of offline status
5454
      # TODO: Use standard RPC runner, but make sure it works when the node is
5455
      # still marked offline
5456
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5457
      if result.fail_msg:
5458
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5459
                                   " to report its version: %s" %
5460
                                   (node.name, result.fail_msg),
5461
                                   errors.ECODE_STATE)
5462
      else:
5463
        self.LogWarning("Transitioning node from offline to online state"
5464
                        " without using re-add. Please make sure the node"
5465
                        " is healthy!")
5466

    
5467
    if self.op.secondary_ip:
5468
      # Ok even without locking, because this can't be changed by any LU
5469
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5470
      master_singlehomed = master.secondary_ip == master.primary_ip
5471
      if master_singlehomed and self.op.secondary_ip:
5472
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5473
                                   " homed cluster", errors.ECODE_INVAL)
5474

    
5475
      assert not (frozenset(affected_instances) -
5476
                  self.owned_locks(locking.LEVEL_INSTANCE))
5477

    
5478
      if node.offline:
5479
        if affected_instances:
5480
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5481
                                     " offline node has instances (%s)"
5482
                                     " configured to use it" %
5483
                                     utils.CommaJoin(affected_instances.keys()))
5484
      else:
5485
        # On online nodes, check that no instances are running, and that
5486
        # the node has the new ip and we can reach it.
5487
        for instance in affected_instances.values():
5488
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5489

    
5490
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5491
        if master.name != node.name:
5492
          # check reachability from master secondary ip to new secondary ip
5493
          if not netutils.TcpPing(self.op.secondary_ip,
5494
                                  constants.DEFAULT_NODED_PORT,
5495
                                  source=master.secondary_ip):
5496
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5497
                                       " based ping to node daemon port",
5498
                                       errors.ECODE_ENVIRON)
5499

    
5500
    if self.op.ndparams:
5501
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5502
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5503
      self.new_ndparams = new_ndparams
5504

    
5505
  def Exec(self, feedback_fn):
5506
    """Modifies a node.
5507

5508
    """
5509
    node = self.node
5510
    old_role = self.old_role
5511
    new_role = self.new_role
5512

    
5513
    result = []
5514

    
5515
    if self.op.ndparams:
5516
      node.ndparams = self.new_ndparams
5517

    
5518
    if self.op.powered is not None:
5519
      node.powered = self.op.powered
5520

    
5521
    for attr in ["master_capable", "vm_capable"]:
5522
      val = getattr(self.op, attr)
5523
      if val is not None:
5524
        setattr(node, attr, val)
5525
        result.append((attr, str(val)))
5526

    
5527
    if new_role != old_role:
5528
      # Tell the node to demote itself, if no longer MC and not offline
5529
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5530
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5531
        if msg:
5532
          self.LogWarning("Node failed to demote itself: %s", msg)
5533

    
5534
      new_flags = self._R2F[new_role]
5535
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5536
        if of != nf:
5537
          result.append((desc, str(nf)))
5538
      (node.master_candidate, node.drained, node.offline) = new_flags
5539

    
5540
      # we locked all nodes, we adjust the CP before updating this node
5541
      if self.lock_all:
5542
        _AdjustCandidatePool(self, [node.name])
5543

    
5544
    if self.op.secondary_ip:
5545
      node.secondary_ip = self.op.secondary_ip
5546
      result.append(("secondary_ip", self.op.secondary_ip))
5547

    
5548
    # this will trigger configuration file update, if needed
5549
    self.cfg.Update(node, feedback_fn)
5550

    
5551
    # this will trigger job queue propagation or cleanup if the mc
5552
    # flag changed
5553
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5554
      self.context.ReaddNode(node)
5555

    
5556
    return result
5557

    
5558

    
5559
class LUNodePowercycle(NoHooksLU):
5560
  """Powercycles a node.
5561

5562
  """
5563
  REQ_BGL = False
5564

    
5565
  def CheckArguments(self):
5566
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5567
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5568
      raise errors.OpPrereqError("The node is the master and the force"
5569
                                 " parameter was not set",
5570
                                 errors.ECODE_INVAL)
5571

    
5572
  def ExpandNames(self):
5573
    """Locking for PowercycleNode.
5574

5575
    This is a last-resort option and shouldn't block on other
5576
    jobs. Therefore, we grab no locks.
5577

5578
    """
5579
    self.needed_locks = {}
5580

    
5581
  def Exec(self, feedback_fn):
5582
    """Reboots a node.
5583

5584
    """
5585
    result = self.rpc.call_node_powercycle(self.op.node_name,
5586
                                           self.cfg.GetHypervisorType())
5587
    result.Raise("Failed to schedule the reboot")
5588
    return result.payload
5589

    
5590

    
5591
class LUClusterQuery(NoHooksLU):
5592
  """Query cluster configuration.
5593

5594
  """
5595
  REQ_BGL = False
5596

    
5597
  def ExpandNames(self):
5598
    self.needed_locks = {}
5599

    
5600
  def Exec(self, feedback_fn):
5601
    """Return cluster config.
5602

5603
    """
5604
    cluster = self.cfg.GetClusterInfo()
5605
    os_hvp = {}
5606

    
5607
    # Filter just for enabled hypervisors
5608
    for os_name, hv_dict in cluster.os_hvp.items():
5609
      os_hvp[os_name] = {}
5610
      for hv_name, hv_params in hv_dict.items():
5611
        if hv_name in cluster.enabled_hypervisors:
5612
          os_hvp[os_name][hv_name] = hv_params
5613

    
5614
    # Convert ip_family to ip_version
5615
    primary_ip_version = constants.IP4_VERSION
5616
    if cluster.primary_ip_family == netutils.IP6Address.family:
5617
      primary_ip_version = constants.IP6_VERSION
5618

    
5619
    result = {
5620
      "software_version": constants.RELEASE_VERSION,
5621
      "protocol_version": constants.PROTOCOL_VERSION,
5622
      "config_version": constants.CONFIG_VERSION,
5623
      "os_api_version": max(constants.OS_API_VERSIONS),
5624
      "export_version": constants.EXPORT_VERSION,
5625
      "architecture": (platform.architecture()[0], platform.machine()),
5626
      "name": cluster.cluster_name,
5627
      "master": cluster.master_node,
5628
      "default_hypervisor": cluster.enabled_hypervisors[0],
5629
      "enabled_hypervisors": cluster.enabled_hypervisors,
5630
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5631
                        for hypervisor_name in cluster.enabled_hypervisors]),
5632
      "os_hvp": os_hvp,
5633
      "beparams": cluster.beparams,
5634
      "osparams": cluster.osparams,
5635
      "nicparams": cluster.nicparams,
5636
      "ndparams": cluster.ndparams,
5637
      "candidate_pool_size": cluster.candidate_pool_size,
5638
      "master_netdev": cluster.master_netdev,
5639
      "master_netmask": cluster.master_netmask,
5640
      "use_external_mip_script": cluster.use_external_mip_script,
5641
      "volume_group_name": cluster.volume_group_name,
5642
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5643
      "file_storage_dir": cluster.file_storage_dir,
5644
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5645
      "maintain_node_health": cluster.maintain_node_health,
5646
      "ctime": cluster.ctime,
5647
      "mtime": cluster.mtime,
5648
      "uuid": cluster.uuid,
5649
      "tags": list(cluster.GetTags()),
5650
      "uid_pool": cluster.uid_pool,
5651
      "default_iallocator": cluster.default_iallocator,
5652
      "reserved_lvs": cluster.reserved_lvs,
5653
      "primary_ip_version": primary_ip_version,
5654
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5655
      "hidden_os": cluster.hidden_os,
5656
      "blacklisted_os": cluster.blacklisted_os,
5657
      }
5658

    
5659
    return result
5660

    
5661

    
5662
class LUClusterConfigQuery(NoHooksLU):
5663
  """Return configuration values.
5664

5665
  """
5666
  REQ_BGL = False
5667
  _FIELDS_DYNAMIC = utils.FieldSet()
5668
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5669
                                  "watcher_pause", "volume_group_name")
5670

    
5671
  def CheckArguments(self):
5672
    _CheckOutputFields(static=self._FIELDS_STATIC,
5673
                       dynamic=self._FIELDS_DYNAMIC,
5674
                       selected=self.op.output_fields)
5675

    
5676
  def ExpandNames(self):
5677
    self.needed_locks = {}
5678

    
5679
  def Exec(self, feedback_fn):
5680
    """Dump a representation of the cluster config to the standard output.
5681

5682
    """
5683
    values = []
5684
    for field in self.op.output_fields:
5685
      if field == "cluster_name":
5686
        entry = self.cfg.GetClusterName()
5687
      elif field == "master_node":
5688
        entry = self.cfg.GetMasterNode()
5689
      elif field == "drain_flag":
5690
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5691
      elif field == "watcher_pause":
5692
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5693
      elif field == "volume_group_name":
5694
        entry = self.cfg.GetVGName()
5695
      else:
5696
        raise errors.ParameterError(field)
5697
      values.append(entry)
5698
    return values
5699

    
5700

    
5701
class LUInstanceActivateDisks(NoHooksLU):
5702
  """Bring up an instance's disks.
5703

5704
  """
5705
  REQ_BGL = False
5706

    
5707
  def ExpandNames(self):
5708
    self._ExpandAndLockInstance()
5709
    self.needed_locks[locking.LEVEL_NODE] = []
5710
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5711

    
5712
  def DeclareLocks(self, level):
5713
    if level == locking.LEVEL_NODE:
5714
      self._LockInstancesNodes()
5715

    
5716
  def CheckPrereq(self):
5717
    """Check prerequisites.
5718

5719
    This checks that the instance is in the cluster.
5720

5721
    """
5722
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5723
    assert self.instance is not None, \
5724
      "Cannot retrieve locked instance %s" % self.op.instance_name
5725
    _CheckNodeOnline(self, self.instance.primary_node)
5726

    
5727
  def Exec(self, feedback_fn):
5728
    """Activate the disks.
5729

5730
    """
5731
    disks_ok, disks_info = \
5732
              _AssembleInstanceDisks(self, self.instance,
5733
                                     ignore_size=self.op.ignore_size)
5734
    if not disks_ok:
5735
      raise errors.OpExecError("Cannot activate block devices")
5736

    
5737
    return disks_info
5738

    
5739

    
5740
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5741
                           ignore_size=False):
5742
  """Prepare the block devices for an instance.
5743

5744
  This sets up the block devices on all nodes.
5745

5746
  @type lu: L{LogicalUnit}
5747
  @param lu: the logical unit on whose behalf we execute
5748
  @type instance: L{objects.Instance}
5749
  @param instance: the instance for whose disks we assemble
5750
  @type disks: list of L{objects.Disk} or None
5751
  @param disks: which disks to assemble (or all, if None)
5752
  @type ignore_secondaries: boolean
5753
  @param ignore_secondaries: if true, errors on secondary nodes
5754
      won't result in an error return from the function
5755
  @type ignore_size: boolean
5756
  @param ignore_size: if true, the current known size of the disk
5757
      will not be used during the disk activation, useful for cases
5758
      when the size is wrong
5759
  @return: False if the operation failed, otherwise a list of
5760
      (host, instance_visible_name, node_visible_name)
5761
      with the mapping from node devices to instance devices
5762

5763
  """
5764
  device_info = []
5765
  disks_ok = True
5766
  iname = instance.name
5767
  disks = _ExpandCheckDisks(instance, disks)
5768

    
5769
  # With the two passes mechanism we try to reduce the window of
5770
  # opportunity for the race condition of switching DRBD to primary
5771
  # before handshaking occured, but we do not eliminate it
5772

    
5773
  # The proper fix would be to wait (with some limits) until the
5774
  # connection has been made and drbd transitions from WFConnection
5775
  # into any other network-connected state (Connected, SyncTarget,
5776
  # SyncSource, etc.)
5777

    
5778
  # 1st pass, assemble on all nodes in secondary mode
5779
  for idx, inst_disk in enumerate(disks):
5780
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5781
      if ignore_size:
5782
        node_disk = node_disk.Copy()
5783
        node_disk.UnsetSize()
5784
      lu.cfg.SetDiskID(node_disk, node)
5785
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5786
      msg = result.fail_msg
5787
      if msg:
5788
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5789
                           " (is_primary=False, pass=1): %s",
5790
                           inst_disk.iv_name, node, msg)
5791
        if not ignore_secondaries:
5792
          disks_ok = False
5793

    
5794
  # FIXME: race condition on drbd migration to primary
5795

    
5796
  # 2nd pass, do only the primary node
5797
  for idx, inst_disk in enumerate(disks):
5798
    dev_path = None
5799

    
5800
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5801
      if node != instance.primary_node:
5802
        continue
5803
      if ignore_size:
5804
        node_disk = node_disk.Copy()
5805
        node_disk.UnsetSize()
5806
      lu.cfg.SetDiskID(node_disk, node)
5807
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5808
      msg = result.fail_msg
5809
      if msg:
5810
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5811
                           " (is_primary=True, pass=2): %s",
5812
                           inst_disk.iv_name, node, msg)
5813
        disks_ok = False
5814
      else:
5815
        dev_path = result.payload
5816

    
5817
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5818

    
5819
  # leave the disks configured for the primary node
5820
  # this is a workaround that would be fixed better by
5821
  # improving the logical/physical id handling
5822
  for disk in disks:
5823
    lu.cfg.SetDiskID(disk, instance.primary_node)
5824

    
5825
  return disks_ok, device_info
5826

    
5827

    
5828
def _StartInstanceDisks(lu, instance, force):
5829
  """Start the disks of an instance.
5830

5831
  """
5832
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5833
                                           ignore_secondaries=force)
5834
  if not disks_ok:
5835
    _ShutdownInstanceDisks(lu, instance)
5836
    if force is not None and not force:
5837
      lu.proc.LogWarning("", hint="If the message above refers to a"
5838
                         " secondary node,"
5839
                         " you can retry the operation using '--force'.")
5840
    raise errors.OpExecError("Disk consistency error")
5841

    
5842

    
5843
class LUInstanceDeactivateDisks(NoHooksLU):
5844
  """Shutdown an instance's disks.
5845

5846
  """
5847
  REQ_BGL = False
5848

    
5849
  def ExpandNames(self):
5850
    self._ExpandAndLockInstance()
5851
    self.needed_locks[locking.LEVEL_NODE] = []
5852
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5853

    
5854
  def DeclareLocks(self, level):
5855
    if level == locking.LEVEL_NODE:
5856
      self._LockInstancesNodes()
5857

    
5858
  def CheckPrereq(self):
5859
    """Check prerequisites.
5860

5861
    This checks that the instance is in the cluster.
5862

5863
    """
5864
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5865
    assert self.instance is not None, \
5866
      "Cannot retrieve locked instance %s" % self.op.instance_name
5867

    
5868
  def Exec(self, feedback_fn):
5869
    """Deactivate the disks
5870

5871
    """
5872
    instance = self.instance
5873
    if self.op.force:
5874
      _ShutdownInstanceDisks(self, instance)
5875
    else:
5876
      _SafeShutdownInstanceDisks(self, instance)
5877

    
5878

    
5879
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5880
  """Shutdown block devices of an instance.
5881

5882
  This function checks if an instance is running, before calling
5883
  _ShutdownInstanceDisks.
5884

5885
  """
5886
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5887
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5888

    
5889

    
5890
def _ExpandCheckDisks(instance, disks):
5891
  """Return the instance disks selected by the disks list
5892

5893
  @type disks: list of L{objects.Disk} or None
5894
  @param disks: selected disks
5895
  @rtype: list of L{objects.Disk}
5896
  @return: selected instance disks to act on
5897

5898
  """
5899
  if disks is None:
5900
    return instance.disks
5901
  else:
5902
    if not set(disks).issubset(instance.disks):
5903
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5904
                                   " target instance")
5905
    return disks
5906

    
5907

    
5908
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5909
  """Shutdown block devices of an instance.
5910

5911
  This does the shutdown on all nodes of the instance.
5912

5913
  If the ignore_primary is false, errors on the primary node are
5914
  ignored.
5915

5916
  """
5917
  all_result = True
5918
  disks = _ExpandCheckDisks(instance, disks)
5919

    
5920
  for disk in disks:
5921
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5922
      lu.cfg.SetDiskID(top_disk, node)
5923
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5924
      msg = result.fail_msg
5925
      if msg:
5926
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5927
                      disk.iv_name, node, msg)
5928
        if ((node == instance.primary_node and not ignore_primary) or
5929
            (node != instance.primary_node and not result.offline)):
5930
          all_result = False
5931
  return all_result
5932

    
5933

    
5934
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5935
  """Checks if a node has enough free memory.
5936

5937
  This function check if a given node has the needed amount of free
5938
  memory. In case the node has less memory or we cannot get the
5939
  information from the node, this function raise an OpPrereqError
5940
  exception.
5941

5942
  @type lu: C{LogicalUnit}
5943
  @param lu: a logical unit from which we get configuration data
5944
  @type node: C{str}
5945
  @param node: the node to check
5946
  @type reason: C{str}
5947
  @param reason: string to use in the error message
5948
  @type requested: C{int}
5949
  @param requested: the amount of memory in MiB to check for
5950
  @type hypervisor_name: C{str}
5951
  @param hypervisor_name: the hypervisor to ask for memory stats
5952
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5953
      we cannot check the node
5954

5955
  """
5956
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5957
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5958
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5959
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5960
  if not isinstance(free_mem, int):
5961
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5962
                               " was '%s'" % (node, free_mem),
5963
                               errors.ECODE_ENVIRON)
5964
  if requested > free_mem:
5965
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5966
                               " needed %s MiB, available %s MiB" %
5967
                               (node, reason, requested, free_mem),
5968
                               errors.ECODE_NORES)
5969

    
5970

    
5971
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5972
  """Checks if nodes have enough free disk space in the all VGs.
5973

5974
  This function check if all given nodes have the needed amount of
5975
  free disk. In case any node has less disk or we cannot get the
5976
  information from the node, this function raise an OpPrereqError
5977
  exception.
5978

5979
  @type lu: C{LogicalUnit}
5980
  @param lu: a logical unit from which we get configuration data
5981
  @type nodenames: C{list}
5982
  @param nodenames: the list of node names to check
5983
  @type req_sizes: C{dict}
5984
  @param req_sizes: the hash of vg and corresponding amount of disk in
5985
      MiB to check for
5986
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5987
      or we cannot check the node
5988

5989
  """
5990
  for vg, req_size in req_sizes.items():
5991
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5992

    
5993

    
5994
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5995
  """Checks if nodes have enough free disk space in the specified VG.
5996

5997
  This function check if all given nodes have the needed amount of
5998
  free disk. In case any node has less disk or we cannot get the
5999
  information from the node, this function raise an OpPrereqError
6000
  exception.
6001

6002
  @type lu: C{LogicalUnit}
6003
  @param lu: a logical unit from which we get configuration data
6004
  @type nodenames: C{list}
6005
  @param nodenames: the list of node names to check
6006
  @type vg: C{str}
6007
  @param vg: the volume group to check
6008
  @type requested: C{int}
6009
  @param requested: the amount of disk in MiB to check for
6010
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6011
      or we cannot check the node
6012

6013
  """
6014
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6015
  for node in nodenames:
6016
    info = nodeinfo[node]
6017
    info.Raise("Cannot get current information from node %s" % node,
6018
               prereq=True, ecode=errors.ECODE_ENVIRON)
6019
    vg_free = info.payload.get("vg_free", None)
6020
    if not isinstance(vg_free, int):
6021
      raise errors.OpPrereqError("Can't compute free disk space on node"
6022
                                 " %s for vg %s, result was '%s'" %
6023
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6024
    if requested > vg_free:
6025
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6026
                                 " vg %s: required %d MiB, available %d MiB" %
6027
                                 (node, vg, requested, vg_free),
6028
                                 errors.ECODE_NORES)
6029

    
6030

    
6031
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6032
  """Checks if nodes have enough physical CPUs
6033

6034
  This function checks if all given nodes have the needed number of
6035
  physical CPUs. In case any node has less CPUs or we cannot get the
6036
  information from the node, this function raises an OpPrereqError
6037
  exception.
6038

6039
  @type lu: C{LogicalUnit}
6040
  @param lu: a logical unit from which we get configuration data
6041
  @type nodenames: C{list}
6042
  @param nodenames: the list of node names to check
6043
  @type requested: C{int}
6044
  @param requested: the minimum acceptable number of physical CPUs
6045
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6046
      or we cannot check the node
6047

6048
  """
6049
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6050
  for node in nodenames:
6051
    info = nodeinfo[node]
6052
    info.Raise("Cannot get current information from node %s" % node,
6053
               prereq=True, ecode=errors.ECODE_ENVIRON)
6054
    num_cpus = info.payload.get("cpu_total", None)
6055
    if not isinstance(num_cpus, int):
6056
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6057
                                 " on node %s, result was '%s'" %
6058
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6059
    if requested > num_cpus:
6060
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6061
                                 "required" % (node, num_cpus, requested),
6062
                                 errors.ECODE_NORES)
6063

    
6064

    
6065
class LUInstanceStartup(LogicalUnit):
6066
  """Starts an instance.
6067

6068
  """
6069
  HPATH = "instance-start"
6070
  HTYPE = constants.HTYPE_INSTANCE
6071
  REQ_BGL = False
6072

    
6073
  def CheckArguments(self):
6074
    # extra beparams
6075
    if self.op.beparams:
6076
      # fill the beparams dict
6077
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6078

    
6079
  def ExpandNames(self):
6080
    self._ExpandAndLockInstance()
6081

    
6082
  def BuildHooksEnv(self):
6083
    """Build hooks env.
6084

6085
    This runs on master, primary and secondary nodes of the instance.
6086

6087
    """
6088
    env = {
6089
      "FORCE": self.op.force,
6090
      }
6091

    
6092
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6093

    
6094
    return env
6095

    
6096
  def BuildHooksNodes(self):
6097
    """Build hooks nodes.
6098

6099
    """
6100
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6101
    return (nl, nl)
6102

    
6103
  def CheckPrereq(self):
6104
    """Check prerequisites.
6105

6106
    This checks that the instance is in the cluster.
6107

6108
    """
6109
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6110
    assert self.instance is not None, \
6111
      "Cannot retrieve locked instance %s" % self.op.instance_name
6112

    
6113
    # extra hvparams
6114
    if self.op.hvparams:
6115
      # check hypervisor parameter syntax (locally)
6116
      cluster = self.cfg.GetClusterInfo()
6117
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6118
      filled_hvp = cluster.FillHV(instance)
6119
      filled_hvp.update(self.op.hvparams)
6120
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6121
      hv_type.CheckParameterSyntax(filled_hvp)
6122
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6123

    
6124
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6125

    
6126
    if self.primary_offline and self.op.ignore_offline_nodes:
6127
      self.proc.LogWarning("Ignoring offline primary node")
6128

    
6129
      if self.op.hvparams or self.op.beparams:
6130
        self.proc.LogWarning("Overridden parameters are ignored")
6131
    else:
6132
      _CheckNodeOnline(self, instance.primary_node)
6133

    
6134
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6135

    
6136
      # check bridges existence
6137
      _CheckInstanceBridgesExist(self, instance)
6138

    
6139
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6140
                                                instance.name,
6141
                                                instance.hypervisor)
6142
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6143
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6144
      if not remote_info.payload: # not running already
6145
        _CheckNodeFreeMemory(self, instance.primary_node,
6146
                             "starting instance %s" % instance.name,
6147
                             bep[constants.BE_MEMORY], instance.hypervisor)
6148

    
6149
  def Exec(self, feedback_fn):
6150
    """Start the instance.
6151

6152
    """
6153
    instance = self.instance
6154
    force = self.op.force
6155

    
6156
    if not self.op.no_remember:
6157
      self.cfg.MarkInstanceUp(instance.name)
6158

    
6159
    if self.primary_offline:
6160
      assert self.op.ignore_offline_nodes
6161
      self.proc.LogInfo("Primary node offline, marked instance as started")
6162
    else:
6163
      node_current = instance.primary_node
6164

    
6165
      _StartInstanceDisks(self, instance, force)
6166

    
6167
      result = \
6168
        self.rpc.call_instance_start(node_current,
6169
                                     (instance, self.op.hvparams,
6170
                                      self.op.beparams),
6171
                                     self.op.startup_paused)
6172
      msg = result.fail_msg
6173
      if msg:
6174
        _ShutdownInstanceDisks(self, instance)
6175
        raise errors.OpExecError("Could not start instance: %s" % msg)
6176

    
6177

    
6178
class LUInstanceReboot(LogicalUnit):
6179
  """Reboot an instance.
6180

6181
  """
6182
  HPATH = "instance-reboot"
6183
  HTYPE = constants.HTYPE_INSTANCE
6184
  REQ_BGL = False
6185

    
6186
  def ExpandNames(self):
6187
    self._ExpandAndLockInstance()
6188

    
6189
  def BuildHooksEnv(self):
6190
    """Build hooks env.
6191

6192
    This runs on master, primary and secondary nodes of the instance.
6193

6194
    """
6195
    env = {
6196
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6197
      "REBOOT_TYPE": self.op.reboot_type,
6198
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6199
      }
6200

    
6201
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6202

    
6203
    return env
6204

    
6205
  def BuildHooksNodes(self):
6206
    """Build hooks nodes.
6207

6208
    """
6209
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6210
    return (nl, nl)
6211

    
6212
  def CheckPrereq(self):
6213
    """Check prerequisites.
6214

6215
    This checks that the instance is in the cluster.
6216

6217
    """
6218
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6219
    assert self.instance is not None, \
6220
      "Cannot retrieve locked instance %s" % self.op.instance_name
6221

    
6222
    _CheckNodeOnline(self, instance.primary_node)
6223

    
6224
    # check bridges existence
6225
    _CheckInstanceBridgesExist(self, instance)
6226

    
6227
  def Exec(self, feedback_fn):
6228
    """Reboot the instance.
6229

6230
    """
6231
    instance = self.instance
6232
    ignore_secondaries = self.op.ignore_secondaries
6233
    reboot_type = self.op.reboot_type
6234

    
6235
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6236
                                              instance.name,
6237
                                              instance.hypervisor)
6238
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6239
    instance_running = bool(remote_info.payload)
6240

    
6241
    node_current = instance.primary_node
6242

    
6243
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6244
                                            constants.INSTANCE_REBOOT_HARD]:
6245
      for disk in instance.disks:
6246
        self.cfg.SetDiskID(disk, node_current)
6247
      result = self.rpc.call_instance_reboot(node_current, instance,
6248
                                             reboot_type,
6249
                                             self.op.shutdown_timeout)
6250
      result.Raise("Could not reboot instance")
6251
    else:
6252
      if instance_running:
6253
        result = self.rpc.call_instance_shutdown(node_current, instance,
6254
                                                 self.op.shutdown_timeout)
6255
        result.Raise("Could not shutdown instance for full reboot")
6256
        _ShutdownInstanceDisks(self, instance)
6257
      else:
6258
        self.LogInfo("Instance %s was already stopped, starting now",
6259
                     instance.name)
6260
      _StartInstanceDisks(self, instance, ignore_secondaries)
6261
      result = self.rpc.call_instance_start(node_current,
6262
                                            (instance, None, None), False)
6263
      msg = result.fail_msg
6264
      if msg:
6265
        _ShutdownInstanceDisks(self, instance)
6266
        raise errors.OpExecError("Could not start instance for"
6267
                                 " full reboot: %s" % msg)
6268

    
6269
    self.cfg.MarkInstanceUp(instance.name)
6270

    
6271

    
6272
class LUInstanceShutdown(LogicalUnit):
6273
  """Shutdown an instance.
6274

6275
  """
6276
  HPATH = "instance-stop"
6277
  HTYPE = constants.HTYPE_INSTANCE
6278
  REQ_BGL = False
6279

    
6280
  def ExpandNames(self):
6281
    self._ExpandAndLockInstance()
6282

    
6283
  def BuildHooksEnv(self):
6284
    """Build hooks env.
6285

6286
    This runs on master, primary and secondary nodes of the instance.
6287

6288
    """
6289
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6290
    env["TIMEOUT"] = self.op.timeout
6291
    return env
6292

    
6293
  def BuildHooksNodes(self):
6294
    """Build hooks nodes.
6295

6296
    """
6297
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6298
    return (nl, nl)
6299

    
6300
  def CheckPrereq(self):
6301
    """Check prerequisites.
6302

6303
    This checks that the instance is in the cluster.
6304

6305
    """
6306
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6307
    assert self.instance is not None, \
6308
      "Cannot retrieve locked instance %s" % self.op.instance_name
6309

    
6310
    self.primary_offline = \
6311
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6312

    
6313
    if self.primary_offline and self.op.ignore_offline_nodes:
6314
      self.proc.LogWarning("Ignoring offline primary node")
6315
    else:
6316
      _CheckNodeOnline(self, self.instance.primary_node)
6317

    
6318
  def Exec(self, feedback_fn):
6319
    """Shutdown the instance.
6320

6321
    """
6322
    instance = self.instance
6323
    node_current = instance.primary_node
6324
    timeout = self.op.timeout
6325

    
6326
    if not self.op.no_remember:
6327
      self.cfg.MarkInstanceDown(instance.name)
6328

    
6329
    if self.primary_offline:
6330
      assert self.op.ignore_offline_nodes
6331
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6332
    else:
6333
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6334
      msg = result.fail_msg
6335
      if msg:
6336
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6337

    
6338
      _ShutdownInstanceDisks(self, instance)
6339

    
6340

    
6341
class LUInstanceReinstall(LogicalUnit):
6342
  """Reinstall an instance.
6343

6344
  """
6345
  HPATH = "instance-reinstall"
6346
  HTYPE = constants.HTYPE_INSTANCE
6347
  REQ_BGL = False
6348

    
6349
  def ExpandNames(self):
6350
    self._ExpandAndLockInstance()
6351

    
6352
  def BuildHooksEnv(self):
6353
    """Build hooks env.
6354

6355
    This runs on master, primary and secondary nodes of the instance.
6356

6357
    """
6358
    return _BuildInstanceHookEnvByObject(self, self.instance)
6359

    
6360
  def BuildHooksNodes(self):
6361
    """Build hooks nodes.
6362

6363
    """
6364
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6365
    return (nl, nl)
6366

    
6367
  def CheckPrereq(self):
6368
    """Check prerequisites.
6369

6370
    This checks that the instance is in the cluster and is not running.
6371

6372
    """
6373
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6374
    assert instance is not None, \
6375
      "Cannot retrieve locked instance %s" % self.op.instance_name
6376
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6377
                     " offline, cannot reinstall")
6378
    for node in instance.secondary_nodes:
6379
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6380
                       " cannot reinstall")
6381

    
6382
    if instance.disk_template == constants.DT_DISKLESS:
6383
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6384
                                 self.op.instance_name,
6385
                                 errors.ECODE_INVAL)
6386
    _CheckInstanceDown(self, instance, "cannot reinstall")
6387

    
6388
    if self.op.os_type is not None:
6389
      # OS verification
6390
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6391
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6392
      instance_os = self.op.os_type
6393
    else:
6394
      instance_os = instance.os
6395

    
6396
    nodelist = list(instance.all_nodes)
6397

    
6398
    if self.op.osparams:
6399
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6400
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6401
      self.os_inst = i_osdict # the new dict (without defaults)
6402
    else:
6403
      self.os_inst = None
6404

    
6405
    self.instance = instance
6406

    
6407
  def Exec(self, feedback_fn):
6408
    """Reinstall the instance.
6409

6410
    """
6411
    inst = self.instance
6412

    
6413
    if self.op.os_type is not None:
6414
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6415
      inst.os = self.op.os_type
6416
      # Write to configuration
6417
      self.cfg.Update(inst, feedback_fn)
6418

    
6419
    _StartInstanceDisks(self, inst, None)
6420
    try:
6421
      feedback_fn("Running the instance OS create scripts...")
6422
      # FIXME: pass debug option from opcode to backend
6423
      result = self.rpc.call_instance_os_add(inst.primary_node,
6424
                                             (inst, self.os_inst), True,
6425
                                             self.op.debug_level)
6426
      result.Raise("Could not install OS for instance %s on node %s" %
6427
                   (inst.name, inst.primary_node))
6428
    finally:
6429
      _ShutdownInstanceDisks(self, inst)
6430

    
6431

    
6432
class LUInstanceRecreateDisks(LogicalUnit):
6433
  """Recreate an instance's missing disks.
6434

6435
  """
6436
  HPATH = "instance-recreate-disks"
6437
  HTYPE = constants.HTYPE_INSTANCE
6438
  REQ_BGL = False
6439

    
6440
  def CheckArguments(self):
6441
    # normalise the disk list
6442
    self.op.disks = sorted(frozenset(self.op.disks))
6443

    
6444
  def ExpandNames(self):
6445
    self._ExpandAndLockInstance()
6446
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6447
    if self.op.nodes:
6448
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6449
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6450
    else:
6451
      self.needed_locks[locking.LEVEL_NODE] = []
6452

    
6453
  def DeclareLocks(self, level):
6454
    if level == locking.LEVEL_NODE:
6455
      # if we replace the nodes, we only need to lock the old primary,
6456
      # otherwise we need to lock all nodes for disk re-creation
6457
      primary_only = bool(self.op.nodes)
6458
      self._LockInstancesNodes(primary_only=primary_only)
6459

    
6460
  def BuildHooksEnv(self):
6461
    """Build hooks env.
6462

6463
    This runs on master, primary and secondary nodes of the instance.
6464

6465
    """
6466
    return _BuildInstanceHookEnvByObject(self, self.instance)
6467

    
6468
  def BuildHooksNodes(self):
6469
    """Build hooks nodes.
6470

6471
    """
6472
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6473
    return (nl, nl)
6474

    
6475
  def CheckPrereq(self):
6476
    """Check prerequisites.
6477

6478
    This checks that the instance is in the cluster and is not running.
6479

6480
    """
6481
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6482
    assert instance is not None, \
6483
      "Cannot retrieve locked instance %s" % self.op.instance_name
6484
    if self.op.nodes:
6485
      if len(self.op.nodes) != len(instance.all_nodes):
6486
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6487
                                   " %d replacement nodes were specified" %
6488
                                   (instance.name, len(instance.all_nodes),
6489
                                    len(self.op.nodes)),
6490
                                   errors.ECODE_INVAL)
6491
      assert instance.disk_template != constants.DT_DRBD8 or \
6492
          len(self.op.nodes) == 2
6493
      assert instance.disk_template != constants.DT_PLAIN or \
6494
          len(self.op.nodes) == 1
6495
      primary_node = self.op.nodes[0]
6496
    else:
6497
      primary_node = instance.primary_node
6498
    _CheckNodeOnline(self, primary_node)
6499

    
6500
    if instance.disk_template == constants.DT_DISKLESS:
6501
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6502
                                 self.op.instance_name, errors.ECODE_INVAL)
6503
    # if we replace nodes *and* the old primary is offline, we don't
6504
    # check
6505
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6506
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6507
    if not (self.op.nodes and old_pnode.offline):
6508
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6509

    
6510
    if not self.op.disks:
6511
      self.op.disks = range(len(instance.disks))
6512
    else:
6513
      for idx in self.op.disks:
6514
        if idx >= len(instance.disks):
6515
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6516
                                     errors.ECODE_INVAL)
6517
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6518
      raise errors.OpPrereqError("Can't recreate disks partially and"
6519
                                 " change the nodes at the same time",
6520
                                 errors.ECODE_INVAL)
6521
    self.instance = instance
6522

    
6523
  def Exec(self, feedback_fn):
6524
    """Recreate the disks.
6525

6526
    """
6527
    instance = self.instance
6528

    
6529
    to_skip = []
6530
    mods = [] # keeps track of needed logical_id changes
6531

    
6532
    for idx, disk in enumerate(instance.disks):
6533
      if idx not in self.op.disks: # disk idx has not been passed in
6534
        to_skip.append(idx)
6535
        continue
6536
      # update secondaries for disks, if needed
6537
      if self.op.nodes:
6538
        if disk.dev_type == constants.LD_DRBD8:
6539
          # need to update the nodes and minors
6540
          assert len(self.op.nodes) == 2
6541
          assert len(disk.logical_id) == 6 # otherwise disk internals
6542
                                           # have changed
6543
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6544
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6545
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6546
                    new_minors[0], new_minors[1], old_secret)
6547
          assert len(disk.logical_id) == len(new_id)
6548
          mods.append((idx, new_id))
6549

    
6550
    # now that we have passed all asserts above, we can apply the mods
6551
    # in a single run (to avoid partial changes)
6552
    for idx, new_id in mods:
6553
      instance.disks[idx].logical_id = new_id
6554

    
6555
    # change primary node, if needed
6556
    if self.op.nodes:
6557
      instance.primary_node = self.op.nodes[0]
6558
      self.LogWarning("Changing the instance's nodes, you will have to"
6559
                      " remove any disks left on the older nodes manually")
6560

    
6561
    if self.op.nodes:
6562
      self.cfg.Update(instance, feedback_fn)
6563

    
6564
    _CreateDisks(self, instance, to_skip=to_skip)
6565

    
6566

    
6567
class LUInstanceRename(LogicalUnit):
6568
  """Rename an instance.
6569

6570
  """
6571
  HPATH = "instance-rename"
6572
  HTYPE = constants.HTYPE_INSTANCE
6573

    
6574
  def CheckArguments(self):
6575
    """Check arguments.
6576

6577
    """
6578
    if self.op.ip_check and not self.op.name_check:
6579
      # TODO: make the ip check more flexible and not depend on the name check
6580
      raise errors.OpPrereqError("IP address check requires a name check",
6581
                                 errors.ECODE_INVAL)
6582

    
6583
  def BuildHooksEnv(self):
6584
    """Build hooks env.
6585

6586
    This runs on master, primary and secondary nodes of the instance.
6587

6588
    """
6589
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6590
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6591
    return env
6592

    
6593
  def BuildHooksNodes(self):
6594
    """Build hooks nodes.
6595

6596
    """
6597
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6598
    return (nl, nl)
6599

    
6600
  def CheckPrereq(self):
6601
    """Check prerequisites.
6602

6603
    This checks that the instance is in the cluster and is not running.
6604

6605
    """
6606
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6607
                                                self.op.instance_name)
6608
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6609
    assert instance is not None
6610
    _CheckNodeOnline(self, instance.primary_node)
6611
    _CheckInstanceDown(self, instance, "cannot rename")
6612
    self.instance = instance
6613

    
6614
    new_name = self.op.new_name
6615
    if self.op.name_check:
6616
      hostname = netutils.GetHostname(name=new_name)
6617
      if hostname != new_name:
6618
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6619
                     hostname.name)
6620
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6621
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6622
                                    " same as given hostname '%s'") %
6623
                                    (hostname.name, self.op.new_name),
6624
                                    errors.ECODE_INVAL)
6625
      new_name = self.op.new_name = hostname.name
6626
      if (self.op.ip_check and
6627
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6628
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6629
                                   (hostname.ip, new_name),
6630
                                   errors.ECODE_NOTUNIQUE)
6631

    
6632
    instance_list = self.cfg.GetInstanceList()
6633
    if new_name in instance_list and new_name != instance.name:
6634
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6635
                                 new_name, errors.ECODE_EXISTS)
6636

    
6637
  def Exec(self, feedback_fn):
6638
    """Rename the instance.
6639

6640
    """
6641
    inst = self.instance
6642
    old_name = inst.name
6643

    
6644
    rename_file_storage = False
6645
    if (inst.disk_template in constants.DTS_FILEBASED and
6646
        self.op.new_name != inst.name):
6647
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6648
      rename_file_storage = True
6649

    
6650
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6651
    # Change the instance lock. This is definitely safe while we hold the BGL.
6652
    # Otherwise the new lock would have to be added in acquired mode.
6653
    assert self.REQ_BGL
6654
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6655
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6656

    
6657
    # re-read the instance from the configuration after rename
6658
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6659

    
6660
    if rename_file_storage:
6661
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6662
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6663
                                                     old_file_storage_dir,
6664
                                                     new_file_storage_dir)
6665
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6666
                   " (but the instance has been renamed in Ganeti)" %
6667
                   (inst.primary_node, old_file_storage_dir,
6668
                    new_file_storage_dir))
6669

    
6670
    _StartInstanceDisks(self, inst, None)
6671
    try:
6672
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6673
                                                 old_name, self.op.debug_level)
6674
      msg = result.fail_msg
6675
      if msg:
6676
        msg = ("Could not run OS rename script for instance %s on node %s"
6677
               " (but the instance has been renamed in Ganeti): %s" %
6678
               (inst.name, inst.primary_node, msg))
6679
        self.proc.LogWarning(msg)
6680
    finally:
6681
      _ShutdownInstanceDisks(self, inst)
6682

    
6683
    return inst.name
6684

    
6685

    
6686
class LUInstanceRemove(LogicalUnit):
6687
  """Remove an instance.
6688

6689
  """
6690
  HPATH = "instance-remove"
6691
  HTYPE = constants.HTYPE_INSTANCE
6692
  REQ_BGL = False
6693

    
6694
  def ExpandNames(self):
6695
    self._ExpandAndLockInstance()
6696
    self.needed_locks[locking.LEVEL_NODE] = []
6697
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6698

    
6699
  def DeclareLocks(self, level):
6700
    if level == locking.LEVEL_NODE:
6701
      self._LockInstancesNodes()
6702

    
6703
  def BuildHooksEnv(self):
6704
    """Build hooks env.
6705

6706
    This runs on master, primary and secondary nodes of the instance.
6707

6708
    """
6709
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6710
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6711
    return env
6712

    
6713
  def BuildHooksNodes(self):
6714
    """Build hooks nodes.
6715

6716
    """
6717
    nl = [self.cfg.GetMasterNode()]
6718
    nl_post = list(self.instance.all_nodes) + nl
6719
    return (nl, nl_post)
6720

    
6721
  def CheckPrereq(self):
6722
    """Check prerequisites.
6723

6724
    This checks that the instance is in the cluster.
6725

6726
    """
6727
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6728
    assert self.instance is not None, \
6729
      "Cannot retrieve locked instance %s" % self.op.instance_name
6730

    
6731
  def Exec(self, feedback_fn):
6732
    """Remove the instance.
6733

6734
    """
6735
    instance = self.instance
6736
    logging.info("Shutting down instance %s on node %s",
6737
                 instance.name, instance.primary_node)
6738

    
6739
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6740
                                             self.op.shutdown_timeout)
6741
    msg = result.fail_msg
6742
    if msg:
6743
      if self.op.ignore_failures:
6744
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6745
      else:
6746
        raise errors.OpExecError("Could not shutdown instance %s on"
6747
                                 " node %s: %s" %
6748
                                 (instance.name, instance.primary_node, msg))
6749

    
6750
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6751

    
6752

    
6753
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6754
  """Utility function to remove an instance.
6755

6756
  """
6757
  logging.info("Removing block devices for instance %s", instance.name)
6758

    
6759
  if not _RemoveDisks(lu, instance):
6760
    if not ignore_failures:
6761
      raise errors.OpExecError("Can't remove instance's disks")
6762
    feedback_fn("Warning: can't remove instance's disks")
6763

    
6764
  logging.info("Removing instance %s out of cluster config", instance.name)
6765

    
6766
  lu.cfg.RemoveInstance(instance.name)
6767

    
6768
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6769
    "Instance lock removal conflict"
6770

    
6771
  # Remove lock for the instance
6772
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6773

    
6774

    
6775
class LUInstanceQuery(NoHooksLU):
6776
  """Logical unit for querying instances.
6777

6778
  """
6779
  # pylint: disable=W0142
6780
  REQ_BGL = False
6781

    
6782
  def CheckArguments(self):
6783
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6784
                             self.op.output_fields, self.op.use_locking)
6785

    
6786
  def ExpandNames(self):
6787
    self.iq.ExpandNames(self)
6788

    
6789
  def DeclareLocks(self, level):
6790
    self.iq.DeclareLocks(self, level)
6791

    
6792
  def Exec(self, feedback_fn):
6793
    return self.iq.OldStyleQuery(self)
6794

    
6795

    
6796
class LUInstanceFailover(LogicalUnit):
6797
  """Failover an instance.
6798

6799
  """
6800
  HPATH = "instance-failover"
6801
  HTYPE = constants.HTYPE_INSTANCE
6802
  REQ_BGL = False
6803

    
6804
  def CheckArguments(self):
6805
    """Check the arguments.
6806

6807
    """
6808
    self.iallocator = getattr(self.op, "iallocator", None)
6809
    self.target_node = getattr(self.op, "target_node", None)
6810

    
6811
  def ExpandNames(self):
6812
    self._ExpandAndLockInstance()
6813

    
6814
    if self.op.target_node is not None:
6815
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6816

    
6817
    self.needed_locks[locking.LEVEL_NODE] = []
6818
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6819

    
6820
    ignore_consistency = self.op.ignore_consistency
6821
    shutdown_timeout = self.op.shutdown_timeout
6822
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6823
                                       cleanup=False,
6824
                                       failover=True,
6825
                                       ignore_consistency=ignore_consistency,
6826
                                       shutdown_timeout=shutdown_timeout)
6827
    self.tasklets = [self._migrater]
6828

    
6829
  def DeclareLocks(self, level):
6830
    if level == locking.LEVEL_NODE:
6831
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6832
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6833
        if self.op.target_node is None:
6834
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6835
        else:
6836
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6837
                                                   self.op.target_node]
6838
        del self.recalculate_locks[locking.LEVEL_NODE]
6839
      else:
6840
        self._LockInstancesNodes()
6841

    
6842
  def BuildHooksEnv(self):
6843
    """Build hooks env.
6844

6845
    This runs on master, primary and secondary nodes of the instance.
6846

6847
    """
6848
    instance = self._migrater.instance
6849
    source_node = instance.primary_node
6850
    target_node = self.op.target_node
6851
    env = {
6852
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6853
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6854
      "OLD_PRIMARY": source_node,
6855
      "NEW_PRIMARY": target_node,
6856
      }
6857

    
6858
    if instance.disk_template in constants.DTS_INT_MIRROR:
6859
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6860
      env["NEW_SECONDARY"] = source_node
6861
    else:
6862
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6863

    
6864
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6865

    
6866
    return env
6867

    
6868
  def BuildHooksNodes(self):
6869
    """Build hooks nodes.
6870

6871
    """
6872
    instance = self._migrater.instance
6873
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6874
    return (nl, nl + [instance.primary_node])
6875

    
6876

    
6877
class LUInstanceMigrate(LogicalUnit):
6878
  """Migrate an instance.
6879

6880
  This is migration without shutting down, compared to the failover,
6881
  which is done with shutdown.
6882

6883
  """
6884
  HPATH = "instance-migrate"
6885
  HTYPE = constants.HTYPE_INSTANCE
6886
  REQ_BGL = False
6887

    
6888
  def ExpandNames(self):
6889
    self._ExpandAndLockInstance()
6890

    
6891
    if self.op.target_node is not None:
6892
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6893

    
6894
    self.needed_locks[locking.LEVEL_NODE] = []
6895
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6896

    
6897
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6898
                                       cleanup=self.op.cleanup,
6899
                                       failover=False,
6900
                                       fallback=self.op.allow_failover)
6901
    self.tasklets = [self._migrater]
6902

    
6903
  def DeclareLocks(self, level):
6904
    if level == locking.LEVEL_NODE:
6905
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6906
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6907
        if self.op.target_node is None:
6908
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6909
        else:
6910
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6911
                                                   self.op.target_node]
6912
        del self.recalculate_locks[locking.LEVEL_NODE]
6913
      else:
6914
        self._LockInstancesNodes()
6915

    
6916
  def BuildHooksEnv(self):
6917
    """Build hooks env.
6918

6919
    This runs on master, primary and secondary nodes of the instance.
6920

6921
    """
6922
    instance = self._migrater.instance
6923
    source_node = instance.primary_node
6924
    target_node = self.op.target_node
6925
    env = _BuildInstanceHookEnvByObject(self, instance)
6926
    env.update({
6927
      "MIGRATE_LIVE": self._migrater.live,
6928
      "MIGRATE_CLEANUP": self.op.cleanup,
6929
      "OLD_PRIMARY": source_node,
6930
      "NEW_PRIMARY": target_node,
6931
      })
6932

    
6933
    if instance.disk_template in constants.DTS_INT_MIRROR:
6934
      env["OLD_SECONDARY"] = target_node
6935
      env["NEW_SECONDARY"] = source_node
6936
    else:
6937
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6938

    
6939
    return env
6940

    
6941
  def BuildHooksNodes(self):
6942
    """Build hooks nodes.
6943

6944
    """
6945
    instance = self._migrater.instance
6946
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6947
    return (nl, nl + [instance.primary_node])
6948

    
6949

    
6950
class LUInstanceMove(LogicalUnit):
6951
  """Move an instance by data-copying.
6952

6953
  """
6954
  HPATH = "instance-move"
6955
  HTYPE = constants.HTYPE_INSTANCE
6956
  REQ_BGL = False
6957

    
6958
  def ExpandNames(self):
6959
    self._ExpandAndLockInstance()
6960
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6961
    self.op.target_node = target_node
6962
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6963
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6964

    
6965
  def DeclareLocks(self, level):
6966
    if level == locking.LEVEL_NODE:
6967
      self._LockInstancesNodes(primary_only=True)
6968

    
6969
  def BuildHooksEnv(self):
6970
    """Build hooks env.
6971

6972
    This runs on master, primary and secondary nodes of the instance.
6973

6974
    """
6975
    env = {
6976
      "TARGET_NODE": self.op.target_node,
6977
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6978
      }
6979
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6980
    return env
6981

    
6982
  def BuildHooksNodes(self):
6983
    """Build hooks nodes.
6984

6985
    """
6986
    nl = [
6987
      self.cfg.GetMasterNode(),
6988
      self.instance.primary_node,
6989
      self.op.target_node,
6990
      ]
6991
    return (nl, nl)
6992

    
6993
  def CheckPrereq(self):
6994
    """Check prerequisites.
6995

6996
    This checks that the instance is in the cluster.
6997

6998
    """
6999
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7000
    assert self.instance is not None, \
7001
      "Cannot retrieve locked instance %s" % self.op.instance_name
7002

    
7003
    node = self.cfg.GetNodeInfo(self.op.target_node)
7004
    assert node is not None, \
7005
      "Cannot retrieve locked node %s" % self.op.target_node
7006

    
7007
    self.target_node = target_node = node.name
7008

    
7009
    if target_node == instance.primary_node:
7010
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7011
                                 (instance.name, target_node),
7012
                                 errors.ECODE_STATE)
7013

    
7014
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7015

    
7016
    for idx, dsk in enumerate(instance.disks):
7017
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7018
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7019
                                   " cannot copy" % idx, errors.ECODE_STATE)
7020

    
7021
    _CheckNodeOnline(self, target_node)
7022
    _CheckNodeNotDrained(self, target_node)
7023
    _CheckNodeVmCapable(self, target_node)
7024

    
7025
    if instance.admin_up:
7026
      # check memory requirements on the secondary node
7027
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7028
                           instance.name, bep[constants.BE_MEMORY],
7029
                           instance.hypervisor)
7030
    else:
7031
      self.LogInfo("Not checking memory on the secondary node as"
7032
                   " instance will not be started")
7033

    
7034
    # check bridge existance
7035
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7036

    
7037
  def Exec(self, feedback_fn):
7038
    """Move an instance.
7039

7040
    The move is done by shutting it down on its present node, copying
7041
    the data over (slow) and starting it on the new node.
7042

7043
    """
7044
    instance = self.instance
7045

    
7046
    source_node = instance.primary_node
7047
    target_node = self.target_node
7048

    
7049
    self.LogInfo("Shutting down instance %s on source node %s",
7050
                 instance.name, source_node)
7051

    
7052
    result = self.rpc.call_instance_shutdown(source_node, instance,
7053
                                             self.op.shutdown_timeout)
7054
    msg = result.fail_msg
7055
    if msg:
7056
      if self.op.ignore_consistency:
7057
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7058
                             " Proceeding anyway. Please make sure node"
7059
                             " %s is down. Error details: %s",
7060
                             instance.name, source_node, source_node, msg)
7061
      else:
7062
        raise errors.OpExecError("Could not shutdown instance %s on"
7063
                                 " node %s: %s" %
7064
                                 (instance.name, source_node, msg))
7065

    
7066
    # create the target disks
7067
    try:
7068
      _CreateDisks(self, instance, target_node=target_node)
7069
    except errors.OpExecError:
7070
      self.LogWarning("Device creation failed, reverting...")
7071
      try:
7072
        _RemoveDisks(self, instance, target_node=target_node)
7073
      finally:
7074
        self.cfg.ReleaseDRBDMinors(instance.name)
7075
        raise
7076

    
7077
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7078

    
7079
    errs = []
7080
    # activate, get path, copy the data over
7081
    for idx, disk in enumerate(instance.disks):
7082
      self.LogInfo("Copying data for disk %d", idx)
7083
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7084
                                               instance.name, True, idx)
7085
      if result.fail_msg:
7086
        self.LogWarning("Can't assemble newly created disk %d: %s",
7087
                        idx, result.fail_msg)
7088
        errs.append(result.fail_msg)
7089
        break
7090
      dev_path = result.payload
7091
      result = self.rpc.call_blockdev_export(source_node, disk,
7092
                                             target_node, dev_path,
7093
                                             cluster_name)
7094
      if result.fail_msg:
7095
        self.LogWarning("Can't copy data over for disk %d: %s",
7096
                        idx, result.fail_msg)
7097
        errs.append(result.fail_msg)
7098
        break
7099

    
7100
    if errs:
7101
      self.LogWarning("Some disks failed to copy, aborting")
7102
      try:
7103
        _RemoveDisks(self, instance, target_node=target_node)
7104
      finally:
7105
        self.cfg.ReleaseDRBDMinors(instance.name)
7106
        raise errors.OpExecError("Errors during disk copy: %s" %
7107
                                 (",".join(errs),))
7108

    
7109
    instance.primary_node = target_node
7110
    self.cfg.Update(instance, feedback_fn)
7111

    
7112
    self.LogInfo("Removing the disks on the original node")
7113
    _RemoveDisks(self, instance, target_node=source_node)
7114

    
7115
    # Only start the instance if it's marked as up
7116
    if instance.admin_up:
7117
      self.LogInfo("Starting instance %s on node %s",
7118
                   instance.name, target_node)
7119

    
7120
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7121
                                           ignore_secondaries=True)
7122
      if not disks_ok:
7123
        _ShutdownInstanceDisks(self, instance)
7124
        raise errors.OpExecError("Can't activate the instance's disks")
7125

    
7126
      result = self.rpc.call_instance_start(target_node,
7127
                                            (instance, None, None), False)
7128
      msg = result.fail_msg
7129
      if msg:
7130
        _ShutdownInstanceDisks(self, instance)
7131
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7132
                                 (instance.name, target_node, msg))
7133

    
7134

    
7135
class LUNodeMigrate(LogicalUnit):
7136
  """Migrate all instances from a node.
7137

7138
  """
7139
  HPATH = "node-migrate"
7140
  HTYPE = constants.HTYPE_NODE
7141
  REQ_BGL = False
7142

    
7143
  def CheckArguments(self):
7144
    pass
7145

    
7146
  def ExpandNames(self):
7147
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7148

    
7149
    self.share_locks = _ShareAll()
7150
    self.needed_locks = {
7151
      locking.LEVEL_NODE: [self.op.node_name],
7152
      }
7153

    
7154
  def BuildHooksEnv(self):
7155
    """Build hooks env.
7156

7157
    This runs on the master, the primary and all the secondaries.
7158

7159
    """
7160
    return {
7161
      "NODE_NAME": self.op.node_name,
7162
      }
7163

    
7164
  def BuildHooksNodes(self):
7165
    """Build hooks nodes.
7166

7167
    """
7168
    nl = [self.cfg.GetMasterNode()]
7169
    return (nl, nl)
7170

    
7171
  def CheckPrereq(self):
7172
    pass
7173

    
7174
  def Exec(self, feedback_fn):
7175
    # Prepare jobs for migration instances
7176
    jobs = [
7177
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7178
                                 mode=self.op.mode,
7179
                                 live=self.op.live,
7180
                                 iallocator=self.op.iallocator,
7181
                                 target_node=self.op.target_node)]
7182
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7183
      ]
7184

    
7185
    # TODO: Run iallocator in this opcode and pass correct placement options to
7186
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7187
    # running the iallocator and the actual migration, a good consistency model
7188
    # will have to be found.
7189

    
7190
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7191
            frozenset([self.op.node_name]))
7192

    
7193
    return ResultWithJobs(jobs)
7194

    
7195

    
7196
class TLMigrateInstance(Tasklet):
7197
  """Tasklet class for instance migration.
7198

7199
  @type live: boolean
7200
  @ivar live: whether the migration will be done live or non-live;
7201
      this variable is initalized only after CheckPrereq has run
7202
  @type cleanup: boolean
7203
  @ivar cleanup: Wheater we cleanup from a failed migration
7204
  @type iallocator: string
7205
  @ivar iallocator: The iallocator used to determine target_node
7206
  @type target_node: string
7207
  @ivar target_node: If given, the target_node to reallocate the instance to
7208
  @type failover: boolean
7209
  @ivar failover: Whether operation results in failover or migration
7210
  @type fallback: boolean
7211
  @ivar fallback: Whether fallback to failover is allowed if migration not
7212
                  possible
7213
  @type ignore_consistency: boolean
7214
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7215
                            and target node
7216
  @type shutdown_timeout: int
7217
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7218

7219
  """
7220

    
7221
  # Constants
7222
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7223
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7224

    
7225
  def __init__(self, lu, instance_name, cleanup=False,
7226
               failover=False, fallback=False,
7227
               ignore_consistency=False,
7228
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7229
    """Initializes this class.
7230

7231
    """
7232
    Tasklet.__init__(self, lu)
7233

    
7234
    # Parameters
7235
    self.instance_name = instance_name
7236
    self.cleanup = cleanup
7237
    self.live = False # will be overridden later
7238
    self.failover = failover
7239
    self.fallback = fallback
7240
    self.ignore_consistency = ignore_consistency
7241
    self.shutdown_timeout = shutdown_timeout
7242

    
7243
  def CheckPrereq(self):
7244
    """Check prerequisites.
7245

7246
    This checks that the instance is in the cluster.
7247

7248
    """
7249
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7250
    instance = self.cfg.GetInstanceInfo(instance_name)
7251
    assert instance is not None
7252
    self.instance = instance
7253

    
7254
    if (not self.cleanup and not instance.admin_up and not self.failover and
7255
        self.fallback):
7256
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7257
                      " to failover")
7258
      self.failover = True
7259

    
7260
    if instance.disk_template not in constants.DTS_MIRRORED:
7261
      if self.failover:
7262
        text = "failovers"
7263
      else:
7264
        text = "migrations"
7265
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7266
                                 " %s" % (instance.disk_template, text),
7267
                                 errors.ECODE_STATE)
7268

    
7269
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7270
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7271

    
7272
      if self.lu.op.iallocator:
7273
        self._RunAllocator()
7274
      else:
7275
        # We set set self.target_node as it is required by
7276
        # BuildHooksEnv
7277
        self.target_node = self.lu.op.target_node
7278

    
7279
      # self.target_node is already populated, either directly or by the
7280
      # iallocator run
7281
      target_node = self.target_node
7282
      if self.target_node == instance.primary_node:
7283
        raise errors.OpPrereqError("Cannot migrate instance %s"
7284
                                   " to its primary (%s)" %
7285
                                   (instance.name, instance.primary_node))
7286

    
7287
      if len(self.lu.tasklets) == 1:
7288
        # It is safe to release locks only when we're the only tasklet
7289
        # in the LU
7290
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7291
                      keep=[instance.primary_node, self.target_node])
7292

    
7293
    else:
7294
      secondary_nodes = instance.secondary_nodes
7295
      if not secondary_nodes:
7296
        raise errors.ConfigurationError("No secondary node but using"
7297
                                        " %s disk template" %
7298
                                        instance.disk_template)
7299
      target_node = secondary_nodes[0]
7300
      if self.lu.op.iallocator or (self.lu.op.target_node and
7301
                                   self.lu.op.target_node != target_node):
7302
        if self.failover:
7303
          text = "failed over"
7304
        else:
7305
          text = "migrated"
7306
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7307
                                   " be %s to arbitrary nodes"
7308
                                   " (neither an iallocator nor a target"
7309
                                   " node can be passed)" %
7310
                                   (instance.disk_template, text),
7311
                                   errors.ECODE_INVAL)
7312

    
7313
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7314

    
7315
    # check memory requirements on the secondary node
7316
    if not self.failover or instance.admin_up:
7317
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7318
                           instance.name, i_be[constants.BE_MEMORY],
7319
                           instance.hypervisor)
7320
    else:
7321
      self.lu.LogInfo("Not checking memory on the secondary node as"
7322
                      " instance will not be started")
7323

    
7324
    # check bridge existance
7325
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7326

    
7327
    if not self.cleanup:
7328
      _CheckNodeNotDrained(self.lu, target_node)
7329
      if not self.failover:
7330
        result = self.rpc.call_instance_migratable(instance.primary_node,
7331
                                                   instance)
7332
        if result.fail_msg and self.fallback:
7333
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7334
                          " failover")
7335
          self.failover = True
7336
        else:
7337
          result.Raise("Can't migrate, please use failover",
7338
                       prereq=True, ecode=errors.ECODE_STATE)
7339

    
7340
    assert not (self.failover and self.cleanup)
7341

    
7342
    if not self.failover:
7343
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7344
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7345
                                   " parameters are accepted",
7346
                                   errors.ECODE_INVAL)
7347
      if self.lu.op.live is not None:
7348
        if self.lu.op.live:
7349
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7350
        else:
7351
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7352
        # reset the 'live' parameter to None so that repeated
7353
        # invocations of CheckPrereq do not raise an exception
7354
        self.lu.op.live = None
7355
      elif self.lu.op.mode is None:
7356
        # read the default value from the hypervisor
7357
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7358
                                                skip_globals=False)
7359
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7360

    
7361
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7362
    else:
7363
      # Failover is never live
7364
      self.live = False
7365

    
7366
  def _RunAllocator(self):
7367
    """Run the allocator based on input opcode.
7368

7369
    """
7370
    ial = IAllocator(self.cfg, self.rpc,
7371
                     mode=constants.IALLOCATOR_MODE_RELOC,
7372
                     name=self.instance_name,
7373
                     # TODO See why hail breaks with a single node below
7374
                     relocate_from=[self.instance.primary_node,
7375
                                    self.instance.primary_node],
7376
                     )
7377

    
7378
    ial.Run(self.lu.op.iallocator)
7379

    
7380
    if not ial.success:
7381
      raise errors.OpPrereqError("Can't compute nodes using"
7382
                                 " iallocator '%s': %s" %
7383
                                 (self.lu.op.iallocator, ial.info),
7384
                                 errors.ECODE_NORES)
7385
    if len(ial.result) != ial.required_nodes:
7386
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7387
                                 " of nodes (%s), required %s" %
7388
                                 (self.lu.op.iallocator, len(ial.result),
7389
                                  ial.required_nodes), errors.ECODE_FAULT)
7390
    self.target_node = ial.result[0]
7391
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7392
                 self.instance_name, self.lu.op.iallocator,
7393
                 utils.CommaJoin(ial.result))
7394

    
7395
  def _WaitUntilSync(self):
7396
    """Poll with custom rpc for disk sync.
7397

7398
    This uses our own step-based rpc call.
7399

7400
    """
7401
    self.feedback_fn("* wait until resync is done")
7402
    all_done = False
7403
    while not all_done:
7404
      all_done = True
7405
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7406
                                            self.nodes_ip,
7407
                                            self.instance.disks)
7408
      min_percent = 100
7409
      for node, nres in result.items():
7410
        nres.Raise("Cannot resync disks on node %s" % node)
7411
        node_done, node_percent = nres.payload
7412
        all_done = all_done and node_done
7413
        if node_percent is not None:
7414
          min_percent = min(min_percent, node_percent)
7415
      if not all_done:
7416
        if min_percent < 100:
7417
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7418
        time.sleep(2)
7419

    
7420
  def _EnsureSecondary(self, node):
7421
    """Demote a node to secondary.
7422

7423
    """
7424
    self.feedback_fn("* switching node %s to secondary mode" % node)
7425

    
7426
    for dev in self.instance.disks:
7427
      self.cfg.SetDiskID(dev, node)
7428

    
7429
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7430
                                          self.instance.disks)
7431
    result.Raise("Cannot change disk to secondary on node %s" % node)
7432

    
7433
  def _GoStandalone(self):
7434
    """Disconnect from the network.
7435

7436
    """
7437
    self.feedback_fn("* changing into standalone mode")
7438
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7439
                                               self.instance.disks)
7440
    for node, nres in result.items():
7441
      nres.Raise("Cannot disconnect disks node %s" % node)
7442

    
7443
  def _GoReconnect(self, multimaster):
7444
    """Reconnect to the network.
7445

7446
    """
7447
    if multimaster:
7448
      msg = "dual-master"
7449
    else:
7450
      msg = "single-master"
7451
    self.feedback_fn("* changing disks into %s mode" % msg)
7452
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7453
                                           self.instance.disks,
7454
                                           self.instance.name, multimaster)
7455
    for node, nres in result.items():
7456
      nres.Raise("Cannot change disks config on node %s" % node)
7457

    
7458
  def _ExecCleanup(self):
7459
    """Try to cleanup after a failed migration.
7460

7461
    The cleanup is done by:
7462
      - check that the instance is running only on one node
7463
        (and update the config if needed)
7464
      - change disks on its secondary node to secondary
7465
      - wait until disks are fully synchronized
7466
      - disconnect from the network
7467
      - change disks into single-master mode
7468
      - wait again until disks are fully synchronized
7469

7470
    """
7471
    instance = self.instance
7472
    target_node = self.target_node
7473
    source_node = self.source_node
7474

    
7475
    # check running on only one node
7476
    self.feedback_fn("* checking where the instance actually runs"
7477
                     " (if this hangs, the hypervisor might be in"
7478
                     " a bad state)")
7479
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7480
    for node, result in ins_l.items():
7481
      result.Raise("Can't contact node %s" % node)
7482

    
7483
    runningon_source = instance.name in ins_l[source_node].payload
7484
    runningon_target = instance.name in ins_l[target_node].payload
7485

    
7486
    if runningon_source and runningon_target:
7487
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7488
                               " or the hypervisor is confused; you will have"
7489
                               " to ensure manually that it runs only on one"
7490
                               " and restart this operation")
7491

    
7492
    if not (runningon_source or runningon_target):
7493
      raise errors.OpExecError("Instance does not seem to be running at all;"
7494
                               " in this case it's safer to repair by"
7495
                               " running 'gnt-instance stop' to ensure disk"
7496
                               " shutdown, and then restarting it")
7497

    
7498
    if runningon_target:
7499
      # the migration has actually succeeded, we need to update the config
7500
      self.feedback_fn("* instance running on secondary node (%s),"
7501
                       " updating config" % target_node)
7502
      instance.primary_node = target_node
7503
      self.cfg.Update(instance, self.feedback_fn)
7504
      demoted_node = source_node
7505
    else:
7506
      self.feedback_fn("* instance confirmed to be running on its"
7507
                       " primary node (%s)" % source_node)
7508
      demoted_node = target_node
7509

    
7510
    if instance.disk_template in constants.DTS_INT_MIRROR:
7511
      self._EnsureSecondary(demoted_node)
7512
      try:
7513
        self._WaitUntilSync()
7514
      except errors.OpExecError:
7515
        # we ignore here errors, since if the device is standalone, it
7516
        # won't be able to sync
7517
        pass
7518
      self._GoStandalone()
7519
      self._GoReconnect(False)
7520
      self._WaitUntilSync()
7521

    
7522
    self.feedback_fn("* done")
7523

    
7524
  def _RevertDiskStatus(self):
7525
    """Try to revert the disk status after a failed migration.
7526

7527
    """
7528
    target_node = self.target_node
7529
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7530
      return
7531

    
7532
    try:
7533
      self._EnsureSecondary(target_node)
7534
      self._GoStandalone()
7535
      self._GoReconnect(False)
7536
      self._WaitUntilSync()
7537
    except errors.OpExecError, err:
7538
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7539
                         " please try to recover the instance manually;"
7540
                         " error '%s'" % str(err))
7541

    
7542
  def _AbortMigration(self):
7543
    """Call the hypervisor code to abort a started migration.
7544

7545
    """
7546
    instance = self.instance
7547
    target_node = self.target_node
7548
    source_node = self.source_node
7549
    migration_info = self.migration_info
7550

    
7551
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7552
                                                                 instance,
7553
                                                                 migration_info,
7554
                                                                 False)
7555
    abort_msg = abort_result.fail_msg
7556
    if abort_msg:
7557
      logging.error("Aborting migration failed on target node %s: %s",
7558
                    target_node, abort_msg)
7559
      # Don't raise an exception here, as we stil have to try to revert the
7560
      # disk status, even if this step failed.
7561

    
7562
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7563
        instance, False, self.live)
7564
    abort_msg = abort_result.fail_msg
7565
    if abort_msg:
7566
      logging.error("Aborting migration failed on source node %s: %s",
7567
                    source_node, abort_msg)
7568

    
7569
  def _ExecMigration(self):
7570
    """Migrate an instance.
7571

7572
    The migrate is done by:
7573
      - change the disks into dual-master mode
7574
      - wait until disks are fully synchronized again
7575
      - migrate the instance
7576
      - change disks on the new secondary node (the old primary) to secondary
7577
      - wait until disks are fully synchronized
7578
      - change disks into single-master mode
7579

7580
    """
7581
    instance = self.instance
7582
    target_node = self.target_node
7583
    source_node = self.source_node
7584

    
7585
    # Check for hypervisor version mismatch and warn the user.
7586
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7587
                                       None, self.instance.hypervisor)
7588
    src_info = nodeinfo[source_node]
7589
    dst_info = nodeinfo[target_node]
7590

    
7591
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7592
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7593
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7594
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7595
      if src_version != dst_version:
7596
        self.feedback_fn("* warning: hypervisor version mismatch between"
7597
                         " source (%s) and target (%s) node" %
7598
                         (src_version, dst_version))
7599

    
7600
    self.feedback_fn("* checking disk consistency between source and target")
7601
    for dev in instance.disks:
7602
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7603
        raise errors.OpExecError("Disk %s is degraded or not fully"
7604
                                 " synchronized on target node,"
7605
                                 " aborting migration" % dev.iv_name)
7606

    
7607
    # First get the migration information from the remote node
7608
    result = self.rpc.call_migration_info(source_node, instance)
7609
    msg = result.fail_msg
7610
    if msg:
7611
      log_err = ("Failed fetching source migration information from %s: %s" %
7612
                 (source_node, msg))
7613
      logging.error(log_err)
7614
      raise errors.OpExecError(log_err)
7615

    
7616
    self.migration_info = migration_info = result.payload
7617

    
7618
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7619
      # Then switch the disks to master/master mode
7620
      self._EnsureSecondary(target_node)
7621
      self._GoStandalone()
7622
      self._GoReconnect(True)
7623
      self._WaitUntilSync()
7624

    
7625
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7626
    result = self.rpc.call_accept_instance(target_node,
7627
                                           instance,
7628
                                           migration_info,
7629
                                           self.nodes_ip[target_node])
7630

    
7631
    msg = result.fail_msg
7632
    if msg:
7633
      logging.error("Instance pre-migration failed, trying to revert"
7634
                    " disk status: %s", msg)
7635
      self.feedback_fn("Pre-migration failed, aborting")
7636
      self._AbortMigration()
7637
      self._RevertDiskStatus()
7638
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7639
                               (instance.name, msg))
7640

    
7641
    self.feedback_fn("* migrating instance to %s" % target_node)
7642
    result = self.rpc.call_instance_migrate(source_node, instance,
7643
                                            self.nodes_ip[target_node],
7644
                                            self.live)
7645
    msg = result.fail_msg
7646
    if msg:
7647
      logging.error("Instance migration failed, trying to revert"
7648
                    " disk status: %s", msg)
7649
      self.feedback_fn("Migration failed, aborting")
7650
      self._AbortMigration()
7651
      self._RevertDiskStatus()
7652
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7653
                               (instance.name, msg))
7654

    
7655
    self.feedback_fn("* starting memory transfer")
7656
    last_feedback = time.time()
7657
    while True:
7658
      result = self.rpc.call_instance_get_migration_status(source_node,
7659
                                                           instance)
7660
      msg = result.fail_msg
7661
      ms = result.payload   # MigrationStatus instance
7662
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7663
        logging.error("Instance migration failed, trying to revert"
7664
                      " disk status: %s", msg)
7665
        self.feedback_fn("Migration failed, aborting")
7666
        self._AbortMigration()
7667
        self._RevertDiskStatus()
7668
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7669
                                 (instance.name, msg))
7670

    
7671
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7672
        self.feedback_fn("* memory transfer complete")
7673
        break
7674

    
7675
      if (utils.TimeoutExpired(last_feedback,
7676
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7677
          ms.transferred_ram is not None):
7678
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7679
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7680
        last_feedback = time.time()
7681

    
7682
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7683

    
7684
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7685
                                                           instance,
7686
                                                           True,
7687
                                                           self.live)
7688
    msg = result.fail_msg
7689
    if msg:
7690
      logging.error("Instance migration succeeded, but finalization failed"
7691
                    " on the source node: %s", msg)
7692
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7693
                               msg)
7694

    
7695
    instance.primary_node = target_node
7696

    
7697
    # distribute new instance config to the other nodes
7698
    self.cfg.Update(instance, self.feedback_fn)
7699

    
7700
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7701
                                                           instance,
7702
                                                           migration_info,
7703
                                                           True)
7704
    msg = result.fail_msg
7705
    if msg:
7706
      logging.error("Instance migration succeeded, but finalization failed"
7707
                    " on the target node: %s", msg)
7708
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7709
                               msg)
7710

    
7711
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7712
      self._EnsureSecondary(source_node)
7713
      self._WaitUntilSync()
7714
      self._GoStandalone()
7715
      self._GoReconnect(False)
7716
      self._WaitUntilSync()
7717

    
7718
    self.feedback_fn("* done")
7719

    
7720
  def _ExecFailover(self):
7721
    """Failover an instance.
7722

7723
    The failover is done by shutting it down on its present node and
7724
    starting it on the secondary.
7725

7726
    """
7727
    instance = self.instance
7728
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7729

    
7730
    source_node = instance.primary_node
7731
    target_node = self.target_node
7732

    
7733
    if instance.admin_up:
7734
      self.feedback_fn("* checking disk consistency between source and target")
7735
      for dev in instance.disks:
7736
        # for drbd, these are drbd over lvm
7737
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7738
          if primary_node.offline:
7739
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7740
                             " target node %s" %
7741
                             (primary_node.name, dev.iv_name, target_node))
7742
          elif not self.ignore_consistency:
7743
            raise errors.OpExecError("Disk %s is degraded on target node,"
7744
                                     " aborting failover" % dev.iv_name)
7745
    else:
7746
      self.feedback_fn("* not checking disk consistency as instance is not"
7747
                       " running")
7748

    
7749
    self.feedback_fn("* shutting down instance on source node")
7750
    logging.info("Shutting down instance %s on node %s",
7751
                 instance.name, source_node)
7752

    
7753
    result = self.rpc.call_instance_shutdown(source_node, instance,
7754
                                             self.shutdown_timeout)
7755
    msg = result.fail_msg
7756
    if msg:
7757
      if self.ignore_consistency or primary_node.offline:
7758
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7759
                           " proceeding anyway; please make sure node"
7760
                           " %s is down; error details: %s",
7761
                           instance.name, source_node, source_node, msg)
7762
      else:
7763
        raise errors.OpExecError("Could not shutdown instance %s on"
7764
                                 " node %s: %s" %
7765
                                 (instance.name, source_node, msg))
7766

    
7767
    self.feedback_fn("* deactivating the instance's disks on source node")
7768
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7769
      raise errors.OpExecError("Can't shut down the instance's disks")
7770

    
7771
    instance.primary_node = target_node
7772
    # distribute new instance config to the other nodes
7773
    self.cfg.Update(instance, self.feedback_fn)
7774

    
7775
    # Only start the instance if it's marked as up
7776
    if instance.admin_up:
7777
      self.feedback_fn("* activating the instance's disks on target node %s" %
7778
                       target_node)
7779
      logging.info("Starting instance %s on node %s",
7780
                   instance.name, target_node)
7781

    
7782
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7783
                                           ignore_secondaries=True)
7784
      if not disks_ok:
7785
        _ShutdownInstanceDisks(self.lu, instance)
7786
        raise errors.OpExecError("Can't activate the instance's disks")
7787

    
7788
      self.feedback_fn("* starting the instance on the target node %s" %
7789
                       target_node)
7790
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7791
                                            False)
7792
      msg = result.fail_msg
7793
      if msg:
7794
        _ShutdownInstanceDisks(self.lu, instance)
7795
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7796
                                 (instance.name, target_node, msg))
7797

    
7798
  def Exec(self, feedback_fn):
7799
    """Perform the migration.
7800

7801
    """
7802
    self.feedback_fn = feedback_fn
7803
    self.source_node = self.instance.primary_node
7804

    
7805
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7806
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7807
      self.target_node = self.instance.secondary_nodes[0]
7808
      # Otherwise self.target_node has been populated either
7809
      # directly, or through an iallocator.
7810

    
7811
    self.all_nodes = [self.source_node, self.target_node]
7812
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7813
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7814

    
7815
    if self.failover:
7816
      feedback_fn("Failover instance %s" % self.instance.name)
7817
      self._ExecFailover()
7818
    else:
7819
      feedback_fn("Migrating instance %s" % self.instance.name)
7820

    
7821
      if self.cleanup:
7822
        return self._ExecCleanup()
7823
      else:
7824
        return self._ExecMigration()
7825

    
7826

    
7827
def _CreateBlockDev(lu, node, instance, device, force_create,
7828
                    info, force_open):
7829
  """Create a tree of block devices on a given node.
7830

7831
  If this device type has to be created on secondaries, create it and
7832
  all its children.
7833

7834
  If not, just recurse to children keeping the same 'force' value.
7835

7836
  @param lu: the lu on whose behalf we execute
7837
  @param node: the node on which to create the device
7838
  @type instance: L{objects.Instance}
7839
  @param instance: the instance which owns the device
7840
  @type device: L{objects.Disk}
7841
  @param device: the device to create
7842
  @type force_create: boolean
7843
  @param force_create: whether to force creation of this device; this
7844
      will be change to True whenever we find a device which has
7845
      CreateOnSecondary() attribute
7846
  @param info: the extra 'metadata' we should attach to the device
7847
      (this will be represented as a LVM tag)
7848
  @type force_open: boolean
7849
  @param force_open: this parameter will be passes to the
7850
      L{backend.BlockdevCreate} function where it specifies
7851
      whether we run on primary or not, and it affects both
7852
      the child assembly and the device own Open() execution
7853

7854
  """
7855
  if device.CreateOnSecondary():
7856
    force_create = True
7857

    
7858
  if device.children:
7859
    for child in device.children:
7860
      _CreateBlockDev(lu, node, instance, child, force_create,
7861
                      info, force_open)
7862

    
7863
  if not force_create:
7864
    return
7865

    
7866
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7867

    
7868

    
7869
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7870
  """Create a single block device on a given node.
7871

7872
  This will not recurse over children of the device, so they must be
7873
  created in advance.
7874

7875
  @param lu: the lu on whose behalf we execute
7876
  @param node: the node on which to create the device
7877
  @type instance: L{objects.Instance}
7878
  @param instance: the instance which owns the device
7879
  @type device: L{objects.Disk}
7880
  @param device: the device to create
7881
  @param info: the extra 'metadata' we should attach to the device
7882
      (this will be represented as a LVM tag)
7883
  @type force_open: boolean
7884
  @param force_open: this parameter will be passes to the
7885
      L{backend.BlockdevCreate} function where it specifies
7886
      whether we run on primary or not, and it affects both
7887
      the child assembly and the device own Open() execution
7888

7889
  """
7890
  lu.cfg.SetDiskID(device, node)
7891
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7892
                                       instance.name, force_open, info)
7893
  result.Raise("Can't create block device %s on"
7894
               " node %s for instance %s" % (device, node, instance.name))
7895
  if device.physical_id is None:
7896
    device.physical_id = result.payload
7897

    
7898

    
7899
def _GenerateUniqueNames(lu, exts):
7900
  """Generate a suitable LV name.
7901

7902
  This will generate a logical volume name for the given instance.
7903

7904
  """
7905
  results = []
7906
  for val in exts:
7907
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7908
    results.append("%s%s" % (new_id, val))
7909
  return results
7910

    
7911

    
7912
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7913
                         iv_name, p_minor, s_minor):
7914
  """Generate a drbd8 device complete with its children.
7915

7916
  """
7917
  assert len(vgnames) == len(names) == 2
7918
  port = lu.cfg.AllocatePort()
7919
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7920
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7921
                          logical_id=(vgnames[0], names[0]))
7922
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7923
                          logical_id=(vgnames[1], names[1]))
7924
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7925
                          logical_id=(primary, secondary, port,
7926
                                      p_minor, s_minor,
7927
                                      shared_secret),
7928
                          children=[dev_data, dev_meta],
7929
                          iv_name=iv_name)
7930
  return drbd_dev
7931

    
7932

    
7933
def _GenerateDiskTemplate(lu, template_name,
7934
                          instance_name, primary_node,
7935
                          secondary_nodes, disk_info,
7936
                          file_storage_dir, file_driver,
7937
                          base_index, feedback_fn):
7938
  """Generate the entire disk layout for a given template type.
7939

7940
  """
7941
  #TODO: compute space requirements
7942

    
7943
  vgname = lu.cfg.GetVGName()
7944
  disk_count = len(disk_info)
7945
  disks = []
7946
  if template_name == constants.DT_DISKLESS:
7947
    pass
7948
  elif template_name == constants.DT_PLAIN:
7949
    if len(secondary_nodes) != 0:
7950
      raise errors.ProgrammerError("Wrong template configuration")
7951

    
7952
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7953
                                      for i in range(disk_count)])
7954
    for idx, disk in enumerate(disk_info):
7955
      disk_index = idx + base_index
7956
      vg = disk.get(constants.IDISK_VG, vgname)
7957
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7958
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7959
                              size=disk[constants.IDISK_SIZE],
7960
                              logical_id=(vg, names[idx]),
7961
                              iv_name="disk/%d" % disk_index,
7962
                              mode=disk[constants.IDISK_MODE])
7963
      disks.append(disk_dev)
7964
  elif template_name == constants.DT_DRBD8:
7965
    if len(secondary_nodes) != 1:
7966
      raise errors.ProgrammerError("Wrong template configuration")
7967
    remote_node = secondary_nodes[0]
7968
    minors = lu.cfg.AllocateDRBDMinor(
7969
      [primary_node, remote_node] * len(disk_info), instance_name)
7970

    
7971
    names = []
7972
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7973
                                               for i in range(disk_count)]):
7974
      names.append(lv_prefix + "_data")
7975
      names.append(lv_prefix + "_meta")
7976
    for idx, disk in enumerate(disk_info):
7977
      disk_index = idx + base_index
7978
      data_vg = disk.get(constants.IDISK_VG, vgname)
7979
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7980
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7981
                                      disk[constants.IDISK_SIZE],
7982
                                      [data_vg, meta_vg],
7983
                                      names[idx * 2:idx * 2 + 2],
7984
                                      "disk/%d" % disk_index,
7985
                                      minors[idx * 2], minors[idx * 2 + 1])
7986
      disk_dev.mode = disk[constants.IDISK_MODE]
7987
      disks.append(disk_dev)
7988
  elif template_name == constants.DT_FILE:
7989
    if len(secondary_nodes) != 0:
7990
      raise errors.ProgrammerError("Wrong template configuration")
7991

    
7992
    opcodes.RequireFileStorage()
7993

    
7994
    for idx, disk in enumerate(disk_info):
7995
      disk_index = idx + base_index
7996
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7997
                              size=disk[constants.IDISK_SIZE],
7998
                              iv_name="disk/%d" % disk_index,
7999
                              logical_id=(file_driver,
8000
                                          "%s/disk%d" % (file_storage_dir,
8001
                                                         disk_index)),
8002
                              mode=disk[constants.IDISK_MODE])
8003
      disks.append(disk_dev)
8004
  elif template_name == constants.DT_SHARED_FILE:
8005
    if len(secondary_nodes) != 0:
8006
      raise errors.ProgrammerError("Wrong template configuration")
8007

    
8008
    opcodes.RequireSharedFileStorage()
8009

    
8010
    for idx, disk in enumerate(disk_info):
8011
      disk_index = idx + base_index
8012
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8013
                              size=disk[constants.IDISK_SIZE],
8014
                              iv_name="disk/%d" % disk_index,
8015
                              logical_id=(file_driver,
8016
                                          "%s/disk%d" % (file_storage_dir,
8017
                                                         disk_index)),
8018
                              mode=disk[constants.IDISK_MODE])
8019
      disks.append(disk_dev)
8020
  elif template_name == constants.DT_BLOCK:
8021
    if len(secondary_nodes) != 0:
8022
      raise errors.ProgrammerError("Wrong template configuration")
8023

    
8024
    for idx, disk in enumerate(disk_info):
8025
      disk_index = idx + base_index
8026
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8027
                              size=disk[constants.IDISK_SIZE],
8028
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8029
                                          disk[constants.IDISK_ADOPT]),
8030
                              iv_name="disk/%d" % disk_index,
8031
                              mode=disk[constants.IDISK_MODE])
8032
      disks.append(disk_dev)
8033

    
8034
  else:
8035
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8036
  return disks
8037

    
8038

    
8039
def _GetInstanceInfoText(instance):
8040
  """Compute that text that should be added to the disk's metadata.
8041

8042
  """
8043
  return "originstname+%s" % instance.name
8044

    
8045

    
8046
def _CalcEta(time_taken, written, total_size):
8047
  """Calculates the ETA based on size written and total size.
8048

8049
  @param time_taken: The time taken so far
8050
  @param written: amount written so far
8051
  @param total_size: The total size of data to be written
8052
  @return: The remaining time in seconds
8053

8054
  """
8055
  avg_time = time_taken / float(written)
8056
  return (total_size - written) * avg_time
8057

    
8058

    
8059
def _WipeDisks(lu, instance):
8060
  """Wipes instance disks.
8061

8062
  @type lu: L{LogicalUnit}
8063
  @param lu: the logical unit on whose behalf we execute
8064
  @type instance: L{objects.Instance}
8065
  @param instance: the instance whose disks we should create
8066
  @return: the success of the wipe
8067

8068
  """
8069
  node = instance.primary_node
8070

    
8071
  for device in instance.disks:
8072
    lu.cfg.SetDiskID(device, node)
8073

    
8074
  logging.info("Pause sync of instance %s disks", instance.name)
8075
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8076

    
8077
  for idx, success in enumerate(result.payload):
8078
    if not success:
8079
      logging.warn("pause-sync of instance %s for disks %d failed",
8080
                   instance.name, idx)
8081

    
8082
  try:
8083
    for idx, device in enumerate(instance.disks):
8084
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8085
      # MAX_WIPE_CHUNK at max
8086
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8087
                            constants.MIN_WIPE_CHUNK_PERCENT)
8088
      # we _must_ make this an int, otherwise rounding errors will
8089
      # occur
8090
      wipe_chunk_size = int(wipe_chunk_size)
8091

    
8092
      lu.LogInfo("* Wiping disk %d", idx)
8093
      logging.info("Wiping disk %d for instance %s, node %s using"
8094
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8095

    
8096
      offset = 0
8097
      size = device.size
8098
      last_output = 0
8099
      start_time = time.time()
8100

    
8101
      while offset < size:
8102
        wipe_size = min(wipe_chunk_size, size - offset)
8103
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8104
                      idx, offset, wipe_size)
8105
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8106
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8107
                     (idx, offset, wipe_size))
8108
        now = time.time()
8109
        offset += wipe_size
8110
        if now - last_output >= 60:
8111
          eta = _CalcEta(now - start_time, offset, size)
8112
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8113
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8114
          last_output = now
8115
  finally:
8116
    logging.info("Resume sync of instance %s disks", instance.name)
8117

    
8118
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8119

    
8120
    for idx, success in enumerate(result.payload):
8121
      if not success:
8122
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8123
                      " look at the status and troubleshoot the issue", idx)
8124
        logging.warn("resume-sync of instance %s for disks %d failed",
8125
                     instance.name, idx)
8126

    
8127

    
8128
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8129
  """Create all disks for an instance.
8130

8131
  This abstracts away some work from AddInstance.
8132

8133
  @type lu: L{LogicalUnit}
8134
  @param lu: the logical unit on whose behalf we execute
8135
  @type instance: L{objects.Instance}
8136
  @param instance: the instance whose disks we should create
8137
  @type to_skip: list
8138
  @param to_skip: list of indices to skip
8139
  @type target_node: string
8140
  @param target_node: if passed, overrides the target node for creation
8141
  @rtype: boolean
8142
  @return: the success of the creation
8143

8144
  """
8145
  info = _GetInstanceInfoText(instance)
8146
  if target_node is None:
8147
    pnode = instance.primary_node
8148
    all_nodes = instance.all_nodes
8149
  else:
8150
    pnode = target_node
8151
    all_nodes = [pnode]
8152

    
8153
  if instance.disk_template in constants.DTS_FILEBASED:
8154
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8155
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8156

    
8157
    result.Raise("Failed to create directory '%s' on"
8158
                 " node %s" % (file_storage_dir, pnode))
8159

    
8160
  # Note: this needs to be kept in sync with adding of disks in
8161
  # LUInstanceSetParams
8162
  for idx, device in enumerate(instance.disks):
8163
    if to_skip and idx in to_skip:
8164
      continue
8165
    logging.info("Creating volume %s for instance %s",
8166
                 device.iv_name, instance.name)
8167
    #HARDCODE
8168
    for node in all_nodes:
8169
      f_create = node == pnode
8170
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8171

    
8172

    
8173
def _RemoveDisks(lu, instance, target_node=None):
8174
  """Remove all disks for an instance.
8175

8176
  This abstracts away some work from `AddInstance()` and
8177
  `RemoveInstance()`. Note that in case some of the devices couldn't
8178
  be removed, the removal will continue with the other ones (compare
8179
  with `_CreateDisks()`).
8180

8181
  @type lu: L{LogicalUnit}
8182
  @param lu: the logical unit on whose behalf we execute
8183
  @type instance: L{objects.Instance}
8184
  @param instance: the instance whose disks we should remove
8185
  @type target_node: string
8186
  @param target_node: used to override the node on which to remove the disks
8187
  @rtype: boolean
8188
  @return: the success of the removal
8189

8190
  """
8191
  logging.info("Removing block devices for instance %s", instance.name)
8192

    
8193
  all_result = True
8194
  for device in instance.disks:
8195
    if target_node:
8196
      edata = [(target_node, device)]
8197
    else:
8198
      edata = device.ComputeNodeTree(instance.primary_node)
8199
    for node, disk in edata:
8200
      lu.cfg.SetDiskID(disk, node)
8201
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8202
      if msg:
8203
        lu.LogWarning("Could not remove block device %s on node %s,"
8204
                      " continuing anyway: %s", device.iv_name, node, msg)
8205
        all_result = False
8206

    
8207
  if instance.disk_template == constants.DT_FILE:
8208
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8209
    if target_node:
8210
      tgt = target_node
8211
    else:
8212
      tgt = instance.primary_node
8213
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8214
    if result.fail_msg:
8215
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8216
                    file_storage_dir, instance.primary_node, result.fail_msg)
8217
      all_result = False
8218

    
8219
  return all_result
8220

    
8221

    
8222
def _ComputeDiskSizePerVG(disk_template, disks):
8223
  """Compute disk size requirements in the volume group
8224

8225
  """
8226
  def _compute(disks, payload):
8227
    """Universal algorithm.
8228

8229
    """
8230
    vgs = {}
8231
    for disk in disks:
8232
      vgs[disk[constants.IDISK_VG]] = \
8233
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8234

    
8235
    return vgs
8236

    
8237
  # Required free disk space as a function of disk and swap space
8238
  req_size_dict = {
8239
    constants.DT_DISKLESS: {},
8240
    constants.DT_PLAIN: _compute(disks, 0),
8241
    # 128 MB are added for drbd metadata for each disk
8242
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8243
    constants.DT_FILE: {},
8244
    constants.DT_SHARED_FILE: {},
8245
  }
8246

    
8247
  if disk_template not in req_size_dict:
8248
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8249
                                 " is unknown" % disk_template)
8250

    
8251
  return req_size_dict[disk_template]
8252

    
8253

    
8254
def _ComputeDiskSize(disk_template, disks):
8255
  """Compute disk size requirements in the volume group
8256

8257
  """
8258
  # Required free disk space as a function of disk and swap space
8259
  req_size_dict = {
8260
    constants.DT_DISKLESS: None,
8261
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8262
    # 128 MB are added for drbd metadata for each disk
8263
    constants.DT_DRBD8:
8264
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8265
    constants.DT_FILE: None,
8266
    constants.DT_SHARED_FILE: 0,
8267
    constants.DT_BLOCK: 0,
8268
  }
8269

    
8270
  if disk_template not in req_size_dict:
8271
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8272
                                 " is unknown" % disk_template)
8273

    
8274
  return req_size_dict[disk_template]
8275

    
8276

    
8277
def _FilterVmNodes(lu, nodenames):
8278
  """Filters out non-vm_capable nodes from a list.
8279

8280
  @type lu: L{LogicalUnit}
8281
  @param lu: the logical unit for which we check
8282
  @type nodenames: list
8283
  @param nodenames: the list of nodes on which we should check
8284
  @rtype: list
8285
  @return: the list of vm-capable nodes
8286

8287
  """
8288
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8289
  return [name for name in nodenames if name not in vm_nodes]
8290

    
8291

    
8292
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8293
  """Hypervisor parameter validation.
8294

8295
  This function abstract the hypervisor parameter validation to be
8296
  used in both instance create and instance modify.
8297

8298
  @type lu: L{LogicalUnit}
8299
  @param lu: the logical unit for which we check
8300
  @type nodenames: list
8301
  @param nodenames: the list of nodes on which we should check
8302
  @type hvname: string
8303
  @param hvname: the name of the hypervisor we should use
8304
  @type hvparams: dict
8305
  @param hvparams: the parameters which we need to check
8306
  @raise errors.OpPrereqError: if the parameters are not valid
8307

8308
  """
8309
  nodenames = _FilterVmNodes(lu, nodenames)
8310

    
8311
  cluster = lu.cfg.GetClusterInfo()
8312
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8313

    
8314
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8315
  for node in nodenames:
8316
    info = hvinfo[node]
8317
    if info.offline:
8318
      continue
8319
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8320

    
8321

    
8322
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8323
  """OS parameters validation.
8324

8325
  @type lu: L{LogicalUnit}
8326
  @param lu: the logical unit for which we check
8327
  @type required: boolean
8328
  @param required: whether the validation should fail if the OS is not
8329
      found
8330
  @type nodenames: list
8331
  @param nodenames: the list of nodes on which we should check
8332
  @type osname: string
8333
  @param osname: the name of the hypervisor we should use
8334
  @type osparams: dict
8335
  @param osparams: the parameters which we need to check
8336
  @raise errors.OpPrereqError: if the parameters are not valid
8337

8338
  """
8339
  nodenames = _FilterVmNodes(lu, nodenames)
8340
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8341
                                   [constants.OS_VALIDATE_PARAMETERS],
8342
                                   osparams)
8343
  for node, nres in result.items():
8344
    # we don't check for offline cases since this should be run only
8345
    # against the master node and/or an instance's nodes
8346
    nres.Raise("OS Parameters validation failed on node %s" % node)
8347
    if not nres.payload:
8348
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8349
                 osname, node)
8350

    
8351

    
8352
class LUInstanceCreate(LogicalUnit):
8353
  """Create an instance.
8354

8355
  """
8356
  HPATH = "instance-add"
8357
  HTYPE = constants.HTYPE_INSTANCE
8358
  REQ_BGL = False
8359

    
8360
  def CheckArguments(self):
8361
    """Check arguments.
8362

8363
    """
8364
    # do not require name_check to ease forward/backward compatibility
8365
    # for tools
8366
    if self.op.no_install and self.op.start:
8367
      self.LogInfo("No-installation mode selected, disabling startup")
8368
      self.op.start = False
8369
    # validate/normalize the instance name
8370
    self.op.instance_name = \
8371
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8372

    
8373
    if self.op.ip_check and not self.op.name_check:
8374
      # TODO: make the ip check more flexible and not depend on the name check
8375
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8376
                                 " check", errors.ECODE_INVAL)
8377

    
8378
    # check nics' parameter names
8379
    for nic in self.op.nics:
8380
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8381

    
8382
    # check disks. parameter names and consistent adopt/no-adopt strategy
8383
    has_adopt = has_no_adopt = False
8384
    for disk in self.op.disks:
8385
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8386
      if constants.IDISK_ADOPT in disk:
8387
        has_adopt = True
8388
      else:
8389
        has_no_adopt = True
8390
    if has_adopt and has_no_adopt:
8391
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8392
                                 errors.ECODE_INVAL)
8393
    if has_adopt:
8394
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8395
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8396
                                   " '%s' disk template" %
8397
                                   self.op.disk_template,
8398
                                   errors.ECODE_INVAL)
8399
      if self.op.iallocator is not None:
8400
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8401
                                   " iallocator script", errors.ECODE_INVAL)
8402
      if self.op.mode == constants.INSTANCE_IMPORT:
8403
        raise errors.OpPrereqError("Disk adoption not allowed for"
8404
                                   " instance import", errors.ECODE_INVAL)
8405
    else:
8406
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8407
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8408
                                   " but no 'adopt' parameter given" %
8409
                                   self.op.disk_template,
8410
                                   errors.ECODE_INVAL)
8411

    
8412
    self.adopt_disks = has_adopt
8413

    
8414
    # instance name verification
8415
    if self.op.name_check:
8416
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8417
      self.op.instance_name = self.hostname1.name
8418
      # used in CheckPrereq for ip ping check
8419
      self.check_ip = self.hostname1.ip
8420
    else:
8421
      self.check_ip = None
8422

    
8423
    # file storage checks
8424
    if (self.op.file_driver and
8425
        not self.op.file_driver in constants.FILE_DRIVER):
8426
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8427
                                 self.op.file_driver, errors.ECODE_INVAL)
8428

    
8429
    if self.op.disk_template == constants.DT_FILE:
8430
      opcodes.RequireFileStorage()
8431
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8432
      opcodes.RequireSharedFileStorage()
8433

    
8434
    ### Node/iallocator related checks
8435
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8436

    
8437
    if self.op.pnode is not None:
8438
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8439
        if self.op.snode is None:
8440
          raise errors.OpPrereqError("The networked disk templates need"
8441
                                     " a mirror node", errors.ECODE_INVAL)
8442
      elif self.op.snode:
8443
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8444
                        " template")
8445
        self.op.snode = None
8446

    
8447
    self._cds = _GetClusterDomainSecret()
8448

    
8449
    if self.op.mode == constants.INSTANCE_IMPORT:
8450
      # On import force_variant must be True, because if we forced it at
8451
      # initial install, our only chance when importing it back is that it
8452
      # works again!
8453
      self.op.force_variant = True
8454

    
8455
      if self.op.no_install:
8456
        self.LogInfo("No-installation mode has no effect during import")
8457

    
8458
    elif self.op.mode == constants.INSTANCE_CREATE:
8459
      if self.op.os_type is None:
8460
        raise errors.OpPrereqError("No guest OS specified",
8461
                                   errors.ECODE_INVAL)
8462
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8463
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8464
                                   " installation" % self.op.os_type,
8465
                                   errors.ECODE_STATE)
8466
      if self.op.disk_template is None:
8467
        raise errors.OpPrereqError("No disk template specified",
8468
                                   errors.ECODE_INVAL)
8469

    
8470
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8471
      # Check handshake to ensure both clusters have the same domain secret
8472
      src_handshake = self.op.source_handshake
8473
      if not src_handshake:
8474
        raise errors.OpPrereqError("Missing source handshake",
8475
                                   errors.ECODE_INVAL)
8476

    
8477
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8478
                                                           src_handshake)
8479
      if errmsg:
8480
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8481
                                   errors.ECODE_INVAL)
8482

    
8483
      # Load and check source CA
8484
      self.source_x509_ca_pem = self.op.source_x509_ca
8485
      if not self.source_x509_ca_pem:
8486
        raise errors.OpPrereqError("Missing source X509 CA",
8487
                                   errors.ECODE_INVAL)
8488

    
8489
      try:
8490
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8491
                                                    self._cds)
8492
      except OpenSSL.crypto.Error, err:
8493
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8494
                                   (err, ), errors.ECODE_INVAL)
8495

    
8496
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8497
      if errcode is not None:
8498
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8499
                                   errors.ECODE_INVAL)
8500

    
8501
      self.source_x509_ca = cert
8502

    
8503
      src_instance_name = self.op.source_instance_name
8504
      if not src_instance_name:
8505
        raise errors.OpPrereqError("Missing source instance name",
8506
                                   errors.ECODE_INVAL)
8507

    
8508
      self.source_instance_name = \
8509
          netutils.GetHostname(name=src_instance_name).name
8510

    
8511
    else:
8512
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8513
                                 self.op.mode, errors.ECODE_INVAL)
8514

    
8515
  def ExpandNames(self):
8516
    """ExpandNames for CreateInstance.
8517

8518
    Figure out the right locks for instance creation.
8519

8520
    """
8521
    self.needed_locks = {}
8522

    
8523
    instance_name = self.op.instance_name
8524
    # this is just a preventive check, but someone might still add this
8525
    # instance in the meantime, and creation will fail at lock-add time
8526
    if instance_name in self.cfg.GetInstanceList():
8527
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8528
                                 instance_name, errors.ECODE_EXISTS)
8529

    
8530
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8531

    
8532
    if self.op.iallocator:
8533
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8534
      # specifying a group on instance creation and then selecting nodes from
8535
      # that group
8536
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8537
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8538
    else:
8539
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8540
      nodelist = [self.op.pnode]
8541
      if self.op.snode is not None:
8542
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8543
        nodelist.append(self.op.snode)
8544
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8545
      # Lock resources of instance's primary and secondary nodes (copy to
8546
      # prevent accidential modification)
8547
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8548

    
8549
    # in case of import lock the source node too
8550
    if self.op.mode == constants.INSTANCE_IMPORT:
8551
      src_node = self.op.src_node
8552
      src_path = self.op.src_path
8553

    
8554
      if src_path is None:
8555
        self.op.src_path = src_path = self.op.instance_name
8556

    
8557
      if src_node is None:
8558
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8559
        self.op.src_node = None
8560
        if os.path.isabs(src_path):
8561
          raise errors.OpPrereqError("Importing an instance from a path"
8562
                                     " requires a source node option",
8563
                                     errors.ECODE_INVAL)
8564
      else:
8565
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8566
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8567
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8568
        if not os.path.isabs(src_path):
8569
          self.op.src_path = src_path = \
8570
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8571

    
8572
  def _RunAllocator(self):
8573
    """Run the allocator based on input opcode.
8574

8575
    """
8576
    nics = [n.ToDict() for n in self.nics]
8577
    ial = IAllocator(self.cfg, self.rpc,
8578
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8579
                     name=self.op.instance_name,
8580
                     disk_template=self.op.disk_template,
8581
                     tags=self.op.tags,
8582
                     os=self.op.os_type,
8583
                     vcpus=self.be_full[constants.BE_VCPUS],
8584
                     memory=self.be_full[constants.BE_MEMORY],
8585
                     disks=self.disks,
8586
                     nics=nics,
8587
                     hypervisor=self.op.hypervisor,
8588
                     )
8589

    
8590
    ial.Run(self.op.iallocator)
8591

    
8592
    if not ial.success:
8593
      raise errors.OpPrereqError("Can't compute nodes using"
8594
                                 " iallocator '%s': %s" %
8595
                                 (self.op.iallocator, ial.info),
8596
                                 errors.ECODE_NORES)
8597
    if len(ial.result) != ial.required_nodes:
8598
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8599
                                 " of nodes (%s), required %s" %
8600
                                 (self.op.iallocator, len(ial.result),
8601
                                  ial.required_nodes), errors.ECODE_FAULT)
8602
    self.op.pnode = ial.result[0]
8603
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8604
                 self.op.instance_name, self.op.iallocator,
8605
                 utils.CommaJoin(ial.result))
8606
    if ial.required_nodes == 2:
8607
      self.op.snode = ial.result[1]
8608

    
8609
  def BuildHooksEnv(self):
8610
    """Build hooks env.
8611

8612
    This runs on master, primary and secondary nodes of the instance.
8613

8614
    """
8615
    env = {
8616
      "ADD_MODE": self.op.mode,
8617
      }
8618
    if self.op.mode == constants.INSTANCE_IMPORT:
8619
      env["SRC_NODE"] = self.op.src_node
8620
      env["SRC_PATH"] = self.op.src_path
8621
      env["SRC_IMAGES"] = self.src_images
8622

    
8623
    env.update(_BuildInstanceHookEnv(
8624
      name=self.op.instance_name,
8625
      primary_node=self.op.pnode,
8626
      secondary_nodes=self.secondaries,
8627
      status=self.op.start,
8628
      os_type=self.op.os_type,
8629
      memory=self.be_full[constants.BE_MEMORY],
8630
      vcpus=self.be_full[constants.BE_VCPUS],
8631
      nics=_NICListToTuple(self, self.nics),
8632
      disk_template=self.op.disk_template,
8633
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8634
             for d in self.disks],
8635
      bep=self.be_full,
8636
      hvp=self.hv_full,
8637
      hypervisor_name=self.op.hypervisor,
8638
      tags=self.op.tags,
8639
    ))
8640

    
8641
    return env
8642

    
8643
  def BuildHooksNodes(self):
8644
    """Build hooks nodes.
8645

8646
    """
8647
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8648
    return nl, nl
8649

    
8650
  def _ReadExportInfo(self):
8651
    """Reads the export information from disk.
8652

8653
    It will override the opcode source node and path with the actual
8654
    information, if these two were not specified before.
8655

8656
    @return: the export information
8657

8658
    """
8659
    assert self.op.mode == constants.INSTANCE_IMPORT
8660

    
8661
    src_node = self.op.src_node
8662
    src_path = self.op.src_path
8663

    
8664
    if src_node is None:
8665
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8666
      exp_list = self.rpc.call_export_list(locked_nodes)
8667
      found = False
8668
      for node in exp_list:
8669
        if exp_list[node].fail_msg:
8670
          continue
8671
        if src_path in exp_list[node].payload:
8672
          found = True
8673
          self.op.src_node = src_node = node
8674
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8675
                                                       src_path)
8676
          break
8677
      if not found:
8678
        raise errors.OpPrereqError("No export found for relative path %s" %
8679
                                    src_path, errors.ECODE_INVAL)
8680

    
8681
    _CheckNodeOnline(self, src_node)
8682
    result = self.rpc.call_export_info(src_node, src_path)
8683
    result.Raise("No export or invalid export found in dir %s" % src_path)
8684

    
8685
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8686
    if not export_info.has_section(constants.INISECT_EXP):
8687
      raise errors.ProgrammerError("Corrupted export config",
8688
                                   errors.ECODE_ENVIRON)
8689

    
8690
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8691
    if (int(ei_version) != constants.EXPORT_VERSION):
8692
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8693
                                 (ei_version, constants.EXPORT_VERSION),
8694
                                 errors.ECODE_ENVIRON)
8695
    return export_info
8696

    
8697
  def _ReadExportParams(self, einfo):
8698
    """Use export parameters as defaults.
8699

8700
    In case the opcode doesn't specify (as in override) some instance
8701
    parameters, then try to use them from the export information, if
8702
    that declares them.
8703

8704
    """
8705
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8706

    
8707
    if self.op.disk_template is None:
8708
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8709
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8710
                                          "disk_template")
8711
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8712
          raise errors.OpPrereqError("Disk template specified in configuration"
8713
                                     " file is not one of the allowed values:"
8714
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8715
      else:
8716
        raise errors.OpPrereqError("No disk template specified and the export"
8717
                                   " is missing the disk_template information",
8718
                                   errors.ECODE_INVAL)
8719

    
8720
    if not self.op.disks:
8721
      disks = []
8722
      # TODO: import the disk iv_name too
8723
      for idx in range(constants.MAX_DISKS):
8724
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8725
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8726
          disks.append({constants.IDISK_SIZE: disk_sz})
8727
      self.op.disks = disks
8728
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8729
        raise errors.OpPrereqError("No disk info specified and the export"
8730
                                   " is missing the disk information",
8731
                                   errors.ECODE_INVAL)
8732

    
8733
    if not self.op.nics:
8734
      nics = []
8735
      for idx in range(constants.MAX_NICS):
8736
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8737
          ndict = {}
8738
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8739
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8740
            ndict[name] = v
8741
          nics.append(ndict)
8742
        else:
8743
          break
8744
      self.op.nics = nics
8745

    
8746
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8747
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8748

    
8749
    if (self.op.hypervisor is None and
8750
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8751
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8752

    
8753
    if einfo.has_section(constants.INISECT_HYP):
8754
      # use the export parameters but do not override the ones
8755
      # specified by the user
8756
      for name, value in einfo.items(constants.INISECT_HYP):
8757
        if name not in self.op.hvparams:
8758
          self.op.hvparams[name] = value
8759

    
8760
    if einfo.has_section(constants.INISECT_BEP):
8761
      # use the parameters, without overriding
8762
      for name, value in einfo.items(constants.INISECT_BEP):
8763
        if name not in self.op.beparams:
8764
          self.op.beparams[name] = value
8765
    else:
8766
      # try to read the parameters old style, from the main section
8767
      for name in constants.BES_PARAMETERS:
8768
        if (name not in self.op.beparams and
8769
            einfo.has_option(constants.INISECT_INS, name)):
8770
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8771

    
8772
    if einfo.has_section(constants.INISECT_OSP):
8773
      # use the parameters, without overriding
8774
      for name, value in einfo.items(constants.INISECT_OSP):
8775
        if name not in self.op.osparams:
8776
          self.op.osparams[name] = value
8777

    
8778
  def _RevertToDefaults(self, cluster):
8779
    """Revert the instance parameters to the default values.
8780

8781
    """
8782
    # hvparams
8783
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8784
    for name in self.op.hvparams.keys():
8785
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8786
        del self.op.hvparams[name]
8787
    # beparams
8788
    be_defs = cluster.SimpleFillBE({})
8789
    for name in self.op.beparams.keys():
8790
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8791
        del self.op.beparams[name]
8792
    # nic params
8793
    nic_defs = cluster.SimpleFillNIC({})
8794
    for nic in self.op.nics:
8795
      for name in constants.NICS_PARAMETERS:
8796
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8797
          del nic[name]
8798
    # osparams
8799
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8800
    for name in self.op.osparams.keys():
8801
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8802
        del self.op.osparams[name]
8803

    
8804
  def _CalculateFileStorageDir(self):
8805
    """Calculate final instance file storage dir.
8806

8807
    """
8808
    # file storage dir calculation/check
8809
    self.instance_file_storage_dir = None
8810
    if self.op.disk_template in constants.DTS_FILEBASED:
8811
      # build the full file storage dir path
8812
      joinargs = []
8813

    
8814
      if self.op.disk_template == constants.DT_SHARED_FILE:
8815
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8816
      else:
8817
        get_fsd_fn = self.cfg.GetFileStorageDir
8818

    
8819
      cfg_storagedir = get_fsd_fn()
8820
      if not cfg_storagedir:
8821
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8822
      joinargs.append(cfg_storagedir)
8823

    
8824
      if self.op.file_storage_dir is not None:
8825
        joinargs.append(self.op.file_storage_dir)
8826

    
8827
      joinargs.append(self.op.instance_name)
8828

    
8829
      # pylint: disable=W0142
8830
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8831

    
8832
  def CheckPrereq(self):
8833
    """Check prerequisites.
8834

8835
    """
8836
    self._CalculateFileStorageDir()
8837

    
8838
    if self.op.mode == constants.INSTANCE_IMPORT:
8839
      export_info = self._ReadExportInfo()
8840
      self._ReadExportParams(export_info)
8841

    
8842
    if (not self.cfg.GetVGName() and
8843
        self.op.disk_template not in constants.DTS_NOT_LVM):
8844
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8845
                                 " instances", errors.ECODE_STATE)
8846

    
8847
    if (self.op.hypervisor is None or
8848
        self.op.hypervisor == constants.VALUE_AUTO):
8849
      self.op.hypervisor = self.cfg.GetHypervisorType()
8850

    
8851
    cluster = self.cfg.GetClusterInfo()
8852
    enabled_hvs = cluster.enabled_hypervisors
8853
    if self.op.hypervisor not in enabled_hvs:
8854
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8855
                                 " cluster (%s)" % (self.op.hypervisor,
8856
                                  ",".join(enabled_hvs)),
8857
                                 errors.ECODE_STATE)
8858

    
8859
    # Check tag validity
8860
    for tag in self.op.tags:
8861
      objects.TaggableObject.ValidateTag(tag)
8862

    
8863
    # check hypervisor parameter syntax (locally)
8864
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8865
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8866
                                      self.op.hvparams)
8867
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8868
    hv_type.CheckParameterSyntax(filled_hvp)
8869
    self.hv_full = filled_hvp
8870
    # check that we don't specify global parameters on an instance
8871
    _CheckGlobalHvParams(self.op.hvparams)
8872

    
8873
    # fill and remember the beparams dict
8874
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8875
    for param, value in self.op.beparams.iteritems():
8876
      if value == constants.VALUE_AUTO:
8877
        self.op.beparams[param] = default_beparams[param]
8878
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8879
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8880

    
8881
    # build os parameters
8882
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8883

    
8884
    # now that hvp/bep are in final format, let's reset to defaults,
8885
    # if told to do so
8886
    if self.op.identify_defaults:
8887
      self._RevertToDefaults(cluster)
8888

    
8889
    # NIC buildup
8890
    self.nics = []
8891
    for idx, nic in enumerate(self.op.nics):
8892
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8893
      nic_mode = nic_mode_req
8894
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8895
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8896

    
8897
      # in routed mode, for the first nic, the default ip is 'auto'
8898
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8899
        default_ip_mode = constants.VALUE_AUTO
8900
      else:
8901
        default_ip_mode = constants.VALUE_NONE
8902

    
8903
      # ip validity checks
8904
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8905
      if ip is None or ip.lower() == constants.VALUE_NONE:
8906
        nic_ip = None
8907
      elif ip.lower() == constants.VALUE_AUTO:
8908
        if not self.op.name_check:
8909
          raise errors.OpPrereqError("IP address set to auto but name checks"
8910
                                     " have been skipped",
8911
                                     errors.ECODE_INVAL)
8912
        nic_ip = self.hostname1.ip
8913
      else:
8914
        if not netutils.IPAddress.IsValid(ip):
8915
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8916
                                     errors.ECODE_INVAL)
8917
        nic_ip = ip
8918

    
8919
      # TODO: check the ip address for uniqueness
8920
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8921
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8922
                                   errors.ECODE_INVAL)
8923

    
8924
      # MAC address verification
8925
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8926
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8927
        mac = utils.NormalizeAndValidateMac(mac)
8928

    
8929
        try:
8930
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8931
        except errors.ReservationError:
8932
          raise errors.OpPrereqError("MAC address %s already in use"
8933
                                     " in cluster" % mac,
8934
                                     errors.ECODE_NOTUNIQUE)
8935

    
8936
      #  Build nic parameters
8937
      link = nic.get(constants.INIC_LINK, None)
8938
      if link == constants.VALUE_AUTO:
8939
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8940
      nicparams = {}
8941
      if nic_mode_req:
8942
        nicparams[constants.NIC_MODE] = nic_mode
8943
      if link:
8944
        nicparams[constants.NIC_LINK] = link
8945

    
8946
      check_params = cluster.SimpleFillNIC(nicparams)
8947
      objects.NIC.CheckParameterSyntax(check_params)
8948
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8949

    
8950
    # disk checks/pre-build
8951
    default_vg = self.cfg.GetVGName()
8952
    self.disks = []
8953
    for disk in self.op.disks:
8954
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8955
      if mode not in constants.DISK_ACCESS_SET:
8956
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8957
                                   mode, errors.ECODE_INVAL)
8958
      size = disk.get(constants.IDISK_SIZE, None)
8959
      if size is None:
8960
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8961
      try:
8962
        size = int(size)
8963
      except (TypeError, ValueError):
8964
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8965
                                   errors.ECODE_INVAL)
8966

    
8967
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8968
      new_disk = {
8969
        constants.IDISK_SIZE: size,
8970
        constants.IDISK_MODE: mode,
8971
        constants.IDISK_VG: data_vg,
8972
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8973
        }
8974
      if constants.IDISK_ADOPT in disk:
8975
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8976
      self.disks.append(new_disk)
8977

    
8978
    if self.op.mode == constants.INSTANCE_IMPORT:
8979
      disk_images = []
8980
      for idx in range(len(self.disks)):
8981
        option = "disk%d_dump" % idx
8982
        if export_info.has_option(constants.INISECT_INS, option):
8983
          # FIXME: are the old os-es, disk sizes, etc. useful?
8984
          export_name = export_info.get(constants.INISECT_INS, option)
8985
          image = utils.PathJoin(self.op.src_path, export_name)
8986
          disk_images.append(image)
8987
        else:
8988
          disk_images.append(False)
8989

    
8990
      self.src_images = disk_images
8991

    
8992
      old_name = export_info.get(constants.INISECT_INS, "name")
8993
      if self.op.instance_name == old_name:
8994
        for idx, nic in enumerate(self.nics):
8995
          if nic.mac == constants.VALUE_AUTO:
8996
            nic_mac_ini = "nic%d_mac" % idx
8997
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8998

    
8999
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9000

    
9001
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9002
    if self.op.ip_check:
9003
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9004
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9005
                                   (self.check_ip, self.op.instance_name),
9006
                                   errors.ECODE_NOTUNIQUE)
9007

    
9008
    #### mac address generation
9009
    # By generating here the mac address both the allocator and the hooks get
9010
    # the real final mac address rather than the 'auto' or 'generate' value.
9011
    # There is a race condition between the generation and the instance object
9012
    # creation, which means that we know the mac is valid now, but we're not
9013
    # sure it will be when we actually add the instance. If things go bad
9014
    # adding the instance will abort because of a duplicate mac, and the
9015
    # creation job will fail.
9016
    for nic in self.nics:
9017
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9018
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9019

    
9020
    #### allocator run
9021

    
9022
    if self.op.iallocator is not None:
9023
      self._RunAllocator()
9024

    
9025
    #### node related checks
9026

    
9027
    # check primary node
9028
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9029
    assert self.pnode is not None, \
9030
      "Cannot retrieve locked node %s" % self.op.pnode
9031
    if pnode.offline:
9032
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9033
                                 pnode.name, errors.ECODE_STATE)
9034
    if pnode.drained:
9035
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9036
                                 pnode.name, errors.ECODE_STATE)
9037
    if not pnode.vm_capable:
9038
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9039
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9040

    
9041
    self.secondaries = []
9042

    
9043
    # mirror node verification
9044
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9045
      if self.op.snode == pnode.name:
9046
        raise errors.OpPrereqError("The secondary node cannot be the"
9047
                                   " primary node", errors.ECODE_INVAL)
9048
      _CheckNodeOnline(self, self.op.snode)
9049
      _CheckNodeNotDrained(self, self.op.snode)
9050
      _CheckNodeVmCapable(self, self.op.snode)
9051
      self.secondaries.append(self.op.snode)
9052

    
9053
    nodenames = [pnode.name] + self.secondaries
9054

    
9055
    if not self.adopt_disks:
9056
      # Check lv size requirements, if not adopting
9057
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9058
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9059

    
9060
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9061
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9062
                                disk[constants.IDISK_ADOPT])
9063
                     for disk in self.disks])
9064
      if len(all_lvs) != len(self.disks):
9065
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9066
                                   errors.ECODE_INVAL)
9067
      for lv_name in all_lvs:
9068
        try:
9069
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9070
          # to ReserveLV uses the same syntax
9071
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9072
        except errors.ReservationError:
9073
          raise errors.OpPrereqError("LV named %s used by another instance" %
9074
                                     lv_name, errors.ECODE_NOTUNIQUE)
9075

    
9076
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9077
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9078

    
9079
      node_lvs = self.rpc.call_lv_list([pnode.name],
9080
                                       vg_names.payload.keys())[pnode.name]
9081
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9082
      node_lvs = node_lvs.payload
9083

    
9084
      delta = all_lvs.difference(node_lvs.keys())
9085
      if delta:
9086
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9087
                                   utils.CommaJoin(delta),
9088
                                   errors.ECODE_INVAL)
9089
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9090
      if online_lvs:
9091
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9092
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9093
                                   errors.ECODE_STATE)
9094
      # update the size of disk based on what is found
9095
      for dsk in self.disks:
9096
        dsk[constants.IDISK_SIZE] = \
9097
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9098
                                        dsk[constants.IDISK_ADOPT])][0]))
9099

    
9100
    elif self.op.disk_template == constants.DT_BLOCK:
9101
      # Normalize and de-duplicate device paths
9102
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9103
                       for disk in self.disks])
9104
      if len(all_disks) != len(self.disks):
9105
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9106
                                   errors.ECODE_INVAL)
9107
      baddisks = [d for d in all_disks
9108
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9109
      if baddisks:
9110
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9111
                                   " cannot be adopted" %
9112
                                   (", ".join(baddisks),
9113
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9114
                                   errors.ECODE_INVAL)
9115

    
9116
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9117
                                            list(all_disks))[pnode.name]
9118
      node_disks.Raise("Cannot get block device information from node %s" %
9119
                       pnode.name)
9120
      node_disks = node_disks.payload
9121
      delta = all_disks.difference(node_disks.keys())
9122
      if delta:
9123
        raise errors.OpPrereqError("Missing block device(s): %s" %
9124
                                   utils.CommaJoin(delta),
9125
                                   errors.ECODE_INVAL)
9126
      for dsk in self.disks:
9127
        dsk[constants.IDISK_SIZE] = \
9128
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9129

    
9130
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9131

    
9132
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9133
    # check OS parameters (remotely)
9134
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9135

    
9136
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9137

    
9138
    # memory check on primary node
9139
    if self.op.start:
9140
      _CheckNodeFreeMemory(self, self.pnode.name,
9141
                           "creating instance %s" % self.op.instance_name,
9142
                           self.be_full[constants.BE_MEMORY],
9143
                           self.op.hypervisor)
9144

    
9145
    self.dry_run_result = list(nodenames)
9146

    
9147
  def Exec(self, feedback_fn):
9148
    """Create and add the instance to the cluster.
9149

9150
    """
9151
    instance = self.op.instance_name
9152
    pnode_name = self.pnode.name
9153

    
9154
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9155
                self.owned_locks(locking.LEVEL_NODE)), \
9156
      "Node locks differ from node resource locks"
9157

    
9158
    ht_kind = self.op.hypervisor
9159
    if ht_kind in constants.HTS_REQ_PORT:
9160
      network_port = self.cfg.AllocatePort()
9161
    else:
9162
      network_port = None
9163

    
9164
    disks = _GenerateDiskTemplate(self,
9165
                                  self.op.disk_template,
9166
                                  instance, pnode_name,
9167
                                  self.secondaries,
9168
                                  self.disks,
9169
                                  self.instance_file_storage_dir,
9170
                                  self.op.file_driver,
9171
                                  0,
9172
                                  feedback_fn)
9173

    
9174
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9175
                            primary_node=pnode_name,
9176
                            nics=self.nics, disks=disks,
9177
                            disk_template=self.op.disk_template,
9178
                            admin_up=False,
9179
                            network_port=network_port,
9180
                            beparams=self.op.beparams,
9181
                            hvparams=self.op.hvparams,
9182
                            hypervisor=self.op.hypervisor,
9183
                            osparams=self.op.osparams,
9184
                            )
9185

    
9186
    if self.op.tags:
9187
      for tag in self.op.tags:
9188
        iobj.AddTag(tag)
9189

    
9190
    if self.adopt_disks:
9191
      if self.op.disk_template == constants.DT_PLAIN:
9192
        # rename LVs to the newly-generated names; we need to construct
9193
        # 'fake' LV disks with the old data, plus the new unique_id
9194
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9195
        rename_to = []
9196
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9197
          rename_to.append(t_dsk.logical_id)
9198
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9199
          self.cfg.SetDiskID(t_dsk, pnode_name)
9200
        result = self.rpc.call_blockdev_rename(pnode_name,
9201
                                               zip(tmp_disks, rename_to))
9202
        result.Raise("Failed to rename adoped LVs")
9203
    else:
9204
      feedback_fn("* creating instance disks...")
9205
      try:
9206
        _CreateDisks(self, iobj)
9207
      except errors.OpExecError:
9208
        self.LogWarning("Device creation failed, reverting...")
9209
        try:
9210
          _RemoveDisks(self, iobj)
9211
        finally:
9212
          self.cfg.ReleaseDRBDMinors(instance)
9213
          raise
9214

    
9215
    feedback_fn("adding instance %s to cluster config" % instance)
9216

    
9217
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9218

    
9219
    # Declare that we don't want to remove the instance lock anymore, as we've
9220
    # added the instance to the config
9221
    del self.remove_locks[locking.LEVEL_INSTANCE]
9222

    
9223
    if self.op.mode == constants.INSTANCE_IMPORT:
9224
      # Release unused nodes
9225
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9226
    else:
9227
      # Release all nodes
9228
      _ReleaseLocks(self, locking.LEVEL_NODE)
9229

    
9230
    disk_abort = False
9231
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9232
      feedback_fn("* wiping instance disks...")
9233
      try:
9234
        _WipeDisks(self, iobj)
9235
      except errors.OpExecError, err:
9236
        logging.exception("Wiping disks failed")
9237
        self.LogWarning("Wiping instance disks failed (%s)", err)
9238
        disk_abort = True
9239

    
9240
    if disk_abort:
9241
      # Something is already wrong with the disks, don't do anything else
9242
      pass
9243
    elif self.op.wait_for_sync:
9244
      disk_abort = not _WaitForSync(self, iobj)
9245
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9246
      # make sure the disks are not degraded (still sync-ing is ok)
9247
      feedback_fn("* checking mirrors status")
9248
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9249
    else:
9250
      disk_abort = False
9251

    
9252
    if disk_abort:
9253
      _RemoveDisks(self, iobj)
9254
      self.cfg.RemoveInstance(iobj.name)
9255
      # Make sure the instance lock gets removed
9256
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9257
      raise errors.OpExecError("There are some degraded disks for"
9258
                               " this instance")
9259

    
9260
    # Release all node resource locks
9261
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9262

    
9263
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9264
      if self.op.mode == constants.INSTANCE_CREATE:
9265
        if not self.op.no_install:
9266
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9267
                        not self.op.wait_for_sync)
9268
          if pause_sync:
9269
            feedback_fn("* pausing disk sync to install instance OS")
9270
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9271
                                                              iobj.disks, True)
9272
            for idx, success in enumerate(result.payload):
9273
              if not success:
9274
                logging.warn("pause-sync of instance %s for disk %d failed",
9275
                             instance, idx)
9276

    
9277
          feedback_fn("* running the instance OS create scripts...")
9278
          # FIXME: pass debug option from opcode to backend
9279
          os_add_result = \
9280
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9281
                                          self.op.debug_level)
9282
          if pause_sync:
9283
            feedback_fn("* resuming disk sync")
9284
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9285
                                                              iobj.disks, False)
9286
            for idx, success in enumerate(result.payload):
9287
              if not success:
9288
                logging.warn("resume-sync of instance %s for disk %d failed",
9289
                             instance, idx)
9290

    
9291
          os_add_result.Raise("Could not add os for instance %s"
9292
                              " on node %s" % (instance, pnode_name))
9293

    
9294
      elif self.op.mode == constants.INSTANCE_IMPORT:
9295
        feedback_fn("* running the instance OS import scripts...")
9296

    
9297
        transfers = []
9298

    
9299
        for idx, image in enumerate(self.src_images):
9300
          if not image:
9301
            continue
9302

    
9303
          # FIXME: pass debug option from opcode to backend
9304
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9305
                                             constants.IEIO_FILE, (image, ),
9306
                                             constants.IEIO_SCRIPT,
9307
                                             (iobj.disks[idx], idx),
9308
                                             None)
9309
          transfers.append(dt)
9310

    
9311
        import_result = \
9312
          masterd.instance.TransferInstanceData(self, feedback_fn,
9313
                                                self.op.src_node, pnode_name,
9314
                                                self.pnode.secondary_ip,
9315
                                                iobj, transfers)
9316
        if not compat.all(import_result):
9317
          self.LogWarning("Some disks for instance %s on node %s were not"
9318
                          " imported successfully" % (instance, pnode_name))
9319

    
9320
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9321
        feedback_fn("* preparing remote import...")
9322
        # The source cluster will stop the instance before attempting to make a
9323
        # connection. In some cases stopping an instance can take a long time,
9324
        # hence the shutdown timeout is added to the connection timeout.
9325
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9326
                           self.op.source_shutdown_timeout)
9327
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9328

    
9329
        assert iobj.primary_node == self.pnode.name
9330
        disk_results = \
9331
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9332
                                        self.source_x509_ca,
9333
                                        self._cds, timeouts)
9334
        if not compat.all(disk_results):
9335
          # TODO: Should the instance still be started, even if some disks
9336
          # failed to import (valid for local imports, too)?
9337
          self.LogWarning("Some disks for instance %s on node %s were not"
9338
                          " imported successfully" % (instance, pnode_name))
9339

    
9340
        # Run rename script on newly imported instance
9341
        assert iobj.name == instance
9342
        feedback_fn("Running rename script for %s" % instance)
9343
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9344
                                                   self.source_instance_name,
9345
                                                   self.op.debug_level)
9346
        if result.fail_msg:
9347
          self.LogWarning("Failed to run rename script for %s on node"
9348
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9349

    
9350
      else:
9351
        # also checked in the prereq part
9352
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9353
                                     % self.op.mode)
9354

    
9355
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9356

    
9357
    if self.op.start:
9358
      iobj.admin_up = True
9359
      self.cfg.Update(iobj, feedback_fn)
9360
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9361
      feedback_fn("* starting instance...")
9362
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9363
                                            False)
9364
      result.Raise("Could not start instance")
9365

    
9366
    return list(iobj.all_nodes)
9367

    
9368

    
9369
class LUInstanceConsole(NoHooksLU):
9370
  """Connect to an instance's console.
9371

9372
  This is somewhat special in that it returns the command line that
9373
  you need to run on the master node in order to connect to the
9374
  console.
9375

9376
  """
9377
  REQ_BGL = False
9378

    
9379
  def ExpandNames(self):
9380
    self.share_locks = _ShareAll()
9381
    self._ExpandAndLockInstance()
9382

    
9383
  def CheckPrereq(self):
9384
    """Check prerequisites.
9385

9386
    This checks that the instance is in the cluster.
9387

9388
    """
9389
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9390
    assert self.instance is not None, \
9391
      "Cannot retrieve locked instance %s" % self.op.instance_name
9392
    _CheckNodeOnline(self, self.instance.primary_node)
9393

    
9394
  def Exec(self, feedback_fn):
9395
    """Connect to the console of an instance
9396

9397
    """
9398
    instance = self.instance
9399
    node = instance.primary_node
9400

    
9401
    node_insts = self.rpc.call_instance_list([node],
9402
                                             [instance.hypervisor])[node]
9403
    node_insts.Raise("Can't get node information from %s" % node)
9404

    
9405
    if instance.name not in node_insts.payload:
9406
      if instance.admin_up:
9407
        state = constants.INSTST_ERRORDOWN
9408
      else:
9409
        state = constants.INSTST_ADMINDOWN
9410
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9411
                               (instance.name, state))
9412

    
9413
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9414

    
9415
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9416

    
9417

    
9418
def _GetInstanceConsole(cluster, instance):
9419
  """Returns console information for an instance.
9420

9421
  @type cluster: L{objects.Cluster}
9422
  @type instance: L{objects.Instance}
9423
  @rtype: dict
9424

9425
  """
9426
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9427
  # beparams and hvparams are passed separately, to avoid editing the
9428
  # instance and then saving the defaults in the instance itself.
9429
  hvparams = cluster.FillHV(instance)
9430
  beparams = cluster.FillBE(instance)
9431
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9432

    
9433
  assert console.instance == instance.name
9434
  assert console.Validate()
9435

    
9436
  return console.ToDict()
9437

    
9438

    
9439
class LUInstanceReplaceDisks(LogicalUnit):
9440
  """Replace the disks of an instance.
9441

9442
  """
9443
  HPATH = "mirrors-replace"
9444
  HTYPE = constants.HTYPE_INSTANCE
9445
  REQ_BGL = False
9446

    
9447
  def CheckArguments(self):
9448
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9449
                                  self.op.iallocator)
9450

    
9451
  def ExpandNames(self):
9452
    self._ExpandAndLockInstance()
9453

    
9454
    assert locking.LEVEL_NODE not in self.needed_locks
9455
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9456

    
9457
    assert self.op.iallocator is None or self.op.remote_node is None, \
9458
      "Conflicting options"
9459

    
9460
    if self.op.remote_node is not None:
9461
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9462

    
9463
      # Warning: do not remove the locking of the new secondary here
9464
      # unless DRBD8.AddChildren is changed to work in parallel;
9465
      # currently it doesn't since parallel invocations of
9466
      # FindUnusedMinor will conflict
9467
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9468
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9469
    else:
9470
      self.needed_locks[locking.LEVEL_NODE] = []
9471
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9472

    
9473
      if self.op.iallocator is not None:
9474
        # iallocator will select a new node in the same group
9475
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9476

    
9477
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9478
                                   self.op.iallocator, self.op.remote_node,
9479
                                   self.op.disks, False, self.op.early_release)
9480

    
9481
    self.tasklets = [self.replacer]
9482

    
9483
  def DeclareLocks(self, level):
9484
    if level == locking.LEVEL_NODEGROUP:
9485
      assert self.op.remote_node is None
9486
      assert self.op.iallocator is not None
9487
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9488

    
9489
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9490
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9491
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9492

    
9493
    elif level == locking.LEVEL_NODE:
9494
      if self.op.iallocator is not None:
9495
        assert self.op.remote_node is None
9496
        assert not self.needed_locks[locking.LEVEL_NODE]
9497

    
9498
        # Lock member nodes of all locked groups
9499
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9500
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9501
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9502
      else:
9503
        self._LockInstancesNodes()
9504

    
9505
  def BuildHooksEnv(self):
9506
    """Build hooks env.
9507

9508
    This runs on the master, the primary and all the secondaries.
9509

9510
    """
9511
    instance = self.replacer.instance
9512
    env = {
9513
      "MODE": self.op.mode,
9514
      "NEW_SECONDARY": self.op.remote_node,
9515
      "OLD_SECONDARY": instance.secondary_nodes[0],
9516
      }
9517
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9518
    return env
9519

    
9520
  def BuildHooksNodes(self):
9521
    """Build hooks nodes.
9522

9523
    """
9524
    instance = self.replacer.instance
9525
    nl = [
9526
      self.cfg.GetMasterNode(),
9527
      instance.primary_node,
9528
      ]
9529
    if self.op.remote_node is not None:
9530
      nl.append(self.op.remote_node)
9531
    return nl, nl
9532

    
9533
  def CheckPrereq(self):
9534
    """Check prerequisites.
9535

9536
    """
9537
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9538
            self.op.iallocator is None)
9539

    
9540
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9541
    if owned_groups:
9542
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9543

    
9544
    return LogicalUnit.CheckPrereq(self)
9545

    
9546

    
9547
class TLReplaceDisks(Tasklet):
9548
  """Replaces disks for an instance.
9549

9550
  Note: Locking is not within the scope of this class.
9551

9552
  """
9553
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9554
               disks, delay_iallocator, early_release):
9555
    """Initializes this class.
9556

9557
    """
9558
    Tasklet.__init__(self, lu)
9559

    
9560
    # Parameters
9561
    self.instance_name = instance_name
9562
    self.mode = mode
9563
    self.iallocator_name = iallocator_name
9564
    self.remote_node = remote_node
9565
    self.disks = disks
9566
    self.delay_iallocator = delay_iallocator
9567
    self.early_release = early_release
9568

    
9569
    # Runtime data
9570
    self.instance = None
9571
    self.new_node = None
9572
    self.target_node = None
9573
    self.other_node = None
9574
    self.remote_node_info = None
9575
    self.node_secondary_ip = None
9576

    
9577
  @staticmethod
9578
  def CheckArguments(mode, remote_node, iallocator):
9579
    """Helper function for users of this class.
9580

9581
    """
9582
    # check for valid parameter combination
9583
    if mode == constants.REPLACE_DISK_CHG:
9584
      if remote_node is None and iallocator is None:
9585
        raise errors.OpPrereqError("When changing the secondary either an"
9586
                                   " iallocator script must be used or the"
9587
                                   " new node given", errors.ECODE_INVAL)
9588

    
9589
      if remote_node is not None and iallocator is not None:
9590
        raise errors.OpPrereqError("Give either the iallocator or the new"
9591
                                   " secondary, not both", errors.ECODE_INVAL)
9592

    
9593
    elif remote_node is not None or iallocator is not None:
9594
      # Not replacing the secondary
9595
      raise errors.OpPrereqError("The iallocator and new node options can"
9596
                                 " only be used when changing the"
9597
                                 " secondary node", errors.ECODE_INVAL)
9598

    
9599
  @staticmethod
9600
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9601
    """Compute a new secondary node using an IAllocator.
9602

9603
    """
9604
    ial = IAllocator(lu.cfg, lu.rpc,
9605
                     mode=constants.IALLOCATOR_MODE_RELOC,
9606
                     name=instance_name,
9607
                     relocate_from=list(relocate_from))
9608

    
9609
    ial.Run(iallocator_name)
9610

    
9611
    if not ial.success:
9612
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9613
                                 " %s" % (iallocator_name, ial.info),
9614
                                 errors.ECODE_NORES)
9615

    
9616
    if len(ial.result) != ial.required_nodes:
9617
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9618
                                 " of nodes (%s), required %s" %
9619
                                 (iallocator_name,
9620
                                  len(ial.result), ial.required_nodes),
9621
                                 errors.ECODE_FAULT)
9622

    
9623
    remote_node_name = ial.result[0]
9624

    
9625
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9626
               instance_name, remote_node_name)
9627

    
9628
    return remote_node_name
9629

    
9630
  def _FindFaultyDisks(self, node_name):
9631
    """Wrapper for L{_FindFaultyInstanceDisks}.
9632

9633
    """
9634
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9635
                                    node_name, True)
9636

    
9637
  def _CheckDisksActivated(self, instance):
9638
    """Checks if the instance disks are activated.
9639

9640
    @param instance: The instance to check disks
9641
    @return: True if they are activated, False otherwise
9642

9643
    """
9644
    nodes = instance.all_nodes
9645

    
9646
    for idx, dev in enumerate(instance.disks):
9647
      for node in nodes:
9648
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9649
        self.cfg.SetDiskID(dev, node)
9650

    
9651
        result = self.rpc.call_blockdev_find(node, dev)
9652

    
9653
        if result.offline:
9654
          continue
9655
        elif result.fail_msg or not result.payload:
9656
          return False
9657

    
9658
    return True
9659

    
9660
  def CheckPrereq(self):
9661
    """Check prerequisites.
9662

9663
    This checks that the instance is in the cluster.
9664

9665
    """
9666
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9667
    assert instance is not None, \
9668
      "Cannot retrieve locked instance %s" % self.instance_name
9669

    
9670
    if instance.disk_template != constants.DT_DRBD8:
9671
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9672
                                 " instances", errors.ECODE_INVAL)
9673

    
9674
    if len(instance.secondary_nodes) != 1:
9675
      raise errors.OpPrereqError("The instance has a strange layout,"
9676
                                 " expected one secondary but found %d" %
9677
                                 len(instance.secondary_nodes),
9678
                                 errors.ECODE_FAULT)
9679

    
9680
    if not self.delay_iallocator:
9681
      self._CheckPrereq2()
9682

    
9683
  def _CheckPrereq2(self):
9684
    """Check prerequisites, second part.
9685

9686
    This function should always be part of CheckPrereq. It was separated and is
9687
    now called from Exec because during node evacuation iallocator was only
9688
    called with an unmodified cluster model, not taking planned changes into
9689
    account.
9690

9691
    """
9692
    instance = self.instance
9693
    secondary_node = instance.secondary_nodes[0]
9694

    
9695
    if self.iallocator_name is None:
9696
      remote_node = self.remote_node
9697
    else:
9698
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9699
                                       instance.name, instance.secondary_nodes)
9700

    
9701
    if remote_node is None:
9702
      self.remote_node_info = None
9703
    else:
9704
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9705
             "Remote node '%s' is not locked" % remote_node
9706

    
9707
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9708
      assert self.remote_node_info is not None, \
9709
        "Cannot retrieve locked node %s" % remote_node
9710

    
9711
    if remote_node == self.instance.primary_node:
9712
      raise errors.OpPrereqError("The specified node is the primary node of"
9713
                                 " the instance", errors.ECODE_INVAL)
9714

    
9715
    if remote_node == secondary_node:
9716
      raise errors.OpPrereqError("The specified node is already the"
9717
                                 " secondary node of the instance",
9718
                                 errors.ECODE_INVAL)
9719

    
9720
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9721
                                    constants.REPLACE_DISK_CHG):
9722
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9723
                                 errors.ECODE_INVAL)
9724

    
9725
    if self.mode == constants.REPLACE_DISK_AUTO:
9726
      if not self._CheckDisksActivated(instance):
9727
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9728
                                   " first" % self.instance_name,
9729
                                   errors.ECODE_STATE)
9730
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9731
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9732

    
9733
      if faulty_primary and faulty_secondary:
9734
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9735
                                   " one node and can not be repaired"
9736
                                   " automatically" % self.instance_name,
9737
                                   errors.ECODE_STATE)
9738

    
9739
      if faulty_primary:
9740
        self.disks = faulty_primary
9741
        self.target_node = instance.primary_node
9742
        self.other_node = secondary_node
9743
        check_nodes = [self.target_node, self.other_node]
9744
      elif faulty_secondary:
9745
        self.disks = faulty_secondary
9746
        self.target_node = secondary_node
9747
        self.other_node = instance.primary_node
9748
        check_nodes = [self.target_node, self.other_node]
9749
      else:
9750
        self.disks = []
9751
        check_nodes = []
9752

    
9753
    else:
9754
      # Non-automatic modes
9755
      if self.mode == constants.REPLACE_DISK_PRI:
9756
        self.target_node = instance.primary_node
9757
        self.other_node = secondary_node
9758
        check_nodes = [self.target_node, self.other_node]
9759

    
9760
      elif self.mode == constants.REPLACE_DISK_SEC:
9761
        self.target_node = secondary_node
9762
        self.other_node = instance.primary_node
9763
        check_nodes = [self.target_node, self.other_node]
9764

    
9765
      elif self.mode == constants.REPLACE_DISK_CHG:
9766
        self.new_node = remote_node
9767
        self.other_node = instance.primary_node
9768
        self.target_node = secondary_node
9769
        check_nodes = [self.new_node, self.other_node]
9770

    
9771
        _CheckNodeNotDrained(self.lu, remote_node)
9772
        _CheckNodeVmCapable(self.lu, remote_node)
9773

    
9774
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9775
        assert old_node_info is not None
9776
        if old_node_info.offline and not self.early_release:
9777
          # doesn't make sense to delay the release
9778
          self.early_release = True
9779
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9780
                          " early-release mode", secondary_node)
9781

    
9782
      else:
9783
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9784
                                     self.mode)
9785

    
9786
      # If not specified all disks should be replaced
9787
      if not self.disks:
9788
        self.disks = range(len(self.instance.disks))
9789

    
9790
    for node in check_nodes:
9791
      _CheckNodeOnline(self.lu, node)
9792

    
9793
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9794
                                                          self.other_node,
9795
                                                          self.target_node]
9796
                              if node_name is not None)
9797

    
9798
    # Release unneeded node locks
9799
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9800

    
9801
    # Release any owned node group
9802
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9803
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9804

    
9805
    # Check whether disks are valid
9806
    for disk_idx in self.disks:
9807
      instance.FindDisk(disk_idx)
9808

    
9809
    # Get secondary node IP addresses
9810
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9811
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9812

    
9813
  def Exec(self, feedback_fn):
9814
    """Execute disk replacement.
9815

9816
    This dispatches the disk replacement to the appropriate handler.
9817

9818
    """
9819
    if self.delay_iallocator:
9820
      self._CheckPrereq2()
9821

    
9822
    if __debug__:
9823
      # Verify owned locks before starting operation
9824
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9825
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9826
          ("Incorrect node locks, owning %s, expected %s" %
9827
           (owned_nodes, self.node_secondary_ip.keys()))
9828

    
9829
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9830
      assert list(owned_instances) == [self.instance_name], \
9831
          "Instance '%s' not locked" % self.instance_name
9832

    
9833
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9834
          "Should not own any node group lock at this point"
9835

    
9836
    if not self.disks:
9837
      feedback_fn("No disks need replacement")
9838
      return
9839

    
9840
    feedback_fn("Replacing disk(s) %s for %s" %
9841
                (utils.CommaJoin(self.disks), self.instance.name))
9842

    
9843
    activate_disks = (not self.instance.admin_up)
9844

    
9845
    # Activate the instance disks if we're replacing them on a down instance
9846
    if activate_disks:
9847
      _StartInstanceDisks(self.lu, self.instance, True)
9848

    
9849
    try:
9850
      # Should we replace the secondary node?
9851
      if self.new_node is not None:
9852
        fn = self._ExecDrbd8Secondary
9853
      else:
9854
        fn = self._ExecDrbd8DiskOnly
9855

    
9856
      result = fn(feedback_fn)
9857
    finally:
9858
      # Deactivate the instance disks if we're replacing them on a
9859
      # down instance
9860
      if activate_disks:
9861
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9862

    
9863
    if __debug__:
9864
      # Verify owned locks
9865
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9866
      nodes = frozenset(self.node_secondary_ip)
9867
      assert ((self.early_release and not owned_nodes) or
9868
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9869
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9870
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9871

    
9872
    return result
9873

    
9874
  def _CheckVolumeGroup(self, nodes):
9875
    self.lu.LogInfo("Checking volume groups")
9876

    
9877
    vgname = self.cfg.GetVGName()
9878

    
9879
    # Make sure volume group exists on all involved nodes
9880
    results = self.rpc.call_vg_list(nodes)
9881
    if not results:
9882
      raise errors.OpExecError("Can't list volume groups on the nodes")
9883

    
9884
    for node in nodes:
9885
      res = results[node]
9886
      res.Raise("Error checking node %s" % node)
9887
      if vgname not in res.payload:
9888
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9889
                                 (vgname, node))
9890

    
9891
  def _CheckDisksExistence(self, nodes):
9892
    # Check disk existence
9893
    for idx, dev in enumerate(self.instance.disks):
9894
      if idx not in self.disks:
9895
        continue
9896

    
9897
      for node in nodes:
9898
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9899
        self.cfg.SetDiskID(dev, node)
9900

    
9901
        result = self.rpc.call_blockdev_find(node, dev)
9902

    
9903
        msg = result.fail_msg
9904
        if msg or not result.payload:
9905
          if not msg:
9906
            msg = "disk not found"
9907
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9908
                                   (idx, node, msg))
9909

    
9910
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9911
    for idx, dev in enumerate(self.instance.disks):
9912
      if idx not in self.disks:
9913
        continue
9914

    
9915
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9916
                      (idx, node_name))
9917

    
9918
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9919
                                   ldisk=ldisk):
9920
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9921
                                 " replace disks for instance %s" %
9922
                                 (node_name, self.instance.name))
9923

    
9924
  def _CreateNewStorage(self, node_name):
9925
    """Create new storage on the primary or secondary node.
9926

9927
    This is only used for same-node replaces, not for changing the
9928
    secondary node, hence we don't want to modify the existing disk.
9929

9930
    """
9931
    iv_names = {}
9932

    
9933
    for idx, dev in enumerate(self.instance.disks):
9934
      if idx not in self.disks:
9935
        continue
9936

    
9937
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9938

    
9939
      self.cfg.SetDiskID(dev, node_name)
9940

    
9941
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9942
      names = _GenerateUniqueNames(self.lu, lv_names)
9943

    
9944
      vg_data = dev.children[0].logical_id[0]
9945
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9946
                             logical_id=(vg_data, names[0]))
9947
      vg_meta = dev.children[1].logical_id[0]
9948
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9949
                             logical_id=(vg_meta, names[1]))
9950

    
9951
      new_lvs = [lv_data, lv_meta]
9952
      old_lvs = [child.Copy() for child in dev.children]
9953
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9954

    
9955
      # we pass force_create=True to force the LVM creation
9956
      for new_lv in new_lvs:
9957
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9958
                        _GetInstanceInfoText(self.instance), False)
9959

    
9960
    return iv_names
9961

    
9962
  def _CheckDevices(self, node_name, iv_names):
9963
    for name, (dev, _, _) in iv_names.iteritems():
9964
      self.cfg.SetDiskID(dev, node_name)
9965

    
9966
      result = self.rpc.call_blockdev_find(node_name, dev)
9967

    
9968
      msg = result.fail_msg
9969
      if msg or not result.payload:
9970
        if not msg:
9971
          msg = "disk not found"
9972
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9973
                                 (name, msg))
9974

    
9975
      if result.payload.is_degraded:
9976
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9977

    
9978
  def _RemoveOldStorage(self, node_name, iv_names):
9979
    for name, (_, old_lvs, _) in iv_names.iteritems():
9980
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9981

    
9982
      for lv in old_lvs:
9983
        self.cfg.SetDiskID(lv, node_name)
9984

    
9985
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9986
        if msg:
9987
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9988
                             hint="remove unused LVs manually")
9989

    
9990
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9991
    """Replace a disk on the primary or secondary for DRBD 8.
9992

9993
    The algorithm for replace is quite complicated:
9994

9995
      1. for each disk to be replaced:
9996

9997
        1. create new LVs on the target node with unique names
9998
        1. detach old LVs from the drbd device
9999
        1. rename old LVs to name_replaced.<time_t>
10000
        1. rename new LVs to old LVs
10001
        1. attach the new LVs (with the old names now) to the drbd device
10002

10003
      1. wait for sync across all devices
10004

10005
      1. for each modified disk:
10006

10007
        1. remove old LVs (which have the name name_replaces.<time_t>)
10008

10009
    Failures are not very well handled.
10010

10011
    """
10012
    steps_total = 6
10013

    
10014
    # Step: check device activation
10015
    self.lu.LogStep(1, steps_total, "Check device existence")
10016
    self._CheckDisksExistence([self.other_node, self.target_node])
10017
    self._CheckVolumeGroup([self.target_node, self.other_node])
10018

    
10019
    # Step: check other node consistency
10020
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10021
    self._CheckDisksConsistency(self.other_node,
10022
                                self.other_node == self.instance.primary_node,
10023
                                False)
10024

    
10025
    # Step: create new storage
10026
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10027
    iv_names = self._CreateNewStorage(self.target_node)
10028

    
10029
    # Step: for each lv, detach+rename*2+attach
10030
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10031
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10032
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10033

    
10034
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10035
                                                     old_lvs)
10036
      result.Raise("Can't detach drbd from local storage on node"
10037
                   " %s for device %s" % (self.target_node, dev.iv_name))
10038
      #dev.children = []
10039
      #cfg.Update(instance)
10040

    
10041
      # ok, we created the new LVs, so now we know we have the needed
10042
      # storage; as such, we proceed on the target node to rename
10043
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10044
      # using the assumption that logical_id == physical_id (which in
10045
      # turn is the unique_id on that node)
10046

    
10047
      # FIXME(iustin): use a better name for the replaced LVs
10048
      temp_suffix = int(time.time())
10049
      ren_fn = lambda d, suff: (d.physical_id[0],
10050
                                d.physical_id[1] + "_replaced-%s" % suff)
10051

    
10052
      # Build the rename list based on what LVs exist on the node
10053
      rename_old_to_new = []
10054
      for to_ren in old_lvs:
10055
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10056
        if not result.fail_msg and result.payload:
10057
          # device exists
10058
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10059

    
10060
      self.lu.LogInfo("Renaming the old LVs on the target node")
10061
      result = self.rpc.call_blockdev_rename(self.target_node,
10062
                                             rename_old_to_new)
10063
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10064

    
10065
      # Now we rename the new LVs to the old LVs
10066
      self.lu.LogInfo("Renaming the new LVs on the target node")
10067
      rename_new_to_old = [(new, old.physical_id)
10068
                           for old, new in zip(old_lvs, new_lvs)]
10069
      result = self.rpc.call_blockdev_rename(self.target_node,
10070
                                             rename_new_to_old)
10071
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10072

    
10073
      # Intermediate steps of in memory modifications
10074
      for old, new in zip(old_lvs, new_lvs):
10075
        new.logical_id = old.logical_id
10076
        self.cfg.SetDiskID(new, self.target_node)
10077

    
10078
      # We need to modify old_lvs so that removal later removes the
10079
      # right LVs, not the newly added ones; note that old_lvs is a
10080
      # copy here
10081
      for disk in old_lvs:
10082
        disk.logical_id = ren_fn(disk, temp_suffix)
10083
        self.cfg.SetDiskID(disk, self.target_node)
10084

    
10085
      # Now that the new lvs have the old name, we can add them to the device
10086
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10087
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10088
                                                  new_lvs)
10089
      msg = result.fail_msg
10090
      if msg:
10091
        for new_lv in new_lvs:
10092
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10093
                                               new_lv).fail_msg
10094
          if msg2:
10095
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10096
                               hint=("cleanup manually the unused logical"
10097
                                     "volumes"))
10098
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10099

    
10100
    cstep = 5
10101
    if self.early_release:
10102
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10103
      cstep += 1
10104
      self._RemoveOldStorage(self.target_node, iv_names)
10105
      # WARNING: we release both node locks here, do not do other RPCs
10106
      # than WaitForSync to the primary node
10107
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10108
                    names=[self.target_node, self.other_node])
10109

    
10110
    # Wait for sync
10111
    # This can fail as the old devices are degraded and _WaitForSync
10112
    # does a combined result over all disks, so we don't check its return value
10113
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10114
    cstep += 1
10115
    _WaitForSync(self.lu, self.instance)
10116

    
10117
    # Check all devices manually
10118
    self._CheckDevices(self.instance.primary_node, iv_names)
10119

    
10120
    # Step: remove old storage
10121
    if not self.early_release:
10122
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10123
      cstep += 1
10124
      self._RemoveOldStorage(self.target_node, iv_names)
10125

    
10126
  def _ExecDrbd8Secondary(self, feedback_fn):
10127
    """Replace the secondary node for DRBD 8.
10128

10129
    The algorithm for replace is quite complicated:
10130
      - for all disks of the instance:
10131
        - create new LVs on the new node with same names
10132
        - shutdown the drbd device on the old secondary
10133
        - disconnect the drbd network on the primary
10134
        - create the drbd device on the new secondary
10135
        - network attach the drbd on the primary, using an artifice:
10136
          the drbd code for Attach() will connect to the network if it
10137
          finds a device which is connected to the good local disks but
10138
          not network enabled
10139
      - wait for sync across all devices
10140
      - remove all disks from the old secondary
10141

10142
    Failures are not very well handled.
10143

10144
    """
10145
    steps_total = 6
10146

    
10147
    pnode = self.instance.primary_node
10148

    
10149
    # Step: check device activation
10150
    self.lu.LogStep(1, steps_total, "Check device existence")
10151
    self._CheckDisksExistence([self.instance.primary_node])
10152
    self._CheckVolumeGroup([self.instance.primary_node])
10153

    
10154
    # Step: check other node consistency
10155
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10156
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10157

    
10158
    # Step: create new storage
10159
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10160
    for idx, dev in enumerate(self.instance.disks):
10161
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10162
                      (self.new_node, idx))
10163
      # we pass force_create=True to force LVM creation
10164
      for new_lv in dev.children:
10165
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10166
                        _GetInstanceInfoText(self.instance), False)
10167

    
10168
    # Step 4: dbrd minors and drbd setups changes
10169
    # after this, we must manually remove the drbd minors on both the
10170
    # error and the success paths
10171
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10172
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10173
                                         for dev in self.instance.disks],
10174
                                        self.instance.name)
10175
    logging.debug("Allocated minors %r", minors)
10176

    
10177
    iv_names = {}
10178
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10179
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10180
                      (self.new_node, idx))
10181
      # create new devices on new_node; note that we create two IDs:
10182
      # one without port, so the drbd will be activated without
10183
      # networking information on the new node at this stage, and one
10184
      # with network, for the latter activation in step 4
10185
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10186
      if self.instance.primary_node == o_node1:
10187
        p_minor = o_minor1
10188
      else:
10189
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10190
        p_minor = o_minor2
10191

    
10192
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10193
                      p_minor, new_minor, o_secret)
10194
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10195
                    p_minor, new_minor, o_secret)
10196

    
10197
      iv_names[idx] = (dev, dev.children, new_net_id)
10198
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10199
                    new_net_id)
10200
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10201
                              logical_id=new_alone_id,
10202
                              children=dev.children,
10203
                              size=dev.size)
10204
      try:
10205
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10206
                              _GetInstanceInfoText(self.instance), False)
10207
      except errors.GenericError:
10208
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10209
        raise
10210

    
10211
    # We have new devices, shutdown the drbd on the old secondary
10212
    for idx, dev in enumerate(self.instance.disks):
10213
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10214
      self.cfg.SetDiskID(dev, self.target_node)
10215
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10216
      if msg:
10217
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10218
                           "node: %s" % (idx, msg),
10219
                           hint=("Please cleanup this device manually as"
10220
                                 " soon as possible"))
10221

    
10222
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10223
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10224
                                               self.instance.disks)[pnode]
10225

    
10226
    msg = result.fail_msg
10227
    if msg:
10228
      # detaches didn't succeed (unlikely)
10229
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10230
      raise errors.OpExecError("Can't detach the disks from the network on"
10231
                               " old node: %s" % (msg,))
10232

    
10233
    # if we managed to detach at least one, we update all the disks of
10234
    # the instance to point to the new secondary
10235
    self.lu.LogInfo("Updating instance configuration")
10236
    for dev, _, new_logical_id in iv_names.itervalues():
10237
      dev.logical_id = new_logical_id
10238
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10239

    
10240
    self.cfg.Update(self.instance, feedback_fn)
10241

    
10242
    # and now perform the drbd attach
10243
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10244
                    " (standalone => connected)")
10245
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10246
                                            self.new_node],
10247
                                           self.node_secondary_ip,
10248
                                           self.instance.disks,
10249
                                           self.instance.name,
10250
                                           False)
10251
    for to_node, to_result in result.items():
10252
      msg = to_result.fail_msg
10253
      if msg:
10254
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10255
                           to_node, msg,
10256
                           hint=("please do a gnt-instance info to see the"
10257
                                 " status of disks"))
10258
    cstep = 5
10259
    if self.early_release:
10260
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10261
      cstep += 1
10262
      self._RemoveOldStorage(self.target_node, iv_names)
10263
      # WARNING: we release all node locks here, do not do other RPCs
10264
      # than WaitForSync to the primary node
10265
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10266
                    names=[self.instance.primary_node,
10267
                           self.target_node,
10268
                           self.new_node])
10269

    
10270
    # Wait for sync
10271
    # This can fail as the old devices are degraded and _WaitForSync
10272
    # does a combined result over all disks, so we don't check its return value
10273
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10274
    cstep += 1
10275
    _WaitForSync(self.lu, self.instance)
10276

    
10277
    # Check all devices manually
10278
    self._CheckDevices(self.instance.primary_node, iv_names)
10279

    
10280
    # Step: remove old storage
10281
    if not self.early_release:
10282
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10283
      self._RemoveOldStorage(self.target_node, iv_names)
10284

    
10285

    
10286
class LURepairNodeStorage(NoHooksLU):
10287
  """Repairs the volume group on a node.
10288

10289
  """
10290
  REQ_BGL = False
10291

    
10292
  def CheckArguments(self):
10293
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10294

    
10295
    storage_type = self.op.storage_type
10296

    
10297
    if (constants.SO_FIX_CONSISTENCY not in
10298
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10299
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10300
                                 " repaired" % storage_type,
10301
                                 errors.ECODE_INVAL)
10302

    
10303
  def ExpandNames(self):
10304
    self.needed_locks = {
10305
      locking.LEVEL_NODE: [self.op.node_name],
10306
      }
10307

    
10308
  def _CheckFaultyDisks(self, instance, node_name):
10309
    """Ensure faulty disks abort the opcode or at least warn."""
10310
    try:
10311
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10312
                                  node_name, True):
10313
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10314
                                   " node '%s'" % (instance.name, node_name),
10315
                                   errors.ECODE_STATE)
10316
    except errors.OpPrereqError, err:
10317
      if self.op.ignore_consistency:
10318
        self.proc.LogWarning(str(err.args[0]))
10319
      else:
10320
        raise
10321

    
10322
  def CheckPrereq(self):
10323
    """Check prerequisites.
10324

10325
    """
10326
    # Check whether any instance on this node has faulty disks
10327
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10328
      if not inst.admin_up:
10329
        continue
10330
      check_nodes = set(inst.all_nodes)
10331
      check_nodes.discard(self.op.node_name)
10332
      for inst_node_name in check_nodes:
10333
        self._CheckFaultyDisks(inst, inst_node_name)
10334

    
10335
  def Exec(self, feedback_fn):
10336
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10337
                (self.op.name, self.op.node_name))
10338

    
10339
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10340
    result = self.rpc.call_storage_execute(self.op.node_name,
10341
                                           self.op.storage_type, st_args,
10342
                                           self.op.name,
10343
                                           constants.SO_FIX_CONSISTENCY)
10344
    result.Raise("Failed to repair storage unit '%s' on %s" %
10345
                 (self.op.name, self.op.node_name))
10346

    
10347

    
10348
class LUNodeEvacuate(NoHooksLU):
10349
  """Evacuates instances off a list of nodes.
10350

10351
  """
10352
  REQ_BGL = False
10353

    
10354
  def CheckArguments(self):
10355
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10356

    
10357
  def ExpandNames(self):
10358
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10359

    
10360
    if self.op.remote_node is not None:
10361
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10362
      assert self.op.remote_node
10363

    
10364
      if self.op.remote_node == self.op.node_name:
10365
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10366
                                   " secondary node", errors.ECODE_INVAL)
10367

    
10368
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10369
        raise errors.OpPrereqError("Without the use of an iallocator only"
10370
                                   " secondary instances can be evacuated",
10371
                                   errors.ECODE_INVAL)
10372

    
10373
    # Declare locks
10374
    self.share_locks = _ShareAll()
10375
    self.needed_locks = {
10376
      locking.LEVEL_INSTANCE: [],
10377
      locking.LEVEL_NODEGROUP: [],
10378
      locking.LEVEL_NODE: [],
10379
      }
10380

    
10381
    if self.op.remote_node is None:
10382
      # Iallocator will choose any node(s) in the same group
10383
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10384
    else:
10385
      group_nodes = frozenset([self.op.remote_node])
10386

    
10387
    # Determine nodes to be locked
10388
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10389

    
10390
  def _DetermineInstances(self):
10391
    """Builds list of instances to operate on.
10392

10393
    """
10394
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10395

    
10396
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10397
      # Primary instances only
10398
      inst_fn = _GetNodePrimaryInstances
10399
      assert self.op.remote_node is None, \
10400
        "Evacuating primary instances requires iallocator"
10401
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10402
      # Secondary instances only
10403
      inst_fn = _GetNodeSecondaryInstances
10404
    else:
10405
      # All instances
10406
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10407
      inst_fn = _GetNodeInstances
10408

    
10409
    return inst_fn(self.cfg, self.op.node_name)
10410

    
10411
  def DeclareLocks(self, level):
10412
    if level == locking.LEVEL_INSTANCE:
10413
      # Lock instances optimistically, needs verification once node and group
10414
      # locks have been acquired
10415
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10416
        set(i.name for i in self._DetermineInstances())
10417

    
10418
    elif level == locking.LEVEL_NODEGROUP:
10419
      # Lock node groups optimistically, needs verification once nodes have
10420
      # been acquired
10421
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10422
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10423

    
10424
    elif level == locking.LEVEL_NODE:
10425
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10426

    
10427
  def CheckPrereq(self):
10428
    # Verify locks
10429
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10430
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10431
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10432

    
10433
    assert owned_nodes == self.lock_nodes
10434

    
10435
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10436
    if owned_groups != wanted_groups:
10437
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10438
                               " current groups are '%s', used to be '%s'" %
10439
                               (utils.CommaJoin(wanted_groups),
10440
                                utils.CommaJoin(owned_groups)))
10441

    
10442
    # Determine affected instances
10443
    self.instances = self._DetermineInstances()
10444
    self.instance_names = [i.name for i in self.instances]
10445

    
10446
    if set(self.instance_names) != owned_instances:
10447
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10448
                               " were acquired, current instances are '%s',"
10449
                               " used to be '%s'" %
10450
                               (self.op.node_name,
10451
                                utils.CommaJoin(self.instance_names),
10452
                                utils.CommaJoin(owned_instances)))
10453

    
10454
    if self.instance_names:
10455
      self.LogInfo("Evacuating instances from node '%s': %s",
10456
                   self.op.node_name,
10457
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10458
    else:
10459
      self.LogInfo("No instances to evacuate from node '%s'",
10460
                   self.op.node_name)
10461

    
10462
    if self.op.remote_node is not None:
10463
      for i in self.instances:
10464
        if i.primary_node == self.op.remote_node:
10465
          raise errors.OpPrereqError("Node %s is the primary node of"
10466
                                     " instance %s, cannot use it as"
10467
                                     " secondary" %
10468
                                     (self.op.remote_node, i.name),
10469
                                     errors.ECODE_INVAL)
10470

    
10471
  def Exec(self, feedback_fn):
10472
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10473

    
10474
    if not self.instance_names:
10475
      # No instances to evacuate
10476
      jobs = []
10477

    
10478
    elif self.op.iallocator is not None:
10479
      # TODO: Implement relocation to other group
10480
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10481
                       evac_mode=self.op.mode,
10482
                       instances=list(self.instance_names))
10483

    
10484
      ial.Run(self.op.iallocator)
10485

    
10486
      if not ial.success:
10487
        raise errors.OpPrereqError("Can't compute node evacuation using"
10488
                                   " iallocator '%s': %s" %
10489
                                   (self.op.iallocator, ial.info),
10490
                                   errors.ECODE_NORES)
10491

    
10492
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10493

    
10494
    elif self.op.remote_node is not None:
10495
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10496
      jobs = [
10497
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10498
                                        remote_node=self.op.remote_node,
10499
                                        disks=[],
10500
                                        mode=constants.REPLACE_DISK_CHG,
10501
                                        early_release=self.op.early_release)]
10502
        for instance_name in self.instance_names
10503
        ]
10504

    
10505
    else:
10506
      raise errors.ProgrammerError("No iallocator or remote node")
10507

    
10508
    return ResultWithJobs(jobs)
10509

    
10510

    
10511
def _SetOpEarlyRelease(early_release, op):
10512
  """Sets C{early_release} flag on opcodes if available.
10513

10514
  """
10515
  try:
10516
    op.early_release = early_release
10517
  except AttributeError:
10518
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10519

    
10520
  return op
10521

    
10522

    
10523
def _NodeEvacDest(use_nodes, group, nodes):
10524
  """Returns group or nodes depending on caller's choice.
10525

10526
  """
10527
  if use_nodes:
10528
    return utils.CommaJoin(nodes)
10529
  else:
10530
    return group
10531

    
10532

    
10533
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10534
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10535

10536
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10537
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10538

10539
  @type lu: L{LogicalUnit}
10540
  @param lu: Logical unit instance
10541
  @type alloc_result: tuple/list
10542
  @param alloc_result: Result from iallocator
10543
  @type early_release: bool
10544
  @param early_release: Whether to release locks early if possible
10545
  @type use_nodes: bool
10546
  @param use_nodes: Whether to display node names instead of groups
10547

10548
  """
10549
  (moved, failed, jobs) = alloc_result
10550

    
10551
  if failed:
10552
    lu.LogWarning("Unable to evacuate instances %s",
10553
                  utils.CommaJoin("%s (%s)" % (name, reason)
10554
                                  for (name, reason) in failed))
10555

    
10556
  if moved:
10557
    lu.LogInfo("Instances to be moved: %s",
10558
               utils.CommaJoin("%s (to %s)" %
10559
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10560
                               for (name, group, nodes) in moved))
10561

    
10562
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10563
              map(opcodes.OpCode.LoadOpCode, ops))
10564
          for ops in jobs]
10565

    
10566

    
10567
class LUInstanceGrowDisk(LogicalUnit):
10568
  """Grow a disk of an instance.
10569

10570
  """
10571
  HPATH = "disk-grow"
10572
  HTYPE = constants.HTYPE_INSTANCE
10573
  REQ_BGL = False
10574

    
10575
  def ExpandNames(self):
10576
    self._ExpandAndLockInstance()
10577
    self.needed_locks[locking.LEVEL_NODE] = []
10578
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10579
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10580

    
10581
  def DeclareLocks(self, level):
10582
    if level == locking.LEVEL_NODE:
10583
      self._LockInstancesNodes()
10584
    elif level == locking.LEVEL_NODE_RES:
10585
      # Copy node locks
10586
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10587
        self.needed_locks[locking.LEVEL_NODE][:]
10588

    
10589
  def BuildHooksEnv(self):
10590
    """Build hooks env.
10591

10592
    This runs on the master, the primary and all the secondaries.
10593

10594
    """
10595
    env = {
10596
      "DISK": self.op.disk,
10597
      "AMOUNT": self.op.amount,
10598
      }
10599
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10600
    return env
10601

    
10602
  def BuildHooksNodes(self):
10603
    """Build hooks nodes.
10604

10605
    """
10606
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10607
    return (nl, nl)
10608

    
10609
  def CheckPrereq(self):
10610
    """Check prerequisites.
10611

10612
    This checks that the instance is in the cluster.
10613

10614
    """
10615
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10616
    assert instance is not None, \
10617
      "Cannot retrieve locked instance %s" % self.op.instance_name
10618
    nodenames = list(instance.all_nodes)
10619
    for node in nodenames:
10620
      _CheckNodeOnline(self, node)
10621

    
10622
    self.instance = instance
10623

    
10624
    if instance.disk_template not in constants.DTS_GROWABLE:
10625
      raise errors.OpPrereqError("Instance's disk layout does not support"
10626
                                 " growing", errors.ECODE_INVAL)
10627

    
10628
    self.disk = instance.FindDisk(self.op.disk)
10629

    
10630
    if instance.disk_template not in (constants.DT_FILE,
10631
                                      constants.DT_SHARED_FILE):
10632
      # TODO: check the free disk space for file, when that feature will be
10633
      # supported
10634
      _CheckNodesFreeDiskPerVG(self, nodenames,
10635
                               self.disk.ComputeGrowth(self.op.amount))
10636

    
10637
  def Exec(self, feedback_fn):
10638
    """Execute disk grow.
10639

10640
    """
10641
    instance = self.instance
10642
    disk = self.disk
10643

    
10644
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10645
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10646
            self.owned_locks(locking.LEVEL_NODE_RES))
10647

    
10648
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10649
    if not disks_ok:
10650
      raise errors.OpExecError("Cannot activate block device to grow")
10651

    
10652
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10653
                (self.op.disk, instance.name,
10654
                 utils.FormatUnit(self.op.amount, "h")))
10655

    
10656
    # First run all grow ops in dry-run mode
10657
    for node in instance.all_nodes:
10658
      self.cfg.SetDiskID(disk, node)
10659
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10660
      result.Raise("Grow request failed to node %s" % node)
10661

    
10662
    # We know that (as far as we can test) operations across different
10663
    # nodes will succeed, time to run it for real
10664
    for node in instance.all_nodes:
10665
      self.cfg.SetDiskID(disk, node)
10666
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10667
      result.Raise("Grow request failed to node %s" % node)
10668

    
10669
      # TODO: Rewrite code to work properly
10670
      # DRBD goes into sync mode for a short amount of time after executing the
10671
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10672
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10673
      # time is a work-around.
10674
      time.sleep(5)
10675

    
10676
    disk.RecordGrow(self.op.amount)
10677
    self.cfg.Update(instance, feedback_fn)
10678

    
10679
    # Changes have been recorded, release node lock
10680
    _ReleaseLocks(self, locking.LEVEL_NODE)
10681

    
10682
    # Downgrade lock while waiting for sync
10683
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10684

    
10685
    if self.op.wait_for_sync:
10686
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10687
      if disk_abort:
10688
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10689
                             " status; please check the instance")
10690
      if not instance.admin_up:
10691
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10692
    elif not instance.admin_up:
10693
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10694
                           " not supposed to be running because no wait for"
10695
                           " sync mode was requested")
10696

    
10697
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10698
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10699

    
10700

    
10701
class LUInstanceQueryData(NoHooksLU):
10702
  """Query runtime instance data.
10703

10704
  """
10705
  REQ_BGL = False
10706

    
10707
  def ExpandNames(self):
10708
    self.needed_locks = {}
10709

    
10710
    # Use locking if requested or when non-static information is wanted
10711
    if not (self.op.static or self.op.use_locking):
10712
      self.LogWarning("Non-static data requested, locks need to be acquired")
10713
      self.op.use_locking = True
10714

    
10715
    if self.op.instances or not self.op.use_locking:
10716
      # Expand instance names right here
10717
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10718
    else:
10719
      # Will use acquired locks
10720
      self.wanted_names = None
10721

    
10722
    if self.op.use_locking:
10723
      self.share_locks = _ShareAll()
10724

    
10725
      if self.wanted_names is None:
10726
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10727
      else:
10728
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10729

    
10730
      self.needed_locks[locking.LEVEL_NODE] = []
10731
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10732

    
10733
  def DeclareLocks(self, level):
10734
    if self.op.use_locking and level == locking.LEVEL_NODE:
10735
      self._LockInstancesNodes()
10736

    
10737
  def CheckPrereq(self):
10738
    """Check prerequisites.
10739

10740
    This only checks the optional instance list against the existing names.
10741

10742
    """
10743
    if self.wanted_names is None:
10744
      assert self.op.use_locking, "Locking was not used"
10745
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10746

    
10747
    self.wanted_instances = \
10748
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10749

    
10750
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10751
    """Returns the status of a block device
10752

10753
    """
10754
    if self.op.static or not node:
10755
      return None
10756

    
10757
    self.cfg.SetDiskID(dev, node)
10758

    
10759
    result = self.rpc.call_blockdev_find(node, dev)
10760
    if result.offline:
10761
      return None
10762

    
10763
    result.Raise("Can't compute disk status for %s" % instance_name)
10764

    
10765
    status = result.payload
10766
    if status is None:
10767
      return None
10768

    
10769
    return (status.dev_path, status.major, status.minor,
10770
            status.sync_percent, status.estimated_time,
10771
            status.is_degraded, status.ldisk_status)
10772

    
10773
  def _ComputeDiskStatus(self, instance, snode, dev):
10774
    """Compute block device status.
10775

10776
    """
10777
    if dev.dev_type in constants.LDS_DRBD:
10778
      # we change the snode then (otherwise we use the one passed in)
10779
      if dev.logical_id[0] == instance.primary_node:
10780
        snode = dev.logical_id[1]
10781
      else:
10782
        snode = dev.logical_id[0]
10783

    
10784
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10785
                                              instance.name, dev)
10786
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10787

    
10788
    if dev.children:
10789
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10790
                                        instance, snode),
10791
                         dev.children)
10792
    else:
10793
      dev_children = []
10794

    
10795
    return {
10796
      "iv_name": dev.iv_name,
10797
      "dev_type": dev.dev_type,
10798
      "logical_id": dev.logical_id,
10799
      "physical_id": dev.physical_id,
10800
      "pstatus": dev_pstatus,
10801
      "sstatus": dev_sstatus,
10802
      "children": dev_children,
10803
      "mode": dev.mode,
10804
      "size": dev.size,
10805
      }
10806

    
10807
  def Exec(self, feedback_fn):
10808
    """Gather and return data"""
10809
    result = {}
10810

    
10811
    cluster = self.cfg.GetClusterInfo()
10812

    
10813
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10814
                                          for i in self.wanted_instances)
10815
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10816
      if self.op.static or pnode.offline:
10817
        remote_state = None
10818
        if pnode.offline:
10819
          self.LogWarning("Primary node %s is marked offline, returning static"
10820
                          " information only for instance %s" %
10821
                          (pnode.name, instance.name))
10822
      else:
10823
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10824
                                                  instance.name,
10825
                                                  instance.hypervisor)
10826
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10827
        remote_info = remote_info.payload
10828
        if remote_info and "state" in remote_info:
10829
          remote_state = "up"
10830
        else:
10831
          remote_state = "down"
10832

    
10833
      if instance.admin_up:
10834
        config_state = "up"
10835
      else:
10836
        config_state = "down"
10837

    
10838
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10839
                  instance.disks)
10840

    
10841
      result[instance.name] = {
10842
        "name": instance.name,
10843
        "config_state": config_state,
10844
        "run_state": remote_state,
10845
        "pnode": instance.primary_node,
10846
        "snodes": instance.secondary_nodes,
10847
        "os": instance.os,
10848
        # this happens to be the same format used for hooks
10849
        "nics": _NICListToTuple(self, instance.nics),
10850
        "disk_template": instance.disk_template,
10851
        "disks": disks,
10852
        "hypervisor": instance.hypervisor,
10853
        "network_port": instance.network_port,
10854
        "hv_instance": instance.hvparams,
10855
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10856
        "be_instance": instance.beparams,
10857
        "be_actual": cluster.FillBE(instance),
10858
        "os_instance": instance.osparams,
10859
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10860
        "serial_no": instance.serial_no,
10861
        "mtime": instance.mtime,
10862
        "ctime": instance.ctime,
10863
        "uuid": instance.uuid,
10864
        }
10865

    
10866
    return result
10867

    
10868

    
10869
class LUInstanceSetParams(LogicalUnit):
10870
  """Modifies an instances's parameters.
10871

10872
  """
10873
  HPATH = "instance-modify"
10874
  HTYPE = constants.HTYPE_INSTANCE
10875
  REQ_BGL = False
10876

    
10877
  def CheckArguments(self):
10878
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10879
            self.op.hvparams or self.op.beparams or self.op.os_name):
10880
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10881

    
10882
    if self.op.hvparams:
10883
      _CheckGlobalHvParams(self.op.hvparams)
10884

    
10885
    # Disk validation
10886
    disk_addremove = 0
10887
    for disk_op, disk_dict in self.op.disks:
10888
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10889
      if disk_op == constants.DDM_REMOVE:
10890
        disk_addremove += 1
10891
        continue
10892
      elif disk_op == constants.DDM_ADD:
10893
        disk_addremove += 1
10894
      else:
10895
        if not isinstance(disk_op, int):
10896
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10897
        if not isinstance(disk_dict, dict):
10898
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10899
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10900

    
10901
      if disk_op == constants.DDM_ADD:
10902
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10903
        if mode not in constants.DISK_ACCESS_SET:
10904
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10905
                                     errors.ECODE_INVAL)
10906
        size = disk_dict.get(constants.IDISK_SIZE, None)
10907
        if size is None:
10908
          raise errors.OpPrereqError("Required disk parameter size missing",
10909
                                     errors.ECODE_INVAL)
10910
        try:
10911
          size = int(size)
10912
        except (TypeError, ValueError), err:
10913
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10914
                                     str(err), errors.ECODE_INVAL)
10915
        disk_dict[constants.IDISK_SIZE] = size
10916
      else:
10917
        # modification of disk
10918
        if constants.IDISK_SIZE in disk_dict:
10919
          raise errors.OpPrereqError("Disk size change not possible, use"
10920
                                     " grow-disk", errors.ECODE_INVAL)
10921

    
10922
    if disk_addremove > 1:
10923
      raise errors.OpPrereqError("Only one disk add or remove operation"
10924
                                 " supported at a time", errors.ECODE_INVAL)
10925

    
10926
    if self.op.disks and self.op.disk_template is not None:
10927
      raise errors.OpPrereqError("Disk template conversion and other disk"
10928
                                 " changes not supported at the same time",
10929
                                 errors.ECODE_INVAL)
10930

    
10931
    if (self.op.disk_template and
10932
        self.op.disk_template in constants.DTS_INT_MIRROR and
10933
        self.op.remote_node is None):
10934
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10935
                                 " one requires specifying a secondary node",
10936
                                 errors.ECODE_INVAL)
10937

    
10938
    # NIC validation
10939
    nic_addremove = 0
10940
    for nic_op, nic_dict in self.op.nics:
10941
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10942
      if nic_op == constants.DDM_REMOVE:
10943
        nic_addremove += 1
10944
        continue
10945
      elif nic_op == constants.DDM_ADD:
10946
        nic_addremove += 1
10947
      else:
10948
        if not isinstance(nic_op, int):
10949
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10950
        if not isinstance(nic_dict, dict):
10951
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10952
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10953

    
10954
      # nic_dict should be a dict
10955
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10956
      if nic_ip is not None:
10957
        if nic_ip.lower() == constants.VALUE_NONE:
10958
          nic_dict[constants.INIC_IP] = None
10959
        else:
10960
          if not netutils.IPAddress.IsValid(nic_ip):
10961
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10962
                                       errors.ECODE_INVAL)
10963

    
10964
      nic_bridge = nic_dict.get("bridge", None)
10965
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10966
      if nic_bridge and nic_link:
10967
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10968
                                   " at the same time", errors.ECODE_INVAL)
10969
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10970
        nic_dict["bridge"] = None
10971
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10972
        nic_dict[constants.INIC_LINK] = None
10973

    
10974
      if nic_op == constants.DDM_ADD:
10975
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10976
        if nic_mac is None:
10977
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10978

    
10979
      if constants.INIC_MAC in nic_dict:
10980
        nic_mac = nic_dict[constants.INIC_MAC]
10981
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10982
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10983

    
10984
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10985
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10986
                                     " modifying an existing nic",
10987
                                     errors.ECODE_INVAL)
10988

    
10989
    if nic_addremove > 1:
10990
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10991
                                 " supported at a time", errors.ECODE_INVAL)
10992

    
10993
  def ExpandNames(self):
10994
    self._ExpandAndLockInstance()
10995
    self.needed_locks[locking.LEVEL_NODE] = []
10996
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10997

    
10998
  def DeclareLocks(self, level):
10999
    if level == locking.LEVEL_NODE:
11000
      self._LockInstancesNodes()
11001
      if self.op.disk_template and self.op.remote_node:
11002
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11003
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11004

    
11005
  def BuildHooksEnv(self):
11006
    """Build hooks env.
11007

11008
    This runs on the master, primary and secondaries.
11009

11010
    """
11011
    args = dict()
11012
    if constants.BE_MEMORY in self.be_new:
11013
      args["memory"] = self.be_new[constants.BE_MEMORY]
11014
    if constants.BE_VCPUS in self.be_new:
11015
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11016
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11017
    # information at all.
11018
    if self.op.nics:
11019
      args["nics"] = []
11020
      nic_override = dict(self.op.nics)
11021
      for idx, nic in enumerate(self.instance.nics):
11022
        if idx in nic_override:
11023
          this_nic_override = nic_override[idx]
11024
        else:
11025
          this_nic_override = {}
11026
        if constants.INIC_IP in this_nic_override:
11027
          ip = this_nic_override[constants.INIC_IP]
11028
        else:
11029
          ip = nic.ip
11030
        if constants.INIC_MAC in this_nic_override:
11031
          mac = this_nic_override[constants.INIC_MAC]
11032
        else:
11033
          mac = nic.mac
11034
        if idx in self.nic_pnew:
11035
          nicparams = self.nic_pnew[idx]
11036
        else:
11037
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11038
        mode = nicparams[constants.NIC_MODE]
11039
        link = nicparams[constants.NIC_LINK]
11040
        args["nics"].append((ip, mac, mode, link))
11041
      if constants.DDM_ADD in nic_override:
11042
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11043
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11044
        nicparams = self.nic_pnew[constants.DDM_ADD]
11045
        mode = nicparams[constants.NIC_MODE]
11046
        link = nicparams[constants.NIC_LINK]
11047
        args["nics"].append((ip, mac, mode, link))
11048
      elif constants.DDM_REMOVE in nic_override:
11049
        del args["nics"][-1]
11050

    
11051
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11052
    if self.op.disk_template:
11053
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11054

    
11055
    return env
11056

    
11057
  def BuildHooksNodes(self):
11058
    """Build hooks nodes.
11059

11060
    """
11061
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11062
    return (nl, nl)
11063

    
11064
  def CheckPrereq(self):
11065
    """Check prerequisites.
11066

11067
    This only checks the instance list against the existing names.
11068

11069
    """
11070
    # checking the new params on the primary/secondary nodes
11071

    
11072
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11073
    cluster = self.cluster = self.cfg.GetClusterInfo()
11074
    assert self.instance is not None, \
11075
      "Cannot retrieve locked instance %s" % self.op.instance_name
11076
    pnode = instance.primary_node
11077
    nodelist = list(instance.all_nodes)
11078

    
11079
    # OS change
11080
    if self.op.os_name and not self.op.force:
11081
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11082
                      self.op.force_variant)
11083
      instance_os = self.op.os_name
11084
    else:
11085
      instance_os = instance.os
11086

    
11087
    if self.op.disk_template:
11088
      if instance.disk_template == self.op.disk_template:
11089
        raise errors.OpPrereqError("Instance already has disk template %s" %
11090
                                   instance.disk_template, errors.ECODE_INVAL)
11091

    
11092
      if (instance.disk_template,
11093
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11094
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11095
                                   " %s to %s" % (instance.disk_template,
11096
                                                  self.op.disk_template),
11097
                                   errors.ECODE_INVAL)
11098
      _CheckInstanceDown(self, instance, "cannot change disk template")
11099
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11100
        if self.op.remote_node == pnode:
11101
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11102
                                     " as the primary node of the instance" %
11103
                                     self.op.remote_node, errors.ECODE_STATE)
11104
        _CheckNodeOnline(self, self.op.remote_node)
11105
        _CheckNodeNotDrained(self, self.op.remote_node)
11106
        # FIXME: here we assume that the old instance type is DT_PLAIN
11107
        assert instance.disk_template == constants.DT_PLAIN
11108
        disks = [{constants.IDISK_SIZE: d.size,
11109
                  constants.IDISK_VG: d.logical_id[0]}
11110
                 for d in instance.disks]
11111
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11112
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11113

    
11114
    # hvparams processing
11115
    if self.op.hvparams:
11116
      hv_type = instance.hypervisor
11117
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11118
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11119
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11120

    
11121
      # local check
11122
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11123
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11124
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11125
      self.hv_inst = i_hvdict # the new dict (without defaults)
11126
    else:
11127
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11128
                                              instance.hvparams)
11129
      self.hv_new = self.hv_inst = {}
11130

    
11131
    # beparams processing
11132
    if self.op.beparams:
11133
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11134
                                   use_none=True)
11135
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11136
      be_new = cluster.SimpleFillBE(i_bedict)
11137
      self.be_proposed = self.be_new = be_new # the new actual values
11138
      self.be_inst = i_bedict # the new dict (without defaults)
11139
    else:
11140
      self.be_new = self.be_inst = {}
11141
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11142
    be_old = cluster.FillBE(instance)
11143

    
11144
    # CPU param validation -- checking every time a paramtere is
11145
    # changed to cover all cases where either CPU mask or vcpus have
11146
    # changed
11147
    if (constants.BE_VCPUS in self.be_proposed and
11148
        constants.HV_CPU_MASK in self.hv_proposed):
11149
      cpu_list = \
11150
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11151
      # Verify mask is consistent with number of vCPUs. Can skip this
11152
      # test if only 1 entry in the CPU mask, which means same mask
11153
      # is applied to all vCPUs.
11154
      if (len(cpu_list) > 1 and
11155
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11156
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11157
                                   " CPU mask [%s]" %
11158
                                   (self.be_proposed[constants.BE_VCPUS],
11159
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11160
                                   errors.ECODE_INVAL)
11161

    
11162
      # Only perform this test if a new CPU mask is given
11163
      if constants.HV_CPU_MASK in self.hv_new:
11164
        # Calculate the largest CPU number requested
11165
        max_requested_cpu = max(map(max, cpu_list))
11166
        # Check that all of the instance's nodes have enough physical CPUs to
11167
        # satisfy the requested CPU mask
11168
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11169
                                max_requested_cpu + 1, instance.hypervisor)
11170

    
11171
    # osparams processing
11172
    if self.op.osparams:
11173
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11174
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11175
      self.os_inst = i_osdict # the new dict (without defaults)
11176
    else:
11177
      self.os_inst = {}
11178

    
11179
    self.warn = []
11180

    
11181
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11182
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11183
      mem_check_list = [pnode]
11184
      if be_new[constants.BE_AUTO_BALANCE]:
11185
        # either we changed auto_balance to yes or it was from before
11186
        mem_check_list.extend(instance.secondary_nodes)
11187
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11188
                                                  instance.hypervisor)
11189
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11190
                                         instance.hypervisor)
11191
      pninfo = nodeinfo[pnode]
11192
      msg = pninfo.fail_msg
11193
      if msg:
11194
        # Assume the primary node is unreachable and go ahead
11195
        self.warn.append("Can't get info from primary node %s: %s" %
11196
                         (pnode, msg))
11197
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11198
        self.warn.append("Node data from primary node %s doesn't contain"
11199
                         " free memory information" % pnode)
11200
      elif instance_info.fail_msg:
11201
        self.warn.append("Can't get instance runtime information: %s" %
11202
                        instance_info.fail_msg)
11203
      else:
11204
        if instance_info.payload:
11205
          current_mem = int(instance_info.payload["memory"])
11206
        else:
11207
          # Assume instance not running
11208
          # (there is a slight race condition here, but it's not very probable,
11209
          # and we have no other way to check)
11210
          current_mem = 0
11211
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11212
                    pninfo.payload["memory_free"])
11213
        if miss_mem > 0:
11214
          raise errors.OpPrereqError("This change will prevent the instance"
11215
                                     " from starting, due to %d MB of memory"
11216
                                     " missing on its primary node" % miss_mem,
11217
                                     errors.ECODE_NORES)
11218

    
11219
      if be_new[constants.BE_AUTO_BALANCE]:
11220
        for node, nres in nodeinfo.items():
11221
          if node not in instance.secondary_nodes:
11222
            continue
11223
          nres.Raise("Can't get info from secondary node %s" % node,
11224
                     prereq=True, ecode=errors.ECODE_STATE)
11225
          if not isinstance(nres.payload.get("memory_free", None), int):
11226
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11227
                                       " memory information" % node,
11228
                                       errors.ECODE_STATE)
11229
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11230
            raise errors.OpPrereqError("This change will prevent the instance"
11231
                                       " from failover to its secondary node"
11232
                                       " %s, due to not enough memory" % node,
11233
                                       errors.ECODE_STATE)
11234

    
11235
    # NIC processing
11236
    self.nic_pnew = {}
11237
    self.nic_pinst = {}
11238
    for nic_op, nic_dict in self.op.nics:
11239
      if nic_op == constants.DDM_REMOVE:
11240
        if not instance.nics:
11241
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11242
                                     errors.ECODE_INVAL)
11243
        continue
11244
      if nic_op != constants.DDM_ADD:
11245
        # an existing nic
11246
        if not instance.nics:
11247
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11248
                                     " no NICs" % nic_op,
11249
                                     errors.ECODE_INVAL)
11250
        if nic_op < 0 or nic_op >= len(instance.nics):
11251
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11252
                                     " are 0 to %d" %
11253
                                     (nic_op, len(instance.nics) - 1),
11254
                                     errors.ECODE_INVAL)
11255
        old_nic_params = instance.nics[nic_op].nicparams
11256
        old_nic_ip = instance.nics[nic_op].ip
11257
      else:
11258
        old_nic_params = {}
11259
        old_nic_ip = None
11260

    
11261
      update_params_dict = dict([(key, nic_dict[key])
11262
                                 for key in constants.NICS_PARAMETERS
11263
                                 if key in nic_dict])
11264

    
11265
      if "bridge" in nic_dict:
11266
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11267

    
11268
      new_nic_params = _GetUpdatedParams(old_nic_params,
11269
                                         update_params_dict)
11270
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11271
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11272
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11273
      self.nic_pinst[nic_op] = new_nic_params
11274
      self.nic_pnew[nic_op] = new_filled_nic_params
11275
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11276

    
11277
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11278
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11279
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11280
        if msg:
11281
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11282
          if self.op.force:
11283
            self.warn.append(msg)
11284
          else:
11285
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11286
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11287
        if constants.INIC_IP in nic_dict:
11288
          nic_ip = nic_dict[constants.INIC_IP]
11289
        else:
11290
          nic_ip = old_nic_ip
11291
        if nic_ip is None:
11292
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11293
                                     " on a routed nic", errors.ECODE_INVAL)
11294
      if constants.INIC_MAC in nic_dict:
11295
        nic_mac = nic_dict[constants.INIC_MAC]
11296
        if nic_mac is None:
11297
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11298
                                     errors.ECODE_INVAL)
11299
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11300
          # otherwise generate the mac
11301
          nic_dict[constants.INIC_MAC] = \
11302
            self.cfg.GenerateMAC(self.proc.GetECId())
11303
        else:
11304
          # or validate/reserve the current one
11305
          try:
11306
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11307
          except errors.ReservationError:
11308
            raise errors.OpPrereqError("MAC address %s already in use"
11309
                                       " in cluster" % nic_mac,
11310
                                       errors.ECODE_NOTUNIQUE)
11311

    
11312
    # DISK processing
11313
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11314
      raise errors.OpPrereqError("Disk operations not supported for"
11315
                                 " diskless instances",
11316
                                 errors.ECODE_INVAL)
11317
    for disk_op, _ in self.op.disks:
11318
      if disk_op == constants.DDM_REMOVE:
11319
        if len(instance.disks) == 1:
11320
          raise errors.OpPrereqError("Cannot remove the last disk of"
11321
                                     " an instance", errors.ECODE_INVAL)
11322
        _CheckInstanceDown(self, instance, "cannot remove disks")
11323

    
11324
      if (disk_op == constants.DDM_ADD and
11325
          len(instance.disks) >= constants.MAX_DISKS):
11326
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11327
                                   " add more" % constants.MAX_DISKS,
11328
                                   errors.ECODE_STATE)
11329
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11330
        # an existing disk
11331
        if disk_op < 0 or disk_op >= len(instance.disks):
11332
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11333
                                     " are 0 to %d" %
11334
                                     (disk_op, len(instance.disks)),
11335
                                     errors.ECODE_INVAL)
11336

    
11337
    return
11338

    
11339
  def _ConvertPlainToDrbd(self, feedback_fn):
11340
    """Converts an instance from plain to drbd.
11341

11342
    """
11343
    feedback_fn("Converting template to drbd")
11344
    instance = self.instance
11345
    pnode = instance.primary_node
11346
    snode = self.op.remote_node
11347

    
11348
    # create a fake disk info for _GenerateDiskTemplate
11349
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11350
                  constants.IDISK_VG: d.logical_id[0]}
11351
                 for d in instance.disks]
11352
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11353
                                      instance.name, pnode, [snode],
11354
                                      disk_info, None, None, 0, feedback_fn)
11355
    info = _GetInstanceInfoText(instance)
11356
    feedback_fn("Creating aditional volumes...")
11357
    # first, create the missing data and meta devices
11358
    for disk in new_disks:
11359
      # unfortunately this is... not too nice
11360
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11361
                            info, True)
11362
      for child in disk.children:
11363
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11364
    # at this stage, all new LVs have been created, we can rename the
11365
    # old ones
11366
    feedback_fn("Renaming original volumes...")
11367
    rename_list = [(o, n.children[0].logical_id)
11368
                   for (o, n) in zip(instance.disks, new_disks)]
11369
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11370
    result.Raise("Failed to rename original LVs")
11371

    
11372
    feedback_fn("Initializing DRBD devices...")
11373
    # all child devices are in place, we can now create the DRBD devices
11374
    for disk in new_disks:
11375
      for node in [pnode, snode]:
11376
        f_create = node == pnode
11377
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11378

    
11379
    # at this point, the instance has been modified
11380
    instance.disk_template = constants.DT_DRBD8
11381
    instance.disks = new_disks
11382
    self.cfg.Update(instance, feedback_fn)
11383

    
11384
    # disks are created, waiting for sync
11385
    disk_abort = not _WaitForSync(self, instance,
11386
                                  oneshot=not self.op.wait_for_sync)
11387
    if disk_abort:
11388
      raise errors.OpExecError("There are some degraded disks for"
11389
                               " this instance, please cleanup manually")
11390

    
11391
  def _ConvertDrbdToPlain(self, feedback_fn):
11392
    """Converts an instance from drbd to plain.
11393

11394
    """
11395
    instance = self.instance
11396
    assert len(instance.secondary_nodes) == 1
11397
    pnode = instance.primary_node
11398
    snode = instance.secondary_nodes[0]
11399
    feedback_fn("Converting template to plain")
11400

    
11401
    old_disks = instance.disks
11402
    new_disks = [d.children[0] for d in old_disks]
11403

    
11404
    # copy over size and mode
11405
    for parent, child in zip(old_disks, new_disks):
11406
      child.size = parent.size
11407
      child.mode = parent.mode
11408

    
11409
    # update instance structure
11410
    instance.disks = new_disks
11411
    instance.disk_template = constants.DT_PLAIN
11412
    self.cfg.Update(instance, feedback_fn)
11413

    
11414
    feedback_fn("Removing volumes on the secondary node...")
11415
    for disk in old_disks:
11416
      self.cfg.SetDiskID(disk, snode)
11417
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11418
      if msg:
11419
        self.LogWarning("Could not remove block device %s on node %s,"
11420
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11421

    
11422
    feedback_fn("Removing unneeded volumes on the primary node...")
11423
    for idx, disk in enumerate(old_disks):
11424
      meta = disk.children[1]
11425
      self.cfg.SetDiskID(meta, pnode)
11426
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11427
      if msg:
11428
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11429
                        " continuing anyway: %s", idx, pnode, msg)
11430

    
11431
  def Exec(self, feedback_fn):
11432
    """Modifies an instance.
11433

11434
    All parameters take effect only at the next restart of the instance.
11435

11436
    """
11437
    # Process here the warnings from CheckPrereq, as we don't have a
11438
    # feedback_fn there.
11439
    for warn in self.warn:
11440
      feedback_fn("WARNING: %s" % warn)
11441

    
11442
    result = []
11443
    instance = self.instance
11444
    # disk changes
11445
    for disk_op, disk_dict in self.op.disks:
11446
      if disk_op == constants.DDM_REMOVE:
11447
        # remove the last disk
11448
        device = instance.disks.pop()
11449
        device_idx = len(instance.disks)
11450
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11451
          self.cfg.SetDiskID(disk, node)
11452
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11453
          if msg:
11454
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11455
                            " continuing anyway", device_idx, node, msg)
11456
        result.append(("disk/%d" % device_idx, "remove"))
11457
      elif disk_op == constants.DDM_ADD:
11458
        # add a new disk
11459
        if instance.disk_template in (constants.DT_FILE,
11460
                                        constants.DT_SHARED_FILE):
11461
          file_driver, file_path = instance.disks[0].logical_id
11462
          file_path = os.path.dirname(file_path)
11463
        else:
11464
          file_driver = file_path = None
11465
        disk_idx_base = len(instance.disks)
11466
        new_disk = _GenerateDiskTemplate(self,
11467
                                         instance.disk_template,
11468
                                         instance.name, instance.primary_node,
11469
                                         instance.secondary_nodes,
11470
                                         [disk_dict],
11471
                                         file_path,
11472
                                         file_driver,
11473
                                         disk_idx_base, feedback_fn)[0]
11474
        instance.disks.append(new_disk)
11475
        info = _GetInstanceInfoText(instance)
11476

    
11477
        logging.info("Creating volume %s for instance %s",
11478
                     new_disk.iv_name, instance.name)
11479
        # Note: this needs to be kept in sync with _CreateDisks
11480
        #HARDCODE
11481
        for node in instance.all_nodes:
11482
          f_create = node == instance.primary_node
11483
          try:
11484
            _CreateBlockDev(self, node, instance, new_disk,
11485
                            f_create, info, f_create)
11486
          except errors.OpExecError, err:
11487
            self.LogWarning("Failed to create volume %s (%s) on"
11488
                            " node %s: %s",
11489
                            new_disk.iv_name, new_disk, node, err)
11490
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11491
                       (new_disk.size, new_disk.mode)))
11492
      else:
11493
        # change a given disk
11494
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11495
        result.append(("disk.mode/%d" % disk_op,
11496
                       disk_dict[constants.IDISK_MODE]))
11497

    
11498
    if self.op.disk_template:
11499
      r_shut = _ShutdownInstanceDisks(self, instance)
11500
      if not r_shut:
11501
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11502
                                 " proceed with disk template conversion")
11503
      mode = (instance.disk_template, self.op.disk_template)
11504
      try:
11505
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11506
      except:
11507
        self.cfg.ReleaseDRBDMinors(instance.name)
11508
        raise
11509
      result.append(("disk_template", self.op.disk_template))
11510

    
11511
    # NIC changes
11512
    for nic_op, nic_dict in self.op.nics:
11513
      if nic_op == constants.DDM_REMOVE:
11514
        # remove the last nic
11515
        del instance.nics[-1]
11516
        result.append(("nic.%d" % len(instance.nics), "remove"))
11517
      elif nic_op == constants.DDM_ADD:
11518
        # mac and bridge should be set, by now
11519
        mac = nic_dict[constants.INIC_MAC]
11520
        ip = nic_dict.get(constants.INIC_IP, None)
11521
        nicparams = self.nic_pinst[constants.DDM_ADD]
11522
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11523
        instance.nics.append(new_nic)
11524
        result.append(("nic.%d" % (len(instance.nics) - 1),
11525
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11526
                       (new_nic.mac, new_nic.ip,
11527
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11528
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11529
                       )))
11530
      else:
11531
        for key in (constants.INIC_MAC, constants.INIC_IP):
11532
          if key in nic_dict:
11533
            setattr(instance.nics[nic_op], key, nic_dict[key])
11534
        if nic_op in self.nic_pinst:
11535
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11536
        for key, val in nic_dict.iteritems():
11537
          result.append(("nic.%s/%d" % (key, nic_op), val))
11538

    
11539
    # hvparams changes
11540
    if self.op.hvparams:
11541
      instance.hvparams = self.hv_inst
11542
      for key, val in self.op.hvparams.iteritems():
11543
        result.append(("hv/%s" % key, val))
11544

    
11545
    # beparams changes
11546
    if self.op.beparams:
11547
      instance.beparams = self.be_inst
11548
      for key, val in self.op.beparams.iteritems():
11549
        result.append(("be/%s" % key, val))
11550

    
11551
    # OS change
11552
    if self.op.os_name:
11553
      instance.os = self.op.os_name
11554

    
11555
    # osparams changes
11556
    if self.op.osparams:
11557
      instance.osparams = self.os_inst
11558
      for key, val in self.op.osparams.iteritems():
11559
        result.append(("os/%s" % key, val))
11560

    
11561
    self.cfg.Update(instance, feedback_fn)
11562

    
11563
    return result
11564

    
11565
  _DISK_CONVERSIONS = {
11566
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11567
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11568
    }
11569

    
11570

    
11571
class LUInstanceChangeGroup(LogicalUnit):
11572
  HPATH = "instance-change-group"
11573
  HTYPE = constants.HTYPE_INSTANCE
11574
  REQ_BGL = False
11575

    
11576
  def ExpandNames(self):
11577
    self.share_locks = _ShareAll()
11578
    self.needed_locks = {
11579
      locking.LEVEL_NODEGROUP: [],
11580
      locking.LEVEL_NODE: [],
11581
      }
11582

    
11583
    self._ExpandAndLockInstance()
11584

    
11585
    if self.op.target_groups:
11586
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11587
                                  self.op.target_groups)
11588
    else:
11589
      self.req_target_uuids = None
11590

    
11591
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11592

    
11593
  def DeclareLocks(self, level):
11594
    if level == locking.LEVEL_NODEGROUP:
11595
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11596

    
11597
      if self.req_target_uuids:
11598
        lock_groups = set(self.req_target_uuids)
11599

    
11600
        # Lock all groups used by instance optimistically; this requires going
11601
        # via the node before it's locked, requiring verification later on
11602
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11603
        lock_groups.update(instance_groups)
11604
      else:
11605
        # No target groups, need to lock all of them
11606
        lock_groups = locking.ALL_SET
11607

    
11608
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11609

    
11610
    elif level == locking.LEVEL_NODE:
11611
      if self.req_target_uuids:
11612
        # Lock all nodes used by instances
11613
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11614
        self._LockInstancesNodes()
11615

    
11616
        # Lock all nodes in all potential target groups
11617
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11618
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11619
        member_nodes = [node_name
11620
                        for group in lock_groups
11621
                        for node_name in self.cfg.GetNodeGroup(group).members]
11622
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11623
      else:
11624
        # Lock all nodes as all groups are potential targets
11625
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11626

    
11627
  def CheckPrereq(self):
11628
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11629
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11630
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11631

    
11632
    assert (self.req_target_uuids is None or
11633
            owned_groups.issuperset(self.req_target_uuids))
11634
    assert owned_instances == set([self.op.instance_name])
11635

    
11636
    # Get instance information
11637
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11638

    
11639
    # Check if node groups for locked instance are still correct
11640
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11641
      ("Instance %s's nodes changed while we kept the lock" %
11642
       self.op.instance_name)
11643

    
11644
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11645
                                           owned_groups)
11646

    
11647
    if self.req_target_uuids:
11648
      # User requested specific target groups
11649
      self.target_uuids = self.req_target_uuids
11650
    else:
11651
      # All groups except those used by the instance are potential targets
11652
      self.target_uuids = owned_groups - inst_groups
11653

    
11654
    conflicting_groups = self.target_uuids & inst_groups
11655
    if conflicting_groups:
11656
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11657
                                 " used by the instance '%s'" %
11658
                                 (utils.CommaJoin(conflicting_groups),
11659
                                  self.op.instance_name),
11660
                                 errors.ECODE_INVAL)
11661

    
11662
    if not self.target_uuids:
11663
      raise errors.OpPrereqError("There are no possible target groups",
11664
                                 errors.ECODE_INVAL)
11665

    
11666
  def BuildHooksEnv(self):
11667
    """Build hooks env.
11668

11669
    """
11670
    assert self.target_uuids
11671

    
11672
    env = {
11673
      "TARGET_GROUPS": " ".join(self.target_uuids),
11674
      }
11675

    
11676
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11677

    
11678
    return env
11679

    
11680
  def BuildHooksNodes(self):
11681
    """Build hooks nodes.
11682

11683
    """
11684
    mn = self.cfg.GetMasterNode()
11685
    return ([mn], [mn])
11686

    
11687
  def Exec(self, feedback_fn):
11688
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11689

    
11690
    assert instances == [self.op.instance_name], "Instance not locked"
11691

    
11692
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11693
                     instances=instances, target_groups=list(self.target_uuids))
11694

    
11695
    ial.Run(self.op.iallocator)
11696

    
11697
    if not ial.success:
11698
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11699
                                 " instance '%s' using iallocator '%s': %s" %
11700
                                 (self.op.instance_name, self.op.iallocator,
11701
                                  ial.info),
11702
                                 errors.ECODE_NORES)
11703

    
11704
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11705

    
11706
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11707
                 " instance '%s'", len(jobs), self.op.instance_name)
11708

    
11709
    return ResultWithJobs(jobs)
11710

    
11711

    
11712
class LUBackupQuery(NoHooksLU):
11713
  """Query the exports list
11714

11715
  """
11716
  REQ_BGL = False
11717

    
11718
  def ExpandNames(self):
11719
    self.needed_locks = {}
11720
    self.share_locks[locking.LEVEL_NODE] = 1
11721
    if not self.op.nodes:
11722
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11723
    else:
11724
      self.needed_locks[locking.LEVEL_NODE] = \
11725
        _GetWantedNodes(self, self.op.nodes)
11726

    
11727
  def Exec(self, feedback_fn):
11728
    """Compute the list of all the exported system images.
11729

11730
    @rtype: dict
11731
    @return: a dictionary with the structure node->(export-list)
11732
        where export-list is a list of the instances exported on
11733
        that node.
11734

11735
    """
11736
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11737
    rpcresult = self.rpc.call_export_list(self.nodes)
11738
    result = {}
11739
    for node in rpcresult:
11740
      if rpcresult[node].fail_msg:
11741
        result[node] = False
11742
      else:
11743
        result[node] = rpcresult[node].payload
11744

    
11745
    return result
11746

    
11747

    
11748
class LUBackupPrepare(NoHooksLU):
11749
  """Prepares an instance for an export and returns useful information.
11750

11751
  """
11752
  REQ_BGL = False
11753

    
11754
  def ExpandNames(self):
11755
    self._ExpandAndLockInstance()
11756

    
11757
  def CheckPrereq(self):
11758
    """Check prerequisites.
11759

11760
    """
11761
    instance_name = self.op.instance_name
11762

    
11763
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11764
    assert self.instance is not None, \
11765
          "Cannot retrieve locked instance %s" % self.op.instance_name
11766
    _CheckNodeOnline(self, self.instance.primary_node)
11767

    
11768
    self._cds = _GetClusterDomainSecret()
11769

    
11770
  def Exec(self, feedback_fn):
11771
    """Prepares an instance for an export.
11772

11773
    """
11774
    instance = self.instance
11775

    
11776
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11777
      salt = utils.GenerateSecret(8)
11778

    
11779
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11780
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11781
                                              constants.RIE_CERT_VALIDITY)
11782
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11783

    
11784
      (name, cert_pem) = result.payload
11785

    
11786
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11787
                                             cert_pem)
11788

    
11789
      return {
11790
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11791
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11792
                          salt),
11793
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11794
        }
11795

    
11796
    return None
11797

    
11798

    
11799
class LUBackupExport(LogicalUnit):
11800
  """Export an instance to an image in the cluster.
11801

11802
  """
11803
  HPATH = "instance-export"
11804
  HTYPE = constants.HTYPE_INSTANCE
11805
  REQ_BGL = False
11806

    
11807
  def CheckArguments(self):
11808
    """Check the arguments.
11809

11810
    """
11811
    self.x509_key_name = self.op.x509_key_name
11812
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11813

    
11814
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11815
      if not self.x509_key_name:
11816
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11817
                                   errors.ECODE_INVAL)
11818

    
11819
      if not self.dest_x509_ca_pem:
11820
        raise errors.OpPrereqError("Missing destination X509 CA",
11821
                                   errors.ECODE_INVAL)
11822

    
11823
  def ExpandNames(self):
11824
    self._ExpandAndLockInstance()
11825

    
11826
    # Lock all nodes for local exports
11827
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11828
      # FIXME: lock only instance primary and destination node
11829
      #
11830
      # Sad but true, for now we have do lock all nodes, as we don't know where
11831
      # the previous export might be, and in this LU we search for it and
11832
      # remove it from its current node. In the future we could fix this by:
11833
      #  - making a tasklet to search (share-lock all), then create the
11834
      #    new one, then one to remove, after
11835
      #  - removing the removal operation altogether
11836
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11837

    
11838
  def DeclareLocks(self, level):
11839
    """Last minute lock declaration."""
11840
    # All nodes are locked anyway, so nothing to do here.
11841

    
11842
  def BuildHooksEnv(self):
11843
    """Build hooks env.
11844

11845
    This will run on the master, primary node and target node.
11846

11847
    """
11848
    env = {
11849
      "EXPORT_MODE": self.op.mode,
11850
      "EXPORT_NODE": self.op.target_node,
11851
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11852
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11853
      # TODO: Generic function for boolean env variables
11854
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11855
      }
11856

    
11857
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11858

    
11859
    return env
11860

    
11861
  def BuildHooksNodes(self):
11862
    """Build hooks nodes.
11863

11864
    """
11865
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11866

    
11867
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11868
      nl.append(self.op.target_node)
11869

    
11870
    return (nl, nl)
11871

    
11872
  def CheckPrereq(self):
11873
    """Check prerequisites.
11874

11875
    This checks that the instance and node names are valid.
11876

11877
    """
11878
    instance_name = self.op.instance_name
11879

    
11880
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11881
    assert self.instance is not None, \
11882
          "Cannot retrieve locked instance %s" % self.op.instance_name
11883
    _CheckNodeOnline(self, self.instance.primary_node)
11884

    
11885
    if (self.op.remove_instance and self.instance.admin_up and
11886
        not self.op.shutdown):
11887
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11888
                                 " down before")
11889

    
11890
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11891
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11892
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11893
      assert self.dst_node is not None
11894

    
11895
      _CheckNodeOnline(self, self.dst_node.name)
11896
      _CheckNodeNotDrained(self, self.dst_node.name)
11897

    
11898
      self._cds = None
11899
      self.dest_disk_info = None
11900
      self.dest_x509_ca = None
11901

    
11902
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11903
      self.dst_node = None
11904

    
11905
      if len(self.op.target_node) != len(self.instance.disks):
11906
        raise errors.OpPrereqError(("Received destination information for %s"
11907
                                    " disks, but instance %s has %s disks") %
11908
                                   (len(self.op.target_node), instance_name,
11909
                                    len(self.instance.disks)),
11910
                                   errors.ECODE_INVAL)
11911

    
11912
      cds = _GetClusterDomainSecret()
11913

    
11914
      # Check X509 key name
11915
      try:
11916
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11917
      except (TypeError, ValueError), err:
11918
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11919

    
11920
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11921
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11922
                                   errors.ECODE_INVAL)
11923

    
11924
      # Load and verify CA
11925
      try:
11926
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11927
      except OpenSSL.crypto.Error, err:
11928
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11929
                                   (err, ), errors.ECODE_INVAL)
11930

    
11931
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11932
      if errcode is not None:
11933
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11934
                                   (msg, ), errors.ECODE_INVAL)
11935

    
11936
      self.dest_x509_ca = cert
11937

    
11938
      # Verify target information
11939
      disk_info = []
11940
      for idx, disk_data in enumerate(self.op.target_node):
11941
        try:
11942
          (host, port, magic) = \
11943
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11944
        except errors.GenericError, err:
11945
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11946
                                     (idx, err), errors.ECODE_INVAL)
11947

    
11948
        disk_info.append((host, port, magic))
11949

    
11950
      assert len(disk_info) == len(self.op.target_node)
11951
      self.dest_disk_info = disk_info
11952

    
11953
    else:
11954
      raise errors.ProgrammerError("Unhandled export mode %r" %
11955
                                   self.op.mode)
11956

    
11957
    # instance disk type verification
11958
    # TODO: Implement export support for file-based disks
11959
    for disk in self.instance.disks:
11960
      if disk.dev_type == constants.LD_FILE:
11961
        raise errors.OpPrereqError("Export not supported for instances with"
11962
                                   " file-based disks", errors.ECODE_INVAL)
11963

    
11964
  def _CleanupExports(self, feedback_fn):
11965
    """Removes exports of current instance from all other nodes.
11966

11967
    If an instance in a cluster with nodes A..D was exported to node C, its
11968
    exports will be removed from the nodes A, B and D.
11969

11970
    """
11971
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11972

    
11973
    nodelist = self.cfg.GetNodeList()
11974
    nodelist.remove(self.dst_node.name)
11975

    
11976
    # on one-node clusters nodelist will be empty after the removal
11977
    # if we proceed the backup would be removed because OpBackupQuery
11978
    # substitutes an empty list with the full cluster node list.
11979
    iname = self.instance.name
11980
    if nodelist:
11981
      feedback_fn("Removing old exports for instance %s" % iname)
11982
      exportlist = self.rpc.call_export_list(nodelist)
11983
      for node in exportlist:
11984
        if exportlist[node].fail_msg:
11985
          continue
11986
        if iname in exportlist[node].payload:
11987
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11988
          if msg:
11989
            self.LogWarning("Could not remove older export for instance %s"
11990
                            " on node %s: %s", iname, node, msg)
11991

    
11992
  def Exec(self, feedback_fn):
11993
    """Export an instance to an image in the cluster.
11994

11995
    """
11996
    assert self.op.mode in constants.EXPORT_MODES
11997

    
11998
    instance = self.instance
11999
    src_node = instance.primary_node
12000

    
12001
    if self.op.shutdown:
12002
      # shutdown the instance, but not the disks
12003
      feedback_fn("Shutting down instance %s" % instance.name)
12004
      result = self.rpc.call_instance_shutdown(src_node, instance,
12005
                                               self.op.shutdown_timeout)
12006
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12007
      result.Raise("Could not shutdown instance %s on"
12008
                   " node %s" % (instance.name, src_node))
12009

    
12010
    # set the disks ID correctly since call_instance_start needs the
12011
    # correct drbd minor to create the symlinks
12012
    for disk in instance.disks:
12013
      self.cfg.SetDiskID(disk, src_node)
12014

    
12015
    activate_disks = (not instance.admin_up)
12016

    
12017
    if activate_disks:
12018
      # Activate the instance disks if we'exporting a stopped instance
12019
      feedback_fn("Activating disks for %s" % instance.name)
12020
      _StartInstanceDisks(self, instance, None)
12021

    
12022
    try:
12023
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12024
                                                     instance)
12025

    
12026
      helper.CreateSnapshots()
12027
      try:
12028
        if (self.op.shutdown and instance.admin_up and
12029
            not self.op.remove_instance):
12030
          assert not activate_disks
12031
          feedback_fn("Starting instance %s" % instance.name)
12032
          result = self.rpc.call_instance_start(src_node,
12033
                                                (instance, None, None), False)
12034
          msg = result.fail_msg
12035
          if msg:
12036
            feedback_fn("Failed to start instance: %s" % msg)
12037
            _ShutdownInstanceDisks(self, instance)
12038
            raise errors.OpExecError("Could not start instance: %s" % msg)
12039

    
12040
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12041
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12042
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12043
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12044
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12045

    
12046
          (key_name, _, _) = self.x509_key_name
12047

    
12048
          dest_ca_pem = \
12049
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12050
                                            self.dest_x509_ca)
12051

    
12052
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12053
                                                     key_name, dest_ca_pem,
12054
                                                     timeouts)
12055
      finally:
12056
        helper.Cleanup()
12057

    
12058
      # Check for backwards compatibility
12059
      assert len(dresults) == len(instance.disks)
12060
      assert compat.all(isinstance(i, bool) for i in dresults), \
12061
             "Not all results are boolean: %r" % dresults
12062

    
12063
    finally:
12064
      if activate_disks:
12065
        feedback_fn("Deactivating disks for %s" % instance.name)
12066
        _ShutdownInstanceDisks(self, instance)
12067

    
12068
    if not (compat.all(dresults) and fin_resu):
12069
      failures = []
12070
      if not fin_resu:
12071
        failures.append("export finalization")
12072
      if not compat.all(dresults):
12073
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12074
                               if not dsk)
12075
        failures.append("disk export: disk(s) %s" % fdsk)
12076

    
12077
      raise errors.OpExecError("Export failed, errors in %s" %
12078
                               utils.CommaJoin(failures))
12079

    
12080
    # At this point, the export was successful, we can cleanup/finish
12081

    
12082
    # Remove instance if requested
12083
    if self.op.remove_instance:
12084
      feedback_fn("Removing instance %s" % instance.name)
12085
      _RemoveInstance(self, feedback_fn, instance,
12086
                      self.op.ignore_remove_failures)
12087

    
12088
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12089
      self._CleanupExports(feedback_fn)
12090

    
12091
    return fin_resu, dresults
12092

    
12093

    
12094
class LUBackupRemove(NoHooksLU):
12095
  """Remove exports related to the named instance.
12096

12097
  """
12098
  REQ_BGL = False
12099

    
12100
  def ExpandNames(self):
12101
    self.needed_locks = {}
12102
    # We need all nodes to be locked in order for RemoveExport to work, but we
12103
    # don't need to lock the instance itself, as nothing will happen to it (and
12104
    # we can remove exports also for a removed instance)
12105
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12106

    
12107
  def Exec(self, feedback_fn):
12108
    """Remove any export.
12109

12110
    """
12111
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12112
    # If the instance was not found we'll try with the name that was passed in.
12113
    # This will only work if it was an FQDN, though.
12114
    fqdn_warn = False
12115
    if not instance_name:
12116
      fqdn_warn = True
12117
      instance_name = self.op.instance_name
12118

    
12119
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12120
    exportlist = self.rpc.call_export_list(locked_nodes)
12121
    found = False
12122
    for node in exportlist:
12123
      msg = exportlist[node].fail_msg
12124
      if msg:
12125
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12126
        continue
12127
      if instance_name in exportlist[node].payload:
12128
        found = True
12129
        result = self.rpc.call_export_remove(node, instance_name)
12130
        msg = result.fail_msg
12131
        if msg:
12132
          logging.error("Could not remove export for instance %s"
12133
                        " on node %s: %s", instance_name, node, msg)
12134

    
12135
    if fqdn_warn and not found:
12136
      feedback_fn("Export not found. If trying to remove an export belonging"
12137
                  " to a deleted instance please use its Fully Qualified"
12138
                  " Domain Name.")
12139

    
12140

    
12141
class LUGroupAdd(LogicalUnit):
12142
  """Logical unit for creating node groups.
12143

12144
  """
12145
  HPATH = "group-add"
12146
  HTYPE = constants.HTYPE_GROUP
12147
  REQ_BGL = False
12148

    
12149
  def ExpandNames(self):
12150
    # We need the new group's UUID here so that we can create and acquire the
12151
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12152
    # that it should not check whether the UUID exists in the configuration.
12153
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12154
    self.needed_locks = {}
12155
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12156

    
12157
  def CheckPrereq(self):
12158
    """Check prerequisites.
12159

12160
    This checks that the given group name is not an existing node group
12161
    already.
12162

12163
    """
12164
    try:
12165
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12166
    except errors.OpPrereqError:
12167
      pass
12168
    else:
12169
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12170
                                 " node group (UUID: %s)" %
12171
                                 (self.op.group_name, existing_uuid),
12172
                                 errors.ECODE_EXISTS)
12173

    
12174
    if self.op.ndparams:
12175
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12176

    
12177
  def BuildHooksEnv(self):
12178
    """Build hooks env.
12179

12180
    """
12181
    return {
12182
      "GROUP_NAME": self.op.group_name,
12183
      }
12184

    
12185
  def BuildHooksNodes(self):
12186
    """Build hooks nodes.
12187

12188
    """
12189
    mn = self.cfg.GetMasterNode()
12190
    return ([mn], [mn])
12191

    
12192
  def Exec(self, feedback_fn):
12193
    """Add the node group to the cluster.
12194

12195
    """
12196
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12197
                                  uuid=self.group_uuid,
12198
                                  alloc_policy=self.op.alloc_policy,
12199
                                  ndparams=self.op.ndparams)
12200

    
12201
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12202
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12203

    
12204

    
12205
class LUGroupAssignNodes(NoHooksLU):
12206
  """Logical unit for assigning nodes to groups.
12207

12208
  """
12209
  REQ_BGL = False
12210

    
12211
  def ExpandNames(self):
12212
    # These raise errors.OpPrereqError on their own:
12213
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12214
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12215

    
12216
    # We want to lock all the affected nodes and groups. We have readily
12217
    # available the list of nodes, and the *destination* group. To gather the
12218
    # list of "source" groups, we need to fetch node information later on.
12219
    self.needed_locks = {
12220
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12221
      locking.LEVEL_NODE: self.op.nodes,
12222
      }
12223

    
12224
  def DeclareLocks(self, level):
12225
    if level == locking.LEVEL_NODEGROUP:
12226
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12227

    
12228
      # Try to get all affected nodes' groups without having the group or node
12229
      # lock yet. Needs verification later in the code flow.
12230
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12231

    
12232
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12233

    
12234
  def CheckPrereq(self):
12235
    """Check prerequisites.
12236

12237
    """
12238
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12239
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12240
            frozenset(self.op.nodes))
12241

    
12242
    expected_locks = (set([self.group_uuid]) |
12243
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12244
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12245
    if actual_locks != expected_locks:
12246
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12247
                               " current groups are '%s', used to be '%s'" %
12248
                               (utils.CommaJoin(expected_locks),
12249
                                utils.CommaJoin(actual_locks)))
12250

    
12251
    self.node_data = self.cfg.GetAllNodesInfo()
12252
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12253
    instance_data = self.cfg.GetAllInstancesInfo()
12254

    
12255
    if self.group is None:
12256
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12257
                               (self.op.group_name, self.group_uuid))
12258

    
12259
    (new_splits, previous_splits) = \
12260
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12261
                                             for node in self.op.nodes],
12262
                                            self.node_data, instance_data)
12263

    
12264
    if new_splits:
12265
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12266

    
12267
      if not self.op.force:
12268
        raise errors.OpExecError("The following instances get split by this"
12269
                                 " change and --force was not given: %s" %
12270
                                 fmt_new_splits)
12271
      else:
12272
        self.LogWarning("This operation will split the following instances: %s",
12273
                        fmt_new_splits)
12274

    
12275
        if previous_splits:
12276
          self.LogWarning("In addition, these already-split instances continue"
12277
                          " to be split across groups: %s",
12278
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12279

    
12280
  def Exec(self, feedback_fn):
12281
    """Assign nodes to a new group.
12282

12283
    """
12284
    for node in self.op.nodes:
12285
      self.node_data[node].group = self.group_uuid
12286

    
12287
    # FIXME: Depends on side-effects of modifying the result of
12288
    # C{cfg.GetAllNodesInfo}
12289

    
12290
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12291

    
12292
  @staticmethod
12293
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12294
    """Check for split instances after a node assignment.
12295

12296
    This method considers a series of node assignments as an atomic operation,
12297
    and returns information about split instances after applying the set of
12298
    changes.
12299

12300
    In particular, it returns information about newly split instances, and
12301
    instances that were already split, and remain so after the change.
12302

12303
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12304
    considered.
12305

12306
    @type changes: list of (node_name, new_group_uuid) pairs.
12307
    @param changes: list of node assignments to consider.
12308
    @param node_data: a dict with data for all nodes
12309
    @param instance_data: a dict with all instances to consider
12310
    @rtype: a two-tuple
12311
    @return: a list of instances that were previously okay and result split as a
12312
      consequence of this change, and a list of instances that were previously
12313
      split and this change does not fix.
12314

12315
    """
12316
    changed_nodes = dict((node, group) for node, group in changes
12317
                         if node_data[node].group != group)
12318

    
12319
    all_split_instances = set()
12320
    previously_split_instances = set()
12321

    
12322
    def InstanceNodes(instance):
12323
      return [instance.primary_node] + list(instance.secondary_nodes)
12324

    
12325
    for inst in instance_data.values():
12326
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12327
        continue
12328

    
12329
      instance_nodes = InstanceNodes(inst)
12330

    
12331
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12332
        previously_split_instances.add(inst.name)
12333

    
12334
      if len(set(changed_nodes.get(node, node_data[node].group)
12335
                 for node in instance_nodes)) > 1:
12336
        all_split_instances.add(inst.name)
12337

    
12338
    return (list(all_split_instances - previously_split_instances),
12339
            list(previously_split_instances & all_split_instances))
12340

    
12341

    
12342
class _GroupQuery(_QueryBase):
12343
  FIELDS = query.GROUP_FIELDS
12344

    
12345
  def ExpandNames(self, lu):
12346
    lu.needed_locks = {}
12347

    
12348
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12349
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12350

    
12351
    if not self.names:
12352
      self.wanted = [name_to_uuid[name]
12353
                     for name in utils.NiceSort(name_to_uuid.keys())]
12354
    else:
12355
      # Accept names to be either names or UUIDs.
12356
      missing = []
12357
      self.wanted = []
12358
      all_uuid = frozenset(self._all_groups.keys())
12359

    
12360
      for name in self.names:
12361
        if name in all_uuid:
12362
          self.wanted.append(name)
12363
        elif name in name_to_uuid:
12364
          self.wanted.append(name_to_uuid[name])
12365
        else:
12366
          missing.append(name)
12367

    
12368
      if missing:
12369
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12370
                                   utils.CommaJoin(missing),
12371
                                   errors.ECODE_NOENT)
12372

    
12373
  def DeclareLocks(self, lu, level):
12374
    pass
12375

    
12376
  def _GetQueryData(self, lu):
12377
    """Computes the list of node groups and their attributes.
12378

12379
    """
12380
    do_nodes = query.GQ_NODE in self.requested_data
12381
    do_instances = query.GQ_INST in self.requested_data
12382

    
12383
    group_to_nodes = None
12384
    group_to_instances = None
12385

    
12386
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12387
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12388
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12389
    # instance->node. Hence, we will need to process nodes even if we only need
12390
    # instance information.
12391
    if do_nodes or do_instances:
12392
      all_nodes = lu.cfg.GetAllNodesInfo()
12393
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12394
      node_to_group = {}
12395

    
12396
      for node in all_nodes.values():
12397
        if node.group in group_to_nodes:
12398
          group_to_nodes[node.group].append(node.name)
12399
          node_to_group[node.name] = node.group
12400

    
12401
      if do_instances:
12402
        all_instances = lu.cfg.GetAllInstancesInfo()
12403
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12404

    
12405
        for instance in all_instances.values():
12406
          node = instance.primary_node
12407
          if node in node_to_group:
12408
            group_to_instances[node_to_group[node]].append(instance.name)
12409

    
12410
        if not do_nodes:
12411
          # Do not pass on node information if it was not requested.
12412
          group_to_nodes = None
12413

    
12414
    return query.GroupQueryData([self._all_groups[uuid]
12415
                                 for uuid in self.wanted],
12416
                                group_to_nodes, group_to_instances)
12417

    
12418

    
12419
class LUGroupQuery(NoHooksLU):
12420
  """Logical unit for querying node groups.
12421

12422
  """
12423
  REQ_BGL = False
12424

    
12425
  def CheckArguments(self):
12426
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12427
                          self.op.output_fields, False)
12428

    
12429
  def ExpandNames(self):
12430
    self.gq.ExpandNames(self)
12431

    
12432
  def DeclareLocks(self, level):
12433
    self.gq.DeclareLocks(self, level)
12434

    
12435
  def Exec(self, feedback_fn):
12436
    return self.gq.OldStyleQuery(self)
12437

    
12438

    
12439
class LUGroupSetParams(LogicalUnit):
12440
  """Modifies the parameters of a node group.
12441

12442
  """
12443
  HPATH = "group-modify"
12444
  HTYPE = constants.HTYPE_GROUP
12445
  REQ_BGL = False
12446

    
12447
  def CheckArguments(self):
12448
    all_changes = [
12449
      self.op.ndparams,
12450
      self.op.alloc_policy,
12451
      ]
12452

    
12453
    if all_changes.count(None) == len(all_changes):
12454
      raise errors.OpPrereqError("Please pass at least one modification",
12455
                                 errors.ECODE_INVAL)
12456

    
12457
  def ExpandNames(self):
12458
    # This raises errors.OpPrereqError on its own:
12459
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12460

    
12461
    self.needed_locks = {
12462
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12463
      }
12464

    
12465
  def CheckPrereq(self):
12466
    """Check prerequisites.
12467

12468
    """
12469
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12470

    
12471
    if self.group is None:
12472
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12473
                               (self.op.group_name, self.group_uuid))
12474

    
12475
    if self.op.ndparams:
12476
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12477
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12478
      self.new_ndparams = new_ndparams
12479

    
12480
  def BuildHooksEnv(self):
12481
    """Build hooks env.
12482

12483
    """
12484
    return {
12485
      "GROUP_NAME": self.op.group_name,
12486
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12487
      }
12488

    
12489
  def BuildHooksNodes(self):
12490
    """Build hooks nodes.
12491

12492
    """
12493
    mn = self.cfg.GetMasterNode()
12494
    return ([mn], [mn])
12495

    
12496
  def Exec(self, feedback_fn):
12497
    """Modifies the node group.
12498

12499
    """
12500
    result = []
12501

    
12502
    if self.op.ndparams:
12503
      self.group.ndparams = self.new_ndparams
12504
      result.append(("ndparams", str(self.group.ndparams)))
12505

    
12506
    if self.op.alloc_policy:
12507
      self.group.alloc_policy = self.op.alloc_policy
12508

    
12509
    self.cfg.Update(self.group, feedback_fn)
12510
    return result
12511

    
12512

    
12513
class LUGroupRemove(LogicalUnit):
12514
  HPATH = "group-remove"
12515
  HTYPE = constants.HTYPE_GROUP
12516
  REQ_BGL = False
12517

    
12518
  def ExpandNames(self):
12519
    # This will raises errors.OpPrereqError on its own:
12520
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12521
    self.needed_locks = {
12522
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12523
      }
12524

    
12525
  def CheckPrereq(self):
12526
    """Check prerequisites.
12527

12528
    This checks that the given group name exists as a node group, that is
12529
    empty (i.e., contains no nodes), and that is not the last group of the
12530
    cluster.
12531

12532
    """
12533
    # Verify that the group is empty.
12534
    group_nodes = [node.name
12535
                   for node in self.cfg.GetAllNodesInfo().values()
12536
                   if node.group == self.group_uuid]
12537

    
12538
    if group_nodes:
12539
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12540
                                 " nodes: %s" %
12541
                                 (self.op.group_name,
12542
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12543
                                 errors.ECODE_STATE)
12544

    
12545
    # Verify the cluster would not be left group-less.
12546
    if len(self.cfg.GetNodeGroupList()) == 1:
12547
      raise errors.OpPrereqError("Group '%s' is the only group,"
12548
                                 " cannot be removed" %
12549
                                 self.op.group_name,
12550
                                 errors.ECODE_STATE)
12551

    
12552
  def BuildHooksEnv(self):
12553
    """Build hooks env.
12554

12555
    """
12556
    return {
12557
      "GROUP_NAME": self.op.group_name,
12558
      }
12559

    
12560
  def BuildHooksNodes(self):
12561
    """Build hooks nodes.
12562

12563
    """
12564
    mn = self.cfg.GetMasterNode()
12565
    return ([mn], [mn])
12566

    
12567
  def Exec(self, feedback_fn):
12568
    """Remove the node group.
12569

12570
    """
12571
    try:
12572
      self.cfg.RemoveNodeGroup(self.group_uuid)
12573
    except errors.ConfigurationError:
12574
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12575
                               (self.op.group_name, self.group_uuid))
12576

    
12577
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12578

    
12579

    
12580
class LUGroupRename(LogicalUnit):
12581
  HPATH = "group-rename"
12582
  HTYPE = constants.HTYPE_GROUP
12583
  REQ_BGL = False
12584

    
12585
  def ExpandNames(self):
12586
    # This raises errors.OpPrereqError on its own:
12587
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12588

    
12589
    self.needed_locks = {
12590
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12591
      }
12592

    
12593
  def CheckPrereq(self):
12594
    """Check prerequisites.
12595

12596
    Ensures requested new name is not yet used.
12597

12598
    """
12599
    try:
12600
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12601
    except errors.OpPrereqError:
12602
      pass
12603
    else:
12604
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12605
                                 " node group (UUID: %s)" %
12606
                                 (self.op.new_name, new_name_uuid),
12607
                                 errors.ECODE_EXISTS)
12608

    
12609
  def BuildHooksEnv(self):
12610
    """Build hooks env.
12611

12612
    """
12613
    return {
12614
      "OLD_NAME": self.op.group_name,
12615
      "NEW_NAME": self.op.new_name,
12616
      }
12617

    
12618
  def BuildHooksNodes(self):
12619
    """Build hooks nodes.
12620

12621
    """
12622
    mn = self.cfg.GetMasterNode()
12623

    
12624
    all_nodes = self.cfg.GetAllNodesInfo()
12625
    all_nodes.pop(mn, None)
12626

    
12627
    run_nodes = [mn]
12628
    run_nodes.extend(node.name for node in all_nodes.values()
12629
                     if node.group == self.group_uuid)
12630

    
12631
    return (run_nodes, run_nodes)
12632

    
12633
  def Exec(self, feedback_fn):
12634
    """Rename the node group.
12635

12636
    """
12637
    group = self.cfg.GetNodeGroup(self.group_uuid)
12638

    
12639
    if group is None:
12640
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12641
                               (self.op.group_name, self.group_uuid))
12642

    
12643
    group.name = self.op.new_name
12644
    self.cfg.Update(group, feedback_fn)
12645

    
12646
    return self.op.new_name
12647

    
12648

    
12649
class LUGroupEvacuate(LogicalUnit):
12650
  HPATH = "group-evacuate"
12651
  HTYPE = constants.HTYPE_GROUP
12652
  REQ_BGL = False
12653

    
12654
  def ExpandNames(self):
12655
    # This raises errors.OpPrereqError on its own:
12656
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12657

    
12658
    if self.op.target_groups:
12659
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12660
                                  self.op.target_groups)
12661
    else:
12662
      self.req_target_uuids = []
12663

    
12664
    if self.group_uuid in self.req_target_uuids:
12665
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12666
                                 " as a target group (targets are %s)" %
12667
                                 (self.group_uuid,
12668
                                  utils.CommaJoin(self.req_target_uuids)),
12669
                                 errors.ECODE_INVAL)
12670

    
12671
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12672

    
12673
    self.share_locks = _ShareAll()
12674
    self.needed_locks = {
12675
      locking.LEVEL_INSTANCE: [],
12676
      locking.LEVEL_NODEGROUP: [],
12677
      locking.LEVEL_NODE: [],
12678
      }
12679

    
12680
  def DeclareLocks(self, level):
12681
    if level == locking.LEVEL_INSTANCE:
12682
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12683

    
12684
      # Lock instances optimistically, needs verification once node and group
12685
      # locks have been acquired
12686
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12687
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12688

    
12689
    elif level == locking.LEVEL_NODEGROUP:
12690
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12691

    
12692
      if self.req_target_uuids:
12693
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12694

    
12695
        # Lock all groups used by instances optimistically; this requires going
12696
        # via the node before it's locked, requiring verification later on
12697
        lock_groups.update(group_uuid
12698
                           for instance_name in
12699
                             self.owned_locks(locking.LEVEL_INSTANCE)
12700
                           for group_uuid in
12701
                             self.cfg.GetInstanceNodeGroups(instance_name))
12702
      else:
12703
        # No target groups, need to lock all of them
12704
        lock_groups = locking.ALL_SET
12705

    
12706
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12707

    
12708
    elif level == locking.LEVEL_NODE:
12709
      # This will only lock the nodes in the group to be evacuated which
12710
      # contain actual instances
12711
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12712
      self._LockInstancesNodes()
12713

    
12714
      # Lock all nodes in group to be evacuated and target groups
12715
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12716
      assert self.group_uuid in owned_groups
12717
      member_nodes = [node_name
12718
                      for group in owned_groups
12719
                      for node_name in self.cfg.GetNodeGroup(group).members]
12720
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12721

    
12722
  def CheckPrereq(self):
12723
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12724
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12725
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12726

    
12727
    assert owned_groups.issuperset(self.req_target_uuids)
12728
    assert self.group_uuid in owned_groups
12729

    
12730
    # Check if locked instances are still correct
12731
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12732

    
12733
    # Get instance information
12734
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12735

    
12736
    # Check if node groups for locked instances are still correct
12737
    for instance_name in owned_instances:
12738
      inst = self.instances[instance_name]
12739
      assert owned_nodes.issuperset(inst.all_nodes), \
12740
        "Instance %s's nodes changed while we kept the lock" % instance_name
12741

    
12742
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12743
                                             owned_groups)
12744

    
12745
      assert self.group_uuid in inst_groups, \
12746
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12747

    
12748
    if self.req_target_uuids:
12749
      # User requested specific target groups
12750
      self.target_uuids = self.req_target_uuids
12751
    else:
12752
      # All groups except the one to be evacuated are potential targets
12753
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12754
                           if group_uuid != self.group_uuid]
12755

    
12756
      if not self.target_uuids:
12757
        raise errors.OpPrereqError("There are no possible target groups",
12758
                                   errors.ECODE_INVAL)
12759

    
12760
  def BuildHooksEnv(self):
12761
    """Build hooks env.
12762

12763
    """
12764
    return {
12765
      "GROUP_NAME": self.op.group_name,
12766
      "TARGET_GROUPS": " ".join(self.target_uuids),
12767
      }
12768

    
12769
  def BuildHooksNodes(self):
12770
    """Build hooks nodes.
12771

12772
    """
12773
    mn = self.cfg.GetMasterNode()
12774

    
12775
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12776

    
12777
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12778

    
12779
    return (run_nodes, run_nodes)
12780

    
12781
  def Exec(self, feedback_fn):
12782
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12783

    
12784
    assert self.group_uuid not in self.target_uuids
12785

    
12786
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12787
                     instances=instances, target_groups=self.target_uuids)
12788

    
12789
    ial.Run(self.op.iallocator)
12790

    
12791
    if not ial.success:
12792
      raise errors.OpPrereqError("Can't compute group evacuation using"
12793
                                 " iallocator '%s': %s" %
12794
                                 (self.op.iallocator, ial.info),
12795
                                 errors.ECODE_NORES)
12796

    
12797
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12798

    
12799
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12800
                 len(jobs), self.op.group_name)
12801

    
12802
    return ResultWithJobs(jobs)
12803

    
12804

    
12805
class TagsLU(NoHooksLU): # pylint: disable=W0223
12806
  """Generic tags LU.
12807

12808
  This is an abstract class which is the parent of all the other tags LUs.
12809

12810
  """
12811
  def ExpandNames(self):
12812
    self.group_uuid = None
12813
    self.needed_locks = {}
12814
    if self.op.kind == constants.TAG_NODE:
12815
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12816
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12817
    elif self.op.kind == constants.TAG_INSTANCE:
12818
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12819
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12820
    elif self.op.kind == constants.TAG_NODEGROUP:
12821
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12822

    
12823
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12824
    # not possible to acquire the BGL based on opcode parameters)
12825

    
12826
  def CheckPrereq(self):
12827
    """Check prerequisites.
12828

12829
    """
12830
    if self.op.kind == constants.TAG_CLUSTER:
12831
      self.target = self.cfg.GetClusterInfo()
12832
    elif self.op.kind == constants.TAG_NODE:
12833
      self.target = self.cfg.GetNodeInfo(self.op.name)
12834
    elif self.op.kind == constants.TAG_INSTANCE:
12835
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12836
    elif self.op.kind == constants.TAG_NODEGROUP:
12837
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12838
    else:
12839
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12840
                                 str(self.op.kind), errors.ECODE_INVAL)
12841

    
12842

    
12843
class LUTagsGet(TagsLU):
12844
  """Returns the tags of a given object.
12845

12846
  """
12847
  REQ_BGL = False
12848

    
12849
  def ExpandNames(self):
12850
    TagsLU.ExpandNames(self)
12851

    
12852
    # Share locks as this is only a read operation
12853
    self.share_locks = _ShareAll()
12854

    
12855
  def Exec(self, feedback_fn):
12856
    """Returns the tag list.
12857

12858
    """
12859
    return list(self.target.GetTags())
12860

    
12861

    
12862
class LUTagsSearch(NoHooksLU):
12863
  """Searches the tags for a given pattern.
12864

12865
  """
12866
  REQ_BGL = False
12867

    
12868
  def ExpandNames(self):
12869
    self.needed_locks = {}
12870

    
12871
  def CheckPrereq(self):
12872
    """Check prerequisites.
12873

12874
    This checks the pattern passed for validity by compiling it.
12875

12876
    """
12877
    try:
12878
      self.re = re.compile(self.op.pattern)
12879
    except re.error, err:
12880
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12881
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12882

    
12883
  def Exec(self, feedback_fn):
12884
    """Returns the tag list.
12885

12886
    """
12887
    cfg = self.cfg
12888
    tgts = [("/cluster", cfg.GetClusterInfo())]
12889
    ilist = cfg.GetAllInstancesInfo().values()
12890
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12891
    nlist = cfg.GetAllNodesInfo().values()
12892
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12893
    tgts.extend(("/nodegroup/%s" % n.name, n)
12894
                for n in cfg.GetAllNodeGroupsInfo().values())
12895
    results = []
12896
    for path, target in tgts:
12897
      for tag in target.GetTags():
12898
        if self.re.search(tag):
12899
          results.append((path, tag))
12900
    return results
12901

    
12902

    
12903
class LUTagsSet(TagsLU):
12904
  """Sets a tag on a given object.
12905

12906
  """
12907
  REQ_BGL = False
12908

    
12909
  def CheckPrereq(self):
12910
    """Check prerequisites.
12911

12912
    This checks the type and length of the tag name and value.
12913

12914
    """
12915
    TagsLU.CheckPrereq(self)
12916
    for tag in self.op.tags:
12917
      objects.TaggableObject.ValidateTag(tag)
12918

    
12919
  def Exec(self, feedback_fn):
12920
    """Sets the tag.
12921

12922
    """
12923
    try:
12924
      for tag in self.op.tags:
12925
        self.target.AddTag(tag)
12926
    except errors.TagError, err:
12927
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12928
    self.cfg.Update(self.target, feedback_fn)
12929

    
12930

    
12931
class LUTagsDel(TagsLU):
12932
  """Delete a list of tags from a given object.
12933

12934
  """
12935
  REQ_BGL = False
12936

    
12937
  def CheckPrereq(self):
12938
    """Check prerequisites.
12939

12940
    This checks that we have the given tag.
12941

12942
    """
12943
    TagsLU.CheckPrereq(self)
12944
    for tag in self.op.tags:
12945
      objects.TaggableObject.ValidateTag(tag)
12946
    del_tags = frozenset(self.op.tags)
12947
    cur_tags = self.target.GetTags()
12948

    
12949
    diff_tags = del_tags - cur_tags
12950
    if diff_tags:
12951
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12952
      raise errors.OpPrereqError("Tag(s) %s not found" %
12953
                                 (utils.CommaJoin(diff_names), ),
12954
                                 errors.ECODE_NOENT)
12955

    
12956
  def Exec(self, feedback_fn):
12957
    """Remove the tag from the object.
12958

12959
    """
12960
    for tag in self.op.tags:
12961
      self.target.RemoveTag(tag)
12962
    self.cfg.Update(self.target, feedback_fn)
12963

    
12964

    
12965
class LUTestDelay(NoHooksLU):
12966
  """Sleep for a specified amount of time.
12967

12968
  This LU sleeps on the master and/or nodes for a specified amount of
12969
  time.
12970

12971
  """
12972
  REQ_BGL = False
12973

    
12974
  def ExpandNames(self):
12975
    """Expand names and set required locks.
12976

12977
    This expands the node list, if any.
12978

12979
    """
12980
    self.needed_locks = {}
12981
    if self.op.on_nodes:
12982
      # _GetWantedNodes can be used here, but is not always appropriate to use
12983
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12984
      # more information.
12985
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12986
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12987

    
12988
  def _TestDelay(self):
12989
    """Do the actual sleep.
12990

12991
    """
12992
    if self.op.on_master:
12993
      if not utils.TestDelay(self.op.duration):
12994
        raise errors.OpExecError("Error during master delay test")
12995
    if self.op.on_nodes:
12996
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12997
      for node, node_result in result.items():
12998
        node_result.Raise("Failure during rpc call to node %s" % node)
12999

    
13000
  def Exec(self, feedback_fn):
13001
    """Execute the test delay opcode, with the wanted repetitions.
13002

13003
    """
13004
    if self.op.repeat == 0:
13005
      self._TestDelay()
13006
    else:
13007
      top_value = self.op.repeat - 1
13008
      for i in range(self.op.repeat):
13009
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13010
        self._TestDelay()
13011

    
13012

    
13013
class LUTestJqueue(NoHooksLU):
13014
  """Utility LU to test some aspects of the job queue.
13015

13016
  """
13017
  REQ_BGL = False
13018

    
13019
  # Must be lower than default timeout for WaitForJobChange to see whether it
13020
  # notices changed jobs
13021
  _CLIENT_CONNECT_TIMEOUT = 20.0
13022
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13023

    
13024
  @classmethod
13025
  def _NotifyUsingSocket(cls, cb, errcls):
13026
    """Opens a Unix socket and waits for another program to connect.
13027

13028
    @type cb: callable
13029
    @param cb: Callback to send socket name to client
13030
    @type errcls: class
13031
    @param errcls: Exception class to use for errors
13032

13033
    """
13034
    # Using a temporary directory as there's no easy way to create temporary
13035
    # sockets without writing a custom loop around tempfile.mktemp and
13036
    # socket.bind
13037
    tmpdir = tempfile.mkdtemp()
13038
    try:
13039
      tmpsock = utils.PathJoin(tmpdir, "sock")
13040

    
13041
      logging.debug("Creating temporary socket at %s", tmpsock)
13042
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13043
      try:
13044
        sock.bind(tmpsock)
13045
        sock.listen(1)
13046

    
13047
        # Send details to client
13048
        cb(tmpsock)
13049

    
13050
        # Wait for client to connect before continuing
13051
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13052
        try:
13053
          (conn, _) = sock.accept()
13054
        except socket.error, err:
13055
          raise errcls("Client didn't connect in time (%s)" % err)
13056
      finally:
13057
        sock.close()
13058
    finally:
13059
      # Remove as soon as client is connected
13060
      shutil.rmtree(tmpdir)
13061

    
13062
    # Wait for client to close
13063
    try:
13064
      try:
13065
        # pylint: disable=E1101
13066
        # Instance of '_socketobject' has no ... member
13067
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13068
        conn.recv(1)
13069
      except socket.error, err:
13070
        raise errcls("Client failed to confirm notification (%s)" % err)
13071
    finally:
13072
      conn.close()
13073

    
13074
  def _SendNotification(self, test, arg, sockname):
13075
    """Sends a notification to the client.
13076

13077
    @type test: string
13078
    @param test: Test name
13079
    @param arg: Test argument (depends on test)
13080
    @type sockname: string
13081
    @param sockname: Socket path
13082

13083
    """
13084
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13085

    
13086
  def _Notify(self, prereq, test, arg):
13087
    """Notifies the client of a test.
13088

13089
    @type prereq: bool
13090
    @param prereq: Whether this is a prereq-phase test
13091
    @type test: string
13092
    @param test: Test name
13093
    @param arg: Test argument (depends on test)
13094

13095
    """
13096
    if prereq:
13097
      errcls = errors.OpPrereqError
13098
    else:
13099
      errcls = errors.OpExecError
13100

    
13101
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13102
                                                  test, arg),
13103
                                   errcls)
13104

    
13105
  def CheckArguments(self):
13106
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13107
    self.expandnames_calls = 0
13108

    
13109
  def ExpandNames(self):
13110
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13111
    if checkargs_calls < 1:
13112
      raise errors.ProgrammerError("CheckArguments was not called")
13113

    
13114
    self.expandnames_calls += 1
13115

    
13116
    if self.op.notify_waitlock:
13117
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13118

    
13119
    self.LogInfo("Expanding names")
13120

    
13121
    # Get lock on master node (just to get a lock, not for a particular reason)
13122
    self.needed_locks = {
13123
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13124
      }
13125

    
13126
  def Exec(self, feedback_fn):
13127
    if self.expandnames_calls < 1:
13128
      raise errors.ProgrammerError("ExpandNames was not called")
13129

    
13130
    if self.op.notify_exec:
13131
      self._Notify(False, constants.JQT_EXEC, None)
13132

    
13133
    self.LogInfo("Executing")
13134

    
13135
    if self.op.log_messages:
13136
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13137
      for idx, msg in enumerate(self.op.log_messages):
13138
        self.LogInfo("Sending log message %s", idx + 1)
13139
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13140
        # Report how many test messages have been sent
13141
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13142

    
13143
    if self.op.fail:
13144
      raise errors.OpExecError("Opcode failure was requested")
13145

    
13146
    return True
13147

    
13148

    
13149
class IAllocator(object):
13150
  """IAllocator framework.
13151

13152
  An IAllocator instance has three sets of attributes:
13153
    - cfg that is needed to query the cluster
13154
    - input data (all members of the _KEYS class attribute are required)
13155
    - four buffer attributes (in|out_data|text), that represent the
13156
      input (to the external script) in text and data structure format,
13157
      and the output from it, again in two formats
13158
    - the result variables from the script (success, info, nodes) for
13159
      easy usage
13160

13161
  """
13162
  # pylint: disable=R0902
13163
  # lots of instance attributes
13164

    
13165
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13166
    self.cfg = cfg
13167
    self.rpc = rpc_runner
13168
    # init buffer variables
13169
    self.in_text = self.out_text = self.in_data = self.out_data = None
13170
    # init all input fields so that pylint is happy
13171
    self.mode = mode
13172
    self.memory = self.disks = self.disk_template = None
13173
    self.os = self.tags = self.nics = self.vcpus = None
13174
    self.hypervisor = None
13175
    self.relocate_from = None
13176
    self.name = None
13177
    self.instances = None
13178
    self.evac_mode = None
13179
    self.target_groups = []
13180
    # computed fields
13181
    self.required_nodes = None
13182
    # init result fields
13183
    self.success = self.info = self.result = None
13184

    
13185
    try:
13186
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13187
    except KeyError:
13188
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13189
                                   " IAllocator" % self.mode)
13190

    
13191
    keyset = [n for (n, _) in keydata]
13192

    
13193
    for key in kwargs:
13194
      if key not in keyset:
13195
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13196
                                     " IAllocator" % key)
13197
      setattr(self, key, kwargs[key])
13198

    
13199
    for key in keyset:
13200
      if key not in kwargs:
13201
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13202
                                     " IAllocator" % key)
13203
    self._BuildInputData(compat.partial(fn, self), keydata)
13204

    
13205
  def _ComputeClusterData(self):
13206
    """Compute the generic allocator input data.
13207

13208
    This is the data that is independent of the actual operation.
13209

13210
    """
13211
    cfg = self.cfg
13212
    cluster_info = cfg.GetClusterInfo()
13213
    # cluster data
13214
    data = {
13215
      "version": constants.IALLOCATOR_VERSION,
13216
      "cluster_name": cfg.GetClusterName(),
13217
      "cluster_tags": list(cluster_info.GetTags()),
13218
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13219
      # we don't have job IDs
13220
      }
13221
    ninfo = cfg.GetAllNodesInfo()
13222
    iinfo = cfg.GetAllInstancesInfo().values()
13223
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13224

    
13225
    # node data
13226
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13227

    
13228
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13229
      hypervisor_name = self.hypervisor
13230
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13231
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13232
    else:
13233
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13234

    
13235
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13236
                                        hypervisor_name)
13237
    node_iinfo = \
13238
      self.rpc.call_all_instances_info(node_list,
13239
                                       cluster_info.enabled_hypervisors)
13240

    
13241
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13242

    
13243
    config_ndata = self._ComputeBasicNodeData(ninfo)
13244
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13245
                                                 i_list, config_ndata)
13246
    assert len(data["nodes"]) == len(ninfo), \
13247
        "Incomplete node data computed"
13248

    
13249
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13250

    
13251
    self.in_data = data
13252

    
13253
  @staticmethod
13254
  def _ComputeNodeGroupData(cfg):
13255
    """Compute node groups data.
13256

13257
    """
13258
    ng = dict((guuid, {
13259
      "name": gdata.name,
13260
      "alloc_policy": gdata.alloc_policy,
13261
      })
13262
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13263

    
13264
    return ng
13265

    
13266
  @staticmethod
13267
  def _ComputeBasicNodeData(node_cfg):
13268
    """Compute global node data.
13269

13270
    @rtype: dict
13271
    @returns: a dict of name: (node dict, node config)
13272

13273
    """
13274
    # fill in static (config-based) values
13275
    node_results = dict((ninfo.name, {
13276
      "tags": list(ninfo.GetTags()),
13277
      "primary_ip": ninfo.primary_ip,
13278
      "secondary_ip": ninfo.secondary_ip,
13279
      "offline": ninfo.offline,
13280
      "drained": ninfo.drained,
13281
      "master_candidate": ninfo.master_candidate,
13282
      "group": ninfo.group,
13283
      "master_capable": ninfo.master_capable,
13284
      "vm_capable": ninfo.vm_capable,
13285
      })
13286
      for ninfo in node_cfg.values())
13287

    
13288
    return node_results
13289

    
13290
  @staticmethod
13291
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13292
                              node_results):
13293
    """Compute global node data.
13294

13295
    @param node_results: the basic node structures as filled from the config
13296

13297
    """
13298
    # make a copy of the current dict
13299
    node_results = dict(node_results)
13300
    for nname, nresult in node_data.items():
13301
      assert nname in node_results, "Missing basic data for node %s" % nname
13302
      ninfo = node_cfg[nname]
13303

    
13304
      if not (ninfo.offline or ninfo.drained):
13305
        nresult.Raise("Can't get data for node %s" % nname)
13306
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13307
                                nname)
13308
        remote_info = nresult.payload
13309

    
13310
        for attr in ["memory_total", "memory_free", "memory_dom0",
13311
                     "vg_size", "vg_free", "cpu_total"]:
13312
          if attr not in remote_info:
13313
            raise errors.OpExecError("Node '%s' didn't return attribute"
13314
                                     " '%s'" % (nname, attr))
13315
          if not isinstance(remote_info[attr], int):
13316
            raise errors.OpExecError("Node '%s' returned invalid value"
13317
                                     " for '%s': %s" %
13318
                                     (nname, attr, remote_info[attr]))
13319
        # compute memory used by primary instances
13320
        i_p_mem = i_p_up_mem = 0
13321
        for iinfo, beinfo in i_list:
13322
          if iinfo.primary_node == nname:
13323
            i_p_mem += beinfo[constants.BE_MEMORY]
13324
            if iinfo.name not in node_iinfo[nname].payload:
13325
              i_used_mem = 0
13326
            else:
13327
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13328
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13329
            remote_info["memory_free"] -= max(0, i_mem_diff)
13330

    
13331
            if iinfo.admin_up:
13332
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13333

    
13334
        # compute memory used by instances
13335
        pnr_dyn = {
13336
          "total_memory": remote_info["memory_total"],
13337
          "reserved_memory": remote_info["memory_dom0"],
13338
          "free_memory": remote_info["memory_free"],
13339
          "total_disk": remote_info["vg_size"],
13340
          "free_disk": remote_info["vg_free"],
13341
          "total_cpus": remote_info["cpu_total"],
13342
          "i_pri_memory": i_p_mem,
13343
          "i_pri_up_memory": i_p_up_mem,
13344
          }
13345
        pnr_dyn.update(node_results[nname])
13346
        node_results[nname] = pnr_dyn
13347

    
13348
    return node_results
13349

    
13350
  @staticmethod
13351
  def _ComputeInstanceData(cluster_info, i_list):
13352
    """Compute global instance data.
13353

13354
    """
13355
    instance_data = {}
13356
    for iinfo, beinfo in i_list:
13357
      nic_data = []
13358
      for nic in iinfo.nics:
13359
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13360
        nic_dict = {
13361
          "mac": nic.mac,
13362
          "ip": nic.ip,
13363
          "mode": filled_params[constants.NIC_MODE],
13364
          "link": filled_params[constants.NIC_LINK],
13365
          }
13366
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13367
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13368
        nic_data.append(nic_dict)
13369
      pir = {
13370
        "tags": list(iinfo.GetTags()),
13371
        "admin_up": iinfo.admin_up,
13372
        "vcpus": beinfo[constants.BE_VCPUS],
13373
        "memory": beinfo[constants.BE_MEMORY],
13374
        "os": iinfo.os,
13375
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13376
        "nics": nic_data,
13377
        "disks": [{constants.IDISK_SIZE: dsk.size,
13378
                   constants.IDISK_MODE: dsk.mode}
13379
                  for dsk in iinfo.disks],
13380
        "disk_template": iinfo.disk_template,
13381
        "hypervisor": iinfo.hypervisor,
13382
        }
13383
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13384
                                                 pir["disks"])
13385
      instance_data[iinfo.name] = pir
13386

    
13387
    return instance_data
13388

    
13389
  def _AddNewInstance(self):
13390
    """Add new instance data to allocator structure.
13391

13392
    This in combination with _AllocatorGetClusterData will create the
13393
    correct structure needed as input for the allocator.
13394

13395
    The checks for the completeness of the opcode must have already been
13396
    done.
13397

13398
    """
13399
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13400

    
13401
    if self.disk_template in constants.DTS_INT_MIRROR:
13402
      self.required_nodes = 2
13403
    else:
13404
      self.required_nodes = 1
13405

    
13406
    request = {
13407
      "name": self.name,
13408
      "disk_template": self.disk_template,
13409
      "tags": self.tags,
13410
      "os": self.os,
13411
      "vcpus": self.vcpus,
13412
      "memory": self.memory,
13413
      "disks": self.disks,
13414
      "disk_space_total": disk_space,
13415
      "nics": self.nics,
13416
      "required_nodes": self.required_nodes,
13417
      "hypervisor": self.hypervisor,
13418
      }
13419

    
13420
    return request
13421

    
13422
  def _AddRelocateInstance(self):
13423
    """Add relocate instance data to allocator structure.
13424

13425
    This in combination with _IAllocatorGetClusterData will create the
13426
    correct structure needed as input for the allocator.
13427

13428
    The checks for the completeness of the opcode must have already been
13429
    done.
13430

13431
    """
13432
    instance = self.cfg.GetInstanceInfo(self.name)
13433
    if instance is None:
13434
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13435
                                   " IAllocator" % self.name)
13436

    
13437
    if instance.disk_template not in constants.DTS_MIRRORED:
13438
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13439
                                 errors.ECODE_INVAL)
13440

    
13441
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13442
        len(instance.secondary_nodes) != 1:
13443
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13444
                                 errors.ECODE_STATE)
13445

    
13446
    self.required_nodes = 1
13447
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13448
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13449

    
13450
    request = {
13451
      "name": self.name,
13452
      "disk_space_total": disk_space,
13453
      "required_nodes": self.required_nodes,
13454
      "relocate_from": self.relocate_from,
13455
      }
13456
    return request
13457

    
13458
  def _AddNodeEvacuate(self):
13459
    """Get data for node-evacuate requests.
13460

13461
    """
13462
    return {
13463
      "instances": self.instances,
13464
      "evac_mode": self.evac_mode,
13465
      }
13466

    
13467
  def _AddChangeGroup(self):
13468
    """Get data for node-evacuate requests.
13469

13470
    """
13471
    return {
13472
      "instances": self.instances,
13473
      "target_groups": self.target_groups,
13474
      }
13475

    
13476
  def _BuildInputData(self, fn, keydata):
13477
    """Build input data structures.
13478

13479
    """
13480
    self._ComputeClusterData()
13481

    
13482
    request = fn()
13483
    request["type"] = self.mode
13484
    for keyname, keytype in keydata:
13485
      if keyname not in request:
13486
        raise errors.ProgrammerError("Request parameter %s is missing" %
13487
                                     keyname)
13488
      val = request[keyname]
13489
      if not keytype(val):
13490
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13491
                                     " validation, value %s, expected"
13492
                                     " type %s" % (keyname, val, keytype))
13493
    self.in_data["request"] = request
13494

    
13495
    self.in_text = serializer.Dump(self.in_data)
13496

    
13497
  _STRING_LIST = ht.TListOf(ht.TString)
13498
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13499
     # pylint: disable=E1101
13500
     # Class '...' has no 'OP_ID' member
13501
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13502
                          opcodes.OpInstanceMigrate.OP_ID,
13503
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13504
     })))
13505

    
13506
  _NEVAC_MOVED = \
13507
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13508
                       ht.TItems([ht.TNonEmptyString,
13509
                                  ht.TNonEmptyString,
13510
                                  ht.TListOf(ht.TNonEmptyString),
13511
                                 ])))
13512
  _NEVAC_FAILED = \
13513
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13514
                       ht.TItems([ht.TNonEmptyString,
13515
                                  ht.TMaybeString,
13516
                                 ])))
13517
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13518
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13519

    
13520
  _MODE_DATA = {
13521
    constants.IALLOCATOR_MODE_ALLOC:
13522
      (_AddNewInstance,
13523
       [
13524
        ("name", ht.TString),
13525
        ("memory", ht.TInt),
13526
        ("disks", ht.TListOf(ht.TDict)),
13527
        ("disk_template", ht.TString),
13528
        ("os", ht.TString),
13529
        ("tags", _STRING_LIST),
13530
        ("nics", ht.TListOf(ht.TDict)),
13531
        ("vcpus", ht.TInt),
13532
        ("hypervisor", ht.TString),
13533
        ], ht.TList),
13534
    constants.IALLOCATOR_MODE_RELOC:
13535
      (_AddRelocateInstance,
13536
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13537
       ht.TList),
13538
     constants.IALLOCATOR_MODE_NODE_EVAC:
13539
      (_AddNodeEvacuate, [
13540
        ("instances", _STRING_LIST),
13541
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13542
        ], _NEVAC_RESULT),
13543
     constants.IALLOCATOR_MODE_CHG_GROUP:
13544
      (_AddChangeGroup, [
13545
        ("instances", _STRING_LIST),
13546
        ("target_groups", _STRING_LIST),
13547
        ], _NEVAC_RESULT),
13548
    }
13549

    
13550
  def Run(self, name, validate=True, call_fn=None):
13551
    """Run an instance allocator and return the results.
13552

13553
    """
13554
    if call_fn is None:
13555
      call_fn = self.rpc.call_iallocator_runner
13556

    
13557
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13558
    result.Raise("Failure while running the iallocator script")
13559

    
13560
    self.out_text = result.payload
13561
    if validate:
13562
      self._ValidateResult()
13563

    
13564
  def _ValidateResult(self):
13565
    """Process the allocator results.
13566

13567
    This will process and if successful save the result in
13568
    self.out_data and the other parameters.
13569

13570
    """
13571
    try:
13572
      rdict = serializer.Load(self.out_text)
13573
    except Exception, err:
13574
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13575

    
13576
    if not isinstance(rdict, dict):
13577
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13578

    
13579
    # TODO: remove backwards compatiblity in later versions
13580
    if "nodes" in rdict and "result" not in rdict:
13581
      rdict["result"] = rdict["nodes"]
13582
      del rdict["nodes"]
13583

    
13584
    for key in "success", "info", "result":
13585
      if key not in rdict:
13586
        raise errors.OpExecError("Can't parse iallocator results:"
13587
                                 " missing key '%s'" % key)
13588
      setattr(self, key, rdict[key])
13589

    
13590
    if not self._result_check(self.result):
13591
      raise errors.OpExecError("Iallocator returned invalid result,"
13592
                               " expected %s, got %s" %
13593
                               (self._result_check, self.result),
13594
                               errors.ECODE_INVAL)
13595

    
13596
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13597
      assert self.relocate_from is not None
13598
      assert self.required_nodes == 1
13599

    
13600
      node2group = dict((name, ndata["group"])
13601
                        for (name, ndata) in self.in_data["nodes"].items())
13602

    
13603
      fn = compat.partial(self._NodesToGroups, node2group,
13604
                          self.in_data["nodegroups"])
13605

    
13606
      instance = self.cfg.GetInstanceInfo(self.name)
13607
      request_groups = fn(self.relocate_from + [instance.primary_node])
13608
      result_groups = fn(rdict["result"] + [instance.primary_node])
13609

    
13610
      if self.success and not set(result_groups).issubset(request_groups):
13611
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13612
                                 " differ from original groups (%s)" %
13613
                                 (utils.CommaJoin(result_groups),
13614
                                  utils.CommaJoin(request_groups)))
13615

    
13616
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13617
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13618

    
13619
    self.out_data = rdict
13620

    
13621
  @staticmethod
13622
  def _NodesToGroups(node2group, groups, nodes):
13623
    """Returns a list of unique group names for a list of nodes.
13624

13625
    @type node2group: dict
13626
    @param node2group: Map from node name to group UUID
13627
    @type groups: dict
13628
    @param groups: Group information
13629
    @type nodes: list
13630
    @param nodes: Node names
13631

13632
    """
13633
    result = set()
13634

    
13635
    for node in nodes:
13636
      try:
13637
        group_uuid = node2group[node]
13638
      except KeyError:
13639
        # Ignore unknown node
13640
        pass
13641
      else:
13642
        try:
13643
          group = groups[group_uuid]
13644
        except KeyError:
13645
          # Can't find group, let's use UUID
13646
          group_name = group_uuid
13647
        else:
13648
          group_name = group["name"]
13649

    
13650
        result.add(group_name)
13651

    
13652
    return sorted(result)
13653

    
13654

    
13655
class LUTestAllocator(NoHooksLU):
13656
  """Run allocator tests.
13657

13658
  This LU runs the allocator tests
13659

13660
  """
13661
  def CheckPrereq(self):
13662
    """Check prerequisites.
13663

13664
    This checks the opcode parameters depending on the director and mode test.
13665

13666
    """
13667
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13668
      for attr in ["memory", "disks", "disk_template",
13669
                   "os", "tags", "nics", "vcpus"]:
13670
        if not hasattr(self.op, attr):
13671
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13672
                                     attr, errors.ECODE_INVAL)
13673
      iname = self.cfg.ExpandInstanceName(self.op.name)
13674
      if iname is not None:
13675
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13676
                                   iname, errors.ECODE_EXISTS)
13677
      if not isinstance(self.op.nics, list):
13678
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13679
                                   errors.ECODE_INVAL)
13680
      if not isinstance(self.op.disks, list):
13681
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13682
                                   errors.ECODE_INVAL)
13683
      for row in self.op.disks:
13684
        if (not isinstance(row, dict) or
13685
            constants.IDISK_SIZE not in row or
13686
            not isinstance(row[constants.IDISK_SIZE], int) or
13687
            constants.IDISK_MODE not in row or
13688
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13689
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13690
                                     " parameter", errors.ECODE_INVAL)
13691
      if self.op.hypervisor is None:
13692
        self.op.hypervisor = self.cfg.GetHypervisorType()
13693
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13694
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13695
      self.op.name = fname
13696
      self.relocate_from = \
13697
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13698
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13699
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13700
      if not self.op.instances:
13701
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13702
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13703
    else:
13704
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13705
                                 self.op.mode, errors.ECODE_INVAL)
13706

    
13707
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13708
      if self.op.allocator is None:
13709
        raise errors.OpPrereqError("Missing allocator name",
13710
                                   errors.ECODE_INVAL)
13711
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13712
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13713
                                 self.op.direction, errors.ECODE_INVAL)
13714

    
13715
  def Exec(self, feedback_fn):
13716
    """Run the allocator test.
13717

13718
    """
13719
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13720
      ial = IAllocator(self.cfg, self.rpc,
13721
                       mode=self.op.mode,
13722
                       name=self.op.name,
13723
                       memory=self.op.memory,
13724
                       disks=self.op.disks,
13725
                       disk_template=self.op.disk_template,
13726
                       os=self.op.os,
13727
                       tags=self.op.tags,
13728
                       nics=self.op.nics,
13729
                       vcpus=self.op.vcpus,
13730
                       hypervisor=self.op.hypervisor,
13731
                       )
13732
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13733
      ial = IAllocator(self.cfg, self.rpc,
13734
                       mode=self.op.mode,
13735
                       name=self.op.name,
13736
                       relocate_from=list(self.relocate_from),
13737
                       )
13738
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13739
      ial = IAllocator(self.cfg, self.rpc,
13740
                       mode=self.op.mode,
13741
                       instances=self.op.instances,
13742
                       target_groups=self.op.target_groups)
13743
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13744
      ial = IAllocator(self.cfg, self.rpc,
13745
                       mode=self.op.mode,
13746
                       instances=self.op.instances,
13747
                       evac_mode=self.op.evac_mode)
13748
    else:
13749
      raise errors.ProgrammerError("Uncatched mode %s in"
13750
                                   " LUTestAllocator.Exec", self.op.mode)
13751

    
13752
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13753
      result = ial.in_text
13754
    else:
13755
      ial.Run(self.op.allocator, validate=False)
13756
      result = ial.out_text
13757
    return result
13758

    
13759

    
13760
#: Query type implementations
13761
_QUERY_IMPL = {
13762
  constants.QR_INSTANCE: _InstanceQuery,
13763
  constants.QR_NODE: _NodeQuery,
13764
  constants.QR_GROUP: _GroupQuery,
13765
  constants.QR_OS: _OsQuery,
13766
  }
13767

    
13768
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13769

    
13770

    
13771
def _GetQueryImplementation(name):
13772
  """Returns the implemtnation for a query type.
13773

13774
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13775

13776
  """
13777
  try:
13778
    return _QUERY_IMPL[name]
13779
  except KeyError:
13780
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13781
                               errors.ECODE_INVAL)