Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 96e0d5cc

History | View | Annotate | Download (481.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False):
353
    """Helper function to declare instances' nodes for locking.
354

355
    This function should be called after locking one or more instances to lock
356
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
357
    with all primary or secondary nodes for instances already locked and
358
    present in self.needed_locks[locking.LEVEL_INSTANCE].
359

360
    It should be called from DeclareLocks, and for safety only works if
361
    self.recalculate_locks[locking.LEVEL_NODE] is set.
362

363
    In the future it may grow parameters to just lock some instance's nodes, or
364
    to just lock primaries or secondary nodes, if needed.
365

366
    If should be called in DeclareLocks in a way similar to::
367

368
      if level == locking.LEVEL_NODE:
369
        self._LockInstancesNodes()
370

371
    @type primary_only: boolean
372
    @param primary_only: only lock primary nodes of locked instances
373

374
    """
375
    assert locking.LEVEL_NODE in self.recalculate_locks, \
376
      "_LockInstancesNodes helper function called with no nodes to recalculate"
377

    
378
    # TODO: check if we're really been called with the instance locks held
379

    
380
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
381
    # future we might want to have different behaviors depending on the value
382
    # of self.recalculate_locks[locking.LEVEL_NODE]
383
    wanted_nodes = []
384
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
385
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
386
      wanted_nodes.append(instance.primary_node)
387
      if not primary_only:
388
        wanted_nodes.extend(instance.secondary_nodes)
389

    
390
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
391
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
392
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
393
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
394

    
395
    del self.recalculate_locks[locking.LEVEL_NODE]
396

    
397

    
398
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
399
  """Simple LU which runs no hooks.
400

401
  This LU is intended as a parent for other LogicalUnits which will
402
  run no hooks, in order to reduce duplicate code.
403

404
  """
405
  HPATH = None
406
  HTYPE = None
407

    
408
  def BuildHooksEnv(self):
409
    """Empty BuildHooksEnv for NoHooksLu.
410

411
    This just raises an error.
412

413
    """
414
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
415

    
416
  def BuildHooksNodes(self):
417
    """Empty BuildHooksNodes for NoHooksLU.
418

419
    """
420
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
421

    
422

    
423
class Tasklet:
424
  """Tasklet base class.
425

426
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
427
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
428
  tasklets know nothing about locks.
429

430
  Subclasses must follow these rules:
431
    - Implement CheckPrereq
432
    - Implement Exec
433

434
  """
435
  def __init__(self, lu):
436
    self.lu = lu
437

    
438
    # Shortcuts
439
    self.cfg = lu.cfg
440
    self.rpc = lu.rpc
441

    
442
  def CheckPrereq(self):
443
    """Check prerequisites for this tasklets.
444

445
    This method should check whether the prerequisites for the execution of
446
    this tasklet are fulfilled. It can do internode communication, but it
447
    should be idempotent - no cluster or system changes are allowed.
448

449
    The method should raise errors.OpPrereqError in case something is not
450
    fulfilled. Its return value is ignored.
451

452
    This method should also update all parameters to their canonical form if it
453
    hasn't been done before.
454

455
    """
456
    pass
457

    
458
  def Exec(self, feedback_fn):
459
    """Execute the tasklet.
460

461
    This method should implement the actual work. It should raise
462
    errors.OpExecError for failures that are somewhat dealt with in code, or
463
    expected.
464

465
    """
466
    raise NotImplementedError
467

    
468

    
469
class _QueryBase:
470
  """Base for query utility classes.
471

472
  """
473
  #: Attribute holding field definitions
474
  FIELDS = None
475

    
476
  def __init__(self, qfilter, fields, use_locking):
477
    """Initializes this class.
478

479
    """
480
    self.use_locking = use_locking
481

    
482
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
483
                             namefield="name")
484
    self.requested_data = self.query.RequestedData()
485
    self.names = self.query.RequestedNames()
486

    
487
    # Sort only if no names were requested
488
    self.sort_by_name = not self.names
489

    
490
    self.do_locking = None
491
    self.wanted = None
492

    
493
  def _GetNames(self, lu, all_names, lock_level):
494
    """Helper function to determine names asked for in the query.
495

496
    """
497
    if self.do_locking:
498
      names = lu.owned_locks(lock_level)
499
    else:
500
      names = all_names
501

    
502
    if self.wanted == locking.ALL_SET:
503
      assert not self.names
504
      # caller didn't specify names, so ordering is not important
505
      return utils.NiceSort(names)
506

    
507
    # caller specified names and we must keep the same order
508
    assert self.names
509
    assert not self.do_locking or lu.glm.is_owned(lock_level)
510

    
511
    missing = set(self.wanted).difference(names)
512
    if missing:
513
      raise errors.OpExecError("Some items were removed before retrieving"
514
                               " their data: %s" % missing)
515

    
516
    # Return expanded names
517
    return self.wanted
518

    
519
  def ExpandNames(self, lu):
520
    """Expand names for this query.
521

522
    See L{LogicalUnit.ExpandNames}.
523

524
    """
525
    raise NotImplementedError()
526

    
527
  def DeclareLocks(self, lu, level):
528
    """Declare locks for this query.
529

530
    See L{LogicalUnit.DeclareLocks}.
531

532
    """
533
    raise NotImplementedError()
534

    
535
  def _GetQueryData(self, lu):
536
    """Collects all data for this query.
537

538
    @return: Query data object
539

540
    """
541
    raise NotImplementedError()
542

    
543
  def NewStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
548
                                  sort_by_name=self.sort_by_name)
549

    
550
  def OldStyleQuery(self, lu):
551
    """Collect data and execute query.
552

553
    """
554
    return self.query.OldStyleQuery(self._GetQueryData(lu),
555
                                    sort_by_name=self.sort_by_name)
556

    
557

    
558
def _ShareAll():
559
  """Returns a dict declaring all lock levels shared.
560

561
  """
562
  return dict.fromkeys(locking.LEVELS, 1)
563

    
564

    
565
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
566
  """Checks if the owned node groups are still correct for an instance.
567

568
  @type cfg: L{config.ConfigWriter}
569
  @param cfg: The cluster configuration
570
  @type instance_name: string
571
  @param instance_name: Instance name
572
  @type owned_groups: set or frozenset
573
  @param owned_groups: List of currently owned node groups
574

575
  """
576
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
577

    
578
  if not owned_groups.issuperset(inst_groups):
579
    raise errors.OpPrereqError("Instance %s's node groups changed since"
580
                               " locks were acquired, current groups are"
581
                               " are '%s', owning groups '%s'; retry the"
582
                               " operation" %
583
                               (instance_name,
584
                                utils.CommaJoin(inst_groups),
585
                                utils.CommaJoin(owned_groups)),
586
                               errors.ECODE_STATE)
587

    
588
  return inst_groups
589

    
590

    
591
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
592
  """Checks if the instances in a node group are still correct.
593

594
  @type cfg: L{config.ConfigWriter}
595
  @param cfg: The cluster configuration
596
  @type group_uuid: string
597
  @param group_uuid: Node group UUID
598
  @type owned_instances: set or frozenset
599
  @param owned_instances: List of currently owned instances
600

601
  """
602
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
603
  if owned_instances != wanted_instances:
604
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
605
                               " locks were acquired, wanted '%s', have '%s';"
606
                               " retry the operation" %
607
                               (group_uuid,
608
                                utils.CommaJoin(wanted_instances),
609
                                utils.CommaJoin(owned_instances)),
610
                               errors.ECODE_STATE)
611

    
612
  return wanted_instances
613

    
614

    
615
def _SupportsOob(cfg, node):
616
  """Tells if node supports OOB.
617

618
  @type cfg: L{config.ConfigWriter}
619
  @param cfg: The cluster configuration
620
  @type node: L{objects.Node}
621
  @param node: The node
622
  @return: The OOB script if supported or an empty string otherwise
623

624
  """
625
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
626

    
627

    
628
def _GetWantedNodes(lu, nodes):
629
  """Returns list of checked and expanded node names.
630

631
  @type lu: L{LogicalUnit}
632
  @param lu: the logical unit on whose behalf we execute
633
  @type nodes: list
634
  @param nodes: list of node names or None for all nodes
635
  @rtype: list
636
  @return: the list of nodes, sorted
637
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
638

639
  """
640
  if nodes:
641
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
642

    
643
  return utils.NiceSort(lu.cfg.GetNodeList())
644

    
645

    
646
def _GetWantedInstances(lu, instances):
647
  """Returns list of checked and expanded instance names.
648

649
  @type lu: L{LogicalUnit}
650
  @param lu: the logical unit on whose behalf we execute
651
  @type instances: list
652
  @param instances: list of instance names or None for all instances
653
  @rtype: list
654
  @return: the list of instances, sorted
655
  @raise errors.OpPrereqError: if the instances parameter is wrong type
656
  @raise errors.OpPrereqError: if any of the passed instances is not found
657

658
  """
659
  if instances:
660
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
661
  else:
662
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
663
  return wanted
664

    
665

    
666
def _GetUpdatedParams(old_params, update_dict,
667
                      use_default=True, use_none=False):
668
  """Return the new version of a parameter dictionary.
669

670
  @type old_params: dict
671
  @param old_params: old parameters
672
  @type update_dict: dict
673
  @param update_dict: dict containing new parameter values, or
674
      constants.VALUE_DEFAULT to reset the parameter to its default
675
      value
676
  @param use_default: boolean
677
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
678
      values as 'to be deleted' values
679
  @param use_none: boolean
680
  @type use_none: whether to recognise C{None} values as 'to be
681
      deleted' values
682
  @rtype: dict
683
  @return: the new parameter dictionary
684

685
  """
686
  params_copy = copy.deepcopy(old_params)
687
  for key, val in update_dict.iteritems():
688
    if ((use_default and val == constants.VALUE_DEFAULT) or
689
        (use_none and val is None)):
690
      try:
691
        del params_copy[key]
692
      except KeyError:
693
        pass
694
    else:
695
      params_copy[key] = val
696
  return params_copy
697

    
698

    
699
def _ReleaseLocks(lu, level, names=None, keep=None):
700
  """Releases locks owned by an LU.
701

702
  @type lu: L{LogicalUnit}
703
  @param level: Lock level
704
  @type names: list or None
705
  @param names: Names of locks to release
706
  @type keep: list or None
707
  @param keep: Names of locks to retain
708

709
  """
710
  assert not (keep is not None and names is not None), \
711
         "Only one of the 'names' and the 'keep' parameters can be given"
712

    
713
  if names is not None:
714
    should_release = names.__contains__
715
  elif keep:
716
    should_release = lambda name: name not in keep
717
  else:
718
    should_release = None
719

    
720
  if should_release:
721
    retain = []
722
    release = []
723

    
724
    # Determine which locks to release
725
    for name in lu.owned_locks(level):
726
      if should_release(name):
727
        release.append(name)
728
      else:
729
        retain.append(name)
730

    
731
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
732

    
733
    # Release just some locks
734
    lu.glm.release(level, names=release)
735

    
736
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
737
  else:
738
    # Release everything
739
    lu.glm.release(level)
740

    
741
    assert not lu.glm.is_owned(level), "No locks should be owned"
742

    
743

    
744
def _MapInstanceDisksToNodes(instances):
745
  """Creates a map from (node, volume) to instance name.
746

747
  @type instances: list of L{objects.Instance}
748
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
749

750
  """
751
  return dict(((node, vol), inst.name)
752
              for inst in instances
753
              for (node, vols) in inst.MapLVsByNode().items()
754
              for vol in vols)
755

    
756

    
757
def _RunPostHook(lu, node_name):
758
  """Runs the post-hook for an opcode on a single node.
759

760
  """
761
  hm = lu.proc.BuildHooksManager(lu)
762
  try:
763
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
764
  except:
765
    # pylint: disable=W0702
766
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
767

    
768

    
769
def _CheckOutputFields(static, dynamic, selected):
770
  """Checks whether all selected fields are valid.
771

772
  @type static: L{utils.FieldSet}
773
  @param static: static fields set
774
  @type dynamic: L{utils.FieldSet}
775
  @param dynamic: dynamic fields set
776

777
  """
778
  f = utils.FieldSet()
779
  f.Extend(static)
780
  f.Extend(dynamic)
781

    
782
  delta = f.NonMatching(selected)
783
  if delta:
784
    raise errors.OpPrereqError("Unknown output fields selected: %s"
785
                               % ",".join(delta), errors.ECODE_INVAL)
786

    
787

    
788
def _CheckGlobalHvParams(params):
789
  """Validates that given hypervisor params are not global ones.
790

791
  This will ensure that instances don't get customised versions of
792
  global params.
793

794
  """
795
  used_globals = constants.HVC_GLOBALS.intersection(params)
796
  if used_globals:
797
    msg = ("The following hypervisor parameters are global and cannot"
798
           " be customized at instance level, please modify them at"
799
           " cluster level: %s" % utils.CommaJoin(used_globals))
800
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
801

    
802

    
803
def _CheckNodeOnline(lu, node, msg=None):
804
  """Ensure that a given node is online.
805

806
  @param lu: the LU on behalf of which we make the check
807
  @param node: the node to check
808
  @param msg: if passed, should be a message to replace the default one
809
  @raise errors.OpPrereqError: if the node is offline
810

811
  """
812
  if msg is None:
813
    msg = "Can't use offline node"
814
  if lu.cfg.GetNodeInfo(node).offline:
815
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
816

    
817

    
818
def _CheckNodeNotDrained(lu, node):
819
  """Ensure that a given node is not drained.
820

821
  @param lu: the LU on behalf of which we make the check
822
  @param node: the node to check
823
  @raise errors.OpPrereqError: if the node is drained
824

825
  """
826
  if lu.cfg.GetNodeInfo(node).drained:
827
    raise errors.OpPrereqError("Can't use drained node %s" % node,
828
                               errors.ECODE_STATE)
829

    
830

    
831
def _CheckNodeVmCapable(lu, node):
832
  """Ensure that a given node is vm capable.
833

834
  @param lu: the LU on behalf of which we make the check
835
  @param node: the node to check
836
  @raise errors.OpPrereqError: if the node is not vm capable
837

838
  """
839
  if not lu.cfg.GetNodeInfo(node).vm_capable:
840
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
841
                               errors.ECODE_STATE)
842

    
843

    
844
def _CheckNodeHasOS(lu, node, os_name, force_variant):
845
  """Ensure that a node supports a given OS.
846

847
  @param lu: the LU on behalf of which we make the check
848
  @param node: the node to check
849
  @param os_name: the OS to query about
850
  @param force_variant: whether to ignore variant errors
851
  @raise errors.OpPrereqError: if the node is not supporting the OS
852

853
  """
854
  result = lu.rpc.call_os_get(node, os_name)
855
  result.Raise("OS '%s' not in supported OS list for node %s" %
856
               (os_name, node),
857
               prereq=True, ecode=errors.ECODE_INVAL)
858
  if not force_variant:
859
    _CheckOSVariant(result.payload, os_name)
860

    
861

    
862
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
863
  """Ensure that a node has the given secondary ip.
864

865
  @type lu: L{LogicalUnit}
866
  @param lu: the LU on behalf of which we make the check
867
  @type node: string
868
  @param node: the node to check
869
  @type secondary_ip: string
870
  @param secondary_ip: the ip to check
871
  @type prereq: boolean
872
  @param prereq: whether to throw a prerequisite or an execute error
873
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
874
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
875

876
  """
877
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
878
  result.Raise("Failure checking secondary ip on node %s" % node,
879
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
880
  if not result.payload:
881
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
882
           " please fix and re-run this command" % secondary_ip)
883
    if prereq:
884
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
885
    else:
886
      raise errors.OpExecError(msg)
887

    
888

    
889
def _GetClusterDomainSecret():
890
  """Reads the cluster domain secret.
891

892
  """
893
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
894
                               strict=True)
895

    
896

    
897
def _CheckInstanceDown(lu, instance, reason):
898
  """Ensure that an instance is not running."""
899
  if instance.admin_up:
900
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
901
                               (instance.name, reason), errors.ECODE_STATE)
902

    
903
  pnode = instance.primary_node
904
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
905
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
906
              prereq=True, ecode=errors.ECODE_ENVIRON)
907

    
908
  if instance.name in ins_l.payload:
909
    raise errors.OpPrereqError("Instance %s is running, %s" %
910
                               (instance.name, reason), errors.ECODE_STATE)
911

    
912

    
913
def _ExpandItemName(fn, name, kind):
914
  """Expand an item name.
915

916
  @param fn: the function to use for expansion
917
  @param name: requested item name
918
  @param kind: text description ('Node' or 'Instance')
919
  @return: the resolved (full) name
920
  @raise errors.OpPrereqError: if the item is not found
921

922
  """
923
  full_name = fn(name)
924
  if full_name is None:
925
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
926
                               errors.ECODE_NOENT)
927
  return full_name
928

    
929

    
930
def _ExpandNodeName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for nodes."""
932
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
933

    
934

    
935
def _ExpandInstanceName(cfg, name):
936
  """Wrapper over L{_ExpandItemName} for instance."""
937
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
938

    
939

    
940
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
941
                          memory, vcpus, nics, disk_template, disks,
942
                          bep, hvp, hypervisor_name, tags):
943
  """Builds instance related env variables for hooks
944

945
  This builds the hook environment from individual variables.
946

947
  @type name: string
948
  @param name: the name of the instance
949
  @type primary_node: string
950
  @param primary_node: the name of the instance's primary node
951
  @type secondary_nodes: list
952
  @param secondary_nodes: list of secondary nodes as strings
953
  @type os_type: string
954
  @param os_type: the name of the instance's OS
955
  @type status: boolean
956
  @param status: the should_run status of the instance
957
  @type memory: string
958
  @param memory: the memory size of the instance
959
  @type vcpus: string
960
  @param vcpus: the count of VCPUs the instance has
961
  @type nics: list
962
  @param nics: list of tuples (ip, mac, mode, link) representing
963
      the NICs the instance has
964
  @type disk_template: string
965
  @param disk_template: the disk template of the instance
966
  @type disks: list
967
  @param disks: the list of (size, mode) pairs
968
  @type bep: dict
969
  @param bep: the backend parameters for the instance
970
  @type hvp: dict
971
  @param hvp: the hypervisor parameters for the instance
972
  @type hypervisor_name: string
973
  @param hypervisor_name: the hypervisor for the instance
974
  @type tags: list
975
  @param tags: list of instance tags as strings
976
  @rtype: dict
977
  @return: the hook environment for this instance
978

979
  """
980
  if status:
981
    str_status = "up"
982
  else:
983
    str_status = "down"
984
  env = {
985
    "OP_TARGET": name,
986
    "INSTANCE_NAME": name,
987
    "INSTANCE_PRIMARY": primary_node,
988
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
989
    "INSTANCE_OS_TYPE": os_type,
990
    "INSTANCE_STATUS": str_status,
991
    "INSTANCE_MEMORY": memory,
992
    "INSTANCE_VCPUS": vcpus,
993
    "INSTANCE_DISK_TEMPLATE": disk_template,
994
    "INSTANCE_HYPERVISOR": hypervisor_name,
995
  }
996

    
997
  if nics:
998
    nic_count = len(nics)
999
    for idx, (ip, mac, mode, link) in enumerate(nics):
1000
      if ip is None:
1001
        ip = ""
1002
      env["INSTANCE_NIC%d_IP" % idx] = ip
1003
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1004
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1005
      env["INSTANCE_NIC%d_LINK" % idx] = link
1006
      if mode == constants.NIC_MODE_BRIDGED:
1007
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1008
  else:
1009
    nic_count = 0
1010

    
1011
  env["INSTANCE_NIC_COUNT"] = nic_count
1012

    
1013
  if disks:
1014
    disk_count = len(disks)
1015
    for idx, (size, mode) in enumerate(disks):
1016
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1017
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1018
  else:
1019
    disk_count = 0
1020

    
1021
  env["INSTANCE_DISK_COUNT"] = disk_count
1022

    
1023
  if not tags:
1024
    tags = []
1025

    
1026
  env["INSTANCE_TAGS"] = " ".join(tags)
1027

    
1028
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1029
    for key, value in source.items():
1030
      env["INSTANCE_%s_%s" % (kind, key)] = value
1031

    
1032
  return env
1033

    
1034

    
1035
def _NICListToTuple(lu, nics):
1036
  """Build a list of nic information tuples.
1037

1038
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1039
  value in LUInstanceQueryData.
1040

1041
  @type lu:  L{LogicalUnit}
1042
  @param lu: the logical unit on whose behalf we execute
1043
  @type nics: list of L{objects.NIC}
1044
  @param nics: list of nics to convert to hooks tuples
1045

1046
  """
1047
  hooks_nics = []
1048
  cluster = lu.cfg.GetClusterInfo()
1049
  for nic in nics:
1050
    ip = nic.ip
1051
    mac = nic.mac
1052
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1053
    mode = filled_params[constants.NIC_MODE]
1054
    link = filled_params[constants.NIC_LINK]
1055
    hooks_nics.append((ip, mac, mode, link))
1056
  return hooks_nics
1057

    
1058

    
1059
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1060
  """Builds instance related env variables for hooks from an object.
1061

1062
  @type lu: L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type instance: L{objects.Instance}
1065
  @param instance: the instance for which we should build the
1066
      environment
1067
  @type override: dict
1068
  @param override: dictionary with key/values that will override
1069
      our values
1070
  @rtype: dict
1071
  @return: the hook environment dictionary
1072

1073
  """
1074
  cluster = lu.cfg.GetClusterInfo()
1075
  bep = cluster.FillBE(instance)
1076
  hvp = cluster.FillHV(instance)
1077
  args = {
1078
    "name": instance.name,
1079
    "primary_node": instance.primary_node,
1080
    "secondary_nodes": instance.secondary_nodes,
1081
    "os_type": instance.os,
1082
    "status": instance.admin_up,
1083
    "memory": bep[constants.BE_MEMORY],
1084
    "vcpus": bep[constants.BE_VCPUS],
1085
    "nics": _NICListToTuple(lu, instance.nics),
1086
    "disk_template": instance.disk_template,
1087
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1088
    "bep": bep,
1089
    "hvp": hvp,
1090
    "hypervisor_name": instance.hypervisor,
1091
    "tags": instance.tags,
1092
  }
1093
  if override:
1094
    args.update(override)
1095
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1096

    
1097

    
1098
def _AdjustCandidatePool(lu, exceptions):
1099
  """Adjust the candidate pool after node operations.
1100

1101
  """
1102
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1103
  if mod_list:
1104
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1105
               utils.CommaJoin(node.name for node in mod_list))
1106
    for name in mod_list:
1107
      lu.context.ReaddNode(name)
1108
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1109
  if mc_now > mc_max:
1110
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1111
               (mc_now, mc_max))
1112

    
1113

    
1114
def _DecideSelfPromotion(lu, exceptions=None):
1115
  """Decide whether I should promote myself as a master candidate.
1116

1117
  """
1118
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1119
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1120
  # the new node will increase mc_max with one, so:
1121
  mc_should = min(mc_should + 1, cp_size)
1122
  return mc_now < mc_should
1123

    
1124

    
1125
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1126
  """Check that the brigdes needed by a list of nics exist.
1127

1128
  """
1129
  cluster = lu.cfg.GetClusterInfo()
1130
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1131
  brlist = [params[constants.NIC_LINK] for params in paramslist
1132
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1133
  if brlist:
1134
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1135
    result.Raise("Error checking bridges on destination node '%s'" %
1136
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1137

    
1138

    
1139
def _CheckInstanceBridgesExist(lu, instance, node=None):
1140
  """Check that the brigdes needed by an instance exist.
1141

1142
  """
1143
  if node is None:
1144
    node = instance.primary_node
1145
  _CheckNicsBridgesExist(lu, instance.nics, node)
1146

    
1147

    
1148
def _CheckOSVariant(os_obj, name):
1149
  """Check whether an OS name conforms to the os variants specification.
1150

1151
  @type os_obj: L{objects.OS}
1152
  @param os_obj: OS object to check
1153
  @type name: string
1154
  @param name: OS name passed by the user, to check for validity
1155

1156
  """
1157
  variant = objects.OS.GetVariant(name)
1158
  if not os_obj.supported_variants:
1159
    if variant:
1160
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1161
                                 " passed)" % (os_obj.name, variant),
1162
                                 errors.ECODE_INVAL)
1163
    return
1164
  if not variant:
1165
    raise errors.OpPrereqError("OS name must include a variant",
1166
                               errors.ECODE_INVAL)
1167

    
1168
  if variant not in os_obj.supported_variants:
1169
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1170

    
1171

    
1172
def _GetNodeInstancesInner(cfg, fn):
1173
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1174

    
1175

    
1176
def _GetNodeInstances(cfg, node_name):
1177
  """Returns a list of all primary and secondary instances on a node.
1178

1179
  """
1180

    
1181
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1182

    
1183

    
1184
def _GetNodePrimaryInstances(cfg, node_name):
1185
  """Returns primary instances on a node.
1186

1187
  """
1188
  return _GetNodeInstancesInner(cfg,
1189
                                lambda inst: node_name == inst.primary_node)
1190

    
1191

    
1192
def _GetNodeSecondaryInstances(cfg, node_name):
1193
  """Returns secondary instances on a node.
1194

1195
  """
1196
  return _GetNodeInstancesInner(cfg,
1197
                                lambda inst: node_name in inst.secondary_nodes)
1198

    
1199

    
1200
def _GetStorageTypeArgs(cfg, storage_type):
1201
  """Returns the arguments for a storage type.
1202

1203
  """
1204
  # Special case for file storage
1205
  if storage_type == constants.ST_FILE:
1206
    # storage.FileStorage wants a list of storage directories
1207
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1208

    
1209
  return []
1210

    
1211

    
1212
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1213
  faulty = []
1214

    
1215
  for dev in instance.disks:
1216
    cfg.SetDiskID(dev, node_name)
1217

    
1218
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1219
  result.Raise("Failed to get disk status from node %s" % node_name,
1220
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1221

    
1222
  for idx, bdev_status in enumerate(result.payload):
1223
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224
      faulty.append(idx)
1225

    
1226
  return faulty
1227

    
1228

    
1229
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1230
  """Check the sanity of iallocator and node arguments and use the
1231
  cluster-wide iallocator if appropriate.
1232

1233
  Check that at most one of (iallocator, node) is specified. If none is
1234
  specified, then the LU's opcode's iallocator slot is filled with the
1235
  cluster-wide default iallocator.
1236

1237
  @type iallocator_slot: string
1238
  @param iallocator_slot: the name of the opcode iallocator slot
1239
  @type node_slot: string
1240
  @param node_slot: the name of the opcode target node slot
1241

1242
  """
1243
  node = getattr(lu.op, node_slot, None)
1244
  iallocator = getattr(lu.op, iallocator_slot, None)
1245

    
1246
  if node is not None and iallocator is not None:
1247
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1248
                               errors.ECODE_INVAL)
1249
  elif node is None and iallocator is None:
1250
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1251
    if default_iallocator:
1252
      setattr(lu.op, iallocator_slot, default_iallocator)
1253
    else:
1254
      raise errors.OpPrereqError("No iallocator or node given and no"
1255
                                 " cluster-wide default iallocator found;"
1256
                                 " please specify either an iallocator or a"
1257
                                 " node, or set a cluster-wide default"
1258
                                 " iallocator")
1259

    
1260

    
1261
def _GetDefaultIAllocator(cfg, iallocator):
1262
  """Decides on which iallocator to use.
1263

1264
  @type cfg: L{config.ConfigWriter}
1265
  @param cfg: Cluster configuration object
1266
  @type iallocator: string or None
1267
  @param iallocator: Iallocator specified in opcode
1268
  @rtype: string
1269
  @return: Iallocator name
1270

1271
  """
1272
  if not iallocator:
1273
    # Use default iallocator
1274
    iallocator = cfg.GetDefaultIAllocator()
1275

    
1276
  if not iallocator:
1277
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1278
                               " opcode nor as a cluster-wide default",
1279
                               errors.ECODE_INVAL)
1280

    
1281
  return iallocator
1282

    
1283

    
1284
class LUClusterPostInit(LogicalUnit):
1285
  """Logical unit for running hooks after cluster initialization.
1286

1287
  """
1288
  HPATH = "cluster-init"
1289
  HTYPE = constants.HTYPE_CLUSTER
1290

    
1291
  def BuildHooksEnv(self):
1292
    """Build hooks env.
1293

1294
    """
1295
    return {
1296
      "OP_TARGET": self.cfg.GetClusterName(),
1297
      }
1298

    
1299
  def BuildHooksNodes(self):
1300
    """Build hooks nodes.
1301

1302
    """
1303
    return ([], [self.cfg.GetMasterNode()])
1304

    
1305
  def Exec(self, feedback_fn):
1306
    """Nothing to do.
1307

1308
    """
1309
    return True
1310

    
1311

    
1312
class LUClusterDestroy(LogicalUnit):
1313
  """Logical unit for destroying the cluster.
1314

1315
  """
1316
  HPATH = "cluster-destroy"
1317
  HTYPE = constants.HTYPE_CLUSTER
1318

    
1319
  def BuildHooksEnv(self):
1320
    """Build hooks env.
1321

1322
    """
1323
    return {
1324
      "OP_TARGET": self.cfg.GetClusterName(),
1325
      }
1326

    
1327
  def BuildHooksNodes(self):
1328
    """Build hooks nodes.
1329

1330
    """
1331
    return ([], [])
1332

    
1333
  def CheckPrereq(self):
1334
    """Check prerequisites.
1335

1336
    This checks whether the cluster is empty.
1337

1338
    Any errors are signaled by raising errors.OpPrereqError.
1339

1340
    """
1341
    master = self.cfg.GetMasterNode()
1342

    
1343
    nodelist = self.cfg.GetNodeList()
1344
    if len(nodelist) != 1 or nodelist[0] != master:
1345
      raise errors.OpPrereqError("There are still %d node(s) in"
1346
                                 " this cluster." % (len(nodelist) - 1),
1347
                                 errors.ECODE_INVAL)
1348
    instancelist = self.cfg.GetInstanceList()
1349
    if instancelist:
1350
      raise errors.OpPrereqError("There are still %d instance(s) in"
1351
                                 " this cluster." % len(instancelist),
1352
                                 errors.ECODE_INVAL)
1353

    
1354
  def Exec(self, feedback_fn):
1355
    """Destroys the cluster.
1356

1357
    """
1358
    (master, ip, dev, netmask, _) = self.cfg.GetMasterNetworkParameters()
1359

    
1360
    # Run post hooks on master node before it's removed
1361
    _RunPostHook(self, master)
1362

    
1363
    result = self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev)
1364
    result.Raise("Could not disable the master role")
1365

    
1366
    return master
1367

    
1368

    
1369
def _VerifyCertificate(filename):
1370
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1371

1372
  @type filename: string
1373
  @param filename: Path to PEM file
1374

1375
  """
1376
  try:
1377
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1378
                                           utils.ReadFile(filename))
1379
  except Exception, err: # pylint: disable=W0703
1380
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1381
            "Failed to load X509 certificate %s: %s" % (filename, err))
1382

    
1383
  (errcode, msg) = \
1384
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1385
                                constants.SSL_CERT_EXPIRATION_ERROR)
1386

    
1387
  if msg:
1388
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1389
  else:
1390
    fnamemsg = None
1391

    
1392
  if errcode is None:
1393
    return (None, fnamemsg)
1394
  elif errcode == utils.CERT_WARNING:
1395
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1396
  elif errcode == utils.CERT_ERROR:
1397
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1398

    
1399
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1400

    
1401

    
1402
def _GetAllHypervisorParameters(cluster, instances):
1403
  """Compute the set of all hypervisor parameters.
1404

1405
  @type cluster: L{objects.Cluster}
1406
  @param cluster: the cluster object
1407
  @param instances: list of L{objects.Instance}
1408
  @param instances: additional instances from which to obtain parameters
1409
  @rtype: list of (origin, hypervisor, parameters)
1410
  @return: a list with all parameters found, indicating the hypervisor they
1411
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1412

1413
  """
1414
  hvp_data = []
1415

    
1416
  for hv_name in cluster.enabled_hypervisors:
1417
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1418

    
1419
  for os_name, os_hvp in cluster.os_hvp.items():
1420
    for hv_name, hv_params in os_hvp.items():
1421
      if hv_params:
1422
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1423
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1424

    
1425
  # TODO: collapse identical parameter values in a single one
1426
  for instance in instances:
1427
    if instance.hvparams:
1428
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1429
                       cluster.FillHV(instance)))
1430

    
1431
  return hvp_data
1432

    
1433

    
1434
class _VerifyErrors(object):
1435
  """Mix-in for cluster/group verify LUs.
1436

1437
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1438
  self.op and self._feedback_fn to be available.)
1439

1440
  """
1441

    
1442
  ETYPE_FIELD = "code"
1443
  ETYPE_ERROR = "ERROR"
1444
  ETYPE_WARNING = "WARNING"
1445

    
1446
  def _Error(self, ecode, item, msg, *args, **kwargs):
1447
    """Format an error message.
1448

1449
    Based on the opcode's error_codes parameter, either format a
1450
    parseable error code, or a simpler error string.
1451

1452
    This must be called only from Exec and functions called from Exec.
1453

1454
    """
1455
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1456
    itype, etxt, _ = ecode
1457
    # first complete the msg
1458
    if args:
1459
      msg = msg % args
1460
    # then format the whole message
1461
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1462
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1463
    else:
1464
      if item:
1465
        item = " " + item
1466
      else:
1467
        item = ""
1468
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1469
    # and finally report it via the feedback_fn
1470
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1471

    
1472
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1473
    """Log an error message if the passed condition is True.
1474

1475
    """
1476
    cond = (bool(cond)
1477
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1478

    
1479
    # If the error code is in the list of ignored errors, demote the error to a
1480
    # warning
1481
    (_, etxt, _) = ecode
1482
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1483
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1484

    
1485
    if cond:
1486
      self._Error(ecode, *args, **kwargs)
1487

    
1488
    # do not mark the operation as failed for WARN cases only
1489
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1490
      self.bad = self.bad or cond
1491

    
1492

    
1493
class LUClusterVerify(NoHooksLU):
1494
  """Submits all jobs necessary to verify the cluster.
1495

1496
  """
1497
  REQ_BGL = False
1498

    
1499
  def ExpandNames(self):
1500
    self.needed_locks = {}
1501

    
1502
  def Exec(self, feedback_fn):
1503
    jobs = []
1504

    
1505
    if self.op.group_name:
1506
      groups = [self.op.group_name]
1507
      depends_fn = lambda: None
1508
    else:
1509
      groups = self.cfg.GetNodeGroupList()
1510

    
1511
      # Verify global configuration
1512
      jobs.append([
1513
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1514
        ])
1515

    
1516
      # Always depend on global verification
1517
      depends_fn = lambda: [(-len(jobs), [])]
1518

    
1519
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1520
                                            ignore_errors=self.op.ignore_errors,
1521
                                            depends=depends_fn())]
1522
                for group in groups)
1523

    
1524
    # Fix up all parameters
1525
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1526
      op.debug_simulate_errors = self.op.debug_simulate_errors
1527
      op.verbose = self.op.verbose
1528
      op.error_codes = self.op.error_codes
1529
      try:
1530
        op.skip_checks = self.op.skip_checks
1531
      except AttributeError:
1532
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1533

    
1534
    return ResultWithJobs(jobs)
1535

    
1536

    
1537
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1538
  """Verifies the cluster config.
1539

1540
  """
1541
  REQ_BGL = True
1542

    
1543
  def _VerifyHVP(self, hvp_data):
1544
    """Verifies locally the syntax of the hypervisor parameters.
1545

1546
    """
1547
    for item, hv_name, hv_params in hvp_data:
1548
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1549
             (item, hv_name))
1550
      try:
1551
        hv_class = hypervisor.GetHypervisor(hv_name)
1552
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1553
        hv_class.CheckParameterSyntax(hv_params)
1554
      except errors.GenericError, err:
1555
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1556

    
1557
  def ExpandNames(self):
1558
    # Information can be safely retrieved as the BGL is acquired in exclusive
1559
    # mode
1560
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1561
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1562
    self.all_node_info = self.cfg.GetAllNodesInfo()
1563
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1564
    self.needed_locks = {}
1565

    
1566
  def Exec(self, feedback_fn):
1567
    """Verify integrity of cluster, performing various test on nodes.
1568

1569
    """
1570
    self.bad = False
1571
    self._feedback_fn = feedback_fn
1572

    
1573
    feedback_fn("* Verifying cluster config")
1574

    
1575
    for msg in self.cfg.VerifyConfig():
1576
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1577

    
1578
    feedback_fn("* Verifying cluster certificate files")
1579

    
1580
    for cert_filename in constants.ALL_CERT_FILES:
1581
      (errcode, msg) = _VerifyCertificate(cert_filename)
1582
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1583

    
1584
    feedback_fn("* Verifying hypervisor parameters")
1585

    
1586
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1587
                                                self.all_inst_info.values()))
1588

    
1589
    feedback_fn("* Verifying all nodes belong to an existing group")
1590

    
1591
    # We do this verification here because, should this bogus circumstance
1592
    # occur, it would never be caught by VerifyGroup, which only acts on
1593
    # nodes/instances reachable from existing node groups.
1594

    
1595
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1596
                         if node.group not in self.all_group_info)
1597

    
1598
    dangling_instances = {}
1599
    no_node_instances = []
1600

    
1601
    for inst in self.all_inst_info.values():
1602
      if inst.primary_node in dangling_nodes:
1603
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1604
      elif inst.primary_node not in self.all_node_info:
1605
        no_node_instances.append(inst.name)
1606

    
1607
    pretty_dangling = [
1608
        "%s (%s)" %
1609
        (node.name,
1610
         utils.CommaJoin(dangling_instances.get(node.name,
1611
                                                ["no instances"])))
1612
        for node in dangling_nodes]
1613

    
1614
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1615
                  None,
1616
                  "the following nodes (and their instances) belong to a non"
1617
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1618

    
1619
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1620
                  None,
1621
                  "the following instances have a non-existing primary-node:"
1622
                  " %s", utils.CommaJoin(no_node_instances))
1623

    
1624
    return not self.bad
1625

    
1626

    
1627
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1628
  """Verifies the status of a node group.
1629

1630
  """
1631
  HPATH = "cluster-verify"
1632
  HTYPE = constants.HTYPE_CLUSTER
1633
  REQ_BGL = False
1634

    
1635
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1636

    
1637
  class NodeImage(object):
1638
    """A class representing the logical and physical status of a node.
1639

1640
    @type name: string
1641
    @ivar name: the node name to which this object refers
1642
    @ivar volumes: a structure as returned from
1643
        L{ganeti.backend.GetVolumeList} (runtime)
1644
    @ivar instances: a list of running instances (runtime)
1645
    @ivar pinst: list of configured primary instances (config)
1646
    @ivar sinst: list of configured secondary instances (config)
1647
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1648
        instances for which this node is secondary (config)
1649
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1650
    @ivar dfree: free disk, as reported by the node (runtime)
1651
    @ivar offline: the offline status (config)
1652
    @type rpc_fail: boolean
1653
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1654
        not whether the individual keys were correct) (runtime)
1655
    @type lvm_fail: boolean
1656
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1657
    @type hyp_fail: boolean
1658
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1659
    @type ghost: boolean
1660
    @ivar ghost: whether this is a known node or not (config)
1661
    @type os_fail: boolean
1662
    @ivar os_fail: whether the RPC call didn't return valid OS data
1663
    @type oslist: list
1664
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1665
    @type vm_capable: boolean
1666
    @ivar vm_capable: whether the node can host instances
1667

1668
    """
1669
    def __init__(self, offline=False, name=None, vm_capable=True):
1670
      self.name = name
1671
      self.volumes = {}
1672
      self.instances = []
1673
      self.pinst = []
1674
      self.sinst = []
1675
      self.sbp = {}
1676
      self.mfree = 0
1677
      self.dfree = 0
1678
      self.offline = offline
1679
      self.vm_capable = vm_capable
1680
      self.rpc_fail = False
1681
      self.lvm_fail = False
1682
      self.hyp_fail = False
1683
      self.ghost = False
1684
      self.os_fail = False
1685
      self.oslist = {}
1686

    
1687
  def ExpandNames(self):
1688
    # This raises errors.OpPrereqError on its own:
1689
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1690

    
1691
    # Get instances in node group; this is unsafe and needs verification later
1692
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1693

    
1694
    self.needed_locks = {
1695
      locking.LEVEL_INSTANCE: inst_names,
1696
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1697
      locking.LEVEL_NODE: [],
1698
      }
1699

    
1700
    self.share_locks = _ShareAll()
1701

    
1702
  def DeclareLocks(self, level):
1703
    if level == locking.LEVEL_NODE:
1704
      # Get members of node group; this is unsafe and needs verification later
1705
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1706

    
1707
      all_inst_info = self.cfg.GetAllInstancesInfo()
1708

    
1709
      # In Exec(), we warn about mirrored instances that have primary and
1710
      # secondary living in separate node groups. To fully verify that
1711
      # volumes for these instances are healthy, we will need to do an
1712
      # extra call to their secondaries. We ensure here those nodes will
1713
      # be locked.
1714
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1715
        # Important: access only the instances whose lock is owned
1716
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1717
          nodes.update(all_inst_info[inst].secondary_nodes)
1718

    
1719
      self.needed_locks[locking.LEVEL_NODE] = nodes
1720

    
1721
  def CheckPrereq(self):
1722
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1723
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1724

    
1725
    group_nodes = set(self.group_info.members)
1726
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1727

    
1728
    unlocked_nodes = \
1729
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1730

    
1731
    unlocked_instances = \
1732
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1733

    
1734
    if unlocked_nodes:
1735
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1736
                                 utils.CommaJoin(unlocked_nodes))
1737

    
1738
    if unlocked_instances:
1739
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1740
                                 utils.CommaJoin(unlocked_instances))
1741

    
1742
    self.all_node_info = self.cfg.GetAllNodesInfo()
1743
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1744

    
1745
    self.my_node_names = utils.NiceSort(group_nodes)
1746
    self.my_inst_names = utils.NiceSort(group_instances)
1747

    
1748
    self.my_node_info = dict((name, self.all_node_info[name])
1749
                             for name in self.my_node_names)
1750

    
1751
    self.my_inst_info = dict((name, self.all_inst_info[name])
1752
                             for name in self.my_inst_names)
1753

    
1754
    # We detect here the nodes that will need the extra RPC calls for verifying
1755
    # split LV volumes; they should be locked.
1756
    extra_lv_nodes = set()
1757

    
1758
    for inst in self.my_inst_info.values():
1759
      if inst.disk_template in constants.DTS_INT_MIRROR:
1760
        group = self.my_node_info[inst.primary_node].group
1761
        for nname in inst.secondary_nodes:
1762
          if self.all_node_info[nname].group != group:
1763
            extra_lv_nodes.add(nname)
1764

    
1765
    unlocked_lv_nodes = \
1766
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1767

    
1768
    if unlocked_lv_nodes:
1769
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1770
                                 utils.CommaJoin(unlocked_lv_nodes))
1771
    self.extra_lv_nodes = list(extra_lv_nodes)
1772

    
1773
  def _VerifyNode(self, ninfo, nresult):
1774
    """Perform some basic validation on data returned from a node.
1775

1776
      - check the result data structure is well formed and has all the
1777
        mandatory fields
1778
      - check ganeti version
1779

1780
    @type ninfo: L{objects.Node}
1781
    @param ninfo: the node to check
1782
    @param nresult: the results from the node
1783
    @rtype: boolean
1784
    @return: whether overall this call was successful (and we can expect
1785
         reasonable values in the respose)
1786

1787
    """
1788
    node = ninfo.name
1789
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1790

    
1791
    # main result, nresult should be a non-empty dict
1792
    test = not nresult or not isinstance(nresult, dict)
1793
    _ErrorIf(test, constants.CV_ENODERPC, node,
1794
                  "unable to verify node: no data returned")
1795
    if test:
1796
      return False
1797

    
1798
    # compares ganeti version
1799
    local_version = constants.PROTOCOL_VERSION
1800
    remote_version = nresult.get("version", None)
1801
    test = not (remote_version and
1802
                isinstance(remote_version, (list, tuple)) and
1803
                len(remote_version) == 2)
1804
    _ErrorIf(test, constants.CV_ENODERPC, node,
1805
             "connection to node returned invalid data")
1806
    if test:
1807
      return False
1808

    
1809
    test = local_version != remote_version[0]
1810
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1811
             "incompatible protocol versions: master %s,"
1812
             " node %s", local_version, remote_version[0])
1813
    if test:
1814
      return False
1815

    
1816
    # node seems compatible, we can actually try to look into its results
1817

    
1818
    # full package version
1819
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1820
                  constants.CV_ENODEVERSION, node,
1821
                  "software version mismatch: master %s, node %s",
1822
                  constants.RELEASE_VERSION, remote_version[1],
1823
                  code=self.ETYPE_WARNING)
1824

    
1825
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1826
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1827
      for hv_name, hv_result in hyp_result.iteritems():
1828
        test = hv_result is not None
1829
        _ErrorIf(test, constants.CV_ENODEHV, node,
1830
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1831

    
1832
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1833
    if ninfo.vm_capable and isinstance(hvp_result, list):
1834
      for item, hv_name, hv_result in hvp_result:
1835
        _ErrorIf(True, constants.CV_ENODEHV, node,
1836
                 "hypervisor %s parameter verify failure (source %s): %s",
1837
                 hv_name, item, hv_result)
1838

    
1839
    test = nresult.get(constants.NV_NODESETUP,
1840
                       ["Missing NODESETUP results"])
1841
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1842
             "; ".join(test))
1843

    
1844
    return True
1845

    
1846
  def _VerifyNodeTime(self, ninfo, nresult,
1847
                      nvinfo_starttime, nvinfo_endtime):
1848
    """Check the node time.
1849

1850
    @type ninfo: L{objects.Node}
1851
    @param ninfo: the node to check
1852
    @param nresult: the remote results for the node
1853
    @param nvinfo_starttime: the start time of the RPC call
1854
    @param nvinfo_endtime: the end time of the RPC call
1855

1856
    """
1857
    node = ninfo.name
1858
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1859

    
1860
    ntime = nresult.get(constants.NV_TIME, None)
1861
    try:
1862
      ntime_merged = utils.MergeTime(ntime)
1863
    except (ValueError, TypeError):
1864
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1865
      return
1866

    
1867
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1868
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1869
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1870
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1871
    else:
1872
      ntime_diff = None
1873

    
1874
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1875
             "Node time diverges by at least %s from master node time",
1876
             ntime_diff)
1877

    
1878
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1879
    """Check the node LVM results.
1880

1881
    @type ninfo: L{objects.Node}
1882
    @param ninfo: the node to check
1883
    @param nresult: the remote results for the node
1884
    @param vg_name: the configured VG name
1885

1886
    """
1887
    if vg_name is None:
1888
      return
1889

    
1890
    node = ninfo.name
1891
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1892

    
1893
    # checks vg existence and size > 20G
1894
    vglist = nresult.get(constants.NV_VGLIST, None)
1895
    test = not vglist
1896
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1897
    if not test:
1898
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1899
                                            constants.MIN_VG_SIZE)
1900
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1901

    
1902
    # check pv names
1903
    pvlist = nresult.get(constants.NV_PVLIST, None)
1904
    test = pvlist is None
1905
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1906
    if not test:
1907
      # check that ':' is not present in PV names, since it's a
1908
      # special character for lvcreate (denotes the range of PEs to
1909
      # use on the PV)
1910
      for _, pvname, owner_vg in pvlist:
1911
        test = ":" in pvname
1912
        _ErrorIf(test, constants.CV_ENODELVM, node,
1913
                 "Invalid character ':' in PV '%s' of VG '%s'",
1914
                 pvname, owner_vg)
1915

    
1916
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1917
    """Check the node bridges.
1918

1919
    @type ninfo: L{objects.Node}
1920
    @param ninfo: the node to check
1921
    @param nresult: the remote results for the node
1922
    @param bridges: the expected list of bridges
1923

1924
    """
1925
    if not bridges:
1926
      return
1927

    
1928
    node = ninfo.name
1929
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1930

    
1931
    missing = nresult.get(constants.NV_BRIDGES, None)
1932
    test = not isinstance(missing, list)
1933
    _ErrorIf(test, constants.CV_ENODENET, node,
1934
             "did not return valid bridge information")
1935
    if not test:
1936
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1937
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1938

    
1939
  def _VerifyNodeNetwork(self, ninfo, nresult):
1940
    """Check the node network connectivity results.
1941

1942
    @type ninfo: L{objects.Node}
1943
    @param ninfo: the node to check
1944
    @param nresult: the remote results for the node
1945

1946
    """
1947
    node = ninfo.name
1948
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1949

    
1950
    test = constants.NV_NODELIST not in nresult
1951
    _ErrorIf(test, constants.CV_ENODESSH, node,
1952
             "node hasn't returned node ssh connectivity data")
1953
    if not test:
1954
      if nresult[constants.NV_NODELIST]:
1955
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1956
          _ErrorIf(True, constants.CV_ENODESSH, node,
1957
                   "ssh communication with node '%s': %s", a_node, a_msg)
1958

    
1959
    test = constants.NV_NODENETTEST not in nresult
1960
    _ErrorIf(test, constants.CV_ENODENET, node,
1961
             "node hasn't returned node tcp connectivity data")
1962
    if not test:
1963
      if nresult[constants.NV_NODENETTEST]:
1964
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1965
        for anode in nlist:
1966
          _ErrorIf(True, constants.CV_ENODENET, node,
1967
                   "tcp communication with node '%s': %s",
1968
                   anode, nresult[constants.NV_NODENETTEST][anode])
1969

    
1970
    test = constants.NV_MASTERIP not in nresult
1971
    _ErrorIf(test, constants.CV_ENODENET, node,
1972
             "node hasn't returned node master IP reachability data")
1973
    if not test:
1974
      if not nresult[constants.NV_MASTERIP]:
1975
        if node == self.master_node:
1976
          msg = "the master node cannot reach the master IP (not configured?)"
1977
        else:
1978
          msg = "cannot reach the master IP"
1979
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1980

    
1981
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1982
                      diskstatus):
1983
    """Verify an instance.
1984

1985
    This function checks to see if the required block devices are
1986
    available on the instance's node.
1987

1988
    """
1989
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1990
    node_current = instanceconfig.primary_node
1991

    
1992
    node_vol_should = {}
1993
    instanceconfig.MapLVsByNode(node_vol_should)
1994

    
1995
    for node in node_vol_should:
1996
      n_img = node_image[node]
1997
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1998
        # ignore missing volumes on offline or broken nodes
1999
        continue
2000
      for volume in node_vol_should[node]:
2001
        test = volume not in n_img.volumes
2002
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2003
                 "volume %s missing on node %s", volume, node)
2004

    
2005
    if instanceconfig.admin_up:
2006
      pri_img = node_image[node_current]
2007
      test = instance not in pri_img.instances and not pri_img.offline
2008
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2009
               "instance not running on its primary node %s",
2010
               node_current)
2011

    
2012
    diskdata = [(nname, success, status, idx)
2013
                for (nname, disks) in diskstatus.items()
2014
                for idx, (success, status) in enumerate(disks)]
2015

    
2016
    for nname, success, bdev_status, idx in diskdata:
2017
      # the 'ghost node' construction in Exec() ensures that we have a
2018
      # node here
2019
      snode = node_image[nname]
2020
      bad_snode = snode.ghost or snode.offline
2021
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2022
               constants.CV_EINSTANCEFAULTYDISK, instance,
2023
               "couldn't retrieve status for disk/%s on %s: %s",
2024
               idx, nname, bdev_status)
2025
      _ErrorIf((instanceconfig.admin_up and success and
2026
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2027
               constants.CV_EINSTANCEFAULTYDISK, instance,
2028
               "disk/%s on %s is faulty", idx, nname)
2029

    
2030
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2031
    """Verify if there are any unknown volumes in the cluster.
2032

2033
    The .os, .swap and backup volumes are ignored. All other volumes are
2034
    reported as unknown.
2035

2036
    @type reserved: L{ganeti.utils.FieldSet}
2037
    @param reserved: a FieldSet of reserved volume names
2038

2039
    """
2040
    for node, n_img in node_image.items():
2041
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2042
        # skip non-healthy nodes
2043
        continue
2044
      for volume in n_img.volumes:
2045
        test = ((node not in node_vol_should or
2046
                volume not in node_vol_should[node]) and
2047
                not reserved.Matches(volume))
2048
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2049
                      "volume %s is unknown", volume)
2050

    
2051
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2052
    """Verify N+1 Memory Resilience.
2053

2054
    Check that if one single node dies we can still start all the
2055
    instances it was primary for.
2056

2057
    """
2058
    cluster_info = self.cfg.GetClusterInfo()
2059
    for node, n_img in node_image.items():
2060
      # This code checks that every node which is now listed as
2061
      # secondary has enough memory to host all instances it is
2062
      # supposed to should a single other node in the cluster fail.
2063
      # FIXME: not ready for failover to an arbitrary node
2064
      # FIXME: does not support file-backed instances
2065
      # WARNING: we currently take into account down instances as well
2066
      # as up ones, considering that even if they're down someone
2067
      # might want to start them even in the event of a node failure.
2068
      if n_img.offline:
2069
        # we're skipping offline nodes from the N+1 warning, since
2070
        # most likely we don't have good memory infromation from them;
2071
        # we already list instances living on such nodes, and that's
2072
        # enough warning
2073
        continue
2074
      for prinode, instances in n_img.sbp.items():
2075
        needed_mem = 0
2076
        for instance in instances:
2077
          bep = cluster_info.FillBE(instance_cfg[instance])
2078
          if bep[constants.BE_AUTO_BALANCE]:
2079
            needed_mem += bep[constants.BE_MEMORY]
2080
        test = n_img.mfree < needed_mem
2081
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2082
                      "not enough memory to accomodate instance failovers"
2083
                      " should node %s fail (%dMiB needed, %dMiB available)",
2084
                      prinode, needed_mem, n_img.mfree)
2085

    
2086
  @classmethod
2087
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2088
                   (files_all, files_opt, files_mc, files_vm)):
2089
    """Verifies file checksums collected from all nodes.
2090

2091
    @param errorif: Callback for reporting errors
2092
    @param nodeinfo: List of L{objects.Node} objects
2093
    @param master_node: Name of master node
2094
    @param all_nvinfo: RPC results
2095

2096
    """
2097
    # Define functions determining which nodes to consider for a file
2098
    files2nodefn = [
2099
      (files_all, None),
2100
      (files_mc, lambda node: (node.master_candidate or
2101
                               node.name == master_node)),
2102
      (files_vm, lambda node: node.vm_capable),
2103
      ]
2104

    
2105
    # Build mapping from filename to list of nodes which should have the file
2106
    nodefiles = {}
2107
    for (files, fn) in files2nodefn:
2108
      if fn is None:
2109
        filenodes = nodeinfo
2110
      else:
2111
        filenodes = filter(fn, nodeinfo)
2112
      nodefiles.update((filename,
2113
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2114
                       for filename in files)
2115

    
2116
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2117

    
2118
    fileinfo = dict((filename, {}) for filename in nodefiles)
2119
    ignore_nodes = set()
2120

    
2121
    for node in nodeinfo:
2122
      if node.offline:
2123
        ignore_nodes.add(node.name)
2124
        continue
2125

    
2126
      nresult = all_nvinfo[node.name]
2127

    
2128
      if nresult.fail_msg or not nresult.payload:
2129
        node_files = None
2130
      else:
2131
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2132

    
2133
      test = not (node_files and isinstance(node_files, dict))
2134
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2135
              "Node did not return file checksum data")
2136
      if test:
2137
        ignore_nodes.add(node.name)
2138
        continue
2139

    
2140
      # Build per-checksum mapping from filename to nodes having it
2141
      for (filename, checksum) in node_files.items():
2142
        assert filename in nodefiles
2143
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2144

    
2145
    for (filename, checksums) in fileinfo.items():
2146
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2147

    
2148
      # Nodes having the file
2149
      with_file = frozenset(node_name
2150
                            for nodes in fileinfo[filename].values()
2151
                            for node_name in nodes) - ignore_nodes
2152

    
2153
      expected_nodes = nodefiles[filename] - ignore_nodes
2154

    
2155
      # Nodes missing file
2156
      missing_file = expected_nodes - with_file
2157

    
2158
      if filename in files_opt:
2159
        # All or no nodes
2160
        errorif(missing_file and missing_file != expected_nodes,
2161
                constants.CV_ECLUSTERFILECHECK, None,
2162
                "File %s is optional, but it must exist on all or no"
2163
                " nodes (not found on %s)",
2164
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2165
      else:
2166
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2167
                "File %s is missing from node(s) %s", filename,
2168
                utils.CommaJoin(utils.NiceSort(missing_file)))
2169

    
2170
        # Warn if a node has a file it shouldn't
2171
        unexpected = with_file - expected_nodes
2172
        errorif(unexpected,
2173
                constants.CV_ECLUSTERFILECHECK, None,
2174
                "File %s should not exist on node(s) %s",
2175
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2176

    
2177
      # See if there are multiple versions of the file
2178
      test = len(checksums) > 1
2179
      if test:
2180
        variants = ["variant %s on %s" %
2181
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2182
                    for (idx, (checksum, nodes)) in
2183
                      enumerate(sorted(checksums.items()))]
2184
      else:
2185
        variants = []
2186

    
2187
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2188
              "File %s found with %s different checksums (%s)",
2189
              filename, len(checksums), "; ".join(variants))
2190

    
2191
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2192
                      drbd_map):
2193
    """Verifies and the node DRBD status.
2194

2195
    @type ninfo: L{objects.Node}
2196
    @param ninfo: the node to check
2197
    @param nresult: the remote results for the node
2198
    @param instanceinfo: the dict of instances
2199
    @param drbd_helper: the configured DRBD usermode helper
2200
    @param drbd_map: the DRBD map as returned by
2201
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2202

2203
    """
2204
    node = ninfo.name
2205
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206

    
2207
    if drbd_helper:
2208
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2209
      test = (helper_result == None)
2210
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2211
               "no drbd usermode helper returned")
2212
      if helper_result:
2213
        status, payload = helper_result
2214
        test = not status
2215
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2216
                 "drbd usermode helper check unsuccessful: %s", payload)
2217
        test = status and (payload != drbd_helper)
2218
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2219
                 "wrong drbd usermode helper: %s", payload)
2220

    
2221
    # compute the DRBD minors
2222
    node_drbd = {}
2223
    for minor, instance in drbd_map[node].items():
2224
      test = instance not in instanceinfo
2225
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2226
               "ghost instance '%s' in temporary DRBD map", instance)
2227
        # ghost instance should not be running, but otherwise we
2228
        # don't give double warnings (both ghost instance and
2229
        # unallocated minor in use)
2230
      if test:
2231
        node_drbd[minor] = (instance, False)
2232
      else:
2233
        instance = instanceinfo[instance]
2234
        node_drbd[minor] = (instance.name, instance.admin_up)
2235

    
2236
    # and now check them
2237
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2238
    test = not isinstance(used_minors, (tuple, list))
2239
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2240
             "cannot parse drbd status file: %s", str(used_minors))
2241
    if test:
2242
      # we cannot check drbd status
2243
      return
2244

    
2245
    for minor, (iname, must_exist) in node_drbd.items():
2246
      test = minor not in used_minors and must_exist
2247
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2248
               "drbd minor %d of instance %s is not active", minor, iname)
2249
    for minor in used_minors:
2250
      test = minor not in node_drbd
2251
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2252
               "unallocated drbd minor %d is in use", minor)
2253

    
2254
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2255
    """Builds the node OS structures.
2256

2257
    @type ninfo: L{objects.Node}
2258
    @param ninfo: the node to check
2259
    @param nresult: the remote results for the node
2260
    @param nimg: the node image object
2261

2262
    """
2263
    node = ninfo.name
2264
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2265

    
2266
    remote_os = nresult.get(constants.NV_OSLIST, None)
2267
    test = (not isinstance(remote_os, list) or
2268
            not compat.all(isinstance(v, list) and len(v) == 7
2269
                           for v in remote_os))
2270

    
2271
    _ErrorIf(test, constants.CV_ENODEOS, node,
2272
             "node hasn't returned valid OS data")
2273

    
2274
    nimg.os_fail = test
2275

    
2276
    if test:
2277
      return
2278

    
2279
    os_dict = {}
2280

    
2281
    for (name, os_path, status, diagnose,
2282
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2283

    
2284
      if name not in os_dict:
2285
        os_dict[name] = []
2286

    
2287
      # parameters is a list of lists instead of list of tuples due to
2288
      # JSON lacking a real tuple type, fix it:
2289
      parameters = [tuple(v) for v in parameters]
2290
      os_dict[name].append((os_path, status, diagnose,
2291
                            set(variants), set(parameters), set(api_ver)))
2292

    
2293
    nimg.oslist = os_dict
2294

    
2295
  def _VerifyNodeOS(self, ninfo, nimg, base):
2296
    """Verifies the node OS list.
2297

2298
    @type ninfo: L{objects.Node}
2299
    @param ninfo: the node to check
2300
    @param nimg: the node image object
2301
    @param base: the 'template' node we match against (e.g. from the master)
2302

2303
    """
2304
    node = ninfo.name
2305
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306

    
2307
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2308

    
2309
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2310
    for os_name, os_data in nimg.oslist.items():
2311
      assert os_data, "Empty OS status for OS %s?!" % os_name
2312
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2313
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2314
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2315
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2316
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2317
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2318
      # comparisons with the 'base' image
2319
      test = os_name not in base.oslist
2320
      _ErrorIf(test, constants.CV_ENODEOS, node,
2321
               "Extra OS %s not present on reference node (%s)",
2322
               os_name, base.name)
2323
      if test:
2324
        continue
2325
      assert base.oslist[os_name], "Base node has empty OS status?"
2326
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2327
      if not b_status:
2328
        # base OS is invalid, skipping
2329
        continue
2330
      for kind, a, b in [("API version", f_api, b_api),
2331
                         ("variants list", f_var, b_var),
2332
                         ("parameters", beautify_params(f_param),
2333
                          beautify_params(b_param))]:
2334
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2335
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2336
                 kind, os_name, base.name,
2337
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2338

    
2339
    # check any missing OSes
2340
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2341
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2342
             "OSes present on reference node %s but missing on this node: %s",
2343
             base.name, utils.CommaJoin(missing))
2344

    
2345
  def _VerifyOob(self, ninfo, nresult):
2346
    """Verifies out of band functionality of a node.
2347

2348
    @type ninfo: L{objects.Node}
2349
    @param ninfo: the node to check
2350
    @param nresult: the remote results for the node
2351

2352
    """
2353
    node = ninfo.name
2354
    # We just have to verify the paths on master and/or master candidates
2355
    # as the oob helper is invoked on the master
2356
    if ((ninfo.master_candidate or ninfo.master_capable) and
2357
        constants.NV_OOB_PATHS in nresult):
2358
      for path_result in nresult[constants.NV_OOB_PATHS]:
2359
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2360

    
2361
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2362
    """Verifies and updates the node volume data.
2363

2364
    This function will update a L{NodeImage}'s internal structures
2365
    with data from the remote call.
2366

2367
    @type ninfo: L{objects.Node}
2368
    @param ninfo: the node to check
2369
    @param nresult: the remote results for the node
2370
    @param nimg: the node image object
2371
    @param vg_name: the configured VG name
2372

2373
    """
2374
    node = ninfo.name
2375
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2376

    
2377
    nimg.lvm_fail = True
2378
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2379
    if vg_name is None:
2380
      pass
2381
    elif isinstance(lvdata, basestring):
2382
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2383
               utils.SafeEncode(lvdata))
2384
    elif not isinstance(lvdata, dict):
2385
      _ErrorIf(True, constants.CV_ENODELVM, node,
2386
               "rpc call to node failed (lvlist)")
2387
    else:
2388
      nimg.volumes = lvdata
2389
      nimg.lvm_fail = False
2390

    
2391
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2392
    """Verifies and updates the node instance list.
2393

2394
    If the listing was successful, then updates this node's instance
2395
    list. Otherwise, it marks the RPC call as failed for the instance
2396
    list key.
2397

2398
    @type ninfo: L{objects.Node}
2399
    @param ninfo: the node to check
2400
    @param nresult: the remote results for the node
2401
    @param nimg: the node image object
2402

2403
    """
2404
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2405
    test = not isinstance(idata, list)
2406
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2407
                  "rpc call to node failed (instancelist): %s",
2408
                  utils.SafeEncode(str(idata)))
2409
    if test:
2410
      nimg.hyp_fail = True
2411
    else:
2412
      nimg.instances = idata
2413

    
2414
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2415
    """Verifies and computes a node information map
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421
    @param vg_name: the configured VG name
2422

2423
    """
2424
    node = ninfo.name
2425
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426

    
2427
    # try to read free memory (from the hypervisor)
2428
    hv_info = nresult.get(constants.NV_HVINFO, None)
2429
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2430
    _ErrorIf(test, constants.CV_ENODEHV, node,
2431
             "rpc call to node failed (hvinfo)")
2432
    if not test:
2433
      try:
2434
        nimg.mfree = int(hv_info["memory_free"])
2435
      except (ValueError, TypeError):
2436
        _ErrorIf(True, constants.CV_ENODERPC, node,
2437
                 "node returned invalid nodeinfo, check hypervisor")
2438

    
2439
    # FIXME: devise a free space model for file based instances as well
2440
    if vg_name is not None:
2441
      test = (constants.NV_VGLIST not in nresult or
2442
              vg_name not in nresult[constants.NV_VGLIST])
2443
      _ErrorIf(test, constants.CV_ENODELVM, node,
2444
               "node didn't return data for the volume group '%s'"
2445
               " - it is either missing or broken", vg_name)
2446
      if not test:
2447
        try:
2448
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2449
        except (ValueError, TypeError):
2450
          _ErrorIf(True, constants.CV_ENODERPC, node,
2451
                   "node returned invalid LVM info, check LVM status")
2452

    
2453
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2454
    """Gets per-disk status information for all instances.
2455

2456
    @type nodelist: list of strings
2457
    @param nodelist: Node names
2458
    @type node_image: dict of (name, L{objects.Node})
2459
    @param node_image: Node objects
2460
    @type instanceinfo: dict of (name, L{objects.Instance})
2461
    @param instanceinfo: Instance objects
2462
    @rtype: {instance: {node: [(succes, payload)]}}
2463
    @return: a dictionary of per-instance dictionaries with nodes as
2464
        keys and disk information as values; the disk information is a
2465
        list of tuples (success, payload)
2466

2467
    """
2468
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2469

    
2470
    node_disks = {}
2471
    node_disks_devonly = {}
2472
    diskless_instances = set()
2473
    diskless = constants.DT_DISKLESS
2474

    
2475
    for nname in nodelist:
2476
      node_instances = list(itertools.chain(node_image[nname].pinst,
2477
                                            node_image[nname].sinst))
2478
      diskless_instances.update(inst for inst in node_instances
2479
                                if instanceinfo[inst].disk_template == diskless)
2480
      disks = [(inst, disk)
2481
               for inst in node_instances
2482
               for disk in instanceinfo[inst].disks]
2483

    
2484
      if not disks:
2485
        # No need to collect data
2486
        continue
2487

    
2488
      node_disks[nname] = disks
2489

    
2490
      # Creating copies as SetDiskID below will modify the objects and that can
2491
      # lead to incorrect data returned from nodes
2492
      devonly = [dev.Copy() for (_, dev) in disks]
2493

    
2494
      for dev in devonly:
2495
        self.cfg.SetDiskID(dev, nname)
2496

    
2497
      node_disks_devonly[nname] = devonly
2498

    
2499
    assert len(node_disks) == len(node_disks_devonly)
2500

    
2501
    # Collect data from all nodes with disks
2502
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2503
                                                          node_disks_devonly)
2504

    
2505
    assert len(result) == len(node_disks)
2506

    
2507
    instdisk = {}
2508

    
2509
    for (nname, nres) in result.items():
2510
      disks = node_disks[nname]
2511

    
2512
      if nres.offline:
2513
        # No data from this node
2514
        data = len(disks) * [(False, "node offline")]
2515
      else:
2516
        msg = nres.fail_msg
2517
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2518
                 "while getting disk information: %s", msg)
2519
        if msg:
2520
          # No data from this node
2521
          data = len(disks) * [(False, msg)]
2522
        else:
2523
          data = []
2524
          for idx, i in enumerate(nres.payload):
2525
            if isinstance(i, (tuple, list)) and len(i) == 2:
2526
              data.append(i)
2527
            else:
2528
              logging.warning("Invalid result from node %s, entry %d: %s",
2529
                              nname, idx, i)
2530
              data.append((False, "Invalid result from the remote node"))
2531

    
2532
      for ((inst, _), status) in zip(disks, data):
2533
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2534

    
2535
    # Add empty entries for diskless instances.
2536
    for inst in diskless_instances:
2537
      assert inst not in instdisk
2538
      instdisk[inst] = {}
2539

    
2540
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2541
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2542
                      compat.all(isinstance(s, (tuple, list)) and
2543
                                 len(s) == 2 for s in statuses)
2544
                      for inst, nnames in instdisk.items()
2545
                      for nname, statuses in nnames.items())
2546
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2547

    
2548
    return instdisk
2549

    
2550
  @staticmethod
2551
  def _SshNodeSelector(group_uuid, all_nodes):
2552
    """Create endless iterators for all potential SSH check hosts.
2553

2554
    """
2555
    nodes = [node for node in all_nodes
2556
             if (node.group != group_uuid and
2557
                 not node.offline)]
2558
    keyfunc = operator.attrgetter("group")
2559

    
2560
    return map(itertools.cycle,
2561
               [sorted(map(operator.attrgetter("name"), names))
2562
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2563
                                                  keyfunc)])
2564

    
2565
  @classmethod
2566
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2567
    """Choose which nodes should talk to which other nodes.
2568

2569
    We will make nodes contact all nodes in their group, and one node from
2570
    every other group.
2571

2572
    @warning: This algorithm has a known issue if one node group is much
2573
      smaller than others (e.g. just one node). In such a case all other
2574
      nodes will talk to the single node.
2575

2576
    """
2577
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2578
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2579

    
2580
    return (online_nodes,
2581
            dict((name, sorted([i.next() for i in sel]))
2582
                 for name in online_nodes))
2583

    
2584
  def BuildHooksEnv(self):
2585
    """Build hooks env.
2586

2587
    Cluster-Verify hooks just ran in the post phase and their failure makes
2588
    the output be logged in the verify output and the verification to fail.
2589

2590
    """
2591
    env = {
2592
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2593
      }
2594

    
2595
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2596
               for node in self.my_node_info.values())
2597

    
2598
    return env
2599

    
2600
  def BuildHooksNodes(self):
2601
    """Build hooks nodes.
2602

2603
    """
2604
    return ([], self.my_node_names)
2605

    
2606
  def Exec(self, feedback_fn):
2607
    """Verify integrity of the node group, performing various test on nodes.
2608

2609
    """
2610
    # This method has too many local variables. pylint: disable=R0914
2611
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2612

    
2613
    if not self.my_node_names:
2614
      # empty node group
2615
      feedback_fn("* Empty node group, skipping verification")
2616
      return True
2617

    
2618
    self.bad = False
2619
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620
    verbose = self.op.verbose
2621
    self._feedback_fn = feedback_fn
2622

    
2623
    vg_name = self.cfg.GetVGName()
2624
    drbd_helper = self.cfg.GetDRBDHelper()
2625
    cluster = self.cfg.GetClusterInfo()
2626
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2627
    hypervisors = cluster.enabled_hypervisors
2628
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2629

    
2630
    i_non_redundant = [] # Non redundant instances
2631
    i_non_a_balanced = [] # Non auto-balanced instances
2632
    n_offline = 0 # Count of offline nodes
2633
    n_drained = 0 # Count of nodes being drained
2634
    node_vol_should = {}
2635

    
2636
    # FIXME: verify OS list
2637

    
2638
    # File verification
2639
    filemap = _ComputeAncillaryFiles(cluster, False)
2640

    
2641
    # do local checksums
2642
    master_node = self.master_node = self.cfg.GetMasterNode()
2643
    master_ip = self.cfg.GetMasterIP()
2644

    
2645
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2646

    
2647
    node_verify_param = {
2648
      constants.NV_FILELIST:
2649
        utils.UniqueSequence(filename
2650
                             for files in filemap
2651
                             for filename in files),
2652
      constants.NV_NODELIST:
2653
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2654
                                  self.all_node_info.values()),
2655
      constants.NV_HYPERVISOR: hypervisors,
2656
      constants.NV_HVPARAMS:
2657
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2658
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2659
                                 for node in node_data_list
2660
                                 if not node.offline],
2661
      constants.NV_INSTANCELIST: hypervisors,
2662
      constants.NV_VERSION: None,
2663
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2664
      constants.NV_NODESETUP: None,
2665
      constants.NV_TIME: None,
2666
      constants.NV_MASTERIP: (master_node, master_ip),
2667
      constants.NV_OSLIST: None,
2668
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2669
      }
2670

    
2671
    if vg_name is not None:
2672
      node_verify_param[constants.NV_VGLIST] = None
2673
      node_verify_param[constants.NV_LVLIST] = vg_name
2674
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2675
      node_verify_param[constants.NV_DRBDLIST] = None
2676

    
2677
    if drbd_helper:
2678
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2679

    
2680
    # bridge checks
2681
    # FIXME: this needs to be changed per node-group, not cluster-wide
2682
    bridges = set()
2683
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2684
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2685
      bridges.add(default_nicpp[constants.NIC_LINK])
2686
    for instance in self.my_inst_info.values():
2687
      for nic in instance.nics:
2688
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2689
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2690
          bridges.add(full_nic[constants.NIC_LINK])
2691

    
2692
    if bridges:
2693
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2694

    
2695
    # Build our expected cluster state
2696
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2697
                                                 name=node.name,
2698
                                                 vm_capable=node.vm_capable))
2699
                      for node in node_data_list)
2700

    
2701
    # Gather OOB paths
2702
    oob_paths = []
2703
    for node in self.all_node_info.values():
2704
      path = _SupportsOob(self.cfg, node)
2705
      if path and path not in oob_paths:
2706
        oob_paths.append(path)
2707

    
2708
    if oob_paths:
2709
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2710

    
2711
    for instance in self.my_inst_names:
2712
      inst_config = self.my_inst_info[instance]
2713

    
2714
      for nname in inst_config.all_nodes:
2715
        if nname not in node_image:
2716
          gnode = self.NodeImage(name=nname)
2717
          gnode.ghost = (nname not in self.all_node_info)
2718
          node_image[nname] = gnode
2719

    
2720
      inst_config.MapLVsByNode(node_vol_should)
2721

    
2722
      pnode = inst_config.primary_node
2723
      node_image[pnode].pinst.append(instance)
2724

    
2725
      for snode in inst_config.secondary_nodes:
2726
        nimg = node_image[snode]
2727
        nimg.sinst.append(instance)
2728
        if pnode not in nimg.sbp:
2729
          nimg.sbp[pnode] = []
2730
        nimg.sbp[pnode].append(instance)
2731

    
2732
    # At this point, we have the in-memory data structures complete,
2733
    # except for the runtime information, which we'll gather next
2734

    
2735
    # Due to the way our RPC system works, exact response times cannot be
2736
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2737
    # time before and after executing the request, we can at least have a time
2738
    # window.
2739
    nvinfo_starttime = time.time()
2740
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2741
                                           node_verify_param,
2742
                                           self.cfg.GetClusterName())
2743
    nvinfo_endtime = time.time()
2744

    
2745
    if self.extra_lv_nodes and vg_name is not None:
2746
      extra_lv_nvinfo = \
2747
          self.rpc.call_node_verify(self.extra_lv_nodes,
2748
                                    {constants.NV_LVLIST: vg_name},
2749
                                    self.cfg.GetClusterName())
2750
    else:
2751
      extra_lv_nvinfo = {}
2752

    
2753
    all_drbd_map = self.cfg.ComputeDRBDMap()
2754

    
2755
    feedback_fn("* Gathering disk information (%s nodes)" %
2756
                len(self.my_node_names))
2757
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2758
                                     self.my_inst_info)
2759

    
2760
    feedback_fn("* Verifying configuration file consistency")
2761

    
2762
    # If not all nodes are being checked, we need to make sure the master node
2763
    # and a non-checked vm_capable node are in the list.
2764
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2765
    if absent_nodes:
2766
      vf_nvinfo = all_nvinfo.copy()
2767
      vf_node_info = list(self.my_node_info.values())
2768
      additional_nodes = []
2769
      if master_node not in self.my_node_info:
2770
        additional_nodes.append(master_node)
2771
        vf_node_info.append(self.all_node_info[master_node])
2772
      # Add the first vm_capable node we find which is not included
2773
      for node in absent_nodes:
2774
        nodeinfo = self.all_node_info[node]
2775
        if nodeinfo.vm_capable and not nodeinfo.offline:
2776
          additional_nodes.append(node)
2777
          vf_node_info.append(self.all_node_info[node])
2778
          break
2779
      key = constants.NV_FILELIST
2780
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2781
                                                 {key: node_verify_param[key]},
2782
                                                 self.cfg.GetClusterName()))
2783
    else:
2784
      vf_nvinfo = all_nvinfo
2785
      vf_node_info = self.my_node_info.values()
2786

    
2787
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2788

    
2789
    feedback_fn("* Verifying node status")
2790

    
2791
    refos_img = None
2792

    
2793
    for node_i in node_data_list:
2794
      node = node_i.name
2795
      nimg = node_image[node]
2796

    
2797
      if node_i.offline:
2798
        if verbose:
2799
          feedback_fn("* Skipping offline node %s" % (node,))
2800
        n_offline += 1
2801
        continue
2802

    
2803
      if node == master_node:
2804
        ntype = "master"
2805
      elif node_i.master_candidate:
2806
        ntype = "master candidate"
2807
      elif node_i.drained:
2808
        ntype = "drained"
2809
        n_drained += 1
2810
      else:
2811
        ntype = "regular"
2812
      if verbose:
2813
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2814

    
2815
      msg = all_nvinfo[node].fail_msg
2816
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2817
               msg)
2818
      if msg:
2819
        nimg.rpc_fail = True
2820
        continue
2821

    
2822
      nresult = all_nvinfo[node].payload
2823

    
2824
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2825
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2826
      self._VerifyNodeNetwork(node_i, nresult)
2827
      self._VerifyOob(node_i, nresult)
2828

    
2829
      if nimg.vm_capable:
2830
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2831
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2832
                             all_drbd_map)
2833

    
2834
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2835
        self._UpdateNodeInstances(node_i, nresult, nimg)
2836
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2837
        self._UpdateNodeOS(node_i, nresult, nimg)
2838

    
2839
        if not nimg.os_fail:
2840
          if refos_img is None:
2841
            refos_img = nimg
2842
          self._VerifyNodeOS(node_i, nimg, refos_img)
2843
        self._VerifyNodeBridges(node_i, nresult, bridges)
2844

    
2845
        # Check whether all running instancies are primary for the node. (This
2846
        # can no longer be done from _VerifyInstance below, since some of the
2847
        # wrong instances could be from other node groups.)
2848
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2849

    
2850
        for inst in non_primary_inst:
2851
          test = inst in self.all_inst_info
2852
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2853
                   "instance should not run on node %s", node_i.name)
2854
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2855
                   "node is running unknown instance %s", inst)
2856

    
2857
    for node, result in extra_lv_nvinfo.items():
2858
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2859
                              node_image[node], vg_name)
2860

    
2861
    feedback_fn("* Verifying instance status")
2862
    for instance in self.my_inst_names:
2863
      if verbose:
2864
        feedback_fn("* Verifying instance %s" % instance)
2865
      inst_config = self.my_inst_info[instance]
2866
      self._VerifyInstance(instance, inst_config, node_image,
2867
                           instdisk[instance])
2868
      inst_nodes_offline = []
2869

    
2870
      pnode = inst_config.primary_node
2871
      pnode_img = node_image[pnode]
2872
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2873
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2874
               " primary node failed", instance)
2875

    
2876
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2877
               constants.CV_EINSTANCEBADNODE, instance,
2878
               "instance is marked as running and lives on offline node %s",
2879
               inst_config.primary_node)
2880

    
2881
      # If the instance is non-redundant we cannot survive losing its primary
2882
      # node, so we are not N+1 compliant. On the other hand we have no disk
2883
      # templates with more than one secondary so that situation is not well
2884
      # supported either.
2885
      # FIXME: does not support file-backed instances
2886
      if not inst_config.secondary_nodes:
2887
        i_non_redundant.append(instance)
2888

    
2889
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2890
               constants.CV_EINSTANCELAYOUT,
2891
               instance, "instance has multiple secondary nodes: %s",
2892
               utils.CommaJoin(inst_config.secondary_nodes),
2893
               code=self.ETYPE_WARNING)
2894

    
2895
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2896
        pnode = inst_config.primary_node
2897
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2898
        instance_groups = {}
2899

    
2900
        for node in instance_nodes:
2901
          instance_groups.setdefault(self.all_node_info[node].group,
2902
                                     []).append(node)
2903

    
2904
        pretty_list = [
2905
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2906
          # Sort so that we always list the primary node first.
2907
          for group, nodes in sorted(instance_groups.items(),
2908
                                     key=lambda (_, nodes): pnode in nodes,
2909
                                     reverse=True)]
2910

    
2911
        self._ErrorIf(len(instance_groups) > 1,
2912
                      constants.CV_EINSTANCESPLITGROUPS,
2913
                      instance, "instance has primary and secondary nodes in"
2914
                      " different groups: %s", utils.CommaJoin(pretty_list),
2915
                      code=self.ETYPE_WARNING)
2916

    
2917
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2918
        i_non_a_balanced.append(instance)
2919

    
2920
      for snode in inst_config.secondary_nodes:
2921
        s_img = node_image[snode]
2922
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2923
                 snode, "instance %s, connection to secondary node failed",
2924
                 instance)
2925

    
2926
        if s_img.offline:
2927
          inst_nodes_offline.append(snode)
2928

    
2929
      # warn that the instance lives on offline nodes
2930
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2931
               "instance has offline secondary node(s) %s",
2932
               utils.CommaJoin(inst_nodes_offline))
2933
      # ... or ghost/non-vm_capable nodes
2934
      for node in inst_config.all_nodes:
2935
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2936
                 instance, "instance lives on ghost node %s", node)
2937
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2938
                 instance, "instance lives on non-vm_capable node %s", node)
2939

    
2940
    feedback_fn("* Verifying orphan volumes")
2941
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2942

    
2943
    # We will get spurious "unknown volume" warnings if any node of this group
2944
    # is secondary for an instance whose primary is in another group. To avoid
2945
    # them, we find these instances and add their volumes to node_vol_should.
2946
    for inst in self.all_inst_info.values():
2947
      for secondary in inst.secondary_nodes:
2948
        if (secondary in self.my_node_info
2949
            and inst.name not in self.my_inst_info):
2950
          inst.MapLVsByNode(node_vol_should)
2951
          break
2952

    
2953
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2954

    
2955
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2956
      feedback_fn("* Verifying N+1 Memory redundancy")
2957
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2958

    
2959
    feedback_fn("* Other Notes")
2960
    if i_non_redundant:
2961
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2962
                  % len(i_non_redundant))
2963

    
2964
    if i_non_a_balanced:
2965
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2966
                  % len(i_non_a_balanced))
2967

    
2968
    if n_offline:
2969
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2970

    
2971
    if n_drained:
2972
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2973

    
2974
    return not self.bad
2975

    
2976
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2977
    """Analyze the post-hooks' result
2978

2979
    This method analyses the hook result, handles it, and sends some
2980
    nicely-formatted feedback back to the user.
2981

2982
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2983
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2984
    @param hooks_results: the results of the multi-node hooks rpc call
2985
    @param feedback_fn: function used send feedback back to the caller
2986
    @param lu_result: previous Exec result
2987
    @return: the new Exec result, based on the previous result
2988
        and hook results
2989

2990
    """
2991
    # We only really run POST phase hooks, only for non-empty groups,
2992
    # and are only interested in their results
2993
    if not self.my_node_names:
2994
      # empty node group
2995
      pass
2996
    elif phase == constants.HOOKS_PHASE_POST:
2997
      # Used to change hooks' output to proper indentation
2998
      feedback_fn("* Hooks Results")
2999
      assert hooks_results, "invalid result from hooks"
3000

    
3001
      for node_name in hooks_results:
3002
        res = hooks_results[node_name]
3003
        msg = res.fail_msg
3004
        test = msg and not res.offline
3005
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3006
                      "Communication failure in hooks execution: %s", msg)
3007
        if res.offline or msg:
3008
          # No need to investigate payload if node is offline or gave
3009
          # an error.
3010
          continue
3011
        for script, hkr, output in res.payload:
3012
          test = hkr == constants.HKR_FAIL
3013
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3014
                        "Script %s failed, output:", script)
3015
          if test:
3016
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3017
            feedback_fn("%s" % output)
3018
            lu_result = False
3019

    
3020
    return lu_result
3021

    
3022

    
3023
class LUClusterVerifyDisks(NoHooksLU):
3024
  """Verifies the cluster disks status.
3025

3026
  """
3027
  REQ_BGL = False
3028

    
3029
  def ExpandNames(self):
3030
    self.share_locks = _ShareAll()
3031
    self.needed_locks = {
3032
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3033
      }
3034

    
3035
  def Exec(self, feedback_fn):
3036
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3037

    
3038
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3039
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3040
                           for group in group_names])
3041

    
3042

    
3043
class LUGroupVerifyDisks(NoHooksLU):
3044
  """Verifies the status of all disks in a node group.
3045

3046
  """
3047
  REQ_BGL = False
3048

    
3049
  def ExpandNames(self):
3050
    # Raises errors.OpPrereqError on its own if group can't be found
3051
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3052

    
3053
    self.share_locks = _ShareAll()
3054
    self.needed_locks = {
3055
      locking.LEVEL_INSTANCE: [],
3056
      locking.LEVEL_NODEGROUP: [],
3057
      locking.LEVEL_NODE: [],
3058
      }
3059

    
3060
  def DeclareLocks(self, level):
3061
    if level == locking.LEVEL_INSTANCE:
3062
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3063

    
3064
      # Lock instances optimistically, needs verification once node and group
3065
      # locks have been acquired
3066
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3067
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3068

    
3069
    elif level == locking.LEVEL_NODEGROUP:
3070
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3071

    
3072
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3073
        set([self.group_uuid] +
3074
            # Lock all groups used by instances optimistically; this requires
3075
            # going via the node before it's locked, requiring verification
3076
            # later on
3077
            [group_uuid
3078
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3079
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3080

    
3081
    elif level == locking.LEVEL_NODE:
3082
      # This will only lock the nodes in the group to be verified which contain
3083
      # actual instances
3084
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3085
      self._LockInstancesNodes()
3086

    
3087
      # Lock all nodes in group to be verified
3088
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3089
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3090
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3091

    
3092
  def CheckPrereq(self):
3093
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3094
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3095
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3096

    
3097
    assert self.group_uuid in owned_groups
3098

    
3099
    # Check if locked instances are still correct
3100
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3101

    
3102
    # Get instance information
3103
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3104

    
3105
    # Check if node groups for locked instances are still correct
3106
    for (instance_name, inst) in self.instances.items():
3107
      assert owned_nodes.issuperset(inst.all_nodes), \
3108
        "Instance %s's nodes changed while we kept the lock" % instance_name
3109

    
3110
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3111
                                             owned_groups)
3112

    
3113
      assert self.group_uuid in inst_groups, \
3114
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3115

    
3116
  def Exec(self, feedback_fn):
3117
    """Verify integrity of cluster disks.
3118

3119
    @rtype: tuple of three items
3120
    @return: a tuple of (dict of node-to-node_error, list of instances
3121
        which need activate-disks, dict of instance: (node, volume) for
3122
        missing volumes
3123

3124
    """
3125
    res_nodes = {}
3126
    res_instances = set()
3127
    res_missing = {}
3128

    
3129
    nv_dict = _MapInstanceDisksToNodes([inst
3130
                                        for inst in self.instances.values()
3131
                                        if inst.admin_up])
3132

    
3133
    if nv_dict:
3134
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3135
                             set(self.cfg.GetVmCapableNodeList()))
3136

    
3137
      node_lvs = self.rpc.call_lv_list(nodes, [])
3138

    
3139
      for (node, node_res) in node_lvs.items():
3140
        if node_res.offline:
3141
          continue
3142

    
3143
        msg = node_res.fail_msg
3144
        if msg:
3145
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3146
          res_nodes[node] = msg
3147
          continue
3148

    
3149
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3150
          inst = nv_dict.pop((node, lv_name), None)
3151
          if not (lv_online or inst is None):
3152
            res_instances.add(inst)
3153

    
3154
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3155
      # better
3156
      for key, inst in nv_dict.iteritems():
3157
        res_missing.setdefault(inst, []).append(list(key))
3158

    
3159
    return (res_nodes, list(res_instances), res_missing)
3160

    
3161

    
3162
class LUClusterRepairDiskSizes(NoHooksLU):
3163
  """Verifies the cluster disks sizes.
3164

3165
  """
3166
  REQ_BGL = False
3167

    
3168
  def ExpandNames(self):
3169
    if self.op.instances:
3170
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3171
      self.needed_locks = {
3172
        locking.LEVEL_NODE: [],
3173
        locking.LEVEL_INSTANCE: self.wanted_names,
3174
        }
3175
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3176
    else:
3177
      self.wanted_names = None
3178
      self.needed_locks = {
3179
        locking.LEVEL_NODE: locking.ALL_SET,
3180
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3181
        }
3182
    self.share_locks = _ShareAll()
3183

    
3184
  def DeclareLocks(self, level):
3185
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3186
      self._LockInstancesNodes(primary_only=True)
3187

    
3188
  def CheckPrereq(self):
3189
    """Check prerequisites.
3190

3191
    This only checks the optional instance list against the existing names.
3192

3193
    """
3194
    if self.wanted_names is None:
3195
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3196

    
3197
    self.wanted_instances = \
3198
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3199

    
3200
  def _EnsureChildSizes(self, disk):
3201
    """Ensure children of the disk have the needed disk size.
3202

3203
    This is valid mainly for DRBD8 and fixes an issue where the
3204
    children have smaller disk size.
3205

3206
    @param disk: an L{ganeti.objects.Disk} object
3207

3208
    """
3209
    if disk.dev_type == constants.LD_DRBD8:
3210
      assert disk.children, "Empty children for DRBD8?"
3211
      fchild = disk.children[0]
3212
      mismatch = fchild.size < disk.size
3213
      if mismatch:
3214
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3215
                     fchild.size, disk.size)
3216
        fchild.size = disk.size
3217

    
3218
      # and we recurse on this child only, not on the metadev
3219
      return self._EnsureChildSizes(fchild) or mismatch
3220
    else:
3221
      return False
3222

    
3223
  def Exec(self, feedback_fn):
3224
    """Verify the size of cluster disks.
3225

3226
    """
3227
    # TODO: check child disks too
3228
    # TODO: check differences in size between primary/secondary nodes
3229
    per_node_disks = {}
3230
    for instance in self.wanted_instances:
3231
      pnode = instance.primary_node
3232
      if pnode not in per_node_disks:
3233
        per_node_disks[pnode] = []
3234
      for idx, disk in enumerate(instance.disks):
3235
        per_node_disks[pnode].append((instance, idx, disk))
3236

    
3237
    changed = []
3238
    for node, dskl in per_node_disks.items():
3239
      newl = [v[2].Copy() for v in dskl]
3240
      for dsk in newl:
3241
        self.cfg.SetDiskID(dsk, node)
3242
      result = self.rpc.call_blockdev_getsize(node, newl)
3243
      if result.fail_msg:
3244
        self.LogWarning("Failure in blockdev_getsize call to node"
3245
                        " %s, ignoring", node)
3246
        continue
3247
      if len(result.payload) != len(dskl):
3248
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3249
                        " result.payload=%s", node, len(dskl), result.payload)
3250
        self.LogWarning("Invalid result from node %s, ignoring node results",
3251
                        node)
3252
        continue
3253
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3254
        if size is None:
3255
          self.LogWarning("Disk %d of instance %s did not return size"
3256
                          " information, ignoring", idx, instance.name)
3257
          continue
3258
        if not isinstance(size, (int, long)):
3259
          self.LogWarning("Disk %d of instance %s did not return valid"
3260
                          " size information, ignoring", idx, instance.name)
3261
          continue
3262
        size = size >> 20
3263
        if size != disk.size:
3264
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3265
                       " correcting: recorded %d, actual %d", idx,
3266
                       instance.name, disk.size, size)
3267
          disk.size = size
3268
          self.cfg.Update(instance, feedback_fn)
3269
          changed.append((instance.name, idx, size))
3270
        if self._EnsureChildSizes(disk):
3271
          self.cfg.Update(instance, feedback_fn)
3272
          changed.append((instance.name, idx, disk.size))
3273
    return changed
3274

    
3275

    
3276
class LUClusterRename(LogicalUnit):
3277
  """Rename the cluster.
3278

3279
  """
3280
  HPATH = "cluster-rename"
3281
  HTYPE = constants.HTYPE_CLUSTER
3282

    
3283
  def BuildHooksEnv(self):
3284
    """Build hooks env.
3285

3286
    """
3287
    return {
3288
      "OP_TARGET": self.cfg.GetClusterName(),
3289
      "NEW_NAME": self.op.name,
3290
      }
3291

    
3292
  def BuildHooksNodes(self):
3293
    """Build hooks nodes.
3294

3295
    """
3296
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3297

    
3298
  def CheckPrereq(self):
3299
    """Verify that the passed name is a valid one.
3300

3301
    """
3302
    hostname = netutils.GetHostname(name=self.op.name,
3303
                                    family=self.cfg.GetPrimaryIPFamily())
3304

    
3305
    new_name = hostname.name
3306
    self.ip = new_ip = hostname.ip
3307
    old_name = self.cfg.GetClusterName()
3308
    old_ip = self.cfg.GetMasterIP()
3309
    if new_name == old_name and new_ip == old_ip:
3310
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3311
                                 " cluster has changed",
3312
                                 errors.ECODE_INVAL)
3313
    if new_ip != old_ip:
3314
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3315
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3316
                                   " reachable on the network" %
3317
                                   new_ip, errors.ECODE_NOTUNIQUE)
3318

    
3319
    self.op.name = new_name
3320

    
3321
  def Exec(self, feedback_fn):
3322
    """Rename the cluster.
3323

3324
    """
3325
    clustername = self.op.name
3326
    new_ip = self.ip
3327

    
3328
    # shutdown the master IP
3329
    (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3330
    result = self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev)
3331
    result.Raise("Could not disable the master role")
3332

    
3333
    try:
3334
      cluster = self.cfg.GetClusterInfo()
3335
      cluster.cluster_name = clustername
3336
      cluster.master_ip = new_ip
3337
      self.cfg.Update(cluster, feedback_fn)
3338

    
3339
      # update the known hosts file
3340
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3341
      node_list = self.cfg.GetOnlineNodeList()
3342
      try:
3343
        node_list.remove(master)
3344
      except ValueError:
3345
        pass
3346
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3347
    finally:
3348
      result = self.rpc.call_node_activate_master_ip(master, new_ip, netmask,
3349
                                                     dev, family)
3350
      msg = result.fail_msg
3351
      if msg:
3352
        self.LogWarning("Could not re-enable the master role on"
3353
                        " the master, please restart manually: %s", msg)
3354

    
3355
    return clustername
3356

    
3357

    
3358
def _ValidateNetmask(cfg, netmask):
3359
  """Checks if a netmask is valid.
3360

3361
  @type cfg: L{config.ConfigWriter}
3362
  @param cfg: The cluster configuration
3363
  @type netmask: int
3364
  @param netmask: the netmask to be verified
3365
  @raise errors.OpPrereqError: if the validation fails
3366

3367
  """
3368
  ip_family = cfg.GetPrimaryIPFamily()
3369
  try:
3370
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3371
  except errors.ProgrammerError:
3372
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3373
                               ip_family)
3374
  if not ipcls.ValidateNetmask(netmask):
3375
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3376
                                (netmask))
3377

    
3378

    
3379
class LUClusterSetParams(LogicalUnit):
3380
  """Change the parameters of the cluster.
3381

3382
  """
3383
  HPATH = "cluster-modify"
3384
  HTYPE = constants.HTYPE_CLUSTER
3385
  REQ_BGL = False
3386

    
3387
  def CheckArguments(self):
3388
    """Check parameters
3389

3390
    """
3391
    if self.op.uid_pool:
3392
      uidpool.CheckUidPool(self.op.uid_pool)
3393

    
3394
    if self.op.add_uids:
3395
      uidpool.CheckUidPool(self.op.add_uids)
3396

    
3397
    if self.op.remove_uids:
3398
      uidpool.CheckUidPool(self.op.remove_uids)
3399

    
3400
    if self.op.master_netmask is not None:
3401
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3402

    
3403
  def ExpandNames(self):
3404
    # FIXME: in the future maybe other cluster params won't require checking on
3405
    # all nodes to be modified.
3406
    self.needed_locks = {
3407
      locking.LEVEL_NODE: locking.ALL_SET,
3408
    }
3409
    self.share_locks[locking.LEVEL_NODE] = 1
3410

    
3411
  def BuildHooksEnv(self):
3412
    """Build hooks env.
3413

3414
    """
3415
    return {
3416
      "OP_TARGET": self.cfg.GetClusterName(),
3417
      "NEW_VG_NAME": self.op.vg_name,
3418
      }
3419

    
3420
  def BuildHooksNodes(self):
3421
    """Build hooks nodes.
3422

3423
    """
3424
    mn = self.cfg.GetMasterNode()
3425
    return ([mn], [mn])
3426

    
3427
  def CheckPrereq(self):
3428
    """Check prerequisites.
3429

3430
    This checks whether the given params don't conflict and
3431
    if the given volume group is valid.
3432

3433
    """
3434
    if self.op.vg_name is not None and not self.op.vg_name:
3435
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3436
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3437
                                   " instances exist", errors.ECODE_INVAL)
3438

    
3439
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3440
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3441
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3442
                                   " drbd-based instances exist",
3443
                                   errors.ECODE_INVAL)
3444

    
3445
    node_list = self.owned_locks(locking.LEVEL_NODE)
3446

    
3447
    # if vg_name not None, checks given volume group on all nodes
3448
    if self.op.vg_name:
3449
      vglist = self.rpc.call_vg_list(node_list)
3450
      for node in node_list:
3451
        msg = vglist[node].fail_msg
3452
        if msg:
3453
          # ignoring down node
3454
          self.LogWarning("Error while gathering data on node %s"
3455
                          " (ignoring node): %s", node, msg)
3456
          continue
3457
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3458
                                              self.op.vg_name,
3459
                                              constants.MIN_VG_SIZE)
3460
        if vgstatus:
3461
          raise errors.OpPrereqError("Error on node '%s': %s" %
3462
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3463

    
3464
    if self.op.drbd_helper:
3465
      # checks given drbd helper on all nodes
3466
      helpers = self.rpc.call_drbd_helper(node_list)
3467
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3468
        if ninfo.offline:
3469
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3470
          continue
3471
        msg = helpers[node].fail_msg
3472
        if msg:
3473
          raise errors.OpPrereqError("Error checking drbd helper on node"
3474
                                     " '%s': %s" % (node, msg),
3475
                                     errors.ECODE_ENVIRON)
3476
        node_helper = helpers[node].payload
3477
        if node_helper != self.op.drbd_helper:
3478
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3479
                                     (node, node_helper), errors.ECODE_ENVIRON)
3480

    
3481
    self.cluster = cluster = self.cfg.GetClusterInfo()
3482
    # validate params changes
3483
    if self.op.beparams:
3484
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3485
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3486

    
3487
    if self.op.ndparams:
3488
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3489
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3490

    
3491
      # TODO: we need a more general way to handle resetting
3492
      # cluster-level parameters to default values
3493
      if self.new_ndparams["oob_program"] == "":
3494
        self.new_ndparams["oob_program"] = \
3495
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3496

    
3497
    if self.op.nicparams:
3498
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3499
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3500
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3501
      nic_errors = []
3502

    
3503
      # check all instances for consistency
3504
      for instance in self.cfg.GetAllInstancesInfo().values():
3505
        for nic_idx, nic in enumerate(instance.nics):
3506
          params_copy = copy.deepcopy(nic.nicparams)
3507
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3508

    
3509
          # check parameter syntax
3510
          try:
3511
            objects.NIC.CheckParameterSyntax(params_filled)
3512
          except errors.ConfigurationError, err:
3513
            nic_errors.append("Instance %s, nic/%d: %s" %
3514
                              (instance.name, nic_idx, err))
3515

    
3516
          # if we're moving instances to routed, check that they have an ip
3517
          target_mode = params_filled[constants.NIC_MODE]
3518
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3519
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3520
                              " address" % (instance.name, nic_idx))
3521
      if nic_errors:
3522
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3523
                                   "\n".join(nic_errors))
3524

    
3525
    # hypervisor list/parameters
3526
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3527
    if self.op.hvparams:
3528
      for hv_name, hv_dict in self.op.hvparams.items():
3529
        if hv_name not in self.new_hvparams:
3530
          self.new_hvparams[hv_name] = hv_dict
3531
        else:
3532
          self.new_hvparams[hv_name].update(hv_dict)
3533

    
3534
    # os hypervisor parameters
3535
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3536
    if self.op.os_hvp:
3537
      for os_name, hvs in self.op.os_hvp.items():
3538
        if os_name not in self.new_os_hvp:
3539
          self.new_os_hvp[os_name] = hvs
3540
        else:
3541
          for hv_name, hv_dict in hvs.items():
3542
            if hv_name not in self.new_os_hvp[os_name]:
3543
              self.new_os_hvp[os_name][hv_name] = hv_dict
3544
            else:
3545
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3546

    
3547
    # os parameters
3548
    self.new_osp = objects.FillDict(cluster.osparams, {})
3549
    if self.op.osparams:
3550
      for os_name, osp in self.op.osparams.items():
3551
        if os_name not in self.new_osp:
3552
          self.new_osp[os_name] = {}
3553

    
3554
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3555
                                                  use_none=True)
3556

    
3557
        if not self.new_osp[os_name]:
3558
          # we removed all parameters
3559
          del self.new_osp[os_name]
3560
        else:
3561
          # check the parameter validity (remote check)
3562
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3563
                         os_name, self.new_osp[os_name])
3564

    
3565
    # changes to the hypervisor list
3566
    if self.op.enabled_hypervisors is not None:
3567
      self.hv_list = self.op.enabled_hypervisors
3568
      for hv in self.hv_list:
3569
        # if the hypervisor doesn't already exist in the cluster
3570
        # hvparams, we initialize it to empty, and then (in both
3571
        # cases) we make sure to fill the defaults, as we might not
3572
        # have a complete defaults list if the hypervisor wasn't
3573
        # enabled before
3574
        if hv not in new_hvp:
3575
          new_hvp[hv] = {}
3576
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3577
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3578
    else:
3579
      self.hv_list = cluster.enabled_hypervisors
3580

    
3581
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3582
      # either the enabled list has changed, or the parameters have, validate
3583
      for hv_name, hv_params in self.new_hvparams.items():
3584
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3585
            (self.op.enabled_hypervisors and
3586
             hv_name in self.op.enabled_hypervisors)):
3587
          # either this is a new hypervisor, or its parameters have changed
3588
          hv_class = hypervisor.GetHypervisor(hv_name)
3589
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590
          hv_class.CheckParameterSyntax(hv_params)
3591
          _CheckHVParams(self, node_list, hv_name, hv_params)
3592

    
3593
    if self.op.os_hvp:
3594
      # no need to check any newly-enabled hypervisors, since the
3595
      # defaults have already been checked in the above code-block
3596
      for os_name, os_hvp in self.new_os_hvp.items():
3597
        for hv_name, hv_params in os_hvp.items():
3598
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3599
          # we need to fill in the new os_hvp on top of the actual hv_p
3600
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3601
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3602
          hv_class = hypervisor.GetHypervisor(hv_name)
3603
          hv_class.CheckParameterSyntax(new_osp)
3604
          _CheckHVParams(self, node_list, hv_name, new_osp)
3605

    
3606
    if self.op.default_iallocator:
3607
      alloc_script = utils.FindFile(self.op.default_iallocator,
3608
                                    constants.IALLOCATOR_SEARCH_PATH,
3609
                                    os.path.isfile)
3610
      if alloc_script is None:
3611
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3612
                                   " specified" % self.op.default_iallocator,
3613
                                   errors.ECODE_INVAL)
3614

    
3615
  def Exec(self, feedback_fn):
3616
    """Change the parameters of the cluster.
3617

3618
    """
3619
    if self.op.vg_name is not None:
3620
      new_volume = self.op.vg_name
3621
      if not new_volume:
3622
        new_volume = None
3623
      if new_volume != self.cfg.GetVGName():
3624
        self.cfg.SetVGName(new_volume)
3625
      else:
3626
        feedback_fn("Cluster LVM configuration already in desired"
3627
                    " state, not changing")
3628
    if self.op.drbd_helper is not None:
3629
      new_helper = self.op.drbd_helper
3630
      if not new_helper:
3631
        new_helper = None
3632
      if new_helper != self.cfg.GetDRBDHelper():
3633
        self.cfg.SetDRBDHelper(new_helper)
3634
      else:
3635
        feedback_fn("Cluster DRBD helper already in desired state,"
3636
                    " not changing")
3637
    if self.op.hvparams:
3638
      self.cluster.hvparams = self.new_hvparams
3639
    if self.op.os_hvp:
3640
      self.cluster.os_hvp = self.new_os_hvp
3641
    if self.op.enabled_hypervisors is not None:
3642
      self.cluster.hvparams = self.new_hvparams
3643
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3644
    if self.op.beparams:
3645
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3646
    if self.op.nicparams:
3647
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3648
    if self.op.osparams:
3649
      self.cluster.osparams = self.new_osp
3650
    if self.op.ndparams:
3651
      self.cluster.ndparams = self.new_ndparams
3652

    
3653
    if self.op.candidate_pool_size is not None:
3654
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3655
      # we need to update the pool size here, otherwise the save will fail
3656
      _AdjustCandidatePool(self, [])
3657

    
3658
    if self.op.maintain_node_health is not None:
3659
      self.cluster.maintain_node_health = self.op.maintain_node_health
3660

    
3661
    if self.op.prealloc_wipe_disks is not None:
3662
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3663

    
3664
    if self.op.add_uids is not None:
3665
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3666

    
3667
    if self.op.remove_uids is not None:
3668
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3669

    
3670
    if self.op.uid_pool is not None:
3671
      self.cluster.uid_pool = self.op.uid_pool
3672

    
3673
    if self.op.default_iallocator is not None:
3674
      self.cluster.default_iallocator = self.op.default_iallocator
3675

    
3676
    if self.op.reserved_lvs is not None:
3677
      self.cluster.reserved_lvs = self.op.reserved_lvs
3678

    
3679
    def helper_os(aname, mods, desc):
3680
      desc += " OS list"
3681
      lst = getattr(self.cluster, aname)
3682
      for key, val in mods:
3683
        if key == constants.DDM_ADD:
3684
          if val in lst:
3685
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3686
          else:
3687
            lst.append(val)
3688
        elif key == constants.DDM_REMOVE:
3689
          if val in lst:
3690
            lst.remove(val)
3691
          else:
3692
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3693
        else:
3694
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3695

    
3696
    if self.op.hidden_os:
3697
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3698

    
3699
    if self.op.blacklisted_os:
3700
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3701

    
3702
    if self.op.master_netdev:
3703
      (master, ip, dev, netmask, _) = self.cfg.GetMasterNetworkParameters()
3704
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3705
                  self.cluster.master_netdev)
3706
      result = self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev)
3707
      result.Raise("Could not disable the master ip")
3708
      feedback_fn("Changing master_netdev from %s to %s" %
3709
                  (dev, self.op.master_netdev))
3710
      self.cluster.master_netdev = self.op.master_netdev
3711

    
3712
    if self.op.master_netmask:
3713
      master = self.cfg.GetMasterNode()
3714
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3715
      result = self.rpc.call_node_change_master_netmask(master,
3716
                                                        self.op.master_netmask)
3717
      if result.fail_msg:
3718
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3719
        self.LogWarning(msg)
3720
        feedback_fn(msg)
3721
      else:
3722
        self.cluster.master_netmask = self.op.master_netmask
3723

    
3724
    self.cfg.Update(self.cluster, feedback_fn)
3725

    
3726
    if self.op.master_netdev:
3727
      (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3728
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3729
                  self.op.master_netdev)
3730
      result = self.rpc.call_node_activate_master_ip(master, ip, netmask, dev,
3731
                                                     family)
3732
      if result.fail_msg:
3733
        self.LogWarning("Could not re-enable the master ip on"
3734
                        " the master, please restart manually: %s",
3735
                        result.fail_msg)
3736

    
3737

    
3738
def _UploadHelper(lu, nodes, fname):
3739
  """Helper for uploading a file and showing warnings.
3740

3741
  """
3742
  if os.path.exists(fname):
3743
    result = lu.rpc.call_upload_file(nodes, fname)
3744
    for to_node, to_result in result.items():
3745
      msg = to_result.fail_msg
3746
      if msg:
3747
        msg = ("Copy of file %s to node %s failed: %s" %
3748
               (fname, to_node, msg))
3749
        lu.proc.LogWarning(msg)
3750

    
3751

    
3752
def _ComputeAncillaryFiles(cluster, redist):
3753
  """Compute files external to Ganeti which need to be consistent.
3754

3755
  @type redist: boolean
3756
  @param redist: Whether to include files which need to be redistributed
3757

3758
  """
3759
  # Compute files for all nodes
3760
  files_all = set([
3761
    constants.SSH_KNOWN_HOSTS_FILE,
3762
    constants.CONFD_HMAC_KEY,
3763
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3764
    constants.SPICE_CERT_FILE,
3765
    constants.SPICE_CACERT_FILE,
3766
    constants.RAPI_USERS_FILE,
3767
    ])
3768

    
3769
  if not redist:
3770
    files_all.update(constants.ALL_CERT_FILES)
3771
    files_all.update(ssconf.SimpleStore().GetFileList())
3772
  else:
3773
    # we need to ship at least the RAPI certificate
3774
    files_all.add(constants.RAPI_CERT_FILE)
3775

    
3776
  if cluster.modify_etc_hosts:
3777
    files_all.add(constants.ETC_HOSTS)
3778

    
3779
  # Files which are optional, these must:
3780
  # - be present in one other category as well
3781
  # - either exist or not exist on all nodes of that category (mc, vm all)
3782
  files_opt = set([
3783
    constants.RAPI_USERS_FILE,
3784
    ])
3785

    
3786
  # Files which should only be on master candidates
3787
  files_mc = set()
3788
  if not redist:
3789
    files_mc.add(constants.CLUSTER_CONF_FILE)
3790

    
3791
  # Files which should only be on VM-capable nodes
3792
  files_vm = set(filename
3793
    for hv_name in cluster.enabled_hypervisors
3794
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3795

    
3796
  files_opt |= set(filename
3797
    for hv_name in cluster.enabled_hypervisors
3798
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3799

    
3800
  # Filenames in each category must be unique
3801
  all_files_set = files_all | files_mc | files_vm
3802
  assert (len(all_files_set) ==
3803
          sum(map(len, [files_all, files_mc, files_vm]))), \
3804
         "Found file listed in more than one file list"
3805

    
3806
  # Optional files must be present in one other category
3807
  assert all_files_set.issuperset(files_opt), \
3808
         "Optional file not in a different required list"
3809

    
3810
  return (files_all, files_opt, files_mc, files_vm)
3811

    
3812

    
3813
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3814
  """Distribute additional files which are part of the cluster configuration.
3815

3816
  ConfigWriter takes care of distributing the config and ssconf files, but
3817
  there are more files which should be distributed to all nodes. This function
3818
  makes sure those are copied.
3819

3820
  @param lu: calling logical unit
3821
  @param additional_nodes: list of nodes not in the config to distribute to
3822
  @type additional_vm: boolean
3823
  @param additional_vm: whether the additional nodes are vm-capable or not
3824

3825
  """
3826
  # Gather target nodes
3827
  cluster = lu.cfg.GetClusterInfo()
3828
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3829

    
3830
  online_nodes = lu.cfg.GetOnlineNodeList()
3831
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3832

    
3833
  if additional_nodes is not None:
3834
    online_nodes.extend(additional_nodes)
3835
    if additional_vm:
3836
      vm_nodes.extend(additional_nodes)
3837

    
3838
  # Never distribute to master node
3839
  for nodelist in [online_nodes, vm_nodes]:
3840
    if master_info.name in nodelist:
3841
      nodelist.remove(master_info.name)
3842

    
3843
  # Gather file lists
3844
  (files_all, _, files_mc, files_vm) = \
3845
    _ComputeAncillaryFiles(cluster, True)
3846

    
3847
  # Never re-distribute configuration file from here
3848
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3849
              constants.CLUSTER_CONF_FILE in files_vm)
3850
  assert not files_mc, "Master candidates not handled in this function"
3851

    
3852
  filemap = [
3853
    (online_nodes, files_all),
3854
    (vm_nodes, files_vm),
3855
    ]
3856

    
3857
  # Upload the files
3858
  for (node_list, files) in filemap:
3859
    for fname in files:
3860
      _UploadHelper(lu, node_list, fname)
3861

    
3862

    
3863
class LUClusterRedistConf(NoHooksLU):
3864
  """Force the redistribution of cluster configuration.
3865

3866
  This is a very simple LU.
3867

3868
  """
3869
  REQ_BGL = False
3870

    
3871
  def ExpandNames(self):
3872
    self.needed_locks = {
3873
      locking.LEVEL_NODE: locking.ALL_SET,
3874
    }
3875
    self.share_locks[locking.LEVEL_NODE] = 1
3876

    
3877
  def Exec(self, feedback_fn):
3878
    """Redistribute the configuration.
3879

3880
    """
3881
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3882
    _RedistributeAncillaryFiles(self)
3883

    
3884

    
3885
class LUClusterActivateMasterIp(NoHooksLU):
3886
  """Activate the master IP on the master node.
3887

3888
  """
3889
  def Exec(self, feedback_fn):
3890
    """Activate the master IP.
3891

3892
    """
3893
    (master, ip, dev, netmask, family) = self.cfg.GetMasterNetworkParameters()
3894
    self.rpc.call_node_activate_master_ip(master, ip, netmask, dev, family)
3895

    
3896

    
3897
class LUClusterDeactivateMasterIp(NoHooksLU):
3898
  """Deactivate the master IP on the master node.
3899

3900
  """
3901
  def Exec(self, feedback_fn):
3902
    """Deactivate the master IP.
3903

3904
    """
3905
    (master, ip, dev, netmask, _) = self.cfg.GetMasterNetworkParameters()
3906
    self.rpc.call_node_deactivate_master_ip(master, ip, netmask, dev)
3907

    
3908

    
3909
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3910
  """Sleep and poll for an instance's disk to sync.
3911

3912
  """
3913
  if not instance.disks or disks is not None and not disks:
3914
    return True
3915

    
3916
  disks = _ExpandCheckDisks(instance, disks)
3917

    
3918
  if not oneshot:
3919
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3920

    
3921
  node = instance.primary_node
3922

    
3923
  for dev in disks:
3924
    lu.cfg.SetDiskID(dev, node)
3925

    
3926
  # TODO: Convert to utils.Retry
3927

    
3928
  retries = 0
3929
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3930
  while True:
3931
    max_time = 0
3932
    done = True
3933
    cumul_degraded = False
3934
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3935
    msg = rstats.fail_msg
3936
    if msg:
3937
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3938
      retries += 1
3939
      if retries >= 10:
3940
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3941
                                 " aborting." % node)
3942
      time.sleep(6)
3943
      continue
3944
    rstats = rstats.payload
3945
    retries = 0
3946
    for i, mstat in enumerate(rstats):
3947
      if mstat is None:
3948
        lu.LogWarning("Can't compute data for node %s/%s",
3949
                           node, disks[i].iv_name)
3950
        continue
3951

    
3952
      cumul_degraded = (cumul_degraded or
3953
                        (mstat.is_degraded and mstat.sync_percent is None))
3954
      if mstat.sync_percent is not None:
3955
        done = False
3956
        if mstat.estimated_time is not None:
3957
          rem_time = ("%s remaining (estimated)" %
3958
                      utils.FormatSeconds(mstat.estimated_time))
3959
          max_time = mstat.estimated_time
3960
        else:
3961
          rem_time = "no time estimate"
3962
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3963
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3964

    
3965
    # if we're done but degraded, let's do a few small retries, to
3966
    # make sure we see a stable and not transient situation; therefore
3967
    # we force restart of the loop
3968
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3969
      logging.info("Degraded disks found, %d retries left", degr_retries)
3970
      degr_retries -= 1
3971
      time.sleep(1)
3972
      continue
3973

    
3974
    if done or oneshot:
3975
      break
3976

    
3977
    time.sleep(min(60, max_time))
3978

    
3979
  if done:
3980
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3981
  return not cumul_degraded
3982

    
3983

    
3984
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3985
  """Check that mirrors are not degraded.
3986

3987
  The ldisk parameter, if True, will change the test from the
3988
  is_degraded attribute (which represents overall non-ok status for
3989
  the device(s)) to the ldisk (representing the local storage status).
3990

3991
  """
3992
  lu.cfg.SetDiskID(dev, node)
3993

    
3994
  result = True
3995

    
3996
  if on_primary or dev.AssembleOnSecondary():
3997
    rstats = lu.rpc.call_blockdev_find(node, dev)
3998
    msg = rstats.fail_msg
3999
    if msg:
4000
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4001
      result = False
4002
    elif not rstats.payload:
4003
      lu.LogWarning("Can't find disk on node %s", node)
4004
      result = False
4005
    else:
4006
      if ldisk:
4007
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4008
      else:
4009
        result = result and not rstats.payload.is_degraded
4010

    
4011
  if dev.children:
4012
    for child in dev.children:
4013
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4014

    
4015
  return result
4016

    
4017

    
4018
class LUOobCommand(NoHooksLU):
4019
  """Logical unit for OOB handling.
4020

4021
  """
4022
  REG_BGL = False
4023
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4024

    
4025
  def ExpandNames(self):
4026
    """Gather locks we need.
4027

4028
    """
4029
    if self.op.node_names:
4030
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4031
      lock_names = self.op.node_names
4032
    else:
4033
      lock_names = locking.ALL_SET
4034

    
4035
    self.needed_locks = {
4036
      locking.LEVEL_NODE: lock_names,
4037
      }
4038

    
4039
  def CheckPrereq(self):
4040
    """Check prerequisites.
4041

4042
    This checks:
4043
     - the node exists in the configuration
4044
     - OOB is supported
4045

4046
    Any errors are signaled by raising errors.OpPrereqError.
4047

4048
    """
4049
    self.nodes = []
4050
    self.master_node = self.cfg.GetMasterNode()
4051

    
4052
    assert self.op.power_delay >= 0.0
4053

    
4054
    if self.op.node_names:
4055
      if (self.op.command in self._SKIP_MASTER and
4056
          self.master_node in self.op.node_names):
4057
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4058
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4059

    
4060
        if master_oob_handler:
4061
          additional_text = ("run '%s %s %s' if you want to operate on the"
4062
                             " master regardless") % (master_oob_handler,
4063
                                                      self.op.command,
4064
                                                      self.master_node)
4065
        else:
4066
          additional_text = "it does not support out-of-band operations"
4067

    
4068
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4069
                                    " allowed for %s; %s") %
4070
                                   (self.master_node, self.op.command,
4071
                                    additional_text), errors.ECODE_INVAL)
4072
    else:
4073
      self.op.node_names = self.cfg.GetNodeList()
4074
      if self.op.command in self._SKIP_MASTER:
4075
        self.op.node_names.remove(self.master_node)
4076

    
4077
    if self.op.command in self._SKIP_MASTER:
4078
      assert self.master_node not in self.op.node_names
4079

    
4080
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4081
      if node is None:
4082
        raise errors.OpPrereqError("Node %s not found" % node_name,
4083
                                   errors.ECODE_NOENT)
4084
      else:
4085
        self.nodes.append(node)
4086

    
4087
      if (not self.op.ignore_status and
4088
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4089
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4090
                                    " not marked offline") % node_name,
4091
                                   errors.ECODE_STATE)
4092

    
4093
  def Exec(self, feedback_fn):
4094
    """Execute OOB and return result if we expect any.
4095

4096
    """
4097
    master_node = self.master_node
4098
    ret = []
4099

    
4100
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4101
                                              key=lambda node: node.name)):
4102
      node_entry = [(constants.RS_NORMAL, node.name)]
4103
      ret.append(node_entry)
4104

    
4105
      oob_program = _SupportsOob(self.cfg, node)
4106

    
4107
      if not oob_program:
4108
        node_entry.append((constants.RS_UNAVAIL, None))
4109
        continue
4110

    
4111
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4112
                   self.op.command, oob_program, node.name)
4113
      result = self.rpc.call_run_oob(master_node, oob_program,
4114
                                     self.op.command, node.name,
4115
                                     self.op.timeout)
4116

    
4117
      if result.fail_msg:
4118
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4119
                        node.name, result.fail_msg)
4120
        node_entry.append((constants.RS_NODATA, None))
4121
      else:
4122
        try:
4123
          self._CheckPayload(result)
4124
        except errors.OpExecError, err:
4125
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4126
                          node.name, err)
4127
          node_entry.append((constants.RS_NODATA, None))
4128
        else:
4129
          if self.op.command == constants.OOB_HEALTH:
4130
            # For health we should log important events
4131
            for item, status in result.payload:
4132
              if status in [constants.OOB_STATUS_WARNING,
4133
                            constants.OOB_STATUS_CRITICAL]:
4134
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4135
                                item, node.name, status)
4136

    
4137
          if self.op.command == constants.OOB_POWER_ON:
4138
            node.powered = True
4139
          elif self.op.command == constants.OOB_POWER_OFF:
4140
            node.powered = False
4141
          elif self.op.command == constants.OOB_POWER_STATUS:
4142
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4143
            if powered != node.powered:
4144
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4145
                               " match actual power state (%s)"), node.powered,
4146
                              node.name, powered)
4147

    
4148
          # For configuration changing commands we should update the node
4149
          if self.op.command in (constants.OOB_POWER_ON,
4150
                                 constants.OOB_POWER_OFF):
4151
            self.cfg.Update(node, feedback_fn)
4152

    
4153
          node_entry.append((constants.RS_NORMAL, result.payload))
4154

    
4155
          if (self.op.command == constants.OOB_POWER_ON and
4156
              idx < len(self.nodes) - 1):
4157
            time.sleep(self.op.power_delay)
4158

    
4159
    return ret
4160

    
4161
  def _CheckPayload(self, result):
4162
    """Checks if the payload is valid.
4163

4164
    @param result: RPC result
4165
    @raises errors.OpExecError: If payload is not valid
4166

4167
    """
4168
    errs = []
4169
    if self.op.command == constants.OOB_HEALTH:
4170
      if not isinstance(result.payload, list):
4171
        errs.append("command 'health' is expected to return a list but got %s" %
4172
                    type(result.payload))
4173
      else:
4174
        for item, status in result.payload:
4175
          if status not in constants.OOB_STATUSES:
4176
            errs.append("health item '%s' has invalid status '%s'" %
4177
                        (item, status))
4178

    
4179
    if self.op.command == constants.OOB_POWER_STATUS:
4180
      if not isinstance(result.payload, dict):
4181
        errs.append("power-status is expected to return a dict but got %s" %
4182
                    type(result.payload))
4183

    
4184
    if self.op.command in [
4185
        constants.OOB_POWER_ON,
4186
        constants.OOB_POWER_OFF,
4187
        constants.OOB_POWER_CYCLE,
4188
        ]:
4189
      if result.payload is not None:
4190
        errs.append("%s is expected to not return payload but got '%s'" %
4191
                    (self.op.command, result.payload))
4192

    
4193
    if errs:
4194
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4195
                               utils.CommaJoin(errs))
4196

    
4197

    
4198
class _OsQuery(_QueryBase):
4199
  FIELDS = query.OS_FIELDS
4200

    
4201
  def ExpandNames(self, lu):
4202
    # Lock all nodes in shared mode
4203
    # Temporary removal of locks, should be reverted later
4204
    # TODO: reintroduce locks when they are lighter-weight
4205
    lu.needed_locks = {}
4206
    #self.share_locks[locking.LEVEL_NODE] = 1
4207
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4208

    
4209
    # The following variables interact with _QueryBase._GetNames
4210
    if self.names:
4211
      self.wanted = self.names
4212
    else:
4213
      self.wanted = locking.ALL_SET
4214

    
4215
    self.do_locking = self.use_locking
4216

    
4217
  def DeclareLocks(self, lu, level):
4218
    pass
4219

    
4220
  @staticmethod
4221
  def _DiagnoseByOS(rlist):
4222
    """Remaps a per-node return list into an a per-os per-node dictionary
4223

4224
    @param rlist: a map with node names as keys and OS objects as values
4225

4226
    @rtype: dict
4227
    @return: a dictionary with osnames as keys and as value another
4228
        map, with nodes as keys and tuples of (path, status, diagnose,
4229
        variants, parameters, api_versions) as values, eg::
4230

4231
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4232
                                     (/srv/..., False, "invalid api")],
4233
                           "node2": [(/srv/..., True, "", [], [])]}
4234
          }
4235

4236
    """
4237
    all_os = {}
4238
    # we build here the list of nodes that didn't fail the RPC (at RPC
4239
    # level), so that nodes with a non-responding node daemon don't
4240
    # make all OSes invalid
4241
    good_nodes = [node_name for node_name in rlist
4242
                  if not rlist[node_name].fail_msg]
4243
    for node_name, nr in rlist.items():
4244
      if nr.fail_msg or not nr.payload:
4245
        continue
4246
      for (name, path, status, diagnose, variants,
4247
           params, api_versions) in nr.payload:
4248
        if name not in all_os:
4249
          # build a list of nodes for this os containing empty lists
4250
          # for each node in node_list
4251
          all_os[name] = {}
4252
          for nname in good_nodes:
4253
            all_os[name][nname] = []
4254
        # convert params from [name, help] to (name, help)
4255
        params = [tuple(v) for v in params]
4256
        all_os[name][node_name].append((path, status, diagnose,
4257
                                        variants, params, api_versions))
4258
    return all_os
4259

    
4260
  def _GetQueryData(self, lu):
4261
    """Computes the list of nodes and their attributes.
4262

4263
    """
4264
    # Locking is not used
4265
    assert not (compat.any(lu.glm.is_owned(level)
4266
                           for level in locking.LEVELS
4267
                           if level != locking.LEVEL_CLUSTER) or
4268
                self.do_locking or self.use_locking)
4269

    
4270
    valid_nodes = [node.name
4271
                   for node in lu.cfg.GetAllNodesInfo().values()
4272
                   if not node.offline and node.vm_capable]
4273
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4274
    cluster = lu.cfg.GetClusterInfo()
4275

    
4276
    data = {}
4277

    
4278
    for (os_name, os_data) in pol.items():
4279
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4280
                          hidden=(os_name in cluster.hidden_os),
4281
                          blacklisted=(os_name in cluster.blacklisted_os))
4282

    
4283
      variants = set()
4284
      parameters = set()
4285
      api_versions = set()
4286

    
4287
      for idx, osl in enumerate(os_data.values()):
4288
        info.valid = bool(info.valid and osl and osl[0][1])
4289
        if not info.valid:
4290
          break
4291

    
4292
        (node_variants, node_params, node_api) = osl[0][3:6]
4293
        if idx == 0:
4294
          # First entry
4295
          variants.update(node_variants)
4296
          parameters.update(node_params)
4297
          api_versions.update(node_api)
4298
        else:
4299
          # Filter out inconsistent values
4300
          variants.intersection_update(node_variants)
4301
          parameters.intersection_update(node_params)
4302
          api_versions.intersection_update(node_api)
4303

    
4304
      info.variants = list(variants)
4305
      info.parameters = list(parameters)
4306
      info.api_versions = list(api_versions)
4307

    
4308
      data[os_name] = info
4309

    
4310
    # Prepare data in requested order
4311
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4312
            if name in data]
4313

    
4314

    
4315
class LUOsDiagnose(NoHooksLU):
4316
  """Logical unit for OS diagnose/query.
4317

4318
  """
4319
  REQ_BGL = False
4320

    
4321
  @staticmethod
4322
  def _BuildFilter(fields, names):
4323
    """Builds a filter for querying OSes.
4324

4325
    """
4326
    name_filter = qlang.MakeSimpleFilter("name", names)
4327

    
4328
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4329
    # respective field is not requested
4330
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4331
                     for fname in ["hidden", "blacklisted"]
4332
                     if fname not in fields]
4333
    if "valid" not in fields:
4334
      status_filter.append([qlang.OP_TRUE, "valid"])
4335

    
4336
    if status_filter:
4337
      status_filter.insert(0, qlang.OP_AND)
4338
    else:
4339
      status_filter = None
4340

    
4341
    if name_filter and status_filter:
4342
      return [qlang.OP_AND, name_filter, status_filter]
4343
    elif name_filter:
4344
      return name_filter
4345
    else:
4346
      return status_filter
4347

    
4348
  def CheckArguments(self):
4349
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4350
                       self.op.output_fields, False)
4351

    
4352
  def ExpandNames(self):
4353
    self.oq.ExpandNames(self)
4354

    
4355
  def Exec(self, feedback_fn):
4356
    return self.oq.OldStyleQuery(self)
4357

    
4358

    
4359
class LUNodeRemove(LogicalUnit):
4360
  """Logical unit for removing a node.
4361

4362
  """
4363
  HPATH = "node-remove"
4364
  HTYPE = constants.HTYPE_NODE
4365

    
4366
  def BuildHooksEnv(self):
4367
    """Build hooks env.
4368

4369
    This doesn't run on the target node in the pre phase as a failed
4370
    node would then be impossible to remove.
4371

4372
    """
4373
    return {
4374
      "OP_TARGET": self.op.node_name,
4375
      "NODE_NAME": self.op.node_name,
4376
      }
4377

    
4378
  def BuildHooksNodes(self):
4379
    """Build hooks nodes.
4380

4381
    """
4382
    all_nodes = self.cfg.GetNodeList()
4383
    try:
4384
      all_nodes.remove(self.op.node_name)
4385
    except ValueError:
4386
      logging.warning("Node '%s', which is about to be removed, was not found"
4387
                      " in the list of all nodes", self.op.node_name)
4388
    return (all_nodes, all_nodes)
4389

    
4390
  def CheckPrereq(self):
4391
    """Check prerequisites.
4392

4393
    This checks:
4394
     - the node exists in the configuration
4395
     - it does not have primary or secondary instances
4396
     - it's not the master
4397

4398
    Any errors are signaled by raising errors.OpPrereqError.
4399

4400
    """
4401
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4402
    node = self.cfg.GetNodeInfo(self.op.node_name)
4403
    assert node is not None
4404

    
4405
    masternode = self.cfg.GetMasterNode()
4406
    if node.name == masternode:
4407
      raise errors.OpPrereqError("Node is the master node, failover to another"
4408
                                 " node is required", errors.ECODE_INVAL)
4409

    
4410
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4411
      if node.name in instance.all_nodes:
4412
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4413
                                   " please remove first" % instance_name,
4414
                                   errors.ECODE_INVAL)
4415
    self.op.node_name = node.name
4416
    self.node = node
4417

    
4418
  def Exec(self, feedback_fn):
4419
    """Removes the node from the cluster.
4420

4421
    """
4422
    node = self.node
4423
    logging.info("Stopping the node daemon and removing configs from node %s",
4424
                 node.name)
4425

    
4426
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4427

    
4428
    # Promote nodes to master candidate as needed
4429
    _AdjustCandidatePool(self, exceptions=[node.name])
4430
    self.context.RemoveNode(node.name)
4431

    
4432
    # Run post hooks on the node before it's removed
4433
    _RunPostHook(self, node.name)
4434

    
4435
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4436
    msg = result.fail_msg
4437
    if msg:
4438
      self.LogWarning("Errors encountered on the remote node while leaving"
4439
                      " the cluster: %s", msg)
4440

    
4441
    # Remove node from our /etc/hosts
4442
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4443
      master_node = self.cfg.GetMasterNode()
4444
      result = self.rpc.call_etc_hosts_modify(master_node,
4445
                                              constants.ETC_HOSTS_REMOVE,
4446
                                              node.name, None)
4447
      result.Raise("Can't update hosts file with new host data")
4448
      _RedistributeAncillaryFiles(self)
4449

    
4450

    
4451
class _NodeQuery(_QueryBase):
4452
  FIELDS = query.NODE_FIELDS
4453

    
4454
  def ExpandNames(self, lu):
4455
    lu.needed_locks = {}
4456
    lu.share_locks = _ShareAll()
4457

    
4458
    if self.names:
4459
      self.wanted = _GetWantedNodes(lu, self.names)
4460
    else:
4461
      self.wanted = locking.ALL_SET
4462

    
4463
    self.do_locking = (self.use_locking and
4464
                       query.NQ_LIVE in self.requested_data)
4465

    
4466
    if self.do_locking:
4467
      # If any non-static field is requested we need to lock the nodes
4468
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4469

    
4470
  def DeclareLocks(self, lu, level):
4471
    pass
4472

    
4473
  def _GetQueryData(self, lu):
4474
    """Computes the list of nodes and their attributes.
4475

4476
    """
4477
    all_info = lu.cfg.GetAllNodesInfo()
4478

    
4479
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4480

    
4481
    # Gather data as requested
4482
    if query.NQ_LIVE in self.requested_data:
4483
      # filter out non-vm_capable nodes
4484
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4485

    
4486
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4487
                                        lu.cfg.GetHypervisorType())
4488
      live_data = dict((name, nresult.payload)
4489
                       for (name, nresult) in node_data.items()
4490
                       if not nresult.fail_msg and nresult.payload)
4491
    else:
4492
      live_data = None
4493

    
4494
    if query.NQ_INST in self.requested_data:
4495
      node_to_primary = dict([(name, set()) for name in nodenames])
4496
      node_to_secondary = dict([(name, set()) for name in nodenames])
4497

    
4498
      inst_data = lu.cfg.GetAllInstancesInfo()
4499

    
4500
      for inst in inst_data.values():
4501
        if inst.primary_node in node_to_primary:
4502
          node_to_primary[inst.primary_node].add(inst.name)
4503
        for secnode in inst.secondary_nodes:
4504
          if secnode in node_to_secondary:
4505
            node_to_secondary[secnode].add(inst.name)
4506
    else:
4507
      node_to_primary = None
4508
      node_to_secondary = None
4509

    
4510
    if query.NQ_OOB in self.requested_data:
4511
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4512
                         for name, node in all_info.iteritems())
4513
    else:
4514
      oob_support = None
4515

    
4516
    if query.NQ_GROUP in self.requested_data:
4517
      groups = lu.cfg.GetAllNodeGroupsInfo()
4518
    else:
4519
      groups = {}
4520

    
4521
    return query.NodeQueryData([all_info[name] for name in nodenames],
4522
                               live_data, lu.cfg.GetMasterNode(),
4523
                               node_to_primary, node_to_secondary, groups,
4524
                               oob_support, lu.cfg.GetClusterInfo())
4525

    
4526

    
4527
class LUNodeQuery(NoHooksLU):
4528
  """Logical unit for querying nodes.
4529

4530
  """
4531
  # pylint: disable=W0142
4532
  REQ_BGL = False
4533

    
4534
  def CheckArguments(self):
4535
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4536
                         self.op.output_fields, self.op.use_locking)
4537

    
4538
  def ExpandNames(self):
4539
    self.nq.ExpandNames(self)
4540

    
4541
  def Exec(self, feedback_fn):
4542
    return self.nq.OldStyleQuery(self)
4543

    
4544

    
4545
class LUNodeQueryvols(NoHooksLU):
4546
  """Logical unit for getting volumes on node(s).
4547

4548
  """
4549
  REQ_BGL = False
4550
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4551
  _FIELDS_STATIC = utils.FieldSet("node")
4552

    
4553
  def CheckArguments(self):
4554
    _CheckOutputFields(static=self._FIELDS_STATIC,
4555
                       dynamic=self._FIELDS_DYNAMIC,
4556
                       selected=self.op.output_fields)
4557

    
4558
  def ExpandNames(self):
4559
    self.needed_locks = {}
4560
    self.share_locks[locking.LEVEL_NODE] = 1
4561
    if not self.op.nodes:
4562
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4563
    else:
4564
      self.needed_locks[locking.LEVEL_NODE] = \
4565
        _GetWantedNodes(self, self.op.nodes)
4566

    
4567
  def Exec(self, feedback_fn):
4568
    """Computes the list of nodes and their attributes.
4569

4570
    """
4571
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4572
    volumes = self.rpc.call_node_volumes(nodenames)
4573

    
4574
    ilist = self.cfg.GetAllInstancesInfo()
4575
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4576

    
4577
    output = []
4578
    for node in nodenames:
4579
      nresult = volumes[node]
4580
      if nresult.offline:
4581
        continue
4582
      msg = nresult.fail_msg
4583
      if msg:
4584
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4585
        continue
4586

    
4587
      node_vols = sorted(nresult.payload,
4588
                         key=operator.itemgetter("dev"))
4589

    
4590
      for vol in node_vols:
4591
        node_output = []
4592
        for field in self.op.output_fields:
4593
          if field == "node":
4594
            val = node
4595
          elif field == "phys":
4596
            val = vol["dev"]
4597
          elif field == "vg":
4598
            val = vol["vg"]
4599
          elif field == "name":
4600
            val = vol["name"]
4601
          elif field == "size":
4602
            val = int(float(vol["size"]))
4603
          elif field == "instance":
4604
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4605
          else:
4606
            raise errors.ParameterError(field)
4607
          node_output.append(str(val))
4608

    
4609
        output.append(node_output)
4610

    
4611
    return output
4612

    
4613

    
4614
class LUNodeQueryStorage(NoHooksLU):
4615
  """Logical unit for getting information on storage units on node(s).
4616

4617
  """
4618
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4619
  REQ_BGL = False
4620

    
4621
  def CheckArguments(self):
4622
    _CheckOutputFields(static=self._FIELDS_STATIC,
4623
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4624
                       selected=self.op.output_fields)
4625

    
4626
  def ExpandNames(self):
4627
    self.needed_locks = {}
4628
    self.share_locks[locking.LEVEL_NODE] = 1
4629

    
4630
    if self.op.nodes:
4631
      self.needed_locks[locking.LEVEL_NODE] = \
4632
        _GetWantedNodes(self, self.op.nodes)
4633
    else:
4634
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4635

    
4636
  def Exec(self, feedback_fn):
4637
    """Computes the list of nodes and their attributes.
4638

4639
    """
4640
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4641

    
4642
    # Always get name to sort by
4643
    if constants.SF_NAME in self.op.output_fields:
4644
      fields = self.op.output_fields[:]
4645
    else:
4646
      fields = [constants.SF_NAME] + self.op.output_fields
4647

    
4648
    # Never ask for node or type as it's only known to the LU
4649
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4650
      while extra in fields:
4651
        fields.remove(extra)
4652

    
4653
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4654
    name_idx = field_idx[constants.SF_NAME]
4655

    
4656
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4657
    data = self.rpc.call_storage_list(self.nodes,
4658
                                      self.op.storage_type, st_args,
4659
                                      self.op.name, fields)
4660

    
4661
    result = []
4662

    
4663
    for node in utils.NiceSort(self.nodes):
4664
      nresult = data[node]
4665
      if nresult.offline:
4666
        continue
4667

    
4668
      msg = nresult.fail_msg
4669
      if msg:
4670
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4671
        continue
4672

    
4673
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4674

    
4675
      for name in utils.NiceSort(rows.keys()):
4676
        row = rows[name]
4677

    
4678
        out = []
4679

    
4680
        for field in self.op.output_fields:
4681
          if field == constants.SF_NODE:
4682
            val = node
4683
          elif field == constants.SF_TYPE:
4684
            val = self.op.storage_type
4685
          elif field in field_idx:
4686
            val = row[field_idx[field]]
4687
          else:
4688
            raise errors.ParameterError(field)
4689

    
4690
          out.append(val)
4691

    
4692
        result.append(out)
4693

    
4694
    return result
4695

    
4696

    
4697
class _InstanceQuery(_QueryBase):
4698
  FIELDS = query.INSTANCE_FIELDS
4699

    
4700
  def ExpandNames(self, lu):
4701
    lu.needed_locks = {}
4702
    lu.share_locks = _ShareAll()
4703

    
4704
    if self.names:
4705
      self.wanted = _GetWantedInstances(lu, self.names)
4706
    else:
4707
      self.wanted = locking.ALL_SET
4708

    
4709
    self.do_locking = (self.use_locking and
4710
                       query.IQ_LIVE in self.requested_data)
4711
    if self.do_locking:
4712
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4713
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4714
      lu.needed_locks[locking.LEVEL_NODE] = []
4715
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4716

    
4717
    self.do_grouplocks = (self.do_locking and
4718
                          query.IQ_NODES in self.requested_data)
4719

    
4720
  def DeclareLocks(self, lu, level):
4721
    if self.do_locking:
4722
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4723
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4724

    
4725
        # Lock all groups used by instances optimistically; this requires going
4726
        # via the node before it's locked, requiring verification later on
4727
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4728
          set(group_uuid
4729
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4730
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4731
      elif level == locking.LEVEL_NODE:
4732
        lu._LockInstancesNodes() # pylint: disable=W0212
4733

    
4734
  @staticmethod
4735
  def _CheckGroupLocks(lu):
4736
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4737
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4738

    
4739
    # Check if node groups for locked instances are still correct
4740
    for instance_name in owned_instances:
4741
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4742

    
4743
  def _GetQueryData(self, lu):
4744
    """Computes the list of instances and their attributes.
4745

4746
    """
4747
    if self.do_grouplocks:
4748
      self._CheckGroupLocks(lu)
4749

    
4750
    cluster = lu.cfg.GetClusterInfo()
4751
    all_info = lu.cfg.GetAllInstancesInfo()
4752

    
4753
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4754

    
4755
    instance_list = [all_info[name] for name in instance_names]
4756
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4757
                                        for inst in instance_list)))
4758
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4759
    bad_nodes = []
4760
    offline_nodes = []
4761
    wrongnode_inst = set()
4762

    
4763
    # Gather data as requested
4764
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4765
      live_data = {}
4766
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4767
      for name in nodes:
4768
        result = node_data[name]
4769
        if result.offline:
4770
          # offline nodes will be in both lists
4771
          assert result.fail_msg
4772
          offline_nodes.append(name)
4773
        if result.fail_msg:
4774
          bad_nodes.append(name)
4775
        elif result.payload:
4776
          for inst in result.payload:
4777
            if inst in all_info:
4778
              if all_info[inst].primary_node == name:
4779
                live_data.update(result.payload)
4780
              else:
4781
                wrongnode_inst.add(inst)
4782
            else:
4783
              # orphan instance; we don't list it here as we don't
4784
              # handle this case yet in the output of instance listing
4785
              logging.warning("Orphan instance '%s' found on node %s",
4786
                              inst, name)
4787
        # else no instance is alive
4788
    else:
4789
      live_data = {}
4790

    
4791
    if query.IQ_DISKUSAGE in self.requested_data:
4792
      disk_usage = dict((inst.name,
4793
                         _ComputeDiskSize(inst.disk_template,
4794
                                          [{constants.IDISK_SIZE: disk.size}
4795
                                           for disk in inst.disks]))
4796
                        for inst in instance_list)
4797
    else:
4798
      disk_usage = None
4799

    
4800
    if query.IQ_CONSOLE in self.requested_data:
4801
      consinfo = {}
4802
      for inst in instance_list:
4803
        if inst.name in live_data:
4804
          # Instance is running
4805
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4806
        else:
4807
          consinfo[inst.name] = None
4808
      assert set(consinfo.keys()) == set(instance_names)
4809
    else:
4810
      consinfo = None
4811

    
4812
    if query.IQ_NODES in self.requested_data:
4813
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4814
                                            instance_list)))
4815
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4816
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4817
                    for uuid in set(map(operator.attrgetter("group"),
4818
                                        nodes.values())))
4819
    else:
4820
      nodes = None
4821
      groups = None
4822

    
4823
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4824
                                   disk_usage, offline_nodes, bad_nodes,
4825
                                   live_data, wrongnode_inst, consinfo,
4826
                                   nodes, groups)
4827

    
4828

    
4829
class LUQuery(NoHooksLU):
4830
  """Query for resources/items of a certain kind.
4831

4832
  """
4833
  # pylint: disable=W0142
4834
  REQ_BGL = False
4835

    
4836
  def CheckArguments(self):
4837
    qcls = _GetQueryImplementation(self.op.what)
4838

    
4839
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4840

    
4841
  def ExpandNames(self):
4842
    self.impl.ExpandNames(self)
4843

    
4844
  def DeclareLocks(self, level):
4845
    self.impl.DeclareLocks(self, level)
4846

    
4847
  def Exec(self, feedback_fn):
4848
    return self.impl.NewStyleQuery(self)
4849

    
4850

    
4851
class LUQueryFields(NoHooksLU):
4852
  """Query for resources/items of a certain kind.
4853

4854
  """
4855
  # pylint: disable=W0142
4856
  REQ_BGL = False
4857

    
4858
  def CheckArguments(self):
4859
    self.qcls = _GetQueryImplementation(self.op.what)
4860

    
4861
  def ExpandNames(self):
4862
    self.needed_locks = {}
4863

    
4864
  def Exec(self, feedback_fn):
4865
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4866

    
4867

    
4868
class LUNodeModifyStorage(NoHooksLU):
4869
  """Logical unit for modifying a storage volume on a node.
4870

4871
  """
4872
  REQ_BGL = False
4873

    
4874
  def CheckArguments(self):
4875
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4876

    
4877
    storage_type = self.op.storage_type
4878

    
4879
    try:
4880
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4881
    except KeyError:
4882
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4883
                                 " modified" % storage_type,
4884
                                 errors.ECODE_INVAL)
4885

    
4886
    diff = set(self.op.changes.keys()) - modifiable
4887
    if diff:
4888
      raise errors.OpPrereqError("The following fields can not be modified for"
4889
                                 " storage units of type '%s': %r" %
4890
                                 (storage_type, list(diff)),
4891
                                 errors.ECODE_INVAL)
4892

    
4893
  def ExpandNames(self):
4894
    self.needed_locks = {
4895
      locking.LEVEL_NODE: self.op.node_name,
4896
      }
4897

    
4898
  def Exec(self, feedback_fn):
4899
    """Computes the list of nodes and their attributes.
4900

4901
    """
4902
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4903
    result = self.rpc.call_storage_modify(self.op.node_name,
4904
                                          self.op.storage_type, st_args,
4905
                                          self.op.name, self.op.changes)
4906
    result.Raise("Failed to modify storage unit '%s' on %s" %
4907
                 (self.op.name, self.op.node_name))
4908

    
4909

    
4910
class LUNodeAdd(LogicalUnit):
4911
  """Logical unit for adding node to the cluster.
4912

4913
  """
4914
  HPATH = "node-add"
4915
  HTYPE = constants.HTYPE_NODE
4916
  _NFLAGS = ["master_capable", "vm_capable"]
4917

    
4918
  def CheckArguments(self):
4919
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4920
    # validate/normalize the node name
4921
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4922
                                         family=self.primary_ip_family)
4923
    self.op.node_name = self.hostname.name
4924

    
4925
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4926
      raise errors.OpPrereqError("Cannot readd the master node",
4927
                                 errors.ECODE_STATE)
4928

    
4929
    if self.op.readd and self.op.group:
4930
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4931
                                 " being readded", errors.ECODE_INVAL)
4932

    
4933
  def BuildHooksEnv(self):
4934
    """Build hooks env.
4935

4936
    This will run on all nodes before, and on all nodes + the new node after.
4937

4938
    """
4939
    return {
4940
      "OP_TARGET": self.op.node_name,
4941
      "NODE_NAME": self.op.node_name,
4942
      "NODE_PIP": self.op.primary_ip,
4943
      "NODE_SIP": self.op.secondary_ip,
4944
      "MASTER_CAPABLE": str(self.op.master_capable),
4945
      "VM_CAPABLE": str(self.op.vm_capable),
4946
      }
4947

    
4948
  def BuildHooksNodes(self):
4949
    """Build hooks nodes.
4950

4951
    """
4952
    # Exclude added node
4953
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4954
    post_nodes = pre_nodes + [self.op.node_name, ]
4955

    
4956
    return (pre_nodes, post_nodes)
4957

    
4958
  def CheckPrereq(self):
4959
    """Check prerequisites.
4960

4961
    This checks:
4962
     - the new node is not already in the config
4963
     - it is resolvable
4964
     - its parameters (single/dual homed) matches the cluster
4965

4966
    Any errors are signaled by raising errors.OpPrereqError.
4967

4968
    """
4969
    cfg = self.cfg
4970
    hostname = self.hostname
4971
    node = hostname.name
4972
    primary_ip = self.op.primary_ip = hostname.ip
4973
    if self.op.secondary_ip is None:
4974
      if self.primary_ip_family == netutils.IP6Address.family:
4975
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4976
                                   " IPv4 address must be given as secondary",
4977
                                   errors.ECODE_INVAL)
4978
      self.op.secondary_ip = primary_ip
4979

    
4980
    secondary_ip = self.op.secondary_ip
4981
    if not netutils.IP4Address.IsValid(secondary_ip):
4982
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4983
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4984

    
4985
    node_list = cfg.GetNodeList()
4986
    if not self.op.readd and node in node_list:
4987
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4988
                                 node, errors.ECODE_EXISTS)
4989
    elif self.op.readd and node not in node_list:
4990
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4991
                                 errors.ECODE_NOENT)
4992

    
4993
    self.changed_primary_ip = False
4994

    
4995
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4996
      if self.op.readd and node == existing_node_name:
4997
        if existing_node.secondary_ip != secondary_ip:
4998
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4999
                                     " address configuration as before",
5000
                                     errors.ECODE_INVAL)
5001
        if existing_node.primary_ip != primary_ip:
5002
          self.changed_primary_ip = True
5003

    
5004
        continue
5005

    
5006
      if (existing_node.primary_ip == primary_ip or
5007
          existing_node.secondary_ip == primary_ip or
5008
          existing_node.primary_ip == secondary_ip or
5009
          existing_node.secondary_ip == secondary_ip):
5010
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5011
                                   " existing node %s" % existing_node.name,
5012
                                   errors.ECODE_NOTUNIQUE)
5013

    
5014
    # After this 'if' block, None is no longer a valid value for the
5015
    # _capable op attributes
5016
    if self.op.readd:
5017
      old_node = self.cfg.GetNodeInfo(node)
5018
      assert old_node is not None, "Can't retrieve locked node %s" % node
5019
      for attr in self._NFLAGS:
5020
        if getattr(self.op, attr) is None:
5021
          setattr(self.op, attr, getattr(old_node, attr))
5022
    else:
5023
      for attr in self._NFLAGS:
5024
        if getattr(self.op, attr) is None:
5025
          setattr(self.op, attr, True)
5026

    
5027
    if self.op.readd and not self.op.vm_capable:
5028
      pri, sec = cfg.GetNodeInstances(node)
5029
      if pri or sec:
5030
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5031
                                   " flag set to false, but it already holds"
5032
                                   " instances" % node,
5033
                                   errors.ECODE_STATE)
5034

    
5035
    # check that the type of the node (single versus dual homed) is the
5036
    # same as for the master
5037
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5038
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5039
    newbie_singlehomed = secondary_ip == primary_ip
5040
    if master_singlehomed != newbie_singlehomed:
5041
      if master_singlehomed:
5042
        raise errors.OpPrereqError("The master has no secondary ip but the"
5043
                                   " new node has one",
5044
                                   errors.ECODE_INVAL)
5045
      else:
5046
        raise errors.OpPrereqError("The master has a secondary ip but the"
5047
                                   " new node doesn't have one",
5048
                                   errors.ECODE_INVAL)
5049

    
5050
    # checks reachability
5051
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5052
      raise errors.OpPrereqError("Node not reachable by ping",
5053
                                 errors.ECODE_ENVIRON)
5054

    
5055
    if not newbie_singlehomed:
5056
      # check reachability from my secondary ip to newbie's secondary ip
5057
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5058
                           source=myself.secondary_ip):
5059
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5060
                                   " based ping to node daemon port",
5061
                                   errors.ECODE_ENVIRON)
5062

    
5063
    if self.op.readd:
5064
      exceptions = [node]
5065
    else:
5066
      exceptions = []
5067

    
5068
    if self.op.master_capable:
5069
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5070
    else:
5071
      self.master_candidate = False
5072

    
5073
    if self.op.readd:
5074
      self.new_node = old_node
5075
    else:
5076
      node_group = cfg.LookupNodeGroup(self.op.group)
5077
      self.new_node = objects.Node(name=node,
5078
                                   primary_ip=primary_ip,
5079
                                   secondary_ip=secondary_ip,
5080
                                   master_candidate=self.master_candidate,
5081
                                   offline=False, drained=False,
5082
                                   group=node_group)
5083

    
5084
    if self.op.ndparams:
5085
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5086

    
5087
  def Exec(self, feedback_fn):
5088
    """Adds the new node to the cluster.
5089

5090
    """
5091
    new_node = self.new_node
5092
    node = new_node.name
5093

    
5094
    # We adding a new node so we assume it's powered
5095
    new_node.powered = True
5096

    
5097
    # for re-adds, reset the offline/drained/master-candidate flags;
5098
    # we need to reset here, otherwise offline would prevent RPC calls
5099
    # later in the procedure; this also means that if the re-add
5100
    # fails, we are left with a non-offlined, broken node
5101
    if self.op.readd:
5102
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5103
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5104
      # if we demote the node, we do cleanup later in the procedure
5105
      new_node.master_candidate = self.master_candidate
5106
      if self.changed_primary_ip:
5107
        new_node.primary_ip = self.op.primary_ip
5108

    
5109
    # copy the master/vm_capable flags
5110
    for attr in self._NFLAGS:
5111
      setattr(new_node, attr, getattr(self.op, attr))
5112

    
5113
    # notify the user about any possible mc promotion
5114
    if new_node.master_candidate:
5115
      self.LogInfo("Node will be a master candidate")
5116

    
5117
    if self.op.ndparams:
5118
      new_node.ndparams = self.op.ndparams
5119
    else:
5120
      new_node.ndparams = {}
5121

    
5122
    # check connectivity
5123
    result = self.rpc.call_version([node])[node]
5124
    result.Raise("Can't get version information from node %s" % node)
5125
    if constants.PROTOCOL_VERSION == result.payload:
5126
      logging.info("Communication to node %s fine, sw version %s match",
5127
                   node, result.payload)
5128
    else:
5129
      raise errors.OpExecError("Version mismatch master version %s,"
5130
                               " node version %s" %
5131
                               (constants.PROTOCOL_VERSION, result.payload))
5132

    
5133
    # Add node to our /etc/hosts, and add key to known_hosts
5134
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5135
      master_node = self.cfg.GetMasterNode()
5136
      result = self.rpc.call_etc_hosts_modify(master_node,
5137
                                              constants.ETC_HOSTS_ADD,
5138
                                              self.hostname.name,
5139
                                              self.hostname.ip)
5140
      result.Raise("Can't update hosts file with new host data")
5141

    
5142
    if new_node.secondary_ip != new_node.primary_ip:
5143
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5144
                               False)
5145

    
5146
    node_verify_list = [self.cfg.GetMasterNode()]
5147
    node_verify_param = {
5148
      constants.NV_NODELIST: ([node], {}),
5149
      # TODO: do a node-net-test as well?
5150
    }
5151

    
5152
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5153
                                       self.cfg.GetClusterName())
5154
    for verifier in node_verify_list:
5155
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5156
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5157
      if nl_payload:
5158
        for failed in nl_payload:
5159
          feedback_fn("ssh/hostname verification failed"
5160
                      " (checking from %s): %s" %
5161
                      (verifier, nl_payload[failed]))
5162
        raise errors.OpExecError("ssh/hostname verification failed")
5163

    
5164
    if self.op.readd:
5165
      _RedistributeAncillaryFiles(self)
5166
      self.context.ReaddNode(new_node)
5167
      # make sure we redistribute the config
5168
      self.cfg.Update(new_node, feedback_fn)
5169
      # and make sure the new node will not have old files around
5170
      if not new_node.master_candidate:
5171
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5172
        msg = result.fail_msg
5173
        if msg:
5174
          self.LogWarning("Node failed to demote itself from master"
5175
                          " candidate status: %s" % msg)
5176
    else:
5177
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5178
                                  additional_vm=self.op.vm_capable)
5179
      self.context.AddNode(new_node, self.proc.GetECId())
5180

    
5181

    
5182
class LUNodeSetParams(LogicalUnit):
5183
  """Modifies the parameters of a node.
5184

5185
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5186
      to the node role (as _ROLE_*)
5187
  @cvar _R2F: a dictionary from node role to tuples of flags
5188
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5189

5190
  """
5191
  HPATH = "node-modify"
5192
  HTYPE = constants.HTYPE_NODE
5193
  REQ_BGL = False
5194
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5195
  _F2R = {
5196
    (True, False, False): _ROLE_CANDIDATE,
5197
    (False, True, False): _ROLE_DRAINED,
5198
    (False, False, True): _ROLE_OFFLINE,
5199
    (False, False, False): _ROLE_REGULAR,
5200
    }
5201
  _R2F = dict((v, k) for k, v in _F2R.items())
5202
  _FLAGS = ["master_candidate", "drained", "offline"]
5203

    
5204
  def CheckArguments(self):
5205
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5206
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5207
                self.op.master_capable, self.op.vm_capable,
5208
                self.op.secondary_ip, self.op.ndparams]
5209
    if all_mods.count(None) == len(all_mods):
5210
      raise errors.OpPrereqError("Please pass at least one modification",
5211
                                 errors.ECODE_INVAL)
5212
    if all_mods.count(True) > 1:
5213
      raise errors.OpPrereqError("Can't set the node into more than one"
5214
                                 " state at the same time",
5215
                                 errors.ECODE_INVAL)
5216

    
5217
    # Boolean value that tells us whether we might be demoting from MC
5218
    self.might_demote = (self.op.master_candidate == False or
5219
                         self.op.offline == True or
5220
                         self.op.drained == True or
5221
                         self.op.master_capable == False)
5222

    
5223
    if self.op.secondary_ip:
5224
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5225
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5226
                                   " address" % self.op.secondary_ip,
5227
                                   errors.ECODE_INVAL)
5228

    
5229
    self.lock_all = self.op.auto_promote and self.might_demote
5230
    self.lock_instances = self.op.secondary_ip is not None
5231

    
5232
  def ExpandNames(self):
5233
    if self.lock_all:
5234
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5235
    else:
5236
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5237

    
5238
    if self.lock_instances:
5239
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5240

    
5241
  def DeclareLocks(self, level):
5242
    # If we have locked all instances, before waiting to lock nodes, release
5243
    # all the ones living on nodes unrelated to the current operation.
5244
    if level == locking.LEVEL_NODE and self.lock_instances:
5245
      self.affected_instances = []
5246
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5247
        instances_keep = []
5248

    
5249
        # Build list of instances to release
5250
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5251
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5252
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5253
              self.op.node_name in instance.all_nodes):
5254
            instances_keep.append(instance_name)
5255
            self.affected_instances.append(instance)
5256

    
5257
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5258

    
5259
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5260
                set(instances_keep))
5261

    
5262
  def BuildHooksEnv(self):
5263
    """Build hooks env.
5264

5265
    This runs on the master node.
5266

5267
    """
5268
    return {
5269
      "OP_TARGET": self.op.node_name,
5270
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5271
      "OFFLINE": str(self.op.offline),
5272
      "DRAINED": str(self.op.drained),
5273
      "MASTER_CAPABLE": str(self.op.master_capable),
5274
      "VM_CAPABLE": str(self.op.vm_capable),
5275
      }
5276

    
5277
  def BuildHooksNodes(self):
5278
    """Build hooks nodes.
5279

5280
    """
5281
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5282
    return (nl, nl)
5283

    
5284
  def CheckPrereq(self):
5285
    """Check prerequisites.
5286

5287
    This only checks the instance list against the existing names.
5288

5289
    """
5290
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5291

    
5292
    if (self.op.master_candidate is not None or
5293
        self.op.drained is not None or
5294
        self.op.offline is not None):
5295
      # we can't change the master's node flags
5296
      if self.op.node_name == self.cfg.GetMasterNode():
5297
        raise errors.OpPrereqError("The master role can be changed"
5298
                                   " only via master-failover",
5299
                                   errors.ECODE_INVAL)
5300

    
5301
    if self.op.master_candidate and not node.master_capable:
5302
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5303
                                 " it a master candidate" % node.name,
5304
                                 errors.ECODE_STATE)
5305

    
5306
    if self.op.vm_capable == False:
5307
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5308
      if ipri or isec:
5309
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5310
                                   " the vm_capable flag" % node.name,
5311
                                   errors.ECODE_STATE)
5312

    
5313
    if node.master_candidate and self.might_demote and not self.lock_all:
5314
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5315
      # check if after removing the current node, we're missing master
5316
      # candidates
5317
      (mc_remaining, mc_should, _) = \
5318
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5319
      if mc_remaining < mc_should:
5320
        raise errors.OpPrereqError("Not enough master candidates, please"
5321
                                   " pass auto promote option to allow"
5322
                                   " promotion", errors.ECODE_STATE)
5323

    
5324
    self.old_flags = old_flags = (node.master_candidate,
5325
                                  node.drained, node.offline)
5326
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5327
    self.old_role = old_role = self._F2R[old_flags]
5328

    
5329
    # Check for ineffective changes
5330
    for attr in self._FLAGS:
5331
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5332
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5333
        setattr(self.op, attr, None)
5334

    
5335
    # Past this point, any flag change to False means a transition
5336
    # away from the respective state, as only real changes are kept
5337

    
5338
    # TODO: We might query the real power state if it supports OOB
5339
    if _SupportsOob(self.cfg, node):
5340
      if self.op.offline is False and not (node.powered or
5341
                                           self.op.powered == True):
5342
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5343
                                    " offline status can be reset") %
5344
                                   self.op.node_name)
5345
    elif self.op.powered is not None:
5346
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5347
                                  " as it does not support out-of-band"
5348
                                  " handling") % self.op.node_name)
5349

    
5350
    # If we're being deofflined/drained, we'll MC ourself if needed
5351
    if (self.op.drained == False or self.op.offline == False or
5352
        (self.op.master_capable and not node.master_capable)):
5353
      if _DecideSelfPromotion(self):
5354
        self.op.master_candidate = True
5355
        self.LogInfo("Auto-promoting node to master candidate")
5356

    
5357
    # If we're no longer master capable, we'll demote ourselves from MC
5358
    if self.op.master_capable == False and node.master_candidate:
5359
      self.LogInfo("Demoting from master candidate")
5360
      self.op.master_candidate = False
5361

    
5362
    # Compute new role
5363
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5364
    if self.op.master_candidate:
5365
      new_role = self._ROLE_CANDIDATE
5366
    elif self.op.drained:
5367
      new_role = self._ROLE_DRAINED
5368
    elif self.op.offline:
5369
      new_role = self._ROLE_OFFLINE
5370
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5371
      # False is still in new flags, which means we're un-setting (the
5372
      # only) True flag
5373
      new_role = self._ROLE_REGULAR
5374
    else: # no new flags, nothing, keep old role
5375
      new_role = old_role
5376

    
5377
    self.new_role = new_role
5378

    
5379
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5380
      # Trying to transition out of offline status
5381
      # TODO: Use standard RPC runner, but make sure it works when the node is
5382
      # still marked offline
5383
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5384
      if result.fail_msg:
5385
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5386
                                   " to report its version: %s" %
5387
                                   (node.name, result.fail_msg),
5388
                                   errors.ECODE_STATE)
5389
      else:
5390
        self.LogWarning("Transitioning node from offline to online state"
5391
                        " without using re-add. Please make sure the node"
5392
                        " is healthy!")
5393

    
5394
    if self.op.secondary_ip:
5395
      # Ok even without locking, because this can't be changed by any LU
5396
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5397
      master_singlehomed = master.secondary_ip == master.primary_ip
5398
      if master_singlehomed and self.op.secondary_ip:
5399
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5400
                                   " homed cluster", errors.ECODE_INVAL)
5401

    
5402
      if node.offline:
5403
        if self.affected_instances:
5404
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5405
                                     " node has instances (%s) configured"
5406
                                     " to use it" % self.affected_instances)
5407
      else:
5408
        # On online nodes, check that no instances are running, and that
5409
        # the node has the new ip and we can reach it.
5410
        for instance in self.affected_instances:
5411
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5412

    
5413
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5414
        if master.name != node.name:
5415
          # check reachability from master secondary ip to new secondary ip
5416
          if not netutils.TcpPing(self.op.secondary_ip,
5417
                                  constants.DEFAULT_NODED_PORT,
5418
                                  source=master.secondary_ip):
5419
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5420
                                       " based ping to node daemon port",
5421
                                       errors.ECODE_ENVIRON)
5422

    
5423
    if self.op.ndparams:
5424
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5425
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5426
      self.new_ndparams = new_ndparams
5427

    
5428
  def Exec(self, feedback_fn):
5429
    """Modifies a node.
5430

5431
    """
5432
    node = self.node
5433
    old_role = self.old_role
5434
    new_role = self.new_role
5435

    
5436
    result = []
5437

    
5438
    if self.op.ndparams:
5439
      node.ndparams = self.new_ndparams
5440

    
5441
    if self.op.powered is not None:
5442
      node.powered = self.op.powered
5443

    
5444
    for attr in ["master_capable", "vm_capable"]:
5445
      val = getattr(self.op, attr)
5446
      if val is not None:
5447
        setattr(node, attr, val)
5448
        result.append((attr, str(val)))
5449

    
5450
    if new_role != old_role:
5451
      # Tell the node to demote itself, if no longer MC and not offline
5452
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5453
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5454
        if msg:
5455
          self.LogWarning("Node failed to demote itself: %s", msg)
5456

    
5457
      new_flags = self._R2F[new_role]
5458
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5459
        if of != nf:
5460
          result.append((desc, str(nf)))
5461
      (node.master_candidate, node.drained, node.offline) = new_flags
5462

    
5463
      # we locked all nodes, we adjust the CP before updating this node
5464
      if self.lock_all:
5465
        _AdjustCandidatePool(self, [node.name])
5466

    
5467
    if self.op.secondary_ip:
5468
      node.secondary_ip = self.op.secondary_ip
5469
      result.append(("secondary_ip", self.op.secondary_ip))
5470

    
5471
    # this will trigger configuration file update, if needed
5472
    self.cfg.Update(node, feedback_fn)
5473

    
5474
    # this will trigger job queue propagation or cleanup if the mc
5475
    # flag changed
5476
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5477
      self.context.ReaddNode(node)
5478

    
5479
    return result
5480

    
5481

    
5482
class LUNodePowercycle(NoHooksLU):
5483
  """Powercycles a node.
5484

5485
  """
5486
  REQ_BGL = False
5487

    
5488
  def CheckArguments(self):
5489
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5490
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5491
      raise errors.OpPrereqError("The node is the master and the force"
5492
                                 " parameter was not set",
5493
                                 errors.ECODE_INVAL)
5494

    
5495
  def ExpandNames(self):
5496
    """Locking for PowercycleNode.
5497

5498
    This is a last-resort option and shouldn't block on other
5499
    jobs. Therefore, we grab no locks.
5500

5501
    """
5502
    self.needed_locks = {}
5503

    
5504
  def Exec(self, feedback_fn):
5505
    """Reboots a node.
5506

5507
    """
5508
    result = self.rpc.call_node_powercycle(self.op.node_name,
5509
                                           self.cfg.GetHypervisorType())
5510
    result.Raise("Failed to schedule the reboot")
5511
    return result.payload
5512

    
5513

    
5514
class LUClusterQuery(NoHooksLU):
5515
  """Query cluster configuration.
5516

5517
  """
5518
  REQ_BGL = False
5519

    
5520
  def ExpandNames(self):
5521
    self.needed_locks = {}
5522

    
5523
  def Exec(self, feedback_fn):
5524
    """Return cluster config.
5525

5526
    """
5527
    cluster = self.cfg.GetClusterInfo()
5528
    os_hvp = {}
5529

    
5530
    # Filter just for enabled hypervisors
5531
    for os_name, hv_dict in cluster.os_hvp.items():
5532
      os_hvp[os_name] = {}
5533
      for hv_name, hv_params in hv_dict.items():
5534
        if hv_name in cluster.enabled_hypervisors:
5535
          os_hvp[os_name][hv_name] = hv_params
5536

    
5537
    # Convert ip_family to ip_version
5538
    primary_ip_version = constants.IP4_VERSION
5539
    if cluster.primary_ip_family == netutils.IP6Address.family:
5540
      primary_ip_version = constants.IP6_VERSION
5541

    
5542
    result = {
5543
      "software_version": constants.RELEASE_VERSION,
5544
      "protocol_version": constants.PROTOCOL_VERSION,
5545
      "config_version": constants.CONFIG_VERSION,
5546
      "os_api_version": max(constants.OS_API_VERSIONS),
5547
      "export_version": constants.EXPORT_VERSION,
5548
      "architecture": (platform.architecture()[0], platform.machine()),
5549
      "name": cluster.cluster_name,
5550
      "master": cluster.master_node,
5551
      "default_hypervisor": cluster.enabled_hypervisors[0],
5552
      "enabled_hypervisors": cluster.enabled_hypervisors,
5553
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5554
                        for hypervisor_name in cluster.enabled_hypervisors]),
5555
      "os_hvp": os_hvp,
5556
      "beparams": cluster.beparams,
5557
      "osparams": cluster.osparams,
5558
      "nicparams": cluster.nicparams,
5559
      "ndparams": cluster.ndparams,
5560
      "candidate_pool_size": cluster.candidate_pool_size,
5561
      "master_netdev": cluster.master_netdev,
5562
      "master_netmask": cluster.master_netmask,
5563
      "volume_group_name": cluster.volume_group_name,
5564
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5565
      "file_storage_dir": cluster.file_storage_dir,
5566
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5567
      "maintain_node_health": cluster.maintain_node_health,
5568
      "ctime": cluster.ctime,
5569
      "mtime": cluster.mtime,
5570
      "uuid": cluster.uuid,
5571
      "tags": list(cluster.GetTags()),
5572
      "uid_pool": cluster.uid_pool,
5573
      "default_iallocator": cluster.default_iallocator,
5574
      "reserved_lvs": cluster.reserved_lvs,
5575
      "primary_ip_version": primary_ip_version,
5576
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5577
      "hidden_os": cluster.hidden_os,
5578
      "blacklisted_os": cluster.blacklisted_os,
5579
      }
5580

    
5581
    return result
5582

    
5583

    
5584
class LUClusterConfigQuery(NoHooksLU):
5585
  """Return configuration values.
5586

5587
  """
5588
  REQ_BGL = False
5589
  _FIELDS_DYNAMIC = utils.FieldSet()
5590
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5591
                                  "watcher_pause", "volume_group_name")
5592

    
5593
  def CheckArguments(self):
5594
    _CheckOutputFields(static=self._FIELDS_STATIC,
5595
                       dynamic=self._FIELDS_DYNAMIC,
5596
                       selected=self.op.output_fields)
5597

    
5598
  def ExpandNames(self):
5599
    self.needed_locks = {}
5600

    
5601
  def Exec(self, feedback_fn):
5602
    """Dump a representation of the cluster config to the standard output.
5603

5604
    """
5605
    values = []
5606
    for field in self.op.output_fields:
5607
      if field == "cluster_name":
5608
        entry = self.cfg.GetClusterName()
5609
      elif field == "master_node":
5610
        entry = self.cfg.GetMasterNode()
5611
      elif field == "drain_flag":
5612
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5613
      elif field == "watcher_pause":
5614
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5615
      elif field == "volume_group_name":
5616
        entry = self.cfg.GetVGName()
5617
      else:
5618
        raise errors.ParameterError(field)
5619
      values.append(entry)
5620
    return values
5621

    
5622

    
5623
class LUInstanceActivateDisks(NoHooksLU):
5624
  """Bring up an instance's disks.
5625

5626
  """
5627
  REQ_BGL = False
5628

    
5629
  def ExpandNames(self):
5630
    self._ExpandAndLockInstance()
5631
    self.needed_locks[locking.LEVEL_NODE] = []
5632
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5633

    
5634
  def DeclareLocks(self, level):
5635
    if level == locking.LEVEL_NODE:
5636
      self._LockInstancesNodes()
5637

    
5638
  def CheckPrereq(self):
5639
    """Check prerequisites.
5640

5641
    This checks that the instance is in the cluster.
5642

5643
    """
5644
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5645
    assert self.instance is not None, \
5646
      "Cannot retrieve locked instance %s" % self.op.instance_name
5647
    _CheckNodeOnline(self, self.instance.primary_node)
5648

    
5649
  def Exec(self, feedback_fn):
5650
    """Activate the disks.
5651

5652
    """
5653
    disks_ok, disks_info = \
5654
              _AssembleInstanceDisks(self, self.instance,
5655
                                     ignore_size=self.op.ignore_size)
5656
    if not disks_ok:
5657
      raise errors.OpExecError("Cannot activate block devices")
5658

    
5659
    return disks_info
5660

    
5661

    
5662
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5663
                           ignore_size=False):
5664
  """Prepare the block devices for an instance.
5665

5666
  This sets up the block devices on all nodes.
5667

5668
  @type lu: L{LogicalUnit}
5669
  @param lu: the logical unit on whose behalf we execute
5670
  @type instance: L{objects.Instance}
5671
  @param instance: the instance for whose disks we assemble
5672
  @type disks: list of L{objects.Disk} or None
5673
  @param disks: which disks to assemble (or all, if None)
5674
  @type ignore_secondaries: boolean
5675
  @param ignore_secondaries: if true, errors on secondary nodes
5676
      won't result in an error return from the function
5677
  @type ignore_size: boolean
5678
  @param ignore_size: if true, the current known size of the disk
5679
      will not be used during the disk activation, useful for cases
5680
      when the size is wrong
5681
  @return: False if the operation failed, otherwise a list of
5682
      (host, instance_visible_name, node_visible_name)
5683
      with the mapping from node devices to instance devices
5684

5685
  """
5686
  device_info = []
5687
  disks_ok = True
5688
  iname = instance.name
5689
  disks = _ExpandCheckDisks(instance, disks)
5690

    
5691
  # With the two passes mechanism we try to reduce the window of
5692
  # opportunity for the race condition of switching DRBD to primary
5693
  # before handshaking occured, but we do not eliminate it
5694

    
5695
  # The proper fix would be to wait (with some limits) until the
5696
  # connection has been made and drbd transitions from WFConnection
5697
  # into any other network-connected state (Connected, SyncTarget,
5698
  # SyncSource, etc.)
5699

    
5700
  # 1st pass, assemble on all nodes in secondary mode
5701
  for idx, inst_disk in enumerate(disks):
5702
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5703
      if ignore_size:
5704
        node_disk = node_disk.Copy()
5705
        node_disk.UnsetSize()
5706
      lu.cfg.SetDiskID(node_disk, node)
5707
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5708
      msg = result.fail_msg
5709
      if msg:
5710
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5711
                           " (is_primary=False, pass=1): %s",
5712
                           inst_disk.iv_name, node, msg)
5713
        if not ignore_secondaries:
5714
          disks_ok = False
5715

    
5716
  # FIXME: race condition on drbd migration to primary
5717

    
5718
  # 2nd pass, do only the primary node
5719
  for idx, inst_disk in enumerate(disks):
5720
    dev_path = None
5721

    
5722
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5723
      if node != instance.primary_node:
5724
        continue
5725
      if ignore_size:
5726
        node_disk = node_disk.Copy()
5727
        node_disk.UnsetSize()
5728
      lu.cfg.SetDiskID(node_disk, node)
5729
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5730
      msg = result.fail_msg
5731
      if msg:
5732
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5733
                           " (is_primary=True, pass=2): %s",
5734
                           inst_disk.iv_name, node, msg)
5735
        disks_ok = False
5736
      else:
5737
        dev_path = result.payload
5738

    
5739
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5740

    
5741
  # leave the disks configured for the primary node
5742
  # this is a workaround that would be fixed better by
5743
  # improving the logical/physical id handling
5744
  for disk in disks:
5745
    lu.cfg.SetDiskID(disk, instance.primary_node)
5746

    
5747
  return disks_ok, device_info
5748

    
5749

    
5750
def _StartInstanceDisks(lu, instance, force):
5751
  """Start the disks of an instance.
5752

5753
  """
5754
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5755
                                           ignore_secondaries=force)
5756
  if not disks_ok:
5757
    _ShutdownInstanceDisks(lu, instance)
5758
    if force is not None and not force:
5759
      lu.proc.LogWarning("", hint="If the message above refers to a"
5760
                         " secondary node,"
5761
                         " you can retry the operation using '--force'.")
5762
    raise errors.OpExecError("Disk consistency error")
5763

    
5764

    
5765
class LUInstanceDeactivateDisks(NoHooksLU):
5766
  """Shutdown an instance's disks.
5767

5768
  """
5769
  REQ_BGL = False
5770

    
5771
  def ExpandNames(self):
5772
    self._ExpandAndLockInstance()
5773
    self.needed_locks[locking.LEVEL_NODE] = []
5774
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5775

    
5776
  def DeclareLocks(self, level):
5777
    if level == locking.LEVEL_NODE:
5778
      self._LockInstancesNodes()
5779

    
5780
  def CheckPrereq(self):
5781
    """Check prerequisites.
5782

5783
    This checks that the instance is in the cluster.
5784

5785
    """
5786
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5787
    assert self.instance is not None, \
5788
      "Cannot retrieve locked instance %s" % self.op.instance_name
5789

    
5790
  def Exec(self, feedback_fn):
5791
    """Deactivate the disks
5792

5793
    """
5794
    instance = self.instance
5795
    if self.op.force:
5796
      _ShutdownInstanceDisks(self, instance)
5797
    else:
5798
      _SafeShutdownInstanceDisks(self, instance)
5799

    
5800

    
5801
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5802
  """Shutdown block devices of an instance.
5803

5804
  This function checks if an instance is running, before calling
5805
  _ShutdownInstanceDisks.
5806

5807
  """
5808
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5809
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5810

    
5811

    
5812
def _ExpandCheckDisks(instance, disks):
5813
  """Return the instance disks selected by the disks list
5814

5815
  @type disks: list of L{objects.Disk} or None
5816
  @param disks: selected disks
5817
  @rtype: list of L{objects.Disk}
5818
  @return: selected instance disks to act on
5819

5820
  """
5821
  if disks is None:
5822
    return instance.disks
5823
  else:
5824
    if not set(disks).issubset(instance.disks):
5825
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5826
                                   " target instance")
5827
    return disks
5828

    
5829

    
5830
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5831
  """Shutdown block devices of an instance.
5832

5833
  This does the shutdown on all nodes of the instance.
5834

5835
  If the ignore_primary is false, errors on the primary node are
5836
  ignored.
5837

5838
  """
5839
  all_result = True
5840
  disks = _ExpandCheckDisks(instance, disks)
5841

    
5842
  for disk in disks:
5843
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5844
      lu.cfg.SetDiskID(top_disk, node)
5845
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5846
      msg = result.fail_msg
5847
      if msg:
5848
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5849
                      disk.iv_name, node, msg)
5850
        if ((node == instance.primary_node and not ignore_primary) or
5851
            (node != instance.primary_node and not result.offline)):
5852
          all_result = False
5853
  return all_result
5854

    
5855

    
5856
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5857
  """Checks if a node has enough free memory.
5858

5859
  This function check if a given node has the needed amount of free
5860
  memory. In case the node has less memory or we cannot get the
5861
  information from the node, this function raise an OpPrereqError
5862
  exception.
5863

5864
  @type lu: C{LogicalUnit}
5865
  @param lu: a logical unit from which we get configuration data
5866
  @type node: C{str}
5867
  @param node: the node to check
5868
  @type reason: C{str}
5869
  @param reason: string to use in the error message
5870
  @type requested: C{int}
5871
  @param requested: the amount of memory in MiB to check for
5872
  @type hypervisor_name: C{str}
5873
  @param hypervisor_name: the hypervisor to ask for memory stats
5874
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5875
      we cannot check the node
5876

5877
  """
5878
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5879
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5880
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5881
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5882
  if not isinstance(free_mem, int):
5883
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5884
                               " was '%s'" % (node, free_mem),
5885
                               errors.ECODE_ENVIRON)
5886
  if requested > free_mem:
5887
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5888
                               " needed %s MiB, available %s MiB" %
5889
                               (node, reason, requested, free_mem),
5890
                               errors.ECODE_NORES)
5891

    
5892

    
5893
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5894
  """Checks if nodes have enough free disk space in the all VGs.
5895

5896
  This function check if all given nodes have the needed amount of
5897
  free disk. In case any node has less disk or we cannot get the
5898
  information from the node, this function raise an OpPrereqError
5899
  exception.
5900

5901
  @type lu: C{LogicalUnit}
5902
  @param lu: a logical unit from which we get configuration data
5903
  @type nodenames: C{list}
5904
  @param nodenames: the list of node names to check
5905
  @type req_sizes: C{dict}
5906
  @param req_sizes: the hash of vg and corresponding amount of disk in
5907
      MiB to check for
5908
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5909
      or we cannot check the node
5910

5911
  """
5912
  for vg, req_size in req_sizes.items():
5913
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5914

    
5915

    
5916
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5917
  """Checks if nodes have enough free disk space in the specified VG.
5918

5919
  This function check if all given nodes have the needed amount of
5920
  free disk. In case any node has less disk or we cannot get the
5921
  information from the node, this function raise an OpPrereqError
5922
  exception.
5923

5924
  @type lu: C{LogicalUnit}
5925
  @param lu: a logical unit from which we get configuration data
5926
  @type nodenames: C{list}
5927
  @param nodenames: the list of node names to check
5928
  @type vg: C{str}
5929
  @param vg: the volume group to check
5930
  @type requested: C{int}
5931
  @param requested: the amount of disk in MiB to check for
5932
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5933
      or we cannot check the node
5934

5935
  """
5936
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5937
  for node in nodenames:
5938
    info = nodeinfo[node]
5939
    info.Raise("Cannot get current information from node %s" % node,
5940
               prereq=True, ecode=errors.ECODE_ENVIRON)
5941
    vg_free = info.payload.get("vg_free", None)
5942
    if not isinstance(vg_free, int):
5943
      raise errors.OpPrereqError("Can't compute free disk space on node"
5944
                                 " %s for vg %s, result was '%s'" %
5945
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5946
    if requested > vg_free:
5947
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5948
                                 " vg %s: required %d MiB, available %d MiB" %
5949
                                 (node, vg, requested, vg_free),
5950
                                 errors.ECODE_NORES)
5951

    
5952

    
5953
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5954
  """Checks if nodes have enough physical CPUs
5955

5956
  This function checks if all given nodes have the needed number of
5957
  physical CPUs. In case any node has less CPUs or we cannot get the
5958
  information from the node, this function raises an OpPrereqError
5959
  exception.
5960

5961
  @type lu: C{LogicalUnit}
5962
  @param lu: a logical unit from which we get configuration data
5963
  @type nodenames: C{list}
5964
  @param nodenames: the list of node names to check
5965
  @type requested: C{int}
5966
  @param requested: the minimum acceptable number of physical CPUs
5967
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5968
      or we cannot check the node
5969

5970
  """
5971
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5972
  for node in nodenames:
5973
    info = nodeinfo[node]
5974
    info.Raise("Cannot get current information from node %s" % node,
5975
               prereq=True, ecode=errors.ECODE_ENVIRON)
5976
    num_cpus = info.payload.get("cpu_total", None)
5977
    if not isinstance(num_cpus, int):
5978
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5979
                                 " on node %s, result was '%s'" %
5980
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5981
    if requested > num_cpus:
5982
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5983
                                 "required" % (node, num_cpus, requested),
5984
                                 errors.ECODE_NORES)
5985

    
5986

    
5987
class LUInstanceStartup(LogicalUnit):
5988
  """Starts an instance.
5989

5990
  """
5991
  HPATH = "instance-start"
5992
  HTYPE = constants.HTYPE_INSTANCE
5993
  REQ_BGL = False
5994

    
5995
  def CheckArguments(self):
5996
    # extra beparams
5997
    if self.op.beparams:
5998
      # fill the beparams dict
5999
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6000

    
6001
  def ExpandNames(self):
6002
    self._ExpandAndLockInstance()
6003

    
6004
  def BuildHooksEnv(self):
6005
    """Build hooks env.
6006

6007
    This runs on master, primary and secondary nodes of the instance.
6008

6009
    """
6010
    env = {
6011
      "FORCE": self.op.force,
6012
      }
6013

    
6014
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6015

    
6016
    return env
6017

    
6018
  def BuildHooksNodes(self):
6019
    """Build hooks nodes.
6020

6021
    """
6022
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6023
    return (nl, nl)
6024

    
6025
  def CheckPrereq(self):
6026
    """Check prerequisites.
6027

6028
    This checks that the instance is in the cluster.
6029

6030
    """
6031
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6032
    assert self.instance is not None, \
6033
      "Cannot retrieve locked instance %s" % self.op.instance_name
6034

    
6035
    # extra hvparams
6036
    if self.op.hvparams:
6037
      # check hypervisor parameter syntax (locally)
6038
      cluster = self.cfg.GetClusterInfo()
6039
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6040
      filled_hvp = cluster.FillHV(instance)
6041
      filled_hvp.update(self.op.hvparams)
6042
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6043
      hv_type.CheckParameterSyntax(filled_hvp)
6044
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6045

    
6046
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6047

    
6048
    if self.primary_offline and self.op.ignore_offline_nodes:
6049
      self.proc.LogWarning("Ignoring offline primary node")
6050

    
6051
      if self.op.hvparams or self.op.beparams:
6052
        self.proc.LogWarning("Overridden parameters are ignored")
6053
    else:
6054
      _CheckNodeOnline(self, instance.primary_node)
6055

    
6056
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6057

    
6058
      # check bridges existence
6059
      _CheckInstanceBridgesExist(self, instance)
6060

    
6061
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6062
                                                instance.name,
6063
                                                instance.hypervisor)
6064
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6065
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6066
      if not remote_info.payload: # not running already
6067
        _CheckNodeFreeMemory(self, instance.primary_node,
6068
                             "starting instance %s" % instance.name,
6069
                             bep[constants.BE_MEMORY], instance.hypervisor)
6070

    
6071
  def Exec(self, feedback_fn):
6072
    """Start the instance.
6073

6074
    """
6075
    instance = self.instance
6076
    force = self.op.force
6077

    
6078
    if not self.op.no_remember:
6079
      self.cfg.MarkInstanceUp(instance.name)
6080

    
6081
    if self.primary_offline:
6082
      assert self.op.ignore_offline_nodes
6083
      self.proc.LogInfo("Primary node offline, marked instance as started")
6084
    else:
6085
      node_current = instance.primary_node
6086

    
6087
      _StartInstanceDisks(self, instance, force)
6088

    
6089
      result = \
6090
        self.rpc.call_instance_start(node_current,
6091
                                     (instance, self.op.hvparams,
6092
                                      self.op.beparams),
6093
                                     self.op.startup_paused)
6094
      msg = result.fail_msg
6095
      if msg:
6096
        _ShutdownInstanceDisks(self, instance)
6097
        raise errors.OpExecError("Could not start instance: %s" % msg)
6098

    
6099

    
6100
class LUInstanceReboot(LogicalUnit):
6101
  """Reboot an instance.
6102

6103
  """
6104
  HPATH = "instance-reboot"
6105
  HTYPE = constants.HTYPE_INSTANCE
6106
  REQ_BGL = False
6107

    
6108
  def ExpandNames(self):
6109
    self._ExpandAndLockInstance()
6110

    
6111
  def BuildHooksEnv(self):
6112
    """Build hooks env.
6113

6114
    This runs on master, primary and secondary nodes of the instance.
6115

6116
    """
6117
    env = {
6118
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6119
      "REBOOT_TYPE": self.op.reboot_type,
6120
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6121
      }
6122

    
6123
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6124

    
6125
    return env
6126

    
6127
  def BuildHooksNodes(self):
6128
    """Build hooks nodes.
6129

6130
    """
6131
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6132
    return (nl, nl)
6133

    
6134
  def CheckPrereq(self):
6135
    """Check prerequisites.
6136

6137
    This checks that the instance is in the cluster.
6138

6139
    """
6140
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6141
    assert self.instance is not None, \
6142
      "Cannot retrieve locked instance %s" % self.op.instance_name
6143

    
6144
    _CheckNodeOnline(self, instance.primary_node)
6145

    
6146
    # check bridges existence
6147
    _CheckInstanceBridgesExist(self, instance)
6148

    
6149
  def Exec(self, feedback_fn):
6150
    """Reboot the instance.
6151

6152
    """
6153
    instance = self.instance
6154
    ignore_secondaries = self.op.ignore_secondaries
6155
    reboot_type = self.op.reboot_type
6156

    
6157
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6158
                                              instance.name,
6159
                                              instance.hypervisor)
6160
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6161
    instance_running = bool(remote_info.payload)
6162

    
6163
    node_current = instance.primary_node
6164

    
6165
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6166
                                            constants.INSTANCE_REBOOT_HARD]:
6167
      for disk in instance.disks:
6168
        self.cfg.SetDiskID(disk, node_current)
6169
      result = self.rpc.call_instance_reboot(node_current, instance,
6170
                                             reboot_type,
6171
                                             self.op.shutdown_timeout)
6172
      result.Raise("Could not reboot instance")
6173
    else:
6174
      if instance_running:
6175
        result = self.rpc.call_instance_shutdown(node_current, instance,
6176
                                                 self.op.shutdown_timeout)
6177
        result.Raise("Could not shutdown instance for full reboot")
6178
        _ShutdownInstanceDisks(self, instance)
6179
      else:
6180
        self.LogInfo("Instance %s was already stopped, starting now",
6181
                     instance.name)
6182
      _StartInstanceDisks(self, instance, ignore_secondaries)
6183
      result = self.rpc.call_instance_start(node_current,
6184
                                            (instance, None, None), False)
6185
      msg = result.fail_msg
6186
      if msg:
6187
        _ShutdownInstanceDisks(self, instance)
6188
        raise errors.OpExecError("Could not start instance for"
6189
                                 " full reboot: %s" % msg)
6190

    
6191
    self.cfg.MarkInstanceUp(instance.name)
6192

    
6193

    
6194
class LUInstanceShutdown(LogicalUnit):
6195
  """Shutdown an instance.
6196

6197
  """
6198
  HPATH = "instance-stop"
6199
  HTYPE = constants.HTYPE_INSTANCE
6200
  REQ_BGL = False
6201

    
6202
  def ExpandNames(self):
6203
    self._ExpandAndLockInstance()
6204

    
6205
  def BuildHooksEnv(self):
6206
    """Build hooks env.
6207

6208
    This runs on master, primary and secondary nodes of the instance.
6209

6210
    """
6211
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6212
    env["TIMEOUT"] = self.op.timeout
6213
    return env
6214

    
6215
  def BuildHooksNodes(self):
6216
    """Build hooks nodes.
6217

6218
    """
6219
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6220
    return (nl, nl)
6221

    
6222
  def CheckPrereq(self):
6223
    """Check prerequisites.
6224

6225
    This checks that the instance is in the cluster.
6226

6227
    """
6228
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6229
    assert self.instance is not None, \
6230
      "Cannot retrieve locked instance %s" % self.op.instance_name
6231

    
6232
    self.primary_offline = \
6233
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6234

    
6235
    if self.primary_offline and self.op.ignore_offline_nodes:
6236
      self.proc.LogWarning("Ignoring offline primary node")
6237
    else:
6238
      _CheckNodeOnline(self, self.instance.primary_node)
6239

    
6240
  def Exec(self, feedback_fn):
6241
    """Shutdown the instance.
6242

6243
    """
6244
    instance = self.instance
6245
    node_current = instance.primary_node
6246
    timeout = self.op.timeout
6247

    
6248
    if not self.op.no_remember:
6249
      self.cfg.MarkInstanceDown(instance.name)
6250

    
6251
    if self.primary_offline:
6252
      assert self.op.ignore_offline_nodes
6253
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6254
    else:
6255
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6256
      msg = result.fail_msg
6257
      if msg:
6258
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6259

    
6260
      _ShutdownInstanceDisks(self, instance)
6261

    
6262

    
6263
class LUInstanceReinstall(LogicalUnit):
6264
  """Reinstall an instance.
6265

6266
  """
6267
  HPATH = "instance-reinstall"
6268
  HTYPE = constants.HTYPE_INSTANCE
6269
  REQ_BGL = False
6270

    
6271
  def ExpandNames(self):
6272
    self._ExpandAndLockInstance()
6273

    
6274
  def BuildHooksEnv(self):
6275
    """Build hooks env.
6276

6277
    This runs on master, primary and secondary nodes of the instance.
6278

6279
    """
6280
    return _BuildInstanceHookEnvByObject(self, self.instance)
6281

    
6282
  def BuildHooksNodes(self):
6283
    """Build hooks nodes.
6284

6285
    """
6286
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6287
    return (nl, nl)
6288

    
6289
  def CheckPrereq(self):
6290
    """Check prerequisites.
6291

6292
    This checks that the instance is in the cluster and is not running.
6293

6294
    """
6295
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6296
    assert instance is not None, \
6297
      "Cannot retrieve locked instance %s" % self.op.instance_name
6298
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6299
                     " offline, cannot reinstall")
6300
    for node in instance.secondary_nodes:
6301
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6302
                       " cannot reinstall")
6303

    
6304
    if instance.disk_template == constants.DT_DISKLESS:
6305
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6306
                                 self.op.instance_name,
6307
                                 errors.ECODE_INVAL)
6308
    _CheckInstanceDown(self, instance, "cannot reinstall")
6309

    
6310
    if self.op.os_type is not None:
6311
      # OS verification
6312
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6313
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6314
      instance_os = self.op.os_type
6315
    else:
6316
      instance_os = instance.os
6317

    
6318
    nodelist = list(instance.all_nodes)
6319

    
6320
    if self.op.osparams:
6321
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6322
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6323
      self.os_inst = i_osdict # the new dict (without defaults)
6324
    else:
6325
      self.os_inst = None
6326

    
6327
    self.instance = instance
6328

    
6329
  def Exec(self, feedback_fn):
6330
    """Reinstall the instance.
6331

6332
    """
6333
    inst = self.instance
6334

    
6335
    if self.op.os_type is not None:
6336
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6337
      inst.os = self.op.os_type
6338
      # Write to configuration
6339
      self.cfg.Update(inst, feedback_fn)
6340

    
6341
    _StartInstanceDisks(self, inst, None)
6342
    try:
6343
      feedback_fn("Running the instance OS create scripts...")
6344
      # FIXME: pass debug option from opcode to backend
6345
      result = self.rpc.call_instance_os_add(inst.primary_node,
6346
                                             (inst, self.os_inst), True,
6347
                                             self.op.debug_level)
6348
      result.Raise("Could not install OS for instance %s on node %s" %
6349
                   (inst.name, inst.primary_node))
6350
    finally:
6351
      _ShutdownInstanceDisks(self, inst)
6352

    
6353

    
6354
class LUInstanceRecreateDisks(LogicalUnit):
6355
  """Recreate an instance's missing disks.
6356

6357
  """
6358
  HPATH = "instance-recreate-disks"
6359
  HTYPE = constants.HTYPE_INSTANCE
6360
  REQ_BGL = False
6361

    
6362
  def CheckArguments(self):
6363
    # normalise the disk list
6364
    self.op.disks = sorted(frozenset(self.op.disks))
6365

    
6366
  def ExpandNames(self):
6367
    self._ExpandAndLockInstance()
6368
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6369
    if self.op.nodes:
6370
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6371
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6372
    else:
6373
      self.needed_locks[locking.LEVEL_NODE] = []
6374

    
6375
  def DeclareLocks(self, level):
6376
    if level == locking.LEVEL_NODE:
6377
      # if we replace the nodes, we only need to lock the old primary,
6378
      # otherwise we need to lock all nodes for disk re-creation
6379
      primary_only = bool(self.op.nodes)
6380
      self._LockInstancesNodes(primary_only=primary_only)
6381

    
6382
  def BuildHooksEnv(self):
6383
    """Build hooks env.
6384

6385
    This runs on master, primary and secondary nodes of the instance.
6386

6387
    """
6388
    return _BuildInstanceHookEnvByObject(self, self.instance)
6389

    
6390
  def BuildHooksNodes(self):
6391
    """Build hooks nodes.
6392

6393
    """
6394
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6395
    return (nl, nl)
6396

    
6397
  def CheckPrereq(self):
6398
    """Check prerequisites.
6399

6400
    This checks that the instance is in the cluster and is not running.
6401

6402
    """
6403
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6404
    assert instance is not None, \
6405
      "Cannot retrieve locked instance %s" % self.op.instance_name
6406
    if self.op.nodes:
6407
      if len(self.op.nodes) != len(instance.all_nodes):
6408
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6409
                                   " %d replacement nodes were specified" %
6410
                                   (instance.name, len(instance.all_nodes),
6411
                                    len(self.op.nodes)),
6412
                                   errors.ECODE_INVAL)
6413
      assert instance.disk_template != constants.DT_DRBD8 or \
6414
          len(self.op.nodes) == 2
6415
      assert instance.disk_template != constants.DT_PLAIN or \
6416
          len(self.op.nodes) == 1
6417
      primary_node = self.op.nodes[0]
6418
    else:
6419
      primary_node = instance.primary_node
6420
    _CheckNodeOnline(self, primary_node)
6421

    
6422
    if instance.disk_template == constants.DT_DISKLESS:
6423
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6424
                                 self.op.instance_name, errors.ECODE_INVAL)
6425
    # if we replace nodes *and* the old primary is offline, we don't
6426
    # check
6427
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6428
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6429
    if not (self.op.nodes and old_pnode.offline):
6430
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6431

    
6432
    if not self.op.disks:
6433
      self.op.disks = range(len(instance.disks))
6434
    else:
6435
      for idx in self.op.disks:
6436
        if idx >= len(instance.disks):
6437
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6438
                                     errors.ECODE_INVAL)
6439
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6440
      raise errors.OpPrereqError("Can't recreate disks partially and"
6441
                                 " change the nodes at the same time",
6442
                                 errors.ECODE_INVAL)
6443
    self.instance = instance
6444

    
6445
  def Exec(self, feedback_fn):
6446
    """Recreate the disks.
6447

6448
    """
6449
    instance = self.instance
6450

    
6451
    to_skip = []
6452
    mods = [] # keeps track of needed logical_id changes
6453

    
6454
    for idx, disk in enumerate(instance.disks):
6455
      if idx not in self.op.disks: # disk idx has not been passed in
6456
        to_skip.append(idx)
6457
        continue
6458
      # update secondaries for disks, if needed
6459
      if self.op.nodes:
6460
        if disk.dev_type == constants.LD_DRBD8:
6461
          # need to update the nodes and minors
6462
          assert len(self.op.nodes) == 2
6463
          assert len(disk.logical_id) == 6 # otherwise disk internals
6464
                                           # have changed
6465
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6466
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6467
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6468
                    new_minors[0], new_minors[1], old_secret)
6469
          assert len(disk.logical_id) == len(new_id)
6470
          mods.append((idx, new_id))
6471

    
6472
    # now that we have passed all asserts above, we can apply the mods
6473
    # in a single run (to avoid partial changes)
6474
    for idx, new_id in mods:
6475
      instance.disks[idx].logical_id = new_id
6476

    
6477
    # change primary node, if needed
6478
    if self.op.nodes:
6479
      instance.primary_node = self.op.nodes[0]
6480
      self.LogWarning("Changing the instance's nodes, you will have to"
6481
                      " remove any disks left on the older nodes manually")
6482

    
6483
    if self.op.nodes:
6484
      self.cfg.Update(instance, feedback_fn)
6485

    
6486
    _CreateDisks(self, instance, to_skip=to_skip)
6487

    
6488

    
6489
class LUInstanceRename(LogicalUnit):
6490
  """Rename an instance.
6491

6492
  """
6493
  HPATH = "instance-rename"
6494
  HTYPE = constants.HTYPE_INSTANCE
6495

    
6496
  def CheckArguments(self):
6497
    """Check arguments.
6498

6499
    """
6500
    if self.op.ip_check and not self.op.name_check:
6501
      # TODO: make the ip check more flexible and not depend on the name check
6502
      raise errors.OpPrereqError("IP address check requires a name check",
6503
                                 errors.ECODE_INVAL)
6504

    
6505
  def BuildHooksEnv(self):
6506
    """Build hooks env.
6507

6508
    This runs on master, primary and secondary nodes of the instance.
6509

6510
    """
6511
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6512
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6513
    return env
6514

    
6515
  def BuildHooksNodes(self):
6516
    """Build hooks nodes.
6517

6518
    """
6519
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6520
    return (nl, nl)
6521

    
6522
  def CheckPrereq(self):
6523
    """Check prerequisites.
6524

6525
    This checks that the instance is in the cluster and is not running.
6526

6527
    """
6528
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6529
                                                self.op.instance_name)
6530
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6531
    assert instance is not None
6532
    _CheckNodeOnline(self, instance.primary_node)
6533
    _CheckInstanceDown(self, instance, "cannot rename")
6534
    self.instance = instance
6535

    
6536
    new_name = self.op.new_name
6537
    if self.op.name_check:
6538
      hostname = netutils.GetHostname(name=new_name)
6539
      if hostname != new_name:
6540
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6541
                     hostname.name)
6542
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6543
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6544
                                    " same as given hostname '%s'") %
6545
                                    (hostname.name, self.op.new_name),
6546
                                    errors.ECODE_INVAL)
6547
      new_name = self.op.new_name = hostname.name
6548
      if (self.op.ip_check and
6549
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6550
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6551
                                   (hostname.ip, new_name),
6552
                                   errors.ECODE_NOTUNIQUE)
6553

    
6554
    instance_list = self.cfg.GetInstanceList()
6555
    if new_name in instance_list and new_name != instance.name:
6556
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6557
                                 new_name, errors.ECODE_EXISTS)
6558

    
6559
  def Exec(self, feedback_fn):
6560
    """Rename the instance.
6561

6562
    """
6563
    inst = self.instance
6564
    old_name = inst.name
6565

    
6566
    rename_file_storage = False
6567
    if (inst.disk_template in constants.DTS_FILEBASED and
6568
        self.op.new_name != inst.name):
6569
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6570
      rename_file_storage = True
6571

    
6572
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6573
    # Change the instance lock. This is definitely safe while we hold the BGL.
6574
    # Otherwise the new lock would have to be added in acquired mode.
6575
    assert self.REQ_BGL
6576
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6577
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6578

    
6579
    # re-read the instance from the configuration after rename
6580
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6581

    
6582
    if rename_file_storage:
6583
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6584
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6585
                                                     old_file_storage_dir,
6586
                                                     new_file_storage_dir)
6587
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6588
                   " (but the instance has been renamed in Ganeti)" %
6589
                   (inst.primary_node, old_file_storage_dir,
6590
                    new_file_storage_dir))
6591

    
6592
    _StartInstanceDisks(self, inst, None)
6593
    try:
6594
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6595
                                                 old_name, self.op.debug_level)
6596
      msg = result.fail_msg
6597
      if msg:
6598
        msg = ("Could not run OS rename script for instance %s on node %s"
6599
               " (but the instance has been renamed in Ganeti): %s" %
6600
               (inst.name, inst.primary_node, msg))
6601
        self.proc.LogWarning(msg)
6602
    finally:
6603
      _ShutdownInstanceDisks(self, inst)
6604

    
6605
    return inst.name
6606

    
6607

    
6608
class LUInstanceRemove(LogicalUnit):
6609
  """Remove an instance.
6610

6611
  """
6612
  HPATH = "instance-remove"
6613
  HTYPE = constants.HTYPE_INSTANCE
6614
  REQ_BGL = False
6615

    
6616
  def ExpandNames(self):
6617
    self._ExpandAndLockInstance()
6618
    self.needed_locks[locking.LEVEL_NODE] = []
6619
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6620

    
6621
  def DeclareLocks(self, level):
6622
    if level == locking.LEVEL_NODE:
6623
      self._LockInstancesNodes()
6624

    
6625
  def BuildHooksEnv(self):
6626
    """Build hooks env.
6627

6628
    This runs on master, primary and secondary nodes of the instance.
6629

6630
    """
6631
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6632
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6633
    return env
6634

    
6635
  def BuildHooksNodes(self):
6636
    """Build hooks nodes.
6637

6638
    """
6639
    nl = [self.cfg.GetMasterNode()]
6640
    nl_post = list(self.instance.all_nodes) + nl
6641
    return (nl, nl_post)
6642

    
6643
  def CheckPrereq(self):
6644
    """Check prerequisites.
6645

6646
    This checks that the instance is in the cluster.
6647

6648
    """
6649
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6650
    assert self.instance is not None, \
6651
      "Cannot retrieve locked instance %s" % self.op.instance_name
6652

    
6653
  def Exec(self, feedback_fn):
6654
    """Remove the instance.
6655

6656
    """
6657
    instance = self.instance
6658
    logging.info("Shutting down instance %s on node %s",
6659
                 instance.name, instance.primary_node)
6660

    
6661
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6662
                                             self.op.shutdown_timeout)
6663
    msg = result.fail_msg
6664
    if msg:
6665
      if self.op.ignore_failures:
6666
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6667
      else:
6668
        raise errors.OpExecError("Could not shutdown instance %s on"
6669
                                 " node %s: %s" %
6670
                                 (instance.name, instance.primary_node, msg))
6671

    
6672
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6673

    
6674

    
6675
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6676
  """Utility function to remove an instance.
6677

6678
  """
6679
  logging.info("Removing block devices for instance %s", instance.name)
6680

    
6681
  if not _RemoveDisks(lu, instance):
6682
    if not ignore_failures:
6683
      raise errors.OpExecError("Can't remove instance's disks")
6684
    feedback_fn("Warning: can't remove instance's disks")
6685

    
6686
  logging.info("Removing instance %s out of cluster config", instance.name)
6687

    
6688
  lu.cfg.RemoveInstance(instance.name)
6689

    
6690
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6691
    "Instance lock removal conflict"
6692

    
6693
  # Remove lock for the instance
6694
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6695

    
6696

    
6697
class LUInstanceQuery(NoHooksLU):
6698
  """Logical unit for querying instances.
6699

6700
  """
6701
  # pylint: disable=W0142
6702
  REQ_BGL = False
6703

    
6704
  def CheckArguments(self):
6705
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6706
                             self.op.output_fields, self.op.use_locking)
6707

    
6708
  def ExpandNames(self):
6709
    self.iq.ExpandNames(self)
6710

    
6711
  def DeclareLocks(self, level):
6712
    self.iq.DeclareLocks(self, level)
6713

    
6714
  def Exec(self, feedback_fn):
6715
    return self.iq.OldStyleQuery(self)
6716

    
6717

    
6718
class LUInstanceFailover(LogicalUnit):
6719
  """Failover an instance.
6720

6721
  """
6722
  HPATH = "instance-failover"
6723
  HTYPE = constants.HTYPE_INSTANCE
6724
  REQ_BGL = False
6725

    
6726
  def CheckArguments(self):
6727
    """Check the arguments.
6728

6729
    """
6730
    self.iallocator = getattr(self.op, "iallocator", None)
6731
    self.target_node = getattr(self.op, "target_node", None)
6732

    
6733
  def ExpandNames(self):
6734
    self._ExpandAndLockInstance()
6735

    
6736
    if self.op.target_node is not None:
6737
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6738

    
6739
    self.needed_locks[locking.LEVEL_NODE] = []
6740
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6741

    
6742
    ignore_consistency = self.op.ignore_consistency
6743
    shutdown_timeout = self.op.shutdown_timeout
6744
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6745
                                       cleanup=False,
6746
                                       failover=True,
6747
                                       ignore_consistency=ignore_consistency,
6748
                                       shutdown_timeout=shutdown_timeout)
6749
    self.tasklets = [self._migrater]
6750

    
6751
  def DeclareLocks(self, level):
6752
    if level == locking.LEVEL_NODE:
6753
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6754
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6755
        if self.op.target_node is None:
6756
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6757
        else:
6758
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6759
                                                   self.op.target_node]
6760
        del self.recalculate_locks[locking.LEVEL_NODE]
6761
      else:
6762
        self._LockInstancesNodes()
6763

    
6764
  def BuildHooksEnv(self):
6765
    """Build hooks env.
6766

6767
    This runs on master, primary and secondary nodes of the instance.
6768

6769
    """
6770
    instance = self._migrater.instance
6771
    source_node = instance.primary_node
6772
    target_node = self.op.target_node
6773
    env = {
6774
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6775
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6776
      "OLD_PRIMARY": source_node,
6777
      "NEW_PRIMARY": target_node,
6778
      }
6779

    
6780
    if instance.disk_template in constants.DTS_INT_MIRROR:
6781
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6782
      env["NEW_SECONDARY"] = source_node
6783
    else:
6784
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6785

    
6786
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6787

    
6788
    return env
6789

    
6790
  def BuildHooksNodes(self):
6791
    """Build hooks nodes.
6792

6793
    """
6794
    instance = self._migrater.instance
6795
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6796
    return (nl, nl + [instance.primary_node])
6797

    
6798

    
6799
class LUInstanceMigrate(LogicalUnit):
6800
  """Migrate an instance.
6801

6802
  This is migration without shutting down, compared to the failover,
6803
  which is done with shutdown.
6804

6805
  """
6806
  HPATH = "instance-migrate"
6807
  HTYPE = constants.HTYPE_INSTANCE
6808
  REQ_BGL = False
6809

    
6810
  def ExpandNames(self):
6811
    self._ExpandAndLockInstance()
6812

    
6813
    if self.op.target_node is not None:
6814
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6815

    
6816
    self.needed_locks[locking.LEVEL_NODE] = []
6817
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6818

    
6819
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6820
                                       cleanup=self.op.cleanup,
6821
                                       failover=False,
6822
                                       fallback=self.op.allow_failover)
6823
    self.tasklets = [self._migrater]
6824

    
6825
  def DeclareLocks(self, level):
6826
    if level == locking.LEVEL_NODE:
6827
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6828
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6829
        if self.op.target_node is None:
6830
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6831
        else:
6832
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6833
                                                   self.op.target_node]
6834
        del self.recalculate_locks[locking.LEVEL_NODE]
6835
      else:
6836
        self._LockInstancesNodes()
6837

    
6838
  def BuildHooksEnv(self):
6839
    """Build hooks env.
6840

6841
    This runs on master, primary and secondary nodes of the instance.
6842

6843
    """
6844
    instance = self._migrater.instance
6845
    source_node = instance.primary_node
6846
    target_node = self.op.target_node
6847
    env = _BuildInstanceHookEnvByObject(self, instance)
6848
    env.update({
6849
      "MIGRATE_LIVE": self._migrater.live,
6850
      "MIGRATE_CLEANUP": self.op.cleanup,
6851
      "OLD_PRIMARY": source_node,
6852
      "NEW_PRIMARY": target_node,
6853
      })
6854

    
6855
    if instance.disk_template in constants.DTS_INT_MIRROR:
6856
      env["OLD_SECONDARY"] = target_node
6857
      env["NEW_SECONDARY"] = source_node
6858
    else:
6859
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6860

    
6861
    return env
6862

    
6863
  def BuildHooksNodes(self):
6864
    """Build hooks nodes.
6865

6866
    """
6867
    instance = self._migrater.instance
6868
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6869
    return (nl, nl + [instance.primary_node])
6870

    
6871

    
6872
class LUInstanceMove(LogicalUnit):
6873
  """Move an instance by data-copying.
6874

6875
  """
6876
  HPATH = "instance-move"
6877
  HTYPE = constants.HTYPE_INSTANCE
6878
  REQ_BGL = False
6879

    
6880
  def ExpandNames(self):
6881
    self._ExpandAndLockInstance()
6882
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6883
    self.op.target_node = target_node
6884
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6885
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6886

    
6887
  def DeclareLocks(self, level):
6888
    if level == locking.LEVEL_NODE:
6889
      self._LockInstancesNodes(primary_only=True)
6890

    
6891
  def BuildHooksEnv(self):
6892
    """Build hooks env.
6893

6894
    This runs on master, primary and secondary nodes of the instance.
6895

6896
    """
6897
    env = {
6898
      "TARGET_NODE": self.op.target_node,
6899
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6900
      }
6901
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6902
    return env
6903

    
6904
  def BuildHooksNodes(self):
6905
    """Build hooks nodes.
6906

6907
    """
6908
    nl = [
6909
      self.cfg.GetMasterNode(),
6910
      self.instance.primary_node,
6911
      self.op.target_node,
6912
      ]
6913
    return (nl, nl)
6914

    
6915
  def CheckPrereq(self):
6916
    """Check prerequisites.
6917

6918
    This checks that the instance is in the cluster.
6919

6920
    """
6921
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6922
    assert self.instance is not None, \
6923
      "Cannot retrieve locked instance %s" % self.op.instance_name
6924

    
6925
    node = self.cfg.GetNodeInfo(self.op.target_node)
6926
    assert node is not None, \
6927
      "Cannot retrieve locked node %s" % self.op.target_node
6928

    
6929
    self.target_node = target_node = node.name
6930

    
6931
    if target_node == instance.primary_node:
6932
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6933
                                 (instance.name, target_node),
6934
                                 errors.ECODE_STATE)
6935

    
6936
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6937

    
6938
    for idx, dsk in enumerate(instance.disks):
6939
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6940
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6941
                                   " cannot copy" % idx, errors.ECODE_STATE)
6942

    
6943
    _CheckNodeOnline(self, target_node)
6944
    _CheckNodeNotDrained(self, target_node)
6945
    _CheckNodeVmCapable(self, target_node)
6946

    
6947
    if instance.admin_up:
6948
      # check memory requirements on the secondary node
6949
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6950
                           instance.name, bep[constants.BE_MEMORY],
6951
                           instance.hypervisor)
6952
    else:
6953
      self.LogInfo("Not checking memory on the secondary node as"
6954
                   " instance will not be started")
6955

    
6956
    # check bridge existance
6957
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6958

    
6959
  def Exec(self, feedback_fn):
6960
    """Move an instance.
6961

6962
    The move is done by shutting it down on its present node, copying
6963
    the data over (slow) and starting it on the new node.
6964

6965
    """
6966
    instance = self.instance
6967

    
6968
    source_node = instance.primary_node
6969
    target_node = self.target_node
6970

    
6971
    self.LogInfo("Shutting down instance %s on source node %s",
6972
                 instance.name, source_node)
6973

    
6974
    result = self.rpc.call_instance_shutdown(source_node, instance,
6975
                                             self.op.shutdown_timeout)
6976
    msg = result.fail_msg
6977
    if msg:
6978
      if self.op.ignore_consistency:
6979
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6980
                             " Proceeding anyway. Please make sure node"
6981
                             " %s is down. Error details: %s",
6982
                             instance.name, source_node, source_node, msg)
6983
      else:
6984
        raise errors.OpExecError("Could not shutdown instance %s on"
6985
                                 " node %s: %s" %
6986
                                 (instance.name, source_node, msg))
6987

    
6988
    # create the target disks
6989
    try:
6990
      _CreateDisks(self, instance, target_node=target_node)
6991
    except errors.OpExecError:
6992
      self.LogWarning("Device creation failed, reverting...")
6993
      try:
6994
        _RemoveDisks(self, instance, target_node=target_node)
6995
      finally:
6996
        self.cfg.ReleaseDRBDMinors(instance.name)
6997
        raise
6998

    
6999
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7000

    
7001
    errs = []
7002
    # activate, get path, copy the data over
7003
    for idx, disk in enumerate(instance.disks):
7004
      self.LogInfo("Copying data for disk %d", idx)
7005
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7006
                                               instance.name, True, idx)
7007
      if result.fail_msg:
7008
        self.LogWarning("Can't assemble newly created disk %d: %s",
7009
                        idx, result.fail_msg)
7010
        errs.append(result.fail_msg)
7011
        break
7012
      dev_path = result.payload
7013
      result = self.rpc.call_blockdev_export(source_node, disk,
7014
                                             target_node, dev_path,
7015
                                             cluster_name)
7016
      if result.fail_msg:
7017
        self.LogWarning("Can't copy data over for disk %d: %s",
7018
                        idx, result.fail_msg)
7019
        errs.append(result.fail_msg)
7020
        break
7021

    
7022
    if errs:
7023
      self.LogWarning("Some disks failed to copy, aborting")
7024
      try:
7025
        _RemoveDisks(self, instance, target_node=target_node)
7026
      finally:
7027
        self.cfg.ReleaseDRBDMinors(instance.name)
7028
        raise errors.OpExecError("Errors during disk copy: %s" %
7029
                                 (",".join(errs),))
7030

    
7031
    instance.primary_node = target_node
7032
    self.cfg.Update(instance, feedback_fn)
7033

    
7034
    self.LogInfo("Removing the disks on the original node")
7035
    _RemoveDisks(self, instance, target_node=source_node)
7036

    
7037
    # Only start the instance if it's marked as up
7038
    if instance.admin_up:
7039
      self.LogInfo("Starting instance %s on node %s",
7040
                   instance.name, target_node)
7041

    
7042
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7043
                                           ignore_secondaries=True)
7044
      if not disks_ok:
7045
        _ShutdownInstanceDisks(self, instance)
7046
        raise errors.OpExecError("Can't activate the instance's disks")
7047

    
7048
      result = self.rpc.call_instance_start(target_node,
7049
                                            (instance, None, None), False)
7050
      msg = result.fail_msg
7051
      if msg:
7052
        _ShutdownInstanceDisks(self, instance)
7053
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7054
                                 (instance.name, target_node, msg))
7055

    
7056

    
7057
class LUNodeMigrate(LogicalUnit):
7058
  """Migrate all instances from a node.
7059

7060
  """
7061
  HPATH = "node-migrate"
7062
  HTYPE = constants.HTYPE_NODE
7063
  REQ_BGL = False
7064

    
7065
  def CheckArguments(self):
7066
    pass
7067

    
7068
  def ExpandNames(self):
7069
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7070

    
7071
    self.share_locks = _ShareAll()
7072
    self.needed_locks = {
7073
      locking.LEVEL_NODE: [self.op.node_name],
7074
      }
7075

    
7076
  def BuildHooksEnv(self):
7077
    """Build hooks env.
7078

7079
    This runs on the master, the primary and all the secondaries.
7080

7081
    """
7082
    return {
7083
      "NODE_NAME": self.op.node_name,
7084
      }
7085

    
7086
  def BuildHooksNodes(self):
7087
    """Build hooks nodes.
7088

7089
    """
7090
    nl = [self.cfg.GetMasterNode()]
7091
    return (nl, nl)
7092

    
7093
  def CheckPrereq(self):
7094
    pass
7095

    
7096
  def Exec(self, feedback_fn):
7097
    # Prepare jobs for migration instances
7098
    jobs = [
7099
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7100
                                 mode=self.op.mode,
7101
                                 live=self.op.live,
7102
                                 iallocator=self.op.iallocator,
7103
                                 target_node=self.op.target_node)]
7104
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7105
      ]
7106

    
7107
    # TODO: Run iallocator in this opcode and pass correct placement options to
7108
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7109
    # running the iallocator and the actual migration, a good consistency model
7110
    # will have to be found.
7111

    
7112
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7113
            frozenset([self.op.node_name]))
7114

    
7115
    return ResultWithJobs(jobs)
7116

    
7117

    
7118
class TLMigrateInstance(Tasklet):
7119
  """Tasklet class for instance migration.
7120

7121
  @type live: boolean
7122
  @ivar live: whether the migration will be done live or non-live;
7123
      this variable is initalized only after CheckPrereq has run
7124
  @type cleanup: boolean
7125
  @ivar cleanup: Wheater we cleanup from a failed migration
7126
  @type iallocator: string
7127
  @ivar iallocator: The iallocator used to determine target_node
7128
  @type target_node: string
7129
  @ivar target_node: If given, the target_node to reallocate the instance to
7130
  @type failover: boolean
7131
  @ivar failover: Whether operation results in failover or migration
7132
  @type fallback: boolean
7133
  @ivar fallback: Whether fallback to failover is allowed if migration not
7134
                  possible
7135
  @type ignore_consistency: boolean
7136
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7137
                            and target node
7138
  @type shutdown_timeout: int
7139
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7140

7141
  """
7142

    
7143
  # Constants
7144
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7145
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7146

    
7147
  def __init__(self, lu, instance_name, cleanup=False,
7148
               failover=False, fallback=False,
7149
               ignore_consistency=False,
7150
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7151
    """Initializes this class.
7152

7153
    """
7154
    Tasklet.__init__(self, lu)
7155

    
7156
    # Parameters
7157
    self.instance_name = instance_name
7158
    self.cleanup = cleanup
7159
    self.live = False # will be overridden later
7160
    self.failover = failover
7161
    self.fallback = fallback
7162
    self.ignore_consistency = ignore_consistency
7163
    self.shutdown_timeout = shutdown_timeout
7164

    
7165
  def CheckPrereq(self):
7166
    """Check prerequisites.
7167

7168
    This checks that the instance is in the cluster.
7169

7170
    """
7171
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7172
    instance = self.cfg.GetInstanceInfo(instance_name)
7173
    assert instance is not None
7174
    self.instance = instance
7175

    
7176
    if (not self.cleanup and not instance.admin_up and not self.failover and
7177
        self.fallback):
7178
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7179
                      " to failover")
7180
      self.failover = True
7181

    
7182
    if instance.disk_template not in constants.DTS_MIRRORED:
7183
      if self.failover:
7184
        text = "failovers"
7185
      else:
7186
        text = "migrations"
7187
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7188
                                 " %s" % (instance.disk_template, text),
7189
                                 errors.ECODE_STATE)
7190

    
7191
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7192
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7193

    
7194
      if self.lu.op.iallocator:
7195
        self._RunAllocator()
7196
      else:
7197
        # We set set self.target_node as it is required by
7198
        # BuildHooksEnv
7199
        self.target_node = self.lu.op.target_node
7200

    
7201
      # self.target_node is already populated, either directly or by the
7202
      # iallocator run
7203
      target_node = self.target_node
7204
      if self.target_node == instance.primary_node:
7205
        raise errors.OpPrereqError("Cannot migrate instance %s"
7206
                                   " to its primary (%s)" %
7207
                                   (instance.name, instance.primary_node))
7208

    
7209
      if len(self.lu.tasklets) == 1:
7210
        # It is safe to release locks only when we're the only tasklet
7211
        # in the LU
7212
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7213
                      keep=[instance.primary_node, self.target_node])
7214

    
7215
    else:
7216
      secondary_nodes = instance.secondary_nodes
7217
      if not secondary_nodes:
7218
        raise errors.ConfigurationError("No secondary node but using"
7219
                                        " %s disk template" %
7220
                                        instance.disk_template)
7221
      target_node = secondary_nodes[0]
7222
      if self.lu.op.iallocator or (self.lu.op.target_node and
7223
                                   self.lu.op.target_node != target_node):
7224
        if self.failover:
7225
          text = "failed over"
7226
        else:
7227
          text = "migrated"
7228
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7229
                                   " be %s to arbitrary nodes"
7230
                                   " (neither an iallocator nor a target"
7231
                                   " node can be passed)" %
7232
                                   (instance.disk_template, text),
7233
                                   errors.ECODE_INVAL)
7234

    
7235
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7236

    
7237
    # check memory requirements on the secondary node
7238
    if not self.failover or instance.admin_up:
7239
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7240
                           instance.name, i_be[constants.BE_MEMORY],
7241
                           instance.hypervisor)
7242
    else:
7243
      self.lu.LogInfo("Not checking memory on the secondary node as"
7244
                      " instance will not be started")
7245

    
7246
    # check bridge existance
7247
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7248

    
7249
    if not self.cleanup:
7250
      _CheckNodeNotDrained(self.lu, target_node)
7251
      if not self.failover:
7252
        result = self.rpc.call_instance_migratable(instance.primary_node,
7253
                                                   instance)
7254
        if result.fail_msg and self.fallback:
7255
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7256
                          " failover")
7257
          self.failover = True
7258
        else:
7259
          result.Raise("Can't migrate, please use failover",
7260
                       prereq=True, ecode=errors.ECODE_STATE)
7261

    
7262
    assert not (self.failover and self.cleanup)
7263

    
7264
    if not self.failover:
7265
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7266
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7267
                                   " parameters are accepted",
7268
                                   errors.ECODE_INVAL)
7269
      if self.lu.op.live is not None:
7270
        if self.lu.op.live:
7271
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7272
        else:
7273
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7274
        # reset the 'live' parameter to None so that repeated
7275
        # invocations of CheckPrereq do not raise an exception
7276
        self.lu.op.live = None
7277
      elif self.lu.op.mode is None:
7278
        # read the default value from the hypervisor
7279
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7280
                                                skip_globals=False)
7281
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7282

    
7283
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7284
    else:
7285
      # Failover is never live
7286
      self.live = False
7287

    
7288
  def _RunAllocator(self):
7289
    """Run the allocator based on input opcode.
7290

7291
    """
7292
    ial = IAllocator(self.cfg, self.rpc,
7293
                     mode=constants.IALLOCATOR_MODE_RELOC,
7294
                     name=self.instance_name,
7295
                     # TODO See why hail breaks with a single node below
7296
                     relocate_from=[self.instance.primary_node,
7297
                                    self.instance.primary_node],
7298
                     )
7299

    
7300
    ial.Run(self.lu.op.iallocator)
7301

    
7302
    if not ial.success:
7303
      raise errors.OpPrereqError("Can't compute nodes using"
7304
                                 " iallocator '%s': %s" %
7305
                                 (self.lu.op.iallocator, ial.info),
7306
                                 errors.ECODE_NORES)
7307
    if len(ial.result) != ial.required_nodes:
7308
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7309
                                 " of nodes (%s), required %s" %
7310
                                 (self.lu.op.iallocator, len(ial.result),
7311
                                  ial.required_nodes), errors.ECODE_FAULT)
7312
    self.target_node = ial.result[0]
7313
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7314
                 self.instance_name, self.lu.op.iallocator,
7315
                 utils.CommaJoin(ial.result))
7316

    
7317
  def _WaitUntilSync(self):
7318
    """Poll with custom rpc for disk sync.
7319

7320
    This uses our own step-based rpc call.
7321

7322
    """
7323
    self.feedback_fn("* wait until resync is done")
7324
    all_done = False
7325
    while not all_done:
7326
      all_done = True
7327
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7328
                                            self.nodes_ip,
7329
                                            self.instance.disks)
7330
      min_percent = 100
7331
      for node, nres in result.items():
7332
        nres.Raise("Cannot resync disks on node %s" % node)
7333
        node_done, node_percent = nres.payload
7334
        all_done = all_done and node_done
7335
        if node_percent is not None:
7336
          min_percent = min(min_percent, node_percent)
7337
      if not all_done:
7338
        if min_percent < 100:
7339
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7340
        time.sleep(2)
7341

    
7342
  def _EnsureSecondary(self, node):
7343
    """Demote a node to secondary.
7344

7345
    """
7346
    self.feedback_fn("* switching node %s to secondary mode" % node)
7347

    
7348
    for dev in self.instance.disks:
7349
      self.cfg.SetDiskID(dev, node)
7350

    
7351
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7352
                                          self.instance.disks)
7353
    result.Raise("Cannot change disk to secondary on node %s" % node)
7354

    
7355
  def _GoStandalone(self):
7356
    """Disconnect from the network.
7357

7358
    """
7359
    self.feedback_fn("* changing into standalone mode")
7360
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7361
                                               self.instance.disks)
7362
    for node, nres in result.items():
7363
      nres.Raise("Cannot disconnect disks node %s" % node)
7364

    
7365
  def _GoReconnect(self, multimaster):
7366
    """Reconnect to the network.
7367

7368
    """
7369
    if multimaster:
7370
      msg = "dual-master"
7371
    else:
7372
      msg = "single-master"
7373
    self.feedback_fn("* changing disks into %s mode" % msg)
7374
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7375
                                           self.instance.disks,
7376
                                           self.instance.name, multimaster)
7377
    for node, nres in result.items():
7378
      nres.Raise("Cannot change disks config on node %s" % node)
7379

    
7380
  def _ExecCleanup(self):
7381
    """Try to cleanup after a failed migration.
7382

7383
    The cleanup is done by:
7384
      - check that the instance is running only on one node
7385
        (and update the config if needed)
7386
      - change disks on its secondary node to secondary
7387
      - wait until disks are fully synchronized
7388
      - disconnect from the network
7389
      - change disks into single-master mode
7390
      - wait again until disks are fully synchronized
7391

7392
    """
7393
    instance = self.instance
7394
    target_node = self.target_node
7395
    source_node = self.source_node
7396

    
7397
    # check running on only one node
7398
    self.feedback_fn("* checking where the instance actually runs"
7399
                     " (if this hangs, the hypervisor might be in"
7400
                     " a bad state)")
7401
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7402
    for node, result in ins_l.items():
7403
      result.Raise("Can't contact node %s" % node)
7404

    
7405
    runningon_source = instance.name in ins_l[source_node].payload
7406
    runningon_target = instance.name in ins_l[target_node].payload
7407

    
7408
    if runningon_source and runningon_target:
7409
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7410
                               " or the hypervisor is confused; you will have"
7411
                               " to ensure manually that it runs only on one"
7412
                               " and restart this operation")
7413

    
7414
    if not (runningon_source or runningon_target):
7415
      raise errors.OpExecError("Instance does not seem to be running at all;"
7416
                               " in this case it's safer to repair by"
7417
                               " running 'gnt-instance stop' to ensure disk"
7418
                               " shutdown, and then restarting it")
7419

    
7420
    if runningon_target:
7421
      # the migration has actually succeeded, we need to update the config
7422
      self.feedback_fn("* instance running on secondary node (%s),"
7423
                       " updating config" % target_node)
7424
      instance.primary_node = target_node
7425
      self.cfg.Update(instance, self.feedback_fn)
7426
      demoted_node = source_node
7427
    else:
7428
      self.feedback_fn("* instance confirmed to be running on its"
7429
                       " primary node (%s)" % source_node)
7430
      demoted_node = target_node
7431

    
7432
    if instance.disk_template in constants.DTS_INT_MIRROR:
7433
      self._EnsureSecondary(demoted_node)
7434
      try:
7435
        self._WaitUntilSync()
7436
      except errors.OpExecError:
7437
        # we ignore here errors, since if the device is standalone, it
7438
        # won't be able to sync
7439
        pass
7440
      self._GoStandalone()
7441
      self._GoReconnect(False)
7442
      self._WaitUntilSync()
7443

    
7444
    self.feedback_fn("* done")
7445

    
7446
  def _RevertDiskStatus(self):
7447
    """Try to revert the disk status after a failed migration.
7448

7449
    """
7450
    target_node = self.target_node
7451
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7452
      return
7453

    
7454
    try:
7455
      self._EnsureSecondary(target_node)
7456
      self._GoStandalone()
7457
      self._GoReconnect(False)
7458
      self._WaitUntilSync()
7459
    except errors.OpExecError, err:
7460
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7461
                         " please try to recover the instance manually;"
7462
                         " error '%s'" % str(err))
7463

    
7464
  def _AbortMigration(self):
7465
    """Call the hypervisor code to abort a started migration.
7466

7467
    """
7468
    instance = self.instance
7469
    target_node = self.target_node
7470
    source_node = self.source_node
7471
    migration_info = self.migration_info
7472

    
7473
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7474
                                                                 instance,
7475
                                                                 migration_info,
7476
                                                                 False)
7477
    abort_msg = abort_result.fail_msg
7478
    if abort_msg:
7479
      logging.error("Aborting migration failed on target node %s: %s",
7480
                    target_node, abort_msg)
7481
      # Don't raise an exception here, as we stil have to try to revert the
7482
      # disk status, even if this step failed.
7483

    
7484
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7485
        instance, False, self.live)
7486
    abort_msg = abort_result.fail_msg
7487
    if abort_msg:
7488
      logging.error("Aborting migration failed on source node %s: %s",
7489
                    source_node, abort_msg)
7490

    
7491
  def _ExecMigration(self):
7492
    """Migrate an instance.
7493

7494
    The migrate is done by:
7495
      - change the disks into dual-master mode
7496
      - wait until disks are fully synchronized again
7497
      - migrate the instance
7498
      - change disks on the new secondary node (the old primary) to secondary
7499
      - wait until disks are fully synchronized
7500
      - change disks into single-master mode
7501

7502
    """
7503
    instance = self.instance
7504
    target_node = self.target_node
7505
    source_node = self.source_node
7506

    
7507
    # Check for hypervisor version mismatch and warn the user.
7508
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7509
                                       None, self.instance.hypervisor)
7510
    src_info = nodeinfo[source_node]
7511
    dst_info = nodeinfo[target_node]
7512

    
7513
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7514
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7515
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7516
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7517
      if src_version != dst_version:
7518
        self.feedback_fn("* warning: hypervisor version mismatch between"
7519
                         " source (%s) and target (%s) node" %
7520
                         (src_version, dst_version))
7521

    
7522
    self.feedback_fn("* checking disk consistency between source and target")
7523
    for dev in instance.disks:
7524
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7525
        raise errors.OpExecError("Disk %s is degraded or not fully"
7526
                                 " synchronized on target node,"
7527
                                 " aborting migration" % dev.iv_name)
7528

    
7529
    # First get the migration information from the remote node
7530
    result = self.rpc.call_migration_info(source_node, instance)
7531
    msg = result.fail_msg
7532
    if msg:
7533
      log_err = ("Failed fetching source migration information from %s: %s" %
7534
                 (source_node, msg))
7535
      logging.error(log_err)
7536
      raise errors.OpExecError(log_err)
7537

    
7538
    self.migration_info = migration_info = result.payload
7539

    
7540
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7541
      # Then switch the disks to master/master mode
7542
      self._EnsureSecondary(target_node)
7543
      self._GoStandalone()
7544
      self._GoReconnect(True)
7545
      self._WaitUntilSync()
7546

    
7547
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7548
    result = self.rpc.call_accept_instance(target_node,
7549
                                           instance,
7550
                                           migration_info,
7551
                                           self.nodes_ip[target_node])
7552

    
7553
    msg = result.fail_msg
7554
    if msg:
7555
      logging.error("Instance pre-migration failed, trying to revert"
7556
                    " disk status: %s", msg)
7557
      self.feedback_fn("Pre-migration failed, aborting")
7558
      self._AbortMigration()
7559
      self._RevertDiskStatus()
7560
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7561
                               (instance.name, msg))
7562

    
7563
    self.feedback_fn("* migrating instance to %s" % target_node)
7564
    result = self.rpc.call_instance_migrate(source_node, instance,
7565
                                            self.nodes_ip[target_node],
7566
                                            self.live)
7567
    msg = result.fail_msg
7568
    if msg:
7569
      logging.error("Instance migration failed, trying to revert"
7570
                    " disk status: %s", msg)
7571
      self.feedback_fn("Migration failed, aborting")
7572
      self._AbortMigration()
7573
      self._RevertDiskStatus()
7574
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7575
                               (instance.name, msg))
7576

    
7577
    self.feedback_fn("* starting memory transfer")
7578
    last_feedback = time.time()
7579
    while True:
7580
      result = self.rpc.call_instance_get_migration_status(source_node,
7581
                                                           instance)
7582
      msg = result.fail_msg
7583
      ms = result.payload   # MigrationStatus instance
7584
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7585
        logging.error("Instance migration failed, trying to revert"
7586
                      " disk status: %s", msg)
7587
        self.feedback_fn("Migration failed, aborting")
7588
        self._AbortMigration()
7589
        self._RevertDiskStatus()
7590
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7591
                                 (instance.name, msg))
7592

    
7593
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7594
        self.feedback_fn("* memory transfer complete")
7595
        break
7596

    
7597
      if (utils.TimeoutExpired(last_feedback,
7598
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7599
          ms.transferred_ram is not None):
7600
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7601
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7602
        last_feedback = time.time()
7603

    
7604
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7605

    
7606
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7607
                                                           instance,
7608
                                                           True,
7609
                                                           self.live)
7610
    msg = result.fail_msg
7611
    if msg:
7612
      logging.error("Instance migration succeeded, but finalization failed"
7613
                    " on the source node: %s", msg)
7614
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7615
                               msg)
7616

    
7617
    instance.primary_node = target_node
7618

    
7619
    # distribute new instance config to the other nodes
7620
    self.cfg.Update(instance, self.feedback_fn)
7621

    
7622
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7623
                                                           instance,
7624
                                                           migration_info,
7625
                                                           True)
7626
    msg = result.fail_msg
7627
    if msg:
7628
      logging.error("Instance migration succeeded, but finalization failed"
7629
                    " on the target node: %s", msg)
7630
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7631
                               msg)
7632

    
7633
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7634
      self._EnsureSecondary(source_node)
7635
      self._WaitUntilSync()
7636
      self._GoStandalone()
7637
      self._GoReconnect(False)
7638
      self._WaitUntilSync()
7639

    
7640
    self.feedback_fn("* done")
7641

    
7642
  def _ExecFailover(self):
7643
    """Failover an instance.
7644

7645
    The failover is done by shutting it down on its present node and
7646
    starting it on the secondary.
7647

7648
    """
7649
    instance = self.instance
7650
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7651

    
7652
    source_node = instance.primary_node
7653
    target_node = self.target_node
7654

    
7655
    if instance.admin_up:
7656
      self.feedback_fn("* checking disk consistency between source and target")
7657
      for dev in instance.disks:
7658
        # for drbd, these are drbd over lvm
7659
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7660
          if primary_node.offline:
7661
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7662
                             " target node %s" %
7663
                             (primary_node.name, dev.iv_name, target_node))
7664
          elif not self.ignore_consistency:
7665
            raise errors.OpExecError("Disk %s is degraded on target node,"
7666
                                     " aborting failover" % dev.iv_name)
7667
    else:
7668
      self.feedback_fn("* not checking disk consistency as instance is not"
7669
                       " running")
7670

    
7671
    self.feedback_fn("* shutting down instance on source node")
7672
    logging.info("Shutting down instance %s on node %s",
7673
                 instance.name, source_node)
7674

    
7675
    result = self.rpc.call_instance_shutdown(source_node, instance,
7676
                                             self.shutdown_timeout)
7677
    msg = result.fail_msg
7678
    if msg:
7679
      if self.ignore_consistency or primary_node.offline:
7680
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7681
                           " proceeding anyway; please make sure node"
7682
                           " %s is down; error details: %s",
7683
                           instance.name, source_node, source_node, msg)
7684
      else:
7685
        raise errors.OpExecError("Could not shutdown instance %s on"
7686
                                 " node %s: %s" %
7687
                                 (instance.name, source_node, msg))
7688

    
7689
    self.feedback_fn("* deactivating the instance's disks on source node")
7690
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7691
      raise errors.OpExecError("Can't shut down the instance's disks")
7692

    
7693
    instance.primary_node = target_node
7694
    # distribute new instance config to the other nodes
7695
    self.cfg.Update(instance, self.feedback_fn)
7696

    
7697
    # Only start the instance if it's marked as up
7698
    if instance.admin_up:
7699
      self.feedback_fn("* activating the instance's disks on target node %s" %
7700
                       target_node)
7701
      logging.info("Starting instance %s on node %s",
7702
                   instance.name, target_node)
7703

    
7704
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7705
                                           ignore_secondaries=True)
7706
      if not disks_ok:
7707
        _ShutdownInstanceDisks(self.lu, instance)
7708
        raise errors.OpExecError("Can't activate the instance's disks")
7709

    
7710
      self.feedback_fn("* starting the instance on the target node %s" %
7711
                       target_node)
7712
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7713
                                            False)
7714
      msg = result.fail_msg
7715
      if msg:
7716
        _ShutdownInstanceDisks(self.lu, instance)
7717
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7718
                                 (instance.name, target_node, msg))
7719

    
7720
  def Exec(self, feedback_fn):
7721
    """Perform the migration.
7722

7723
    """
7724
    self.feedback_fn = feedback_fn
7725
    self.source_node = self.instance.primary_node
7726

    
7727
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7728
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7729
      self.target_node = self.instance.secondary_nodes[0]
7730
      # Otherwise self.target_node has been populated either
7731
      # directly, or through an iallocator.
7732

    
7733
    self.all_nodes = [self.source_node, self.target_node]
7734
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7735
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7736

    
7737
    if self.failover:
7738
      feedback_fn("Failover instance %s" % self.instance.name)
7739
      self._ExecFailover()
7740
    else:
7741
      feedback_fn("Migrating instance %s" % self.instance.name)
7742

    
7743
      if self.cleanup:
7744
        return self._ExecCleanup()
7745
      else:
7746
        return self._ExecMigration()
7747

    
7748

    
7749
def _CreateBlockDev(lu, node, instance, device, force_create,
7750
                    info, force_open):
7751
  """Create a tree of block devices on a given node.
7752

7753
  If this device type has to be created on secondaries, create it and
7754
  all its children.
7755

7756
  If not, just recurse to children keeping the same 'force' value.
7757

7758
  @param lu: the lu on whose behalf we execute
7759
  @param node: the node on which to create the device
7760
  @type instance: L{objects.Instance}
7761
  @param instance: the instance which owns the device
7762
  @type device: L{objects.Disk}
7763
  @param device: the device to create
7764
  @type force_create: boolean
7765
  @param force_create: whether to force creation of this device; this
7766
      will be change to True whenever we find a device which has
7767
      CreateOnSecondary() attribute
7768
  @param info: the extra 'metadata' we should attach to the device
7769
      (this will be represented as a LVM tag)
7770
  @type force_open: boolean
7771
  @param force_open: this parameter will be passes to the
7772
      L{backend.BlockdevCreate} function where it specifies
7773
      whether we run on primary or not, and it affects both
7774
      the child assembly and the device own Open() execution
7775

7776
  """
7777
  if device.CreateOnSecondary():
7778
    force_create = True
7779

    
7780
  if device.children:
7781
    for child in device.children:
7782
      _CreateBlockDev(lu, node, instance, child, force_create,
7783
                      info, force_open)
7784

    
7785
  if not force_create:
7786
    return
7787

    
7788
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7789

    
7790

    
7791
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7792
  """Create a single block device on a given node.
7793

7794
  This will not recurse over children of the device, so they must be
7795
  created in advance.
7796

7797
  @param lu: the lu on whose behalf we execute
7798
  @param node: the node on which to create the device
7799
  @type instance: L{objects.Instance}
7800
  @param instance: the instance which owns the device
7801
  @type device: L{objects.Disk}
7802
  @param device: the device to create
7803
  @param info: the extra 'metadata' we should attach to the device
7804
      (this will be represented as a LVM tag)
7805
  @type force_open: boolean
7806
  @param force_open: this parameter will be passes to the
7807
      L{backend.BlockdevCreate} function where it specifies
7808
      whether we run on primary or not, and it affects both
7809
      the child assembly and the device own Open() execution
7810

7811
  """
7812
  lu.cfg.SetDiskID(device, node)
7813
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7814
                                       instance.name, force_open, info)
7815
  result.Raise("Can't create block device %s on"
7816
               " node %s for instance %s" % (device, node, instance.name))
7817
  if device.physical_id is None:
7818
    device.physical_id = result.payload
7819

    
7820

    
7821
def _GenerateUniqueNames(lu, exts):
7822
  """Generate a suitable LV name.
7823

7824
  This will generate a logical volume name for the given instance.
7825

7826
  """
7827
  results = []
7828
  for val in exts:
7829
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7830
    results.append("%s%s" % (new_id, val))
7831
  return results
7832

    
7833

    
7834
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7835
                         iv_name, p_minor, s_minor):
7836
  """Generate a drbd8 device complete with its children.
7837

7838
  """
7839
  assert len(vgnames) == len(names) == 2
7840
  port = lu.cfg.AllocatePort()
7841
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7842
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7843
                          logical_id=(vgnames[0], names[0]))
7844
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7845
                          logical_id=(vgnames[1], names[1]))
7846
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7847
                          logical_id=(primary, secondary, port,
7848
                                      p_minor, s_minor,
7849
                                      shared_secret),
7850
                          children=[dev_data, dev_meta],
7851
                          iv_name=iv_name)
7852
  return drbd_dev
7853

    
7854

    
7855
def _GenerateDiskTemplate(lu, template_name,
7856
                          instance_name, primary_node,
7857
                          secondary_nodes, disk_info,
7858
                          file_storage_dir, file_driver,
7859
                          base_index, feedback_fn):
7860
  """Generate the entire disk layout for a given template type.
7861

7862
  """
7863
  #TODO: compute space requirements
7864

    
7865
  vgname = lu.cfg.GetVGName()
7866
  disk_count = len(disk_info)
7867
  disks = []
7868
  if template_name == constants.DT_DISKLESS:
7869
    pass
7870
  elif template_name == constants.DT_PLAIN:
7871
    if len(secondary_nodes) != 0:
7872
      raise errors.ProgrammerError("Wrong template configuration")
7873

    
7874
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7875
                                      for i in range(disk_count)])
7876
    for idx, disk in enumerate(disk_info):
7877
      disk_index = idx + base_index
7878
      vg = disk.get(constants.IDISK_VG, vgname)
7879
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7880
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7881
                              size=disk[constants.IDISK_SIZE],
7882
                              logical_id=(vg, names[idx]),
7883
                              iv_name="disk/%d" % disk_index,
7884
                              mode=disk[constants.IDISK_MODE])
7885
      disks.append(disk_dev)
7886
  elif template_name == constants.DT_DRBD8:
7887
    if len(secondary_nodes) != 1:
7888
      raise errors.ProgrammerError("Wrong template configuration")
7889
    remote_node = secondary_nodes[0]
7890
    minors = lu.cfg.AllocateDRBDMinor(
7891
      [primary_node, remote_node] * len(disk_info), instance_name)
7892

    
7893
    names = []
7894
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7895
                                               for i in range(disk_count)]):
7896
      names.append(lv_prefix + "_data")
7897
      names.append(lv_prefix + "_meta")
7898
    for idx, disk in enumerate(disk_info):
7899
      disk_index = idx + base_index
7900
      data_vg = disk.get(constants.IDISK_VG, vgname)
7901
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7902
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7903
                                      disk[constants.IDISK_SIZE],
7904
                                      [data_vg, meta_vg],
7905
                                      names[idx * 2:idx * 2 + 2],
7906
                                      "disk/%d" % disk_index,
7907
                                      minors[idx * 2], minors[idx * 2 + 1])
7908
      disk_dev.mode = disk[constants.IDISK_MODE]
7909
      disks.append(disk_dev)
7910
  elif template_name == constants.DT_FILE:
7911
    if len(secondary_nodes) != 0:
7912
      raise errors.ProgrammerError("Wrong template configuration")
7913

    
7914
    opcodes.RequireFileStorage()
7915

    
7916
    for idx, disk in enumerate(disk_info):
7917
      disk_index = idx + base_index
7918
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7919
                              size=disk[constants.IDISK_SIZE],
7920
                              iv_name="disk/%d" % disk_index,
7921
                              logical_id=(file_driver,
7922
                                          "%s/disk%d" % (file_storage_dir,
7923
                                                         disk_index)),
7924
                              mode=disk[constants.IDISK_MODE])
7925
      disks.append(disk_dev)
7926
  elif template_name == constants.DT_SHARED_FILE:
7927
    if len(secondary_nodes) != 0:
7928
      raise errors.ProgrammerError("Wrong template configuration")
7929

    
7930
    opcodes.RequireSharedFileStorage()
7931

    
7932
    for idx, disk in enumerate(disk_info):
7933
      disk_index = idx + base_index
7934
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7935
                              size=disk[constants.IDISK_SIZE],
7936
                              iv_name="disk/%d" % disk_index,
7937
                              logical_id=(file_driver,
7938
                                          "%s/disk%d" % (file_storage_dir,
7939
                                                         disk_index)),
7940
                              mode=disk[constants.IDISK_MODE])
7941
      disks.append(disk_dev)
7942
  elif template_name == constants.DT_BLOCK:
7943
    if len(secondary_nodes) != 0:
7944
      raise errors.ProgrammerError("Wrong template configuration")
7945

    
7946
    for idx, disk in enumerate(disk_info):
7947
      disk_index = idx + base_index
7948
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7949
                              size=disk[constants.IDISK_SIZE],
7950
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7951
                                          disk[constants.IDISK_ADOPT]),
7952
                              iv_name="disk/%d" % disk_index,
7953
                              mode=disk[constants.IDISK_MODE])
7954
      disks.append(disk_dev)
7955

    
7956
  else:
7957
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7958
  return disks
7959

    
7960

    
7961
def _GetInstanceInfoText(instance):
7962
  """Compute that text that should be added to the disk's metadata.
7963

7964
  """
7965
  return "originstname+%s" % instance.name
7966

    
7967

    
7968
def _CalcEta(time_taken, written, total_size):
7969
  """Calculates the ETA based on size written and total size.
7970

7971
  @param time_taken: The time taken so far
7972
  @param written: amount written so far
7973
  @param total_size: The total size of data to be written
7974
  @return: The remaining time in seconds
7975

7976
  """
7977
  avg_time = time_taken / float(written)
7978
  return (total_size - written) * avg_time
7979

    
7980

    
7981
def _WipeDisks(lu, instance):
7982
  """Wipes instance disks.
7983

7984
  @type lu: L{LogicalUnit}
7985
  @param lu: the logical unit on whose behalf we execute
7986
  @type instance: L{objects.Instance}
7987
  @param instance: the instance whose disks we should create
7988
  @return: the success of the wipe
7989

7990
  """
7991
  node = instance.primary_node
7992

    
7993
  for device in instance.disks:
7994
    lu.cfg.SetDiskID(device, node)
7995

    
7996
  logging.info("Pause sync of instance %s disks", instance.name)
7997
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7998

    
7999
  for idx, success in enumerate(result.payload):
8000
    if not success:
8001
      logging.warn("pause-sync of instance %s for disks %d failed",
8002
                   instance.name, idx)
8003

    
8004
  try:
8005
    for idx, device in enumerate(instance.disks):
8006
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8007
      # MAX_WIPE_CHUNK at max
8008
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8009
                            constants.MIN_WIPE_CHUNK_PERCENT)
8010
      # we _must_ make this an int, otherwise rounding errors will
8011
      # occur
8012
      wipe_chunk_size = int(wipe_chunk_size)
8013

    
8014
      lu.LogInfo("* Wiping disk %d", idx)
8015
      logging.info("Wiping disk %d for instance %s, node %s using"
8016
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8017

    
8018
      offset = 0
8019
      size = device.size
8020
      last_output = 0
8021
      start_time = time.time()
8022

    
8023
      while offset < size:
8024
        wipe_size = min(wipe_chunk_size, size - offset)
8025
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8026
                      idx, offset, wipe_size)
8027
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8028
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8029
                     (idx, offset, wipe_size))
8030
        now = time.time()
8031
        offset += wipe_size
8032
        if now - last_output >= 60:
8033
          eta = _CalcEta(now - start_time, offset, size)
8034
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8035
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8036
          last_output = now
8037
  finally:
8038
    logging.info("Resume sync of instance %s disks", instance.name)
8039

    
8040
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8041

    
8042
    for idx, success in enumerate(result.payload):
8043
      if not success:
8044
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8045
                      " look at the status and troubleshoot the issue", idx)
8046
        logging.warn("resume-sync of instance %s for disks %d failed",
8047
                     instance.name, idx)
8048

    
8049

    
8050
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8051
  """Create all disks for an instance.
8052

8053
  This abstracts away some work from AddInstance.
8054

8055
  @type lu: L{LogicalUnit}
8056
  @param lu: the logical unit on whose behalf we execute
8057
  @type instance: L{objects.Instance}
8058
  @param instance: the instance whose disks we should create
8059
  @type to_skip: list
8060
  @param to_skip: list of indices to skip
8061
  @type target_node: string
8062
  @param target_node: if passed, overrides the target node for creation
8063
  @rtype: boolean
8064
  @return: the success of the creation
8065

8066
  """
8067
  info = _GetInstanceInfoText(instance)
8068
  if target_node is None:
8069
    pnode = instance.primary_node
8070
    all_nodes = instance.all_nodes
8071
  else:
8072
    pnode = target_node
8073
    all_nodes = [pnode]
8074

    
8075
  if instance.disk_template in constants.DTS_FILEBASED:
8076
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8077
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8078

    
8079
    result.Raise("Failed to create directory '%s' on"
8080
                 " node %s" % (file_storage_dir, pnode))
8081

    
8082
  # Note: this needs to be kept in sync with adding of disks in
8083
  # LUInstanceSetParams
8084
  for idx, device in enumerate(instance.disks):
8085
    if to_skip and idx in to_skip:
8086
      continue
8087
    logging.info("Creating volume %s for instance %s",
8088
                 device.iv_name, instance.name)
8089
    #HARDCODE
8090
    for node in all_nodes:
8091
      f_create = node == pnode
8092
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8093

    
8094

    
8095
def _RemoveDisks(lu, instance, target_node=None):
8096
  """Remove all disks for an instance.
8097

8098
  This abstracts away some work from `AddInstance()` and
8099
  `RemoveInstance()`. Note that in case some of the devices couldn't
8100
  be removed, the removal will continue with the other ones (compare
8101
  with `_CreateDisks()`).
8102

8103
  @type lu: L{LogicalUnit}
8104
  @param lu: the logical unit on whose behalf we execute
8105
  @type instance: L{objects.Instance}
8106
  @param instance: the instance whose disks we should remove
8107
  @type target_node: string
8108
  @param target_node: used to override the node on which to remove the disks
8109
  @rtype: boolean
8110
  @return: the success of the removal
8111

8112
  """
8113
  logging.info("Removing block devices for instance %s", instance.name)
8114

    
8115
  all_result = True
8116
  for device in instance.disks:
8117
    if target_node:
8118
      edata = [(target_node, device)]
8119
    else:
8120
      edata = device.ComputeNodeTree(instance.primary_node)
8121
    for node, disk in edata:
8122
      lu.cfg.SetDiskID(disk, node)
8123
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8124
      if msg:
8125
        lu.LogWarning("Could not remove block device %s on node %s,"
8126
                      " continuing anyway: %s", device.iv_name, node, msg)
8127
        all_result = False
8128

    
8129
  if instance.disk_template == constants.DT_FILE:
8130
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8131
    if target_node:
8132
      tgt = target_node
8133
    else:
8134
      tgt = instance.primary_node
8135
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8136
    if result.fail_msg:
8137
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8138
                    file_storage_dir, instance.primary_node, result.fail_msg)
8139
      all_result = False
8140

    
8141
  return all_result
8142

    
8143

    
8144
def _ComputeDiskSizePerVG(disk_template, disks):
8145
  """Compute disk size requirements in the volume group
8146

8147
  """
8148
  def _compute(disks, payload):
8149
    """Universal algorithm.
8150

8151
    """
8152
    vgs = {}
8153
    for disk in disks:
8154
      vgs[disk[constants.IDISK_VG]] = \
8155
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8156

    
8157
    return vgs
8158

    
8159
  # Required free disk space as a function of disk and swap space
8160
  req_size_dict = {
8161
    constants.DT_DISKLESS: {},
8162
    constants.DT_PLAIN: _compute(disks, 0),
8163
    # 128 MB are added for drbd metadata for each disk
8164
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8165
    constants.DT_FILE: {},
8166
    constants.DT_SHARED_FILE: {},
8167
  }
8168

    
8169
  if disk_template not in req_size_dict:
8170
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8171
                                 " is unknown" % disk_template)
8172

    
8173
  return req_size_dict[disk_template]
8174

    
8175

    
8176
def _ComputeDiskSize(disk_template, disks):
8177
  """Compute disk size requirements in the volume group
8178

8179
  """
8180
  # Required free disk space as a function of disk and swap space
8181
  req_size_dict = {
8182
    constants.DT_DISKLESS: None,
8183
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8184
    # 128 MB are added for drbd metadata for each disk
8185
    constants.DT_DRBD8:
8186
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8187
    constants.DT_FILE: None,
8188
    constants.DT_SHARED_FILE: 0,
8189
    constants.DT_BLOCK: 0,
8190
  }
8191

    
8192
  if disk_template not in req_size_dict:
8193
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8194
                                 " is unknown" % disk_template)
8195

    
8196
  return req_size_dict[disk_template]
8197

    
8198

    
8199
def _FilterVmNodes(lu, nodenames):
8200
  """Filters out non-vm_capable nodes from a list.
8201

8202
  @type lu: L{LogicalUnit}
8203
  @param lu: the logical unit for which we check
8204
  @type nodenames: list
8205
  @param nodenames: the list of nodes on which we should check
8206
  @rtype: list
8207
  @return: the list of vm-capable nodes
8208

8209
  """
8210
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8211
  return [name for name in nodenames if name not in vm_nodes]
8212

    
8213

    
8214
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8215
  """Hypervisor parameter validation.
8216

8217
  This function abstract the hypervisor parameter validation to be
8218
  used in both instance create and instance modify.
8219

8220
  @type lu: L{LogicalUnit}
8221
  @param lu: the logical unit for which we check
8222
  @type nodenames: list
8223
  @param nodenames: the list of nodes on which we should check
8224
  @type hvname: string
8225
  @param hvname: the name of the hypervisor we should use
8226
  @type hvparams: dict
8227
  @param hvparams: the parameters which we need to check
8228
  @raise errors.OpPrereqError: if the parameters are not valid
8229

8230
  """
8231
  nodenames = _FilterVmNodes(lu, nodenames)
8232

    
8233
  cluster = lu.cfg.GetClusterInfo()
8234
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8235

    
8236
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8237
  for node in nodenames:
8238
    info = hvinfo[node]
8239
    if info.offline:
8240
      continue
8241
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8242

    
8243

    
8244
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8245
  """OS parameters validation.
8246

8247
  @type lu: L{LogicalUnit}
8248
  @param lu: the logical unit for which we check
8249
  @type required: boolean
8250
  @param required: whether the validation should fail if the OS is not
8251
      found
8252
  @type nodenames: list
8253
  @param nodenames: the list of nodes on which we should check
8254
  @type osname: string
8255
  @param osname: the name of the hypervisor we should use
8256
  @type osparams: dict
8257
  @param osparams: the parameters which we need to check
8258
  @raise errors.OpPrereqError: if the parameters are not valid
8259

8260
  """
8261
  nodenames = _FilterVmNodes(lu, nodenames)
8262
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8263
                                   [constants.OS_VALIDATE_PARAMETERS],
8264
                                   osparams)
8265
  for node, nres in result.items():
8266
    # we don't check for offline cases since this should be run only
8267
    # against the master node and/or an instance's nodes
8268
    nres.Raise("OS Parameters validation failed on node %s" % node)
8269
    if not nres.payload:
8270
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8271
                 osname, node)
8272

    
8273

    
8274
class LUInstanceCreate(LogicalUnit):
8275
  """Create an instance.
8276

8277
  """
8278
  HPATH = "instance-add"
8279
  HTYPE = constants.HTYPE_INSTANCE
8280
  REQ_BGL = False
8281

    
8282
  def CheckArguments(self):
8283
    """Check arguments.
8284

8285
    """
8286
    # do not require name_check to ease forward/backward compatibility
8287
    # for tools
8288
    if self.op.no_install and self.op.start:
8289
      self.LogInfo("No-installation mode selected, disabling startup")
8290
      self.op.start = False
8291
    # validate/normalize the instance name
8292
    self.op.instance_name = \
8293
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8294

    
8295
    if self.op.ip_check and not self.op.name_check:
8296
      # TODO: make the ip check more flexible and not depend on the name check
8297
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8298
                                 " check", errors.ECODE_INVAL)
8299

    
8300
    # check nics' parameter names
8301
    for nic in self.op.nics:
8302
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8303

    
8304
    # check disks. parameter names and consistent adopt/no-adopt strategy
8305
    has_adopt = has_no_adopt = False
8306
    for disk in self.op.disks:
8307
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8308
      if constants.IDISK_ADOPT in disk:
8309
        has_adopt = True
8310
      else:
8311
        has_no_adopt = True
8312
    if has_adopt and has_no_adopt:
8313
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8314
                                 errors.ECODE_INVAL)
8315
    if has_adopt:
8316
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8317
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8318
                                   " '%s' disk template" %
8319
                                   self.op.disk_template,
8320
                                   errors.ECODE_INVAL)
8321
      if self.op.iallocator is not None:
8322
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8323
                                   " iallocator script", errors.ECODE_INVAL)
8324
      if self.op.mode == constants.INSTANCE_IMPORT:
8325
        raise errors.OpPrereqError("Disk adoption not allowed for"
8326
                                   " instance import", errors.ECODE_INVAL)
8327
    else:
8328
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8329
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8330
                                   " but no 'adopt' parameter given" %
8331
                                   self.op.disk_template,
8332
                                   errors.ECODE_INVAL)
8333

    
8334
    self.adopt_disks = has_adopt
8335

    
8336
    # instance name verification
8337
    if self.op.name_check:
8338
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8339
      self.op.instance_name = self.hostname1.name
8340
      # used in CheckPrereq for ip ping check
8341
      self.check_ip = self.hostname1.ip
8342
    else:
8343
      self.check_ip = None
8344

    
8345
    # file storage checks
8346
    if (self.op.file_driver and
8347
        not self.op.file_driver in constants.FILE_DRIVER):
8348
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8349
                                 self.op.file_driver, errors.ECODE_INVAL)
8350

    
8351
    if self.op.disk_template == constants.DT_FILE:
8352
      opcodes.RequireFileStorage()
8353
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8354
      opcodes.RequireSharedFileStorage()
8355

    
8356
    ### Node/iallocator related checks
8357
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8358

    
8359
    if self.op.pnode is not None:
8360
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8361
        if self.op.snode is None:
8362
          raise errors.OpPrereqError("The networked disk templates need"
8363
                                     " a mirror node", errors.ECODE_INVAL)
8364
      elif self.op.snode:
8365
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8366
                        " template")
8367
        self.op.snode = None
8368

    
8369
    self._cds = _GetClusterDomainSecret()
8370

    
8371
    if self.op.mode == constants.INSTANCE_IMPORT:
8372
      # On import force_variant must be True, because if we forced it at
8373
      # initial install, our only chance when importing it back is that it
8374
      # works again!
8375
      self.op.force_variant = True
8376

    
8377
      if self.op.no_install:
8378
        self.LogInfo("No-installation mode has no effect during import")
8379

    
8380
    elif self.op.mode == constants.INSTANCE_CREATE:
8381
      if self.op.os_type is None:
8382
        raise errors.OpPrereqError("No guest OS specified",
8383
                                   errors.ECODE_INVAL)
8384
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8385
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8386
                                   " installation" % self.op.os_type,
8387
                                   errors.ECODE_STATE)
8388
      if self.op.disk_template is None:
8389
        raise errors.OpPrereqError("No disk template specified",
8390
                                   errors.ECODE_INVAL)
8391

    
8392
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8393
      # Check handshake to ensure both clusters have the same domain secret
8394
      src_handshake = self.op.source_handshake
8395
      if not src_handshake:
8396
        raise errors.OpPrereqError("Missing source handshake",
8397
                                   errors.ECODE_INVAL)
8398

    
8399
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8400
                                                           src_handshake)
8401
      if errmsg:
8402
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8403
                                   errors.ECODE_INVAL)
8404

    
8405
      # Load and check source CA
8406
      self.source_x509_ca_pem = self.op.source_x509_ca
8407
      if not self.source_x509_ca_pem:
8408
        raise errors.OpPrereqError("Missing source X509 CA",
8409
                                   errors.ECODE_INVAL)
8410

    
8411
      try:
8412
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8413
                                                    self._cds)
8414
      except OpenSSL.crypto.Error, err:
8415
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8416
                                   (err, ), errors.ECODE_INVAL)
8417

    
8418
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8419
      if errcode is not None:
8420
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8421
                                   errors.ECODE_INVAL)
8422

    
8423
      self.source_x509_ca = cert
8424

    
8425
      src_instance_name = self.op.source_instance_name
8426
      if not src_instance_name:
8427
        raise errors.OpPrereqError("Missing source instance name",
8428
                                   errors.ECODE_INVAL)
8429

    
8430
      self.source_instance_name = \
8431
          netutils.GetHostname(name=src_instance_name).name
8432

    
8433
    else:
8434
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8435
                                 self.op.mode, errors.ECODE_INVAL)
8436

    
8437
  def ExpandNames(self):
8438
    """ExpandNames for CreateInstance.
8439

8440
    Figure out the right locks for instance creation.
8441

8442
    """
8443
    self.needed_locks = {}
8444

    
8445
    instance_name = self.op.instance_name
8446
    # this is just a preventive check, but someone might still add this
8447
    # instance in the meantime, and creation will fail at lock-add time
8448
    if instance_name in self.cfg.GetInstanceList():
8449
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8450
                                 instance_name, errors.ECODE_EXISTS)
8451

    
8452
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8453

    
8454
    if self.op.iallocator:
8455
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8456
    else:
8457
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8458
      nodelist = [self.op.pnode]
8459
      if self.op.snode is not None:
8460
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8461
        nodelist.append(self.op.snode)
8462
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8463

    
8464
    # in case of import lock the source node too
8465
    if self.op.mode == constants.INSTANCE_IMPORT:
8466
      src_node = self.op.src_node
8467
      src_path = self.op.src_path
8468

    
8469
      if src_path is None:
8470
        self.op.src_path = src_path = self.op.instance_name
8471

    
8472
      if src_node is None:
8473
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8474
        self.op.src_node = None
8475
        if os.path.isabs(src_path):
8476
          raise errors.OpPrereqError("Importing an instance from a path"
8477
                                     " requires a source node option",
8478
                                     errors.ECODE_INVAL)
8479
      else:
8480
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8481
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8482
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8483
        if not os.path.isabs(src_path):
8484
          self.op.src_path = src_path = \
8485
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8486

    
8487
  def _RunAllocator(self):
8488
    """Run the allocator based on input opcode.
8489

8490
    """
8491
    nics = [n.ToDict() for n in self.nics]
8492
    ial = IAllocator(self.cfg, self.rpc,
8493
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8494
                     name=self.op.instance_name,
8495
                     disk_template=self.op.disk_template,
8496
                     tags=self.op.tags,
8497
                     os=self.op.os_type,
8498
                     vcpus=self.be_full[constants.BE_VCPUS],
8499
                     memory=self.be_full[constants.BE_MEMORY],
8500
                     disks=self.disks,
8501
                     nics=nics,
8502
                     hypervisor=self.op.hypervisor,
8503
                     )
8504

    
8505
    ial.Run(self.op.iallocator)
8506

    
8507
    if not ial.success:
8508
      raise errors.OpPrereqError("Can't compute nodes using"
8509
                                 " iallocator '%s': %s" %
8510
                                 (self.op.iallocator, ial.info),
8511
                                 errors.ECODE_NORES)
8512
    if len(ial.result) != ial.required_nodes:
8513
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8514
                                 " of nodes (%s), required %s" %
8515
                                 (self.op.iallocator, len(ial.result),
8516
                                  ial.required_nodes), errors.ECODE_FAULT)
8517
    self.op.pnode = ial.result[0]
8518
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8519
                 self.op.instance_name, self.op.iallocator,
8520
                 utils.CommaJoin(ial.result))
8521
    if ial.required_nodes == 2:
8522
      self.op.snode = ial.result[1]
8523

    
8524
  def BuildHooksEnv(self):
8525
    """Build hooks env.
8526

8527
    This runs on master, primary and secondary nodes of the instance.
8528

8529
    """
8530
    env = {
8531
      "ADD_MODE": self.op.mode,
8532
      }
8533
    if self.op.mode == constants.INSTANCE_IMPORT:
8534
      env["SRC_NODE"] = self.op.src_node
8535
      env["SRC_PATH"] = self.op.src_path
8536
      env["SRC_IMAGES"] = self.src_images
8537

    
8538
    env.update(_BuildInstanceHookEnv(
8539
      name=self.op.instance_name,
8540
      primary_node=self.op.pnode,
8541
      secondary_nodes=self.secondaries,
8542
      status=self.op.start,
8543
      os_type=self.op.os_type,
8544
      memory=self.be_full[constants.BE_MEMORY],
8545
      vcpus=self.be_full[constants.BE_VCPUS],
8546
      nics=_NICListToTuple(self, self.nics),
8547
      disk_template=self.op.disk_template,
8548
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8549
             for d in self.disks],
8550
      bep=self.be_full,
8551
      hvp=self.hv_full,
8552
      hypervisor_name=self.op.hypervisor,
8553
      tags=self.op.tags,
8554
    ))
8555

    
8556
    return env
8557

    
8558
  def BuildHooksNodes(self):
8559
    """Build hooks nodes.
8560

8561
    """
8562
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8563
    return nl, nl
8564

    
8565
  def _ReadExportInfo(self):
8566
    """Reads the export information from disk.
8567

8568
    It will override the opcode source node and path with the actual
8569
    information, if these two were not specified before.
8570

8571
    @return: the export information
8572

8573
    """
8574
    assert self.op.mode == constants.INSTANCE_IMPORT
8575

    
8576
    src_node = self.op.src_node
8577
    src_path = self.op.src_path
8578

    
8579
    if src_node is None:
8580
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8581
      exp_list = self.rpc.call_export_list(locked_nodes)
8582
      found = False
8583
      for node in exp_list:
8584
        if exp_list[node].fail_msg:
8585
          continue
8586
        if src_path in exp_list[node].payload:
8587
          found = True
8588
          self.op.src_node = src_node = node
8589
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8590
                                                       src_path)
8591
          break
8592
      if not found:
8593
        raise errors.OpPrereqError("No export found for relative path %s" %
8594
                                    src_path, errors.ECODE_INVAL)
8595

    
8596
    _CheckNodeOnline(self, src_node)
8597
    result = self.rpc.call_export_info(src_node, src_path)
8598
    result.Raise("No export or invalid export found in dir %s" % src_path)
8599

    
8600
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8601
    if not export_info.has_section(constants.INISECT_EXP):
8602
      raise errors.ProgrammerError("Corrupted export config",
8603
                                   errors.ECODE_ENVIRON)
8604

    
8605
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8606
    if (int(ei_version) != constants.EXPORT_VERSION):
8607
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8608
                                 (ei_version, constants.EXPORT_VERSION),
8609
                                 errors.ECODE_ENVIRON)
8610
    return export_info
8611

    
8612
  def _ReadExportParams(self, einfo):
8613
    """Use export parameters as defaults.
8614

8615
    In case the opcode doesn't specify (as in override) some instance
8616
    parameters, then try to use them from the export information, if
8617
    that declares them.
8618

8619
    """
8620
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8621

    
8622
    if self.op.disk_template is None:
8623
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8624
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8625
                                          "disk_template")
8626
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8627
          raise errors.OpPrereqError("Disk template specified in configuration"
8628
                                     " file is not one of the allowed values:"
8629
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8630
      else:
8631
        raise errors.OpPrereqError("No disk template specified and the export"
8632
                                   " is missing the disk_template information",
8633
                                   errors.ECODE_INVAL)
8634

    
8635
    if not self.op.disks:
8636
      disks = []
8637
      # TODO: import the disk iv_name too
8638
      for idx in range(constants.MAX_DISKS):
8639
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8640
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8641
          disks.append({constants.IDISK_SIZE: disk_sz})
8642
      self.op.disks = disks
8643
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8644
        raise errors.OpPrereqError("No disk info specified and the export"
8645
                                   " is missing the disk information",
8646
                                   errors.ECODE_INVAL)
8647

    
8648
    if not self.op.nics:
8649
      nics = []
8650
      for idx in range(constants.MAX_NICS):
8651
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8652
          ndict = {}
8653
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8654
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8655
            ndict[name] = v
8656
          nics.append(ndict)
8657
        else:
8658
          break
8659
      self.op.nics = nics
8660

    
8661
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8662
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8663

    
8664
    if (self.op.hypervisor is None and
8665
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8666
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8667

    
8668
    if einfo.has_section(constants.INISECT_HYP):
8669
      # use the export parameters but do not override the ones
8670
      # specified by the user
8671
      for name, value in einfo.items(constants.INISECT_HYP):
8672
        if name not in self.op.hvparams:
8673
          self.op.hvparams[name] = value
8674

    
8675
    if einfo.has_section(constants.INISECT_BEP):
8676
      # use the parameters, without overriding
8677
      for name, value in einfo.items(constants.INISECT_BEP):
8678
        if name not in self.op.beparams:
8679
          self.op.beparams[name] = value
8680
    else:
8681
      # try to read the parameters old style, from the main section
8682
      for name in constants.BES_PARAMETERS:
8683
        if (name not in self.op.beparams and
8684
            einfo.has_option(constants.INISECT_INS, name)):
8685
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8686

    
8687
    if einfo.has_section(constants.INISECT_OSP):
8688
      # use the parameters, without overriding
8689
      for name, value in einfo.items(constants.INISECT_OSP):
8690
        if name not in self.op.osparams:
8691
          self.op.osparams[name] = value
8692

    
8693
  def _RevertToDefaults(self, cluster):
8694
    """Revert the instance parameters to the default values.
8695

8696
    """
8697
    # hvparams
8698
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8699
    for name in self.op.hvparams.keys():
8700
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8701
        del self.op.hvparams[name]
8702
    # beparams
8703
    be_defs = cluster.SimpleFillBE({})
8704
    for name in self.op.beparams.keys():
8705
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8706
        del self.op.beparams[name]
8707
    # nic params
8708
    nic_defs = cluster.SimpleFillNIC({})
8709
    for nic in self.op.nics:
8710
      for name in constants.NICS_PARAMETERS:
8711
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8712
          del nic[name]
8713
    # osparams
8714
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8715
    for name in self.op.osparams.keys():
8716
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8717
        del self.op.osparams[name]
8718

    
8719
  def _CalculateFileStorageDir(self):
8720
    """Calculate final instance file storage dir.
8721

8722
    """
8723
    # file storage dir calculation/check
8724
    self.instance_file_storage_dir = None
8725
    if self.op.disk_template in constants.DTS_FILEBASED:
8726
      # build the full file storage dir path
8727
      joinargs = []
8728

    
8729
      if self.op.disk_template == constants.DT_SHARED_FILE:
8730
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8731
      else:
8732
        get_fsd_fn = self.cfg.GetFileStorageDir
8733

    
8734
      cfg_storagedir = get_fsd_fn()
8735
      if not cfg_storagedir:
8736
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8737
      joinargs.append(cfg_storagedir)
8738

    
8739
      if self.op.file_storage_dir is not None:
8740
        joinargs.append(self.op.file_storage_dir)
8741

    
8742
      joinargs.append(self.op.instance_name)
8743

    
8744
      # pylint: disable=W0142
8745
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8746

    
8747
  def CheckPrereq(self):
8748
    """Check prerequisites.
8749

8750
    """
8751
    self._CalculateFileStorageDir()
8752

    
8753
    if self.op.mode == constants.INSTANCE_IMPORT:
8754
      export_info = self._ReadExportInfo()
8755
      self._ReadExportParams(export_info)
8756

    
8757
    if (not self.cfg.GetVGName() and
8758
        self.op.disk_template not in constants.DTS_NOT_LVM):
8759
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8760
                                 " instances", errors.ECODE_STATE)
8761

    
8762
    if (self.op.hypervisor is None or
8763
        self.op.hypervisor == constants.VALUE_AUTO):
8764
      self.op.hypervisor = self.cfg.GetHypervisorType()
8765

    
8766
    cluster = self.cfg.GetClusterInfo()
8767
    enabled_hvs = cluster.enabled_hypervisors
8768
    if self.op.hypervisor not in enabled_hvs:
8769
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8770
                                 " cluster (%s)" % (self.op.hypervisor,
8771
                                  ",".join(enabled_hvs)),
8772
                                 errors.ECODE_STATE)
8773

    
8774
    # Check tag validity
8775
    for tag in self.op.tags:
8776
      objects.TaggableObject.ValidateTag(tag)
8777

    
8778
    # check hypervisor parameter syntax (locally)
8779
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8780
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8781
                                      self.op.hvparams)
8782
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8783
    hv_type.CheckParameterSyntax(filled_hvp)
8784
    self.hv_full = filled_hvp
8785
    # check that we don't specify global parameters on an instance
8786
    _CheckGlobalHvParams(self.op.hvparams)
8787

    
8788
    # fill and remember the beparams dict
8789
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8790
    for param, value in self.op.beparams.iteritems():
8791
      if value == constants.VALUE_AUTO:
8792
        self.op.beparams[param] = default_beparams[param]
8793
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8794
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8795

    
8796
    # build os parameters
8797
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8798

    
8799
    # now that hvp/bep are in final format, let's reset to defaults,
8800
    # if told to do so
8801
    if self.op.identify_defaults:
8802
      self._RevertToDefaults(cluster)
8803

    
8804
    # NIC buildup
8805
    self.nics = []
8806
    for idx, nic in enumerate(self.op.nics):
8807
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8808
      nic_mode = nic_mode_req
8809
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8810
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8811

    
8812
      # in routed mode, for the first nic, the default ip is 'auto'
8813
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8814
        default_ip_mode = constants.VALUE_AUTO
8815
      else:
8816
        default_ip_mode = constants.VALUE_NONE
8817

    
8818
      # ip validity checks
8819
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8820
      if ip is None or ip.lower() == constants.VALUE_NONE:
8821
        nic_ip = None
8822
      elif ip.lower() == constants.VALUE_AUTO:
8823
        if not self.op.name_check:
8824
          raise errors.OpPrereqError("IP address set to auto but name checks"
8825
                                     " have been skipped",
8826
                                     errors.ECODE_INVAL)
8827
        nic_ip = self.hostname1.ip
8828
      else:
8829
        if not netutils.IPAddress.IsValid(ip):
8830
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8831
                                     errors.ECODE_INVAL)
8832
        nic_ip = ip
8833

    
8834
      # TODO: check the ip address for uniqueness
8835
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8836
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8837
                                   errors.ECODE_INVAL)
8838

    
8839
      # MAC address verification
8840
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8841
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8842
        mac = utils.NormalizeAndValidateMac(mac)
8843

    
8844
        try:
8845
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8846
        except errors.ReservationError:
8847
          raise errors.OpPrereqError("MAC address %s already in use"
8848
                                     " in cluster" % mac,
8849
                                     errors.ECODE_NOTUNIQUE)
8850

    
8851
      #  Build nic parameters
8852
      link = nic.get(constants.INIC_LINK, None)
8853
      if link == constants.VALUE_AUTO:
8854
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8855
      nicparams = {}
8856
      if nic_mode_req:
8857
        nicparams[constants.NIC_MODE] = nic_mode
8858
      if link:
8859
        nicparams[constants.NIC_LINK] = link
8860

    
8861
      check_params = cluster.SimpleFillNIC(nicparams)
8862
      objects.NIC.CheckParameterSyntax(check_params)
8863
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8864

    
8865
    # disk checks/pre-build
8866
    default_vg = self.cfg.GetVGName()
8867
    self.disks = []
8868
    for disk in self.op.disks:
8869
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8870
      if mode not in constants.DISK_ACCESS_SET:
8871
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8872
                                   mode, errors.ECODE_INVAL)
8873
      size = disk.get(constants.IDISK_SIZE, None)
8874
      if size is None:
8875
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8876
      try:
8877
        size = int(size)
8878
      except (TypeError, ValueError):
8879
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8880
                                   errors.ECODE_INVAL)
8881

    
8882
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8883
      new_disk = {
8884
        constants.IDISK_SIZE: size,
8885
        constants.IDISK_MODE: mode,
8886
        constants.IDISK_VG: data_vg,
8887
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8888
        }
8889
      if constants.IDISK_ADOPT in disk:
8890
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8891
      self.disks.append(new_disk)
8892

    
8893
    if self.op.mode == constants.INSTANCE_IMPORT:
8894
      disk_images = []
8895
      for idx in range(len(self.disks)):
8896
        option = "disk%d_dump" % idx
8897
        if export_info.has_option(constants.INISECT_INS, option):
8898
          # FIXME: are the old os-es, disk sizes, etc. useful?
8899
          export_name = export_info.get(constants.INISECT_INS, option)
8900
          image = utils.PathJoin(self.op.src_path, export_name)
8901
          disk_images.append(image)
8902
        else:
8903
          disk_images.append(False)
8904

    
8905
      self.src_images = disk_images
8906

    
8907
      old_name = export_info.get(constants.INISECT_INS, "name")
8908
      if self.op.instance_name == old_name:
8909
        for idx, nic in enumerate(self.nics):
8910
          if nic.mac == constants.VALUE_AUTO:
8911
            nic_mac_ini = "nic%d_mac" % idx
8912
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8913

    
8914
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8915

    
8916
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8917
    if self.op.ip_check:
8918
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8919
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8920
                                   (self.check_ip, self.op.instance_name),
8921
                                   errors.ECODE_NOTUNIQUE)
8922

    
8923
    #### mac address generation
8924
    # By generating here the mac address both the allocator and the hooks get
8925
    # the real final mac address rather than the 'auto' or 'generate' value.
8926
    # There is a race condition between the generation and the instance object
8927
    # creation, which means that we know the mac is valid now, but we're not
8928
    # sure it will be when we actually add the instance. If things go bad
8929
    # adding the instance will abort because of a duplicate mac, and the
8930
    # creation job will fail.
8931
    for nic in self.nics:
8932
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8933
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8934

    
8935
    #### allocator run
8936

    
8937
    if self.op.iallocator is not None:
8938
      self._RunAllocator()
8939

    
8940
    #### node related checks
8941

    
8942
    # check primary node
8943
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8944
    assert self.pnode is not None, \
8945
      "Cannot retrieve locked node %s" % self.op.pnode
8946
    if pnode.offline:
8947
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8948
                                 pnode.name, errors.ECODE_STATE)
8949
    if pnode.drained:
8950
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8951
                                 pnode.name, errors.ECODE_STATE)
8952
    if not pnode.vm_capable:
8953
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8954
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8955

    
8956
    self.secondaries = []
8957

    
8958
    # mirror node verification
8959
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8960
      if self.op.snode == pnode.name:
8961
        raise errors.OpPrereqError("The secondary node cannot be the"
8962
                                   " primary node", errors.ECODE_INVAL)
8963
      _CheckNodeOnline(self, self.op.snode)
8964
      _CheckNodeNotDrained(self, self.op.snode)
8965
      _CheckNodeVmCapable(self, self.op.snode)
8966
      self.secondaries.append(self.op.snode)
8967

    
8968
    nodenames = [pnode.name] + self.secondaries
8969

    
8970
    if not self.adopt_disks:
8971
      # Check lv size requirements, if not adopting
8972
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8973
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8974

    
8975
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8976
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8977
                                disk[constants.IDISK_ADOPT])
8978
                     for disk in self.disks])
8979
      if len(all_lvs) != len(self.disks):
8980
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8981
                                   errors.ECODE_INVAL)
8982
      for lv_name in all_lvs:
8983
        try:
8984
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8985
          # to ReserveLV uses the same syntax
8986
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8987
        except errors.ReservationError:
8988
          raise errors.OpPrereqError("LV named %s used by another instance" %
8989
                                     lv_name, errors.ECODE_NOTUNIQUE)
8990

    
8991
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8992
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8993

    
8994
      node_lvs = self.rpc.call_lv_list([pnode.name],
8995
                                       vg_names.payload.keys())[pnode.name]
8996
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8997
      node_lvs = node_lvs.payload
8998

    
8999
      delta = all_lvs.difference(node_lvs.keys())
9000
      if delta:
9001
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9002
                                   utils.CommaJoin(delta),
9003
                                   errors.ECODE_INVAL)
9004
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9005
      if online_lvs:
9006
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9007
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9008
                                   errors.ECODE_STATE)
9009
      # update the size of disk based on what is found
9010
      for dsk in self.disks:
9011
        dsk[constants.IDISK_SIZE] = \
9012
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9013
                                        dsk[constants.IDISK_ADOPT])][0]))
9014

    
9015
    elif self.op.disk_template == constants.DT_BLOCK:
9016
      # Normalize and de-duplicate device paths
9017
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9018
                       for disk in self.disks])
9019
      if len(all_disks) != len(self.disks):
9020
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9021
                                   errors.ECODE_INVAL)
9022
      baddisks = [d for d in all_disks
9023
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9024
      if baddisks:
9025
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9026
                                   " cannot be adopted" %
9027
                                   (", ".join(baddisks),
9028
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9029
                                   errors.ECODE_INVAL)
9030

    
9031
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9032
                                            list(all_disks))[pnode.name]
9033
      node_disks.Raise("Cannot get block device information from node %s" %
9034
                       pnode.name)
9035
      node_disks = node_disks.payload
9036
      delta = all_disks.difference(node_disks.keys())
9037
      if delta:
9038
        raise errors.OpPrereqError("Missing block device(s): %s" %
9039
                                   utils.CommaJoin(delta),
9040
                                   errors.ECODE_INVAL)
9041
      for dsk in self.disks:
9042
        dsk[constants.IDISK_SIZE] = \
9043
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9044

    
9045
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9046

    
9047
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9048
    # check OS parameters (remotely)
9049
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9050

    
9051
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9052

    
9053
    # memory check on primary node
9054
    if self.op.start:
9055
      _CheckNodeFreeMemory(self, self.pnode.name,
9056
                           "creating instance %s" % self.op.instance_name,
9057
                           self.be_full[constants.BE_MEMORY],
9058
                           self.op.hypervisor)
9059

    
9060
    self.dry_run_result = list(nodenames)
9061

    
9062
  def Exec(self, feedback_fn):
9063
    """Create and add the instance to the cluster.
9064

9065
    """
9066
    instance = self.op.instance_name
9067
    pnode_name = self.pnode.name
9068

    
9069
    ht_kind = self.op.hypervisor
9070
    if ht_kind in constants.HTS_REQ_PORT:
9071
      network_port = self.cfg.AllocatePort()
9072
    else:
9073
      network_port = None
9074

    
9075
    disks = _GenerateDiskTemplate(self,
9076
                                  self.op.disk_template,
9077
                                  instance, pnode_name,
9078
                                  self.secondaries,
9079
                                  self.disks,
9080
                                  self.instance_file_storage_dir,
9081
                                  self.op.file_driver,
9082
                                  0,
9083
                                  feedback_fn)
9084

    
9085
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9086
                            primary_node=pnode_name,
9087
                            nics=self.nics, disks=disks,
9088
                            disk_template=self.op.disk_template,
9089
                            admin_up=False,
9090
                            network_port=network_port,
9091
                            beparams=self.op.beparams,
9092
                            hvparams=self.op.hvparams,
9093
                            hypervisor=self.op.hypervisor,
9094
                            osparams=self.op.osparams,
9095
                            )
9096

    
9097
    if self.op.tags:
9098
      for tag in self.op.tags:
9099
        iobj.AddTag(tag)
9100

    
9101
    if self.adopt_disks:
9102
      if self.op.disk_template == constants.DT_PLAIN:
9103
        # rename LVs to the newly-generated names; we need to construct
9104
        # 'fake' LV disks with the old data, plus the new unique_id
9105
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9106
        rename_to = []
9107
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9108
          rename_to.append(t_dsk.logical_id)
9109
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9110
          self.cfg.SetDiskID(t_dsk, pnode_name)
9111
        result = self.rpc.call_blockdev_rename(pnode_name,
9112
                                               zip(tmp_disks, rename_to))
9113
        result.Raise("Failed to rename adoped LVs")
9114
    else:
9115
      feedback_fn("* creating instance disks...")
9116
      try:
9117
        _CreateDisks(self, iobj)
9118
      except errors.OpExecError:
9119
        self.LogWarning("Device creation failed, reverting...")
9120
        try:
9121
          _RemoveDisks(self, iobj)
9122
        finally:
9123
          self.cfg.ReleaseDRBDMinors(instance)
9124
          raise
9125

    
9126
    feedback_fn("adding instance %s to cluster config" % instance)
9127

    
9128
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9129

    
9130
    # Declare that we don't want to remove the instance lock anymore, as we've
9131
    # added the instance to the config
9132
    del self.remove_locks[locking.LEVEL_INSTANCE]
9133

    
9134
    if self.op.mode == constants.INSTANCE_IMPORT:
9135
      # Release unused nodes
9136
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9137
    else:
9138
      # Release all nodes
9139
      _ReleaseLocks(self, locking.LEVEL_NODE)
9140

    
9141
    disk_abort = False
9142
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9143
      feedback_fn("* wiping instance disks...")
9144
      try:
9145
        _WipeDisks(self, iobj)
9146
      except errors.OpExecError, err:
9147
        logging.exception("Wiping disks failed")
9148
        self.LogWarning("Wiping instance disks failed (%s)", err)
9149
        disk_abort = True
9150

    
9151
    if disk_abort:
9152
      # Something is already wrong with the disks, don't do anything else
9153
      pass
9154
    elif self.op.wait_for_sync:
9155
      disk_abort = not _WaitForSync(self, iobj)
9156
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9157
      # make sure the disks are not degraded (still sync-ing is ok)
9158
      feedback_fn("* checking mirrors status")
9159
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9160
    else:
9161
      disk_abort = False
9162

    
9163
    if disk_abort:
9164
      _RemoveDisks(self, iobj)
9165
      self.cfg.RemoveInstance(iobj.name)
9166
      # Make sure the instance lock gets removed
9167
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9168
      raise errors.OpExecError("There are some degraded disks for"
9169
                               " this instance")
9170

    
9171
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9172
      if self.op.mode == constants.INSTANCE_CREATE:
9173
        if not self.op.no_install:
9174
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9175
                        not self.op.wait_for_sync)
9176
          if pause_sync:
9177
            feedback_fn("* pausing disk sync to install instance OS")
9178
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9179
                                                              iobj.disks, True)
9180
            for idx, success in enumerate(result.payload):
9181
              if not success:
9182
                logging.warn("pause-sync of instance %s for disk %d failed",
9183
                             instance, idx)
9184

    
9185
          feedback_fn("* running the instance OS create scripts...")
9186
          # FIXME: pass debug option from opcode to backend
9187
          os_add_result = \
9188
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9189
                                          self.op.debug_level)
9190
          if pause_sync:
9191
            feedback_fn("* resuming disk sync")
9192
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9193
                                                              iobj.disks, False)
9194
            for idx, success in enumerate(result.payload):
9195
              if not success:
9196
                logging.warn("resume-sync of instance %s for disk %d failed",
9197
                             instance, idx)
9198

    
9199
          os_add_result.Raise("Could not add os for instance %s"
9200
                              " on node %s" % (instance, pnode_name))
9201

    
9202
      elif self.op.mode == constants.INSTANCE_IMPORT:
9203
        feedback_fn("* running the instance OS import scripts...")
9204

    
9205
        transfers = []
9206

    
9207
        for idx, image in enumerate(self.src_images):
9208
          if not image:
9209
            continue
9210

    
9211
          # FIXME: pass debug option from opcode to backend
9212
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9213
                                             constants.IEIO_FILE, (image, ),
9214
                                             constants.IEIO_SCRIPT,
9215
                                             (iobj.disks[idx], idx),
9216
                                             None)
9217
          transfers.append(dt)
9218

    
9219
        import_result = \
9220
          masterd.instance.TransferInstanceData(self, feedback_fn,
9221
                                                self.op.src_node, pnode_name,
9222
                                                self.pnode.secondary_ip,
9223
                                                iobj, transfers)
9224
        if not compat.all(import_result):
9225
          self.LogWarning("Some disks for instance %s on node %s were not"
9226
                          " imported successfully" % (instance, pnode_name))
9227

    
9228
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9229
        feedback_fn("* preparing remote import...")
9230
        # The source cluster will stop the instance before attempting to make a
9231
        # connection. In some cases stopping an instance can take a long time,
9232
        # hence the shutdown timeout is added to the connection timeout.
9233
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9234
                           self.op.source_shutdown_timeout)
9235
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9236

    
9237
        assert iobj.primary_node == self.pnode.name
9238
        disk_results = \
9239
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9240
                                        self.source_x509_ca,
9241
                                        self._cds, timeouts)
9242
        if not compat.all(disk_results):
9243
          # TODO: Should the instance still be started, even if some disks
9244
          # failed to import (valid for local imports, too)?
9245
          self.LogWarning("Some disks for instance %s on node %s were not"
9246
                          " imported successfully" % (instance, pnode_name))
9247

    
9248
        # Run rename script on newly imported instance
9249
        assert iobj.name == instance
9250
        feedback_fn("Running rename script for %s" % instance)
9251
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9252
                                                   self.source_instance_name,
9253
                                                   self.op.debug_level)
9254
        if result.fail_msg:
9255
          self.LogWarning("Failed to run rename script for %s on node"
9256
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9257

    
9258
      else:
9259
        # also checked in the prereq part
9260
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9261
                                     % self.op.mode)
9262

    
9263
    if self.op.start:
9264
      iobj.admin_up = True
9265
      self.cfg.Update(iobj, feedback_fn)
9266
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9267
      feedback_fn("* starting instance...")
9268
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9269
                                            False)
9270
      result.Raise("Could not start instance")
9271

    
9272
    return list(iobj.all_nodes)
9273

    
9274

    
9275
class LUInstanceConsole(NoHooksLU):
9276
  """Connect to an instance's console.
9277

9278
  This is somewhat special in that it returns the command line that
9279
  you need to run on the master node in order to connect to the
9280
  console.
9281

9282
  """
9283
  REQ_BGL = False
9284

    
9285
  def ExpandNames(self):
9286
    self._ExpandAndLockInstance()
9287

    
9288
  def CheckPrereq(self):
9289
    """Check prerequisites.
9290

9291
    This checks that the instance is in the cluster.
9292

9293
    """
9294
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9295
    assert self.instance is not None, \
9296
      "Cannot retrieve locked instance %s" % self.op.instance_name
9297
    _CheckNodeOnline(self, self.instance.primary_node)
9298

    
9299
  def Exec(self, feedback_fn):
9300
    """Connect to the console of an instance
9301

9302
    """
9303
    instance = self.instance
9304
    node = instance.primary_node
9305

    
9306
    node_insts = self.rpc.call_instance_list([node],
9307
                                             [instance.hypervisor])[node]
9308
    node_insts.Raise("Can't get node information from %s" % node)
9309

    
9310
    if instance.name not in node_insts.payload:
9311
      if instance.admin_up:
9312
        state = constants.INSTST_ERRORDOWN
9313
      else:
9314
        state = constants.INSTST_ADMINDOWN
9315
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9316
                               (instance.name, state))
9317

    
9318
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9319

    
9320
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9321

    
9322

    
9323
def _GetInstanceConsole(cluster, instance):
9324
  """Returns console information for an instance.
9325

9326
  @type cluster: L{objects.Cluster}
9327
  @type instance: L{objects.Instance}
9328
  @rtype: dict
9329

9330
  """
9331
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9332
  # beparams and hvparams are passed separately, to avoid editing the
9333
  # instance and then saving the defaults in the instance itself.
9334
  hvparams = cluster.FillHV(instance)
9335
  beparams = cluster.FillBE(instance)
9336
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9337

    
9338
  assert console.instance == instance.name
9339
  assert console.Validate()
9340

    
9341
  return console.ToDict()
9342

    
9343

    
9344
class LUInstanceReplaceDisks(LogicalUnit):
9345
  """Replace the disks of an instance.
9346

9347
  """
9348
  HPATH = "mirrors-replace"
9349
  HTYPE = constants.HTYPE_INSTANCE
9350
  REQ_BGL = False
9351

    
9352
  def CheckArguments(self):
9353
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9354
                                  self.op.iallocator)
9355

    
9356
  def ExpandNames(self):
9357
    self._ExpandAndLockInstance()
9358

    
9359
    assert locking.LEVEL_NODE not in self.needed_locks
9360
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9361

    
9362
    assert self.op.iallocator is None or self.op.remote_node is None, \
9363
      "Conflicting options"
9364

    
9365
    if self.op.remote_node is not None:
9366
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9367

    
9368
      # Warning: do not remove the locking of the new secondary here
9369
      # unless DRBD8.AddChildren is changed to work in parallel;
9370
      # currently it doesn't since parallel invocations of
9371
      # FindUnusedMinor will conflict
9372
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9373
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9374
    else:
9375
      self.needed_locks[locking.LEVEL_NODE] = []
9376
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9377

    
9378
      if self.op.iallocator is not None:
9379
        # iallocator will select a new node in the same group
9380
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9381

    
9382
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9383
                                   self.op.iallocator, self.op.remote_node,
9384
                                   self.op.disks, False, self.op.early_release)
9385

    
9386
    self.tasklets = [self.replacer]
9387

    
9388
  def DeclareLocks(self, level):
9389
    if level == locking.LEVEL_NODEGROUP:
9390
      assert self.op.remote_node is None
9391
      assert self.op.iallocator is not None
9392
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9393

    
9394
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9395
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9396
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9397

    
9398
    elif level == locking.LEVEL_NODE:
9399
      if self.op.iallocator is not None:
9400
        assert self.op.remote_node is None
9401
        assert not self.needed_locks[locking.LEVEL_NODE]
9402

    
9403
        # Lock member nodes of all locked groups
9404
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9405
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9406
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9407
      else:
9408
        self._LockInstancesNodes()
9409

    
9410
  def BuildHooksEnv(self):
9411
    """Build hooks env.
9412

9413
    This runs on the master, the primary and all the secondaries.
9414

9415
    """
9416
    instance = self.replacer.instance
9417
    env = {
9418
      "MODE": self.op.mode,
9419
      "NEW_SECONDARY": self.op.remote_node,
9420
      "OLD_SECONDARY": instance.secondary_nodes[0],
9421
      }
9422
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9423
    return env
9424

    
9425
  def BuildHooksNodes(self):
9426
    """Build hooks nodes.
9427

9428
    """
9429
    instance = self.replacer.instance
9430
    nl = [
9431
      self.cfg.GetMasterNode(),
9432
      instance.primary_node,
9433
      ]
9434
    if self.op.remote_node is not None:
9435
      nl.append(self.op.remote_node)
9436
    return nl, nl
9437

    
9438
  def CheckPrereq(self):
9439
    """Check prerequisites.
9440

9441
    """
9442
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9443
            self.op.iallocator is None)
9444

    
9445
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9446
    if owned_groups:
9447
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9448

    
9449
    return LogicalUnit.CheckPrereq(self)
9450

    
9451

    
9452
class TLReplaceDisks(Tasklet):
9453
  """Replaces disks for an instance.
9454

9455
  Note: Locking is not within the scope of this class.
9456

9457
  """
9458
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9459
               disks, delay_iallocator, early_release):
9460
    """Initializes this class.
9461

9462
    """
9463
    Tasklet.__init__(self, lu)
9464

    
9465
    # Parameters
9466
    self.instance_name = instance_name
9467
    self.mode = mode
9468
    self.iallocator_name = iallocator_name
9469
    self.remote_node = remote_node
9470
    self.disks = disks
9471
    self.delay_iallocator = delay_iallocator
9472
    self.early_release = early_release
9473

    
9474
    # Runtime data
9475
    self.instance = None
9476
    self.new_node = None
9477
    self.target_node = None
9478
    self.other_node = None
9479
    self.remote_node_info = None
9480
    self.node_secondary_ip = None
9481

    
9482
  @staticmethod
9483
  def CheckArguments(mode, remote_node, iallocator):
9484
    """Helper function for users of this class.
9485

9486
    """
9487
    # check for valid parameter combination
9488
    if mode == constants.REPLACE_DISK_CHG:
9489
      if remote_node is None and iallocator is None:
9490
        raise errors.OpPrereqError("When changing the secondary either an"
9491
                                   " iallocator script must be used or the"
9492
                                   " new node given", errors.ECODE_INVAL)
9493

    
9494
      if remote_node is not None and iallocator is not None:
9495
        raise errors.OpPrereqError("Give either the iallocator or the new"
9496
                                   " secondary, not both", errors.ECODE_INVAL)
9497

    
9498
    elif remote_node is not None or iallocator is not None:
9499
      # Not replacing the secondary
9500
      raise errors.OpPrereqError("The iallocator and new node options can"
9501
                                 " only be used when changing the"
9502
                                 " secondary node", errors.ECODE_INVAL)
9503

    
9504
  @staticmethod
9505
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9506
    """Compute a new secondary node using an IAllocator.
9507

9508
    """
9509
    ial = IAllocator(lu.cfg, lu.rpc,
9510
                     mode=constants.IALLOCATOR_MODE_RELOC,
9511
                     name=instance_name,
9512
                     relocate_from=list(relocate_from))
9513

    
9514
    ial.Run(iallocator_name)
9515

    
9516
    if not ial.success:
9517
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9518
                                 " %s" % (iallocator_name, ial.info),
9519
                                 errors.ECODE_NORES)
9520

    
9521
    if len(ial.result) != ial.required_nodes:
9522
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9523
                                 " of nodes (%s), required %s" %
9524
                                 (iallocator_name,
9525
                                  len(ial.result), ial.required_nodes),
9526
                                 errors.ECODE_FAULT)
9527

    
9528
    remote_node_name = ial.result[0]
9529

    
9530
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9531
               instance_name, remote_node_name)
9532

    
9533
    return remote_node_name
9534

    
9535
  def _FindFaultyDisks(self, node_name):
9536
    """Wrapper for L{_FindFaultyInstanceDisks}.
9537

9538
    """
9539
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9540
                                    node_name, True)
9541

    
9542
  def _CheckDisksActivated(self, instance):
9543
    """Checks if the instance disks are activated.
9544

9545
    @param instance: The instance to check disks
9546
    @return: True if they are activated, False otherwise
9547

9548
    """
9549
    nodes = instance.all_nodes
9550

    
9551
    for idx, dev in enumerate(instance.disks):
9552
      for node in nodes:
9553
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9554
        self.cfg.SetDiskID(dev, node)
9555

    
9556
        result = self.rpc.call_blockdev_find(node, dev)
9557

    
9558
        if result.offline:
9559
          continue
9560
        elif result.fail_msg or not result.payload:
9561
          return False
9562

    
9563
    return True
9564

    
9565
  def CheckPrereq(self):
9566
    """Check prerequisites.
9567

9568
    This checks that the instance is in the cluster.
9569

9570
    """
9571
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9572
    assert instance is not None, \
9573
      "Cannot retrieve locked instance %s" % self.instance_name
9574

    
9575
    if instance.disk_template != constants.DT_DRBD8:
9576
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9577
                                 " instances", errors.ECODE_INVAL)
9578

    
9579
    if len(instance.secondary_nodes) != 1:
9580
      raise errors.OpPrereqError("The instance has a strange layout,"
9581
                                 " expected one secondary but found %d" %
9582
                                 len(instance.secondary_nodes),
9583
                                 errors.ECODE_FAULT)
9584

    
9585
    if not self.delay_iallocator:
9586
      self._CheckPrereq2()
9587

    
9588
  def _CheckPrereq2(self):
9589
    """Check prerequisites, second part.
9590

9591
    This function should always be part of CheckPrereq. It was separated and is
9592
    now called from Exec because during node evacuation iallocator was only
9593
    called with an unmodified cluster model, not taking planned changes into
9594
    account.
9595

9596
    """
9597
    instance = self.instance
9598
    secondary_node = instance.secondary_nodes[0]
9599

    
9600
    if self.iallocator_name is None:
9601
      remote_node = self.remote_node
9602
    else:
9603
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9604
                                       instance.name, instance.secondary_nodes)
9605

    
9606
    if remote_node is None:
9607
      self.remote_node_info = None
9608
    else:
9609
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9610
             "Remote node '%s' is not locked" % remote_node
9611

    
9612
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9613
      assert self.remote_node_info is not None, \
9614
        "Cannot retrieve locked node %s" % remote_node
9615

    
9616
    if remote_node == self.instance.primary_node:
9617
      raise errors.OpPrereqError("The specified node is the primary node of"
9618
                                 " the instance", errors.ECODE_INVAL)
9619

    
9620
    if remote_node == secondary_node:
9621
      raise errors.OpPrereqError("The specified node is already the"
9622
                                 " secondary node of the instance",
9623
                                 errors.ECODE_INVAL)
9624

    
9625
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9626
                                    constants.REPLACE_DISK_CHG):
9627
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9628
                                 errors.ECODE_INVAL)
9629

    
9630
    if self.mode == constants.REPLACE_DISK_AUTO:
9631
      if not self._CheckDisksActivated(instance):
9632
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9633
                                   " first" % self.instance_name,
9634
                                   errors.ECODE_STATE)
9635
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9636
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9637

    
9638
      if faulty_primary and faulty_secondary:
9639
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9640
                                   " one node and can not be repaired"
9641
                                   " automatically" % self.instance_name,
9642
                                   errors.ECODE_STATE)
9643

    
9644
      if faulty_primary:
9645
        self.disks = faulty_primary
9646
        self.target_node = instance.primary_node
9647
        self.other_node = secondary_node
9648
        check_nodes = [self.target_node, self.other_node]
9649
      elif faulty_secondary:
9650
        self.disks = faulty_secondary
9651
        self.target_node = secondary_node
9652
        self.other_node = instance.primary_node
9653
        check_nodes = [self.target_node, self.other_node]
9654
      else:
9655
        self.disks = []
9656
        check_nodes = []
9657

    
9658
    else:
9659
      # Non-automatic modes
9660
      if self.mode == constants.REPLACE_DISK_PRI:
9661
        self.target_node = instance.primary_node
9662
        self.other_node = secondary_node
9663
        check_nodes = [self.target_node, self.other_node]
9664

    
9665
      elif self.mode == constants.REPLACE_DISK_SEC:
9666
        self.target_node = secondary_node
9667
        self.other_node = instance.primary_node
9668
        check_nodes = [self.target_node, self.other_node]
9669

    
9670
      elif self.mode == constants.REPLACE_DISK_CHG:
9671
        self.new_node = remote_node
9672
        self.other_node = instance.primary_node
9673
        self.target_node = secondary_node
9674
        check_nodes = [self.new_node, self.other_node]
9675

    
9676
        _CheckNodeNotDrained(self.lu, remote_node)
9677
        _CheckNodeVmCapable(self.lu, remote_node)
9678

    
9679
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9680
        assert old_node_info is not None
9681
        if old_node_info.offline and not self.early_release:
9682
          # doesn't make sense to delay the release
9683
          self.early_release = True
9684
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9685
                          " early-release mode", secondary_node)
9686

    
9687
      else:
9688
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9689
                                     self.mode)
9690

    
9691
      # If not specified all disks should be replaced
9692
      if not self.disks:
9693
        self.disks = range(len(self.instance.disks))
9694

    
9695
    for node in check_nodes:
9696
      _CheckNodeOnline(self.lu, node)
9697

    
9698
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9699
                                                          self.other_node,
9700
                                                          self.target_node]
9701
                              if node_name is not None)
9702

    
9703
    # Release unneeded node locks
9704
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9705

    
9706
    # Release any owned node group
9707
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9708
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9709

    
9710
    # Check whether disks are valid
9711
    for disk_idx in self.disks:
9712
      instance.FindDisk(disk_idx)
9713

    
9714
    # Get secondary node IP addresses
9715
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9716
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9717

    
9718
  def Exec(self, feedback_fn):
9719
    """Execute disk replacement.
9720

9721
    This dispatches the disk replacement to the appropriate handler.
9722

9723
    """
9724
    if self.delay_iallocator:
9725
      self._CheckPrereq2()
9726

    
9727
    if __debug__:
9728
      # Verify owned locks before starting operation
9729
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9730
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9731
          ("Incorrect node locks, owning %s, expected %s" %
9732
           (owned_nodes, self.node_secondary_ip.keys()))
9733

    
9734
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9735
      assert list(owned_instances) == [self.instance_name], \
9736
          "Instance '%s' not locked" % self.instance_name
9737

    
9738
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9739
          "Should not own any node group lock at this point"
9740

    
9741
    if not self.disks:
9742
      feedback_fn("No disks need replacement")
9743
      return
9744

    
9745
    feedback_fn("Replacing disk(s) %s for %s" %
9746
                (utils.CommaJoin(self.disks), self.instance.name))
9747

    
9748
    activate_disks = (not self.instance.admin_up)
9749

    
9750
    # Activate the instance disks if we're replacing them on a down instance
9751
    if activate_disks:
9752
      _StartInstanceDisks(self.lu, self.instance, True)
9753

    
9754
    try:
9755
      # Should we replace the secondary node?
9756
      if self.new_node is not None:
9757
        fn = self._ExecDrbd8Secondary
9758
      else:
9759
        fn = self._ExecDrbd8DiskOnly
9760

    
9761
      result = fn(feedback_fn)
9762
    finally:
9763
      # Deactivate the instance disks if we're replacing them on a
9764
      # down instance
9765
      if activate_disks:
9766
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9767

    
9768
    if __debug__:
9769
      # Verify owned locks
9770
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9771
      nodes = frozenset(self.node_secondary_ip)
9772
      assert ((self.early_release and not owned_nodes) or
9773
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9774
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9775
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9776

    
9777
    return result
9778

    
9779
  def _CheckVolumeGroup(self, nodes):
9780
    self.lu.LogInfo("Checking volume groups")
9781

    
9782
    vgname = self.cfg.GetVGName()
9783

    
9784
    # Make sure volume group exists on all involved nodes
9785
    results = self.rpc.call_vg_list(nodes)
9786
    if not results:
9787
      raise errors.OpExecError("Can't list volume groups on the nodes")
9788

    
9789
    for node in nodes:
9790
      res = results[node]
9791
      res.Raise("Error checking node %s" % node)
9792
      if vgname not in res.payload:
9793
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9794
                                 (vgname, node))
9795

    
9796
  def _CheckDisksExistence(self, nodes):
9797
    # Check disk existence
9798
    for idx, dev in enumerate(self.instance.disks):
9799
      if idx not in self.disks:
9800
        continue
9801

    
9802
      for node in nodes:
9803
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9804
        self.cfg.SetDiskID(dev, node)
9805

    
9806
        result = self.rpc.call_blockdev_find(node, dev)
9807

    
9808
        msg = result.fail_msg
9809
        if msg or not result.payload:
9810
          if not msg:
9811
            msg = "disk not found"
9812
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9813
                                   (idx, node, msg))
9814

    
9815
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9816
    for idx, dev in enumerate(self.instance.disks):
9817
      if idx not in self.disks:
9818
        continue
9819

    
9820
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9821
                      (idx, node_name))
9822

    
9823
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9824
                                   ldisk=ldisk):
9825
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9826
                                 " replace disks for instance %s" %
9827
                                 (node_name, self.instance.name))
9828

    
9829
  def _CreateNewStorage(self, node_name):
9830
    """Create new storage on the primary or secondary node.
9831

9832
    This is only used for same-node replaces, not for changing the
9833
    secondary node, hence we don't want to modify the existing disk.
9834

9835
    """
9836
    iv_names = {}
9837

    
9838
    for idx, dev in enumerate(self.instance.disks):
9839
      if idx not in self.disks:
9840
        continue
9841

    
9842
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9843

    
9844
      self.cfg.SetDiskID(dev, node_name)
9845

    
9846
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9847
      names = _GenerateUniqueNames(self.lu, lv_names)
9848

    
9849
      vg_data = dev.children[0].logical_id[0]
9850
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9851
                             logical_id=(vg_data, names[0]))
9852
      vg_meta = dev.children[1].logical_id[0]
9853
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9854
                             logical_id=(vg_meta, names[1]))
9855

    
9856
      new_lvs = [lv_data, lv_meta]
9857
      old_lvs = [child.Copy() for child in dev.children]
9858
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9859

    
9860
      # we pass force_create=True to force the LVM creation
9861
      for new_lv in new_lvs:
9862
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9863
                        _GetInstanceInfoText(self.instance), False)
9864

    
9865
    return iv_names
9866

    
9867
  def _CheckDevices(self, node_name, iv_names):
9868
    for name, (dev, _, _) in iv_names.iteritems():
9869
      self.cfg.SetDiskID(dev, node_name)
9870

    
9871
      result = self.rpc.call_blockdev_find(node_name, dev)
9872

    
9873
      msg = result.fail_msg
9874
      if msg or not result.payload:
9875
        if not msg:
9876
          msg = "disk not found"
9877
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9878
                                 (name, msg))
9879

    
9880
      if result.payload.is_degraded:
9881
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9882

    
9883
  def _RemoveOldStorage(self, node_name, iv_names):
9884
    for name, (_, old_lvs, _) in iv_names.iteritems():
9885
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9886

    
9887
      for lv in old_lvs:
9888
        self.cfg.SetDiskID(lv, node_name)
9889

    
9890
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9891
        if msg:
9892
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9893
                             hint="remove unused LVs manually")
9894

    
9895
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9896
    """Replace a disk on the primary or secondary for DRBD 8.
9897

9898
    The algorithm for replace is quite complicated:
9899

9900
      1. for each disk to be replaced:
9901

9902
        1. create new LVs on the target node with unique names
9903
        1. detach old LVs from the drbd device
9904
        1. rename old LVs to name_replaced.<time_t>
9905
        1. rename new LVs to old LVs
9906
        1. attach the new LVs (with the old names now) to the drbd device
9907

9908
      1. wait for sync across all devices
9909

9910
      1. for each modified disk:
9911

9912
        1. remove old LVs (which have the name name_replaces.<time_t>)
9913

9914
    Failures are not very well handled.
9915

9916
    """
9917
    steps_total = 6
9918

    
9919
    # Step: check device activation
9920
    self.lu.LogStep(1, steps_total, "Check device existence")
9921
    self._CheckDisksExistence([self.other_node, self.target_node])
9922
    self._CheckVolumeGroup([self.target_node, self.other_node])
9923

    
9924
    # Step: check other node consistency
9925
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9926
    self._CheckDisksConsistency(self.other_node,
9927
                                self.other_node == self.instance.primary_node,
9928
                                False)
9929

    
9930
    # Step: create new storage
9931
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9932
    iv_names = self._CreateNewStorage(self.target_node)
9933

    
9934
    # Step: for each lv, detach+rename*2+attach
9935
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9936
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9937
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9938

    
9939
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9940
                                                     old_lvs)
9941
      result.Raise("Can't detach drbd from local storage on node"
9942
                   " %s for device %s" % (self.target_node, dev.iv_name))
9943
      #dev.children = []
9944
      #cfg.Update(instance)
9945

    
9946
      # ok, we created the new LVs, so now we know we have the needed
9947
      # storage; as such, we proceed on the target node to rename
9948
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9949
      # using the assumption that logical_id == physical_id (which in
9950
      # turn is the unique_id on that node)
9951

    
9952
      # FIXME(iustin): use a better name for the replaced LVs
9953
      temp_suffix = int(time.time())
9954
      ren_fn = lambda d, suff: (d.physical_id[0],
9955
                                d.physical_id[1] + "_replaced-%s" % suff)
9956

    
9957
      # Build the rename list based on what LVs exist on the node
9958
      rename_old_to_new = []
9959
      for to_ren in old_lvs:
9960
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9961
        if not result.fail_msg and result.payload:
9962
          # device exists
9963
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9964

    
9965
      self.lu.LogInfo("Renaming the old LVs on the target node")
9966
      result = self.rpc.call_blockdev_rename(self.target_node,
9967
                                             rename_old_to_new)
9968
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9969

    
9970
      # Now we rename the new LVs to the old LVs
9971
      self.lu.LogInfo("Renaming the new LVs on the target node")
9972
      rename_new_to_old = [(new, old.physical_id)
9973
                           for old, new in zip(old_lvs, new_lvs)]
9974
      result = self.rpc.call_blockdev_rename(self.target_node,
9975
                                             rename_new_to_old)
9976
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9977

    
9978
      # Intermediate steps of in memory modifications
9979
      for old, new in zip(old_lvs, new_lvs):
9980
        new.logical_id = old.logical_id
9981
        self.cfg.SetDiskID(new, self.target_node)
9982

    
9983
      # We need to modify old_lvs so that removal later removes the
9984
      # right LVs, not the newly added ones; note that old_lvs is a
9985
      # copy here
9986
      for disk in old_lvs:
9987
        disk.logical_id = ren_fn(disk, temp_suffix)
9988
        self.cfg.SetDiskID(disk, self.target_node)
9989

    
9990
      # Now that the new lvs have the old name, we can add them to the device
9991
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9992
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9993
                                                  new_lvs)
9994
      msg = result.fail_msg
9995
      if msg:
9996
        for new_lv in new_lvs:
9997
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9998
                                               new_lv).fail_msg
9999
          if msg2:
10000
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10001
                               hint=("cleanup manually the unused logical"
10002
                                     "volumes"))
10003
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10004

    
10005
    cstep = 5
10006
    if self.early_release:
10007
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10008
      cstep += 1
10009
      self._RemoveOldStorage(self.target_node, iv_names)
10010
      # WARNING: we release both node locks here, do not do other RPCs
10011
      # than WaitForSync to the primary node
10012
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10013
                    names=[self.target_node, self.other_node])
10014

    
10015
    # Wait for sync
10016
    # This can fail as the old devices are degraded and _WaitForSync
10017
    # does a combined result over all disks, so we don't check its return value
10018
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10019
    cstep += 1
10020
    _WaitForSync(self.lu, self.instance)
10021

    
10022
    # Check all devices manually
10023
    self._CheckDevices(self.instance.primary_node, iv_names)
10024

    
10025
    # Step: remove old storage
10026
    if not self.early_release:
10027
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10028
      cstep += 1
10029
      self._RemoveOldStorage(self.target_node, iv_names)
10030

    
10031
  def _ExecDrbd8Secondary(self, feedback_fn):
10032
    """Replace the secondary node for DRBD 8.
10033

10034
    The algorithm for replace is quite complicated:
10035
      - for all disks of the instance:
10036
        - create new LVs on the new node with same names
10037
        - shutdown the drbd device on the old secondary
10038
        - disconnect the drbd network on the primary
10039
        - create the drbd device on the new secondary
10040
        - network attach the drbd on the primary, using an artifice:
10041
          the drbd code for Attach() will connect to the network if it
10042
          finds a device which is connected to the good local disks but
10043
          not network enabled
10044
      - wait for sync across all devices
10045
      - remove all disks from the old secondary
10046

10047
    Failures are not very well handled.
10048

10049
    """
10050
    steps_total = 6
10051

    
10052
    pnode = self.instance.primary_node
10053

    
10054
    # Step: check device activation
10055
    self.lu.LogStep(1, steps_total, "Check device existence")
10056
    self._CheckDisksExistence([self.instance.primary_node])
10057
    self._CheckVolumeGroup([self.instance.primary_node])
10058

    
10059
    # Step: check other node consistency
10060
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10061
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10062

    
10063
    # Step: create new storage
10064
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10065
    for idx, dev in enumerate(self.instance.disks):
10066
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10067
                      (self.new_node, idx))
10068
      # we pass force_create=True to force LVM creation
10069
      for new_lv in dev.children:
10070
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10071
                        _GetInstanceInfoText(self.instance), False)
10072

    
10073
    # Step 4: dbrd minors and drbd setups changes
10074
    # after this, we must manually remove the drbd minors on both the
10075
    # error and the success paths
10076
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10077
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10078
                                         for dev in self.instance.disks],
10079
                                        self.instance.name)
10080
    logging.debug("Allocated minors %r", minors)
10081

    
10082
    iv_names = {}
10083
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10084
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10085
                      (self.new_node, idx))
10086
      # create new devices on new_node; note that we create two IDs:
10087
      # one without port, so the drbd will be activated without
10088
      # networking information on the new node at this stage, and one
10089
      # with network, for the latter activation in step 4
10090
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10091
      if self.instance.primary_node == o_node1:
10092
        p_minor = o_minor1
10093
      else:
10094
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10095
        p_minor = o_minor2
10096

    
10097
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10098
                      p_minor, new_minor, o_secret)
10099
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10100
                    p_minor, new_minor, o_secret)
10101

    
10102
      iv_names[idx] = (dev, dev.children, new_net_id)
10103
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10104
                    new_net_id)
10105
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10106
                              logical_id=new_alone_id,
10107
                              children=dev.children,
10108
                              size=dev.size)
10109
      try:
10110
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10111
                              _GetInstanceInfoText(self.instance), False)
10112
      except errors.GenericError:
10113
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10114
        raise
10115

    
10116
    # We have new devices, shutdown the drbd on the old secondary
10117
    for idx, dev in enumerate(self.instance.disks):
10118
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10119
      self.cfg.SetDiskID(dev, self.target_node)
10120
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10121
      if msg:
10122
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10123
                           "node: %s" % (idx, msg),
10124
                           hint=("Please cleanup this device manually as"
10125
                                 " soon as possible"))
10126

    
10127
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10128
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10129
                                               self.instance.disks)[pnode]
10130

    
10131
    msg = result.fail_msg
10132
    if msg:
10133
      # detaches didn't succeed (unlikely)
10134
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10135
      raise errors.OpExecError("Can't detach the disks from the network on"
10136
                               " old node: %s" % (msg,))
10137

    
10138
    # if we managed to detach at least one, we update all the disks of
10139
    # the instance to point to the new secondary
10140
    self.lu.LogInfo("Updating instance configuration")
10141
    for dev, _, new_logical_id in iv_names.itervalues():
10142
      dev.logical_id = new_logical_id
10143
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10144

    
10145
    self.cfg.Update(self.instance, feedback_fn)
10146

    
10147
    # and now perform the drbd attach
10148
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10149
                    " (standalone => connected)")
10150
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10151
                                            self.new_node],
10152
                                           self.node_secondary_ip,
10153
                                           self.instance.disks,
10154
                                           self.instance.name,
10155
                                           False)
10156
    for to_node, to_result in result.items():
10157
      msg = to_result.fail_msg
10158
      if msg:
10159
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10160
                           to_node, msg,
10161
                           hint=("please do a gnt-instance info to see the"
10162
                                 " status of disks"))
10163
    cstep = 5
10164
    if self.early_release:
10165
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10166
      cstep += 1
10167
      self._RemoveOldStorage(self.target_node, iv_names)
10168
      # WARNING: we release all node locks here, do not do other RPCs
10169
      # than WaitForSync to the primary node
10170
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10171
                    names=[self.instance.primary_node,
10172
                           self.target_node,
10173
                           self.new_node])
10174

    
10175
    # Wait for sync
10176
    # This can fail as the old devices are degraded and _WaitForSync
10177
    # does a combined result over all disks, so we don't check its return value
10178
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10179
    cstep += 1
10180
    _WaitForSync(self.lu, self.instance)
10181

    
10182
    # Check all devices manually
10183
    self._CheckDevices(self.instance.primary_node, iv_names)
10184

    
10185
    # Step: remove old storage
10186
    if not self.early_release:
10187
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10188
      self._RemoveOldStorage(self.target_node, iv_names)
10189

    
10190

    
10191
class LURepairNodeStorage(NoHooksLU):
10192
  """Repairs the volume group on a node.
10193

10194
  """
10195
  REQ_BGL = False
10196

    
10197
  def CheckArguments(self):
10198
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10199

    
10200
    storage_type = self.op.storage_type
10201

    
10202
    if (constants.SO_FIX_CONSISTENCY not in
10203
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10204
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10205
                                 " repaired" % storage_type,
10206
                                 errors.ECODE_INVAL)
10207

    
10208
  def ExpandNames(self):
10209
    self.needed_locks = {
10210
      locking.LEVEL_NODE: [self.op.node_name],
10211
      }
10212

    
10213
  def _CheckFaultyDisks(self, instance, node_name):
10214
    """Ensure faulty disks abort the opcode or at least warn."""
10215
    try:
10216
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10217
                                  node_name, True):
10218
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10219
                                   " node '%s'" % (instance.name, node_name),
10220
                                   errors.ECODE_STATE)
10221
    except errors.OpPrereqError, err:
10222
      if self.op.ignore_consistency:
10223
        self.proc.LogWarning(str(err.args[0]))
10224
      else:
10225
        raise
10226

    
10227
  def CheckPrereq(self):
10228
    """Check prerequisites.
10229

10230
    """
10231
    # Check whether any instance on this node has faulty disks
10232
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10233
      if not inst.admin_up:
10234
        continue
10235
      check_nodes = set(inst.all_nodes)
10236
      check_nodes.discard(self.op.node_name)
10237
      for inst_node_name in check_nodes:
10238
        self._CheckFaultyDisks(inst, inst_node_name)
10239

    
10240
  def Exec(self, feedback_fn):
10241
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10242
                (self.op.name, self.op.node_name))
10243

    
10244
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10245
    result = self.rpc.call_storage_execute(self.op.node_name,
10246
                                           self.op.storage_type, st_args,
10247
                                           self.op.name,
10248
                                           constants.SO_FIX_CONSISTENCY)
10249
    result.Raise("Failed to repair storage unit '%s' on %s" %
10250
                 (self.op.name, self.op.node_name))
10251

    
10252

    
10253
class LUNodeEvacuate(NoHooksLU):
10254
  """Evacuates instances off a list of nodes.
10255

10256
  """
10257
  REQ_BGL = False
10258

    
10259
  def CheckArguments(self):
10260
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10261

    
10262
  def ExpandNames(self):
10263
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10264

    
10265
    if self.op.remote_node is not None:
10266
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10267
      assert self.op.remote_node
10268

    
10269
      if self.op.remote_node == self.op.node_name:
10270
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10271
                                   " secondary node", errors.ECODE_INVAL)
10272

    
10273
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10274
        raise errors.OpPrereqError("Without the use of an iallocator only"
10275
                                   " secondary instances can be evacuated",
10276
                                   errors.ECODE_INVAL)
10277

    
10278
    # Declare locks
10279
    self.share_locks = _ShareAll()
10280
    self.needed_locks = {
10281
      locking.LEVEL_INSTANCE: [],
10282
      locking.LEVEL_NODEGROUP: [],
10283
      locking.LEVEL_NODE: [],
10284
      }
10285

    
10286
    if self.op.remote_node is None:
10287
      # Iallocator will choose any node(s) in the same group
10288
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10289
    else:
10290
      group_nodes = frozenset([self.op.remote_node])
10291

    
10292
    # Determine nodes to be locked
10293
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10294

    
10295
  def _DetermineInstances(self):
10296
    """Builds list of instances to operate on.
10297

10298
    """
10299
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10300

    
10301
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10302
      # Primary instances only
10303
      inst_fn = _GetNodePrimaryInstances
10304
      assert self.op.remote_node is None, \
10305
        "Evacuating primary instances requires iallocator"
10306
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10307
      # Secondary instances only
10308
      inst_fn = _GetNodeSecondaryInstances
10309
    else:
10310
      # All instances
10311
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10312
      inst_fn = _GetNodeInstances
10313

    
10314
    return inst_fn(self.cfg, self.op.node_name)
10315

    
10316
  def DeclareLocks(self, level):
10317
    if level == locking.LEVEL_INSTANCE:
10318
      # Lock instances optimistically, needs verification once node and group
10319
      # locks have been acquired
10320
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10321
        set(i.name for i in self._DetermineInstances())
10322

    
10323
    elif level == locking.LEVEL_NODEGROUP:
10324
      # Lock node groups optimistically, needs verification once nodes have
10325
      # been acquired
10326
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10327
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10328

    
10329
    elif level == locking.LEVEL_NODE:
10330
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10331

    
10332
  def CheckPrereq(self):
10333
    # Verify locks
10334
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10335
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10336
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10337

    
10338
    assert owned_nodes == self.lock_nodes
10339

    
10340
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10341
    if owned_groups != wanted_groups:
10342
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10343
                               " current groups are '%s', used to be '%s'" %
10344
                               (utils.CommaJoin(wanted_groups),
10345
                                utils.CommaJoin(owned_groups)))
10346

    
10347
    # Determine affected instances
10348
    self.instances = self._DetermineInstances()
10349
    self.instance_names = [i.name for i in self.instances]
10350

    
10351
    if set(self.instance_names) != owned_instances:
10352
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10353
                               " were acquired, current instances are '%s',"
10354
                               " used to be '%s'" %
10355
                               (self.op.node_name,
10356
                                utils.CommaJoin(self.instance_names),
10357
                                utils.CommaJoin(owned_instances)))
10358

    
10359
    if self.instance_names:
10360
      self.LogInfo("Evacuating instances from node '%s': %s",
10361
                   self.op.node_name,
10362
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10363
    else:
10364
      self.LogInfo("No instances to evacuate from node '%s'",
10365
                   self.op.node_name)
10366

    
10367
    if self.op.remote_node is not None:
10368
      for i in self.instances:
10369
        if i.primary_node == self.op.remote_node:
10370
          raise errors.OpPrereqError("Node %s is the primary node of"
10371
                                     " instance %s, cannot use it as"
10372
                                     " secondary" %
10373
                                     (self.op.remote_node, i.name),
10374
                                     errors.ECODE_INVAL)
10375

    
10376
  def Exec(self, feedback_fn):
10377
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10378

    
10379
    if not self.instance_names:
10380
      # No instances to evacuate
10381
      jobs = []
10382

    
10383
    elif self.op.iallocator is not None:
10384
      # TODO: Implement relocation to other group
10385
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10386
                       evac_mode=self.op.mode,
10387
                       instances=list(self.instance_names))
10388

    
10389
      ial.Run(self.op.iallocator)
10390

    
10391
      if not ial.success:
10392
        raise errors.OpPrereqError("Can't compute node evacuation using"
10393
                                   " iallocator '%s': %s" %
10394
                                   (self.op.iallocator, ial.info),
10395
                                   errors.ECODE_NORES)
10396

    
10397
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10398

    
10399
    elif self.op.remote_node is not None:
10400
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10401
      jobs = [
10402
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10403
                                        remote_node=self.op.remote_node,
10404
                                        disks=[],
10405
                                        mode=constants.REPLACE_DISK_CHG,
10406
                                        early_release=self.op.early_release)]
10407
        for instance_name in self.instance_names
10408
        ]
10409

    
10410
    else:
10411
      raise errors.ProgrammerError("No iallocator or remote node")
10412

    
10413
    return ResultWithJobs(jobs)
10414

    
10415

    
10416
def _SetOpEarlyRelease(early_release, op):
10417
  """Sets C{early_release} flag on opcodes if available.
10418

10419
  """
10420
  try:
10421
    op.early_release = early_release
10422
  except AttributeError:
10423
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10424

    
10425
  return op
10426

    
10427

    
10428
def _NodeEvacDest(use_nodes, group, nodes):
10429
  """Returns group or nodes depending on caller's choice.
10430

10431
  """
10432
  if use_nodes:
10433
    return utils.CommaJoin(nodes)
10434
  else:
10435
    return group
10436

    
10437

    
10438
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10439
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10440

10441
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10442
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10443

10444
  @type lu: L{LogicalUnit}
10445
  @param lu: Logical unit instance
10446
  @type alloc_result: tuple/list
10447
  @param alloc_result: Result from iallocator
10448
  @type early_release: bool
10449
  @param early_release: Whether to release locks early if possible
10450
  @type use_nodes: bool
10451
  @param use_nodes: Whether to display node names instead of groups
10452

10453
  """
10454
  (moved, failed, jobs) = alloc_result
10455

    
10456
  if failed:
10457
    lu.LogWarning("Unable to evacuate instances %s",
10458
                  utils.CommaJoin("%s (%s)" % (name, reason)
10459
                                  for (name, reason) in failed))
10460

    
10461
  if moved:
10462
    lu.LogInfo("Instances to be moved: %s",
10463
               utils.CommaJoin("%s (to %s)" %
10464
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10465
                               for (name, group, nodes) in moved))
10466

    
10467
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10468
              map(opcodes.OpCode.LoadOpCode, ops))
10469
          for ops in jobs]
10470

    
10471

    
10472
class LUInstanceGrowDisk(LogicalUnit):
10473
  """Grow a disk of an instance.
10474

10475
  """
10476
  HPATH = "disk-grow"
10477
  HTYPE = constants.HTYPE_INSTANCE
10478
  REQ_BGL = False
10479

    
10480
  def ExpandNames(self):
10481
    self._ExpandAndLockInstance()
10482
    self.needed_locks[locking.LEVEL_NODE] = []
10483
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10484

    
10485
  def DeclareLocks(self, level):
10486
    if level == locking.LEVEL_NODE:
10487
      self._LockInstancesNodes()
10488

    
10489
  def BuildHooksEnv(self):
10490
    """Build hooks env.
10491

10492
    This runs on the master, the primary and all the secondaries.
10493

10494
    """
10495
    env = {
10496
      "DISK": self.op.disk,
10497
      "AMOUNT": self.op.amount,
10498
      }
10499
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10500
    return env
10501

    
10502
  def BuildHooksNodes(self):
10503
    """Build hooks nodes.
10504

10505
    """
10506
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10507
    return (nl, nl)
10508

    
10509
  def CheckPrereq(self):
10510
    """Check prerequisites.
10511

10512
    This checks that the instance is in the cluster.
10513

10514
    """
10515
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10516
    assert instance is not None, \
10517
      "Cannot retrieve locked instance %s" % self.op.instance_name
10518
    nodenames = list(instance.all_nodes)
10519
    for node in nodenames:
10520
      _CheckNodeOnline(self, node)
10521

    
10522
    self.instance = instance
10523

    
10524
    if instance.disk_template not in constants.DTS_GROWABLE:
10525
      raise errors.OpPrereqError("Instance's disk layout does not support"
10526
                                 " growing", errors.ECODE_INVAL)
10527

    
10528
    self.disk = instance.FindDisk(self.op.disk)
10529

    
10530
    if instance.disk_template not in (constants.DT_FILE,
10531
                                      constants.DT_SHARED_FILE):
10532
      # TODO: check the free disk space for file, when that feature will be
10533
      # supported
10534
      _CheckNodesFreeDiskPerVG(self, nodenames,
10535
                               self.disk.ComputeGrowth(self.op.amount))
10536

    
10537
  def Exec(self, feedback_fn):
10538
    """Execute disk grow.
10539

10540
    """
10541
    instance = self.instance
10542
    disk = self.disk
10543

    
10544
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10545
    if not disks_ok:
10546
      raise errors.OpExecError("Cannot activate block device to grow")
10547

    
10548
    # First run all grow ops in dry-run mode
10549
    for node in instance.all_nodes:
10550
      self.cfg.SetDiskID(disk, node)
10551
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10552
      result.Raise("Grow request failed to node %s" % node)
10553

    
10554
    # We know that (as far as we can test) operations across different
10555
    # nodes will succeed, time to run it for real
10556
    for node in instance.all_nodes:
10557
      self.cfg.SetDiskID(disk, node)
10558
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10559
      result.Raise("Grow request failed to node %s" % node)
10560

    
10561
      # TODO: Rewrite code to work properly
10562
      # DRBD goes into sync mode for a short amount of time after executing the
10563
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10564
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10565
      # time is a work-around.
10566
      time.sleep(5)
10567

    
10568
    disk.RecordGrow(self.op.amount)
10569
    self.cfg.Update(instance, feedback_fn)
10570
    if self.op.wait_for_sync:
10571
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10572
      if disk_abort:
10573
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10574
                             " status; please check the instance")
10575
      if not instance.admin_up:
10576
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10577
    elif not instance.admin_up:
10578
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10579
                           " not supposed to be running because no wait for"
10580
                           " sync mode was requested")
10581

    
10582

    
10583
class LUInstanceQueryData(NoHooksLU):
10584
  """Query runtime instance data.
10585

10586
  """
10587
  REQ_BGL = False
10588

    
10589
  def ExpandNames(self):
10590
    self.needed_locks = {}
10591

    
10592
    # Use locking if requested or when non-static information is wanted
10593
    if not (self.op.static or self.op.use_locking):
10594
      self.LogWarning("Non-static data requested, locks need to be acquired")
10595
      self.op.use_locking = True
10596

    
10597
    if self.op.instances or not self.op.use_locking:
10598
      # Expand instance names right here
10599
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10600
    else:
10601
      # Will use acquired locks
10602
      self.wanted_names = None
10603

    
10604
    if self.op.use_locking:
10605
      self.share_locks = _ShareAll()
10606

    
10607
      if self.wanted_names is None:
10608
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10609
      else:
10610
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10611

    
10612
      self.needed_locks[locking.LEVEL_NODE] = []
10613
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10614

    
10615
  def DeclareLocks(self, level):
10616
    if self.op.use_locking and level == locking.LEVEL_NODE:
10617
      self._LockInstancesNodes()
10618

    
10619
  def CheckPrereq(self):
10620
    """Check prerequisites.
10621

10622
    This only checks the optional instance list against the existing names.
10623

10624
    """
10625
    if self.wanted_names is None:
10626
      assert self.op.use_locking, "Locking was not used"
10627
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10628

    
10629
    self.wanted_instances = \
10630
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10631

    
10632
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10633
    """Returns the status of a block device
10634

10635
    """
10636
    if self.op.static or not node:
10637
      return None
10638

    
10639
    self.cfg.SetDiskID(dev, node)
10640

    
10641
    result = self.rpc.call_blockdev_find(node, dev)
10642
    if result.offline:
10643
      return None
10644

    
10645
    result.Raise("Can't compute disk status for %s" % instance_name)
10646

    
10647
    status = result.payload
10648
    if status is None:
10649
      return None
10650

    
10651
    return (status.dev_path, status.major, status.minor,
10652
            status.sync_percent, status.estimated_time,
10653
            status.is_degraded, status.ldisk_status)
10654

    
10655
  def _ComputeDiskStatus(self, instance, snode, dev):
10656
    """Compute block device status.
10657

10658
    """
10659
    if dev.dev_type in constants.LDS_DRBD:
10660
      # we change the snode then (otherwise we use the one passed in)
10661
      if dev.logical_id[0] == instance.primary_node:
10662
        snode = dev.logical_id[1]
10663
      else:
10664
        snode = dev.logical_id[0]
10665

    
10666
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10667
                                              instance.name, dev)
10668
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10669

    
10670
    if dev.children:
10671
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10672
                                        instance, snode),
10673
                         dev.children)
10674
    else:
10675
      dev_children = []
10676

    
10677
    return {
10678
      "iv_name": dev.iv_name,
10679
      "dev_type": dev.dev_type,
10680
      "logical_id": dev.logical_id,
10681
      "physical_id": dev.physical_id,
10682
      "pstatus": dev_pstatus,
10683
      "sstatus": dev_sstatus,
10684
      "children": dev_children,
10685
      "mode": dev.mode,
10686
      "size": dev.size,
10687
      }
10688

    
10689
  def Exec(self, feedback_fn):
10690
    """Gather and return data"""
10691
    result = {}
10692

    
10693
    cluster = self.cfg.GetClusterInfo()
10694

    
10695
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10696
                                          for i in self.wanted_instances)
10697
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10698
      if self.op.static or pnode.offline:
10699
        remote_state = None
10700
        if pnode.offline:
10701
          self.LogWarning("Primary node %s is marked offline, returning static"
10702
                          " information only for instance %s" %
10703
                          (pnode.name, instance.name))
10704
      else:
10705
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10706
                                                  instance.name,
10707
                                                  instance.hypervisor)
10708
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10709
        remote_info = remote_info.payload
10710
        if remote_info and "state" in remote_info:
10711
          remote_state = "up"
10712
        else:
10713
          remote_state = "down"
10714

    
10715
      if instance.admin_up:
10716
        config_state = "up"
10717
      else:
10718
        config_state = "down"
10719

    
10720
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10721
                  instance.disks)
10722

    
10723
      result[instance.name] = {
10724
        "name": instance.name,
10725
        "config_state": config_state,
10726
        "run_state": remote_state,
10727
        "pnode": instance.primary_node,
10728
        "snodes": instance.secondary_nodes,
10729
        "os": instance.os,
10730
        # this happens to be the same format used for hooks
10731
        "nics": _NICListToTuple(self, instance.nics),
10732
        "disk_template": instance.disk_template,
10733
        "disks": disks,
10734
        "hypervisor": instance.hypervisor,
10735
        "network_port": instance.network_port,
10736
        "hv_instance": instance.hvparams,
10737
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10738
        "be_instance": instance.beparams,
10739
        "be_actual": cluster.FillBE(instance),
10740
        "os_instance": instance.osparams,
10741
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10742
        "serial_no": instance.serial_no,
10743
        "mtime": instance.mtime,
10744
        "ctime": instance.ctime,
10745
        "uuid": instance.uuid,
10746
        }
10747

    
10748
    return result
10749

    
10750

    
10751
class LUInstanceSetParams(LogicalUnit):
10752
  """Modifies an instances's parameters.
10753

10754
  """
10755
  HPATH = "instance-modify"
10756
  HTYPE = constants.HTYPE_INSTANCE
10757
  REQ_BGL = False
10758

    
10759
  def CheckArguments(self):
10760
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10761
            self.op.hvparams or self.op.beparams or self.op.os_name):
10762
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10763

    
10764
    if self.op.hvparams:
10765
      _CheckGlobalHvParams(self.op.hvparams)
10766

    
10767
    # Disk validation
10768
    disk_addremove = 0
10769
    for disk_op, disk_dict in self.op.disks:
10770
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10771
      if disk_op == constants.DDM_REMOVE:
10772
        disk_addremove += 1
10773
        continue
10774
      elif disk_op == constants.DDM_ADD:
10775
        disk_addremove += 1
10776
      else:
10777
        if not isinstance(disk_op, int):
10778
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10779
        if not isinstance(disk_dict, dict):
10780
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10781
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10782

    
10783
      if disk_op == constants.DDM_ADD:
10784
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10785
        if mode not in constants.DISK_ACCESS_SET:
10786
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10787
                                     errors.ECODE_INVAL)
10788
        size = disk_dict.get(constants.IDISK_SIZE, None)
10789
        if size is None:
10790
          raise errors.OpPrereqError("Required disk parameter size missing",
10791
                                     errors.ECODE_INVAL)
10792
        try:
10793
          size = int(size)
10794
        except (TypeError, ValueError), err:
10795
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10796
                                     str(err), errors.ECODE_INVAL)
10797
        disk_dict[constants.IDISK_SIZE] = size
10798
      else:
10799
        # modification of disk
10800
        if constants.IDISK_SIZE in disk_dict:
10801
          raise errors.OpPrereqError("Disk size change not possible, use"
10802
                                     " grow-disk", errors.ECODE_INVAL)
10803

    
10804
    if disk_addremove > 1:
10805
      raise errors.OpPrereqError("Only one disk add or remove operation"
10806
                                 " supported at a time", errors.ECODE_INVAL)
10807

    
10808
    if self.op.disks and self.op.disk_template is not None:
10809
      raise errors.OpPrereqError("Disk template conversion and other disk"
10810
                                 " changes not supported at the same time",
10811
                                 errors.ECODE_INVAL)
10812

    
10813
    if (self.op.disk_template and
10814
        self.op.disk_template in constants.DTS_INT_MIRROR and
10815
        self.op.remote_node is None):
10816
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10817
                                 " one requires specifying a secondary node",
10818
                                 errors.ECODE_INVAL)
10819

    
10820
    # NIC validation
10821
    nic_addremove = 0
10822
    for nic_op, nic_dict in self.op.nics:
10823
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10824
      if nic_op == constants.DDM_REMOVE:
10825
        nic_addremove += 1
10826
        continue
10827
      elif nic_op == constants.DDM_ADD:
10828
        nic_addremove += 1
10829
      else:
10830
        if not isinstance(nic_op, int):
10831
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10832
        if not isinstance(nic_dict, dict):
10833
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10834
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10835

    
10836
      # nic_dict should be a dict
10837
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10838
      if nic_ip is not None:
10839
        if nic_ip.lower() == constants.VALUE_NONE:
10840
          nic_dict[constants.INIC_IP] = None
10841
        else:
10842
          if not netutils.IPAddress.IsValid(nic_ip):
10843
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10844
                                       errors.ECODE_INVAL)
10845

    
10846
      nic_bridge = nic_dict.get("bridge", None)
10847
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10848
      if nic_bridge and nic_link:
10849
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10850
                                   " at the same time", errors.ECODE_INVAL)
10851
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10852
        nic_dict["bridge"] = None
10853
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10854
        nic_dict[constants.INIC_LINK] = None
10855

    
10856
      if nic_op == constants.DDM_ADD:
10857
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10858
        if nic_mac is None:
10859
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10860

    
10861
      if constants.INIC_MAC in nic_dict:
10862
        nic_mac = nic_dict[constants.INIC_MAC]
10863
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10864
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10865

    
10866
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10867
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10868
                                     " modifying an existing nic",
10869
                                     errors.ECODE_INVAL)
10870

    
10871
    if nic_addremove > 1:
10872
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10873
                                 " supported at a time", errors.ECODE_INVAL)
10874

    
10875
  def ExpandNames(self):
10876
    self._ExpandAndLockInstance()
10877
    self.needed_locks[locking.LEVEL_NODE] = []
10878
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10879

    
10880
  def DeclareLocks(self, level):
10881
    if level == locking.LEVEL_NODE:
10882
      self._LockInstancesNodes()
10883
      if self.op.disk_template and self.op.remote_node:
10884
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10885
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10886

    
10887
  def BuildHooksEnv(self):
10888
    """Build hooks env.
10889

10890
    This runs on the master, primary and secondaries.
10891

10892
    """
10893
    args = dict()
10894
    if constants.BE_MEMORY in self.be_new:
10895
      args["memory"] = self.be_new[constants.BE_MEMORY]
10896
    if constants.BE_VCPUS in self.be_new:
10897
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10898
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10899
    # information at all.
10900
    if self.op.nics:
10901
      args["nics"] = []
10902
      nic_override = dict(self.op.nics)
10903
      for idx, nic in enumerate(self.instance.nics):
10904
        if idx in nic_override:
10905
          this_nic_override = nic_override[idx]
10906
        else:
10907
          this_nic_override = {}
10908
        if constants.INIC_IP in this_nic_override:
10909
          ip = this_nic_override[constants.INIC_IP]
10910
        else:
10911
          ip = nic.ip
10912
        if constants.INIC_MAC in this_nic_override:
10913
          mac = this_nic_override[constants.INIC_MAC]
10914
        else:
10915
          mac = nic.mac
10916
        if idx in self.nic_pnew:
10917
          nicparams = self.nic_pnew[idx]
10918
        else:
10919
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10920
        mode = nicparams[constants.NIC_MODE]
10921
        link = nicparams[constants.NIC_LINK]
10922
        args["nics"].append((ip, mac, mode, link))
10923
      if constants.DDM_ADD in nic_override:
10924
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10925
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10926
        nicparams = self.nic_pnew[constants.DDM_ADD]
10927
        mode = nicparams[constants.NIC_MODE]
10928
        link = nicparams[constants.NIC_LINK]
10929
        args["nics"].append((ip, mac, mode, link))
10930
      elif constants.DDM_REMOVE in nic_override:
10931
        del args["nics"][-1]
10932

    
10933
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10934
    if self.op.disk_template:
10935
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10936

    
10937
    return env
10938

    
10939
  def BuildHooksNodes(self):
10940
    """Build hooks nodes.
10941

10942
    """
10943
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10944
    return (nl, nl)
10945

    
10946
  def CheckPrereq(self):
10947
    """Check prerequisites.
10948

10949
    This only checks the instance list against the existing names.
10950

10951
    """
10952
    # checking the new params on the primary/secondary nodes
10953

    
10954
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10955
    cluster = self.cluster = self.cfg.GetClusterInfo()
10956
    assert self.instance is not None, \
10957
      "Cannot retrieve locked instance %s" % self.op.instance_name
10958
    pnode = instance.primary_node
10959
    nodelist = list(instance.all_nodes)
10960

    
10961
    # OS change
10962
    if self.op.os_name and not self.op.force:
10963
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10964
                      self.op.force_variant)
10965
      instance_os = self.op.os_name
10966
    else:
10967
      instance_os = instance.os
10968

    
10969
    if self.op.disk_template:
10970
      if instance.disk_template == self.op.disk_template:
10971
        raise errors.OpPrereqError("Instance already has disk template %s" %
10972
                                   instance.disk_template, errors.ECODE_INVAL)
10973

    
10974
      if (instance.disk_template,
10975
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10976
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10977
                                   " %s to %s" % (instance.disk_template,
10978
                                                  self.op.disk_template),
10979
                                   errors.ECODE_INVAL)
10980
      _CheckInstanceDown(self, instance, "cannot change disk template")
10981
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10982
        if self.op.remote_node == pnode:
10983
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10984
                                     " as the primary node of the instance" %
10985
                                     self.op.remote_node, errors.ECODE_STATE)
10986
        _CheckNodeOnline(self, self.op.remote_node)
10987
        _CheckNodeNotDrained(self, self.op.remote_node)
10988
        # FIXME: here we assume that the old instance type is DT_PLAIN
10989
        assert instance.disk_template == constants.DT_PLAIN
10990
        disks = [{constants.IDISK_SIZE: d.size,
10991
                  constants.IDISK_VG: d.logical_id[0]}
10992
                 for d in instance.disks]
10993
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10994
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10995

    
10996
    # hvparams processing
10997
    if self.op.hvparams:
10998
      hv_type = instance.hypervisor
10999
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11000
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11001
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11002

    
11003
      # local check
11004
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11005
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11006
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11007
      self.hv_inst = i_hvdict # the new dict (without defaults)
11008
    else:
11009
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11010
                                              instance.hvparams)
11011
      self.hv_new = self.hv_inst = {}
11012

    
11013
    # beparams processing
11014
    if self.op.beparams:
11015
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11016
                                   use_none=True)
11017
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11018
      be_new = cluster.SimpleFillBE(i_bedict)
11019
      self.be_proposed = self.be_new = be_new # the new actual values
11020
      self.be_inst = i_bedict # the new dict (without defaults)
11021
    else:
11022
      self.be_new = self.be_inst = {}
11023
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11024
    be_old = cluster.FillBE(instance)
11025

    
11026
    # CPU param validation -- checking every time a paramtere is
11027
    # changed to cover all cases where either CPU mask or vcpus have
11028
    # changed
11029
    if (constants.BE_VCPUS in self.be_proposed and
11030
        constants.HV_CPU_MASK in self.hv_proposed):
11031
      cpu_list = \
11032
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11033
      # Verify mask is consistent with number of vCPUs. Can skip this
11034
      # test if only 1 entry in the CPU mask, which means same mask
11035
      # is applied to all vCPUs.
11036
      if (len(cpu_list) > 1 and
11037
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11038
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11039
                                   " CPU mask [%s]" %
11040
                                   (self.be_proposed[constants.BE_VCPUS],
11041
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11042
                                   errors.ECODE_INVAL)
11043

    
11044
      # Only perform this test if a new CPU mask is given
11045
      if constants.HV_CPU_MASK in self.hv_new:
11046
        # Calculate the largest CPU number requested
11047
        max_requested_cpu = max(map(max, cpu_list))
11048
        # Check that all of the instance's nodes have enough physical CPUs to
11049
        # satisfy the requested CPU mask
11050
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11051
                                max_requested_cpu + 1, instance.hypervisor)
11052

    
11053
    # osparams processing
11054
    if self.op.osparams:
11055
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11056
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11057
      self.os_inst = i_osdict # the new dict (without defaults)
11058
    else:
11059
      self.os_inst = {}
11060

    
11061
    self.warn = []
11062

    
11063
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11064
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11065
      mem_check_list = [pnode]
11066
      if be_new[constants.BE_AUTO_BALANCE]:
11067
        # either we changed auto_balance to yes or it was from before
11068
        mem_check_list.extend(instance.secondary_nodes)
11069
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11070
                                                  instance.hypervisor)
11071
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11072
                                         instance.hypervisor)
11073
      pninfo = nodeinfo[pnode]
11074
      msg = pninfo.fail_msg
11075
      if msg:
11076
        # Assume the primary node is unreachable and go ahead
11077
        self.warn.append("Can't get info from primary node %s: %s" %
11078
                         (pnode, msg))
11079
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11080
        self.warn.append("Node data from primary node %s doesn't contain"
11081
                         " free memory information" % pnode)
11082
      elif instance_info.fail_msg:
11083
        self.warn.append("Can't get instance runtime information: %s" %
11084
                        instance_info.fail_msg)
11085
      else:
11086
        if instance_info.payload:
11087
          current_mem = int(instance_info.payload["memory"])
11088
        else:
11089
          # Assume instance not running
11090
          # (there is a slight race condition here, but it's not very probable,
11091
          # and we have no other way to check)
11092
          current_mem = 0
11093
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11094
                    pninfo.payload["memory_free"])
11095
        if miss_mem > 0:
11096
          raise errors.OpPrereqError("This change will prevent the instance"
11097
                                     " from starting, due to %d MB of memory"
11098
                                     " missing on its primary node" % miss_mem,
11099
                                     errors.ECODE_NORES)
11100

    
11101
      if be_new[constants.BE_AUTO_BALANCE]:
11102
        for node, nres in nodeinfo.items():
11103
          if node not in instance.secondary_nodes:
11104
            continue
11105
          nres.Raise("Can't get info from secondary node %s" % node,
11106
                     prereq=True, ecode=errors.ECODE_STATE)
11107
          if not isinstance(nres.payload.get("memory_free", None), int):
11108
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11109
                                       " memory information" % node,
11110
                                       errors.ECODE_STATE)
11111
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11112
            raise errors.OpPrereqError("This change will prevent the instance"
11113
                                       " from failover to its secondary node"
11114
                                       " %s, due to not enough memory" % node,
11115
                                       errors.ECODE_STATE)
11116

    
11117
    # NIC processing
11118
    self.nic_pnew = {}
11119
    self.nic_pinst = {}
11120
    for nic_op, nic_dict in self.op.nics:
11121
      if nic_op == constants.DDM_REMOVE:
11122
        if not instance.nics:
11123
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11124
                                     errors.ECODE_INVAL)
11125
        continue
11126
      if nic_op != constants.DDM_ADD:
11127
        # an existing nic
11128
        if not instance.nics:
11129
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11130
                                     " no NICs" % nic_op,
11131
                                     errors.ECODE_INVAL)
11132
        if nic_op < 0 or nic_op >= len(instance.nics):
11133
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11134
                                     " are 0 to %d" %
11135
                                     (nic_op, len(instance.nics) - 1),
11136
                                     errors.ECODE_INVAL)
11137
        old_nic_params = instance.nics[nic_op].nicparams
11138
        old_nic_ip = instance.nics[nic_op].ip
11139
      else:
11140
        old_nic_params = {}
11141
        old_nic_ip = None
11142

    
11143
      update_params_dict = dict([(key, nic_dict[key])
11144
                                 for key in constants.NICS_PARAMETERS
11145
                                 if key in nic_dict])
11146

    
11147
      if "bridge" in nic_dict:
11148
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11149

    
11150
      new_nic_params = _GetUpdatedParams(old_nic_params,
11151
                                         update_params_dict)
11152
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11153
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11154
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11155
      self.nic_pinst[nic_op] = new_nic_params
11156
      self.nic_pnew[nic_op] = new_filled_nic_params
11157
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11158

    
11159
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11160
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11161
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11162
        if msg:
11163
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11164
          if self.op.force:
11165
            self.warn.append(msg)
11166
          else:
11167
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11168
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11169
        if constants.INIC_IP in nic_dict:
11170
          nic_ip = nic_dict[constants.INIC_IP]
11171
        else:
11172
          nic_ip = old_nic_ip
11173
        if nic_ip is None:
11174
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11175
                                     " on a routed nic", errors.ECODE_INVAL)
11176
      if constants.INIC_MAC in nic_dict:
11177
        nic_mac = nic_dict[constants.INIC_MAC]
11178
        if nic_mac is None:
11179
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11180
                                     errors.ECODE_INVAL)
11181
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11182
          # otherwise generate the mac
11183
          nic_dict[constants.INIC_MAC] = \
11184
            self.cfg.GenerateMAC(self.proc.GetECId())
11185
        else:
11186
          # or validate/reserve the current one
11187
          try:
11188
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11189
          except errors.ReservationError:
11190
            raise errors.OpPrereqError("MAC address %s already in use"
11191
                                       " in cluster" % nic_mac,
11192
                                       errors.ECODE_NOTUNIQUE)
11193

    
11194
    # DISK processing
11195
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11196
      raise errors.OpPrereqError("Disk operations not supported for"
11197
                                 " diskless instances",
11198
                                 errors.ECODE_INVAL)
11199
    for disk_op, _ in self.op.disks:
11200
      if disk_op == constants.DDM_REMOVE:
11201
        if len(instance.disks) == 1:
11202
          raise errors.OpPrereqError("Cannot remove the last disk of"
11203
                                     " an instance", errors.ECODE_INVAL)
11204
        _CheckInstanceDown(self, instance, "cannot remove disks")
11205

    
11206
      if (disk_op == constants.DDM_ADD and
11207
          len(instance.disks) >= constants.MAX_DISKS):
11208
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11209
                                   " add more" % constants.MAX_DISKS,
11210
                                   errors.ECODE_STATE)
11211
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11212
        # an existing disk
11213
        if disk_op < 0 or disk_op >= len(instance.disks):
11214
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11215
                                     " are 0 to %d" %
11216
                                     (disk_op, len(instance.disks)),
11217
                                     errors.ECODE_INVAL)
11218

    
11219
    return
11220

    
11221
  def _ConvertPlainToDrbd(self, feedback_fn):
11222
    """Converts an instance from plain to drbd.
11223

11224
    """
11225
    feedback_fn("Converting template to drbd")
11226
    instance = self.instance
11227
    pnode = instance.primary_node
11228
    snode = self.op.remote_node
11229

    
11230
    # create a fake disk info for _GenerateDiskTemplate
11231
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11232
                  constants.IDISK_VG: d.logical_id[0]}
11233
                 for d in instance.disks]
11234
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11235
                                      instance.name, pnode, [snode],
11236
                                      disk_info, None, None, 0, feedback_fn)
11237
    info = _GetInstanceInfoText(instance)
11238
    feedback_fn("Creating aditional volumes...")
11239
    # first, create the missing data and meta devices
11240
    for disk in new_disks:
11241
      # unfortunately this is... not too nice
11242
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11243
                            info, True)
11244
      for child in disk.children:
11245
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11246
    # at this stage, all new LVs have been created, we can rename the
11247
    # old ones
11248
    feedback_fn("Renaming original volumes...")
11249
    rename_list = [(o, n.children[0].logical_id)
11250
                   for (o, n) in zip(instance.disks, new_disks)]
11251
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11252
    result.Raise("Failed to rename original LVs")
11253

    
11254
    feedback_fn("Initializing DRBD devices...")
11255
    # all child devices are in place, we can now create the DRBD devices
11256
    for disk in new_disks:
11257
      for node in [pnode, snode]:
11258
        f_create = node == pnode
11259
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11260

    
11261
    # at this point, the instance has been modified
11262
    instance.disk_template = constants.DT_DRBD8
11263
    instance.disks = new_disks
11264
    self.cfg.Update(instance, feedback_fn)
11265

    
11266
    # disks are created, waiting for sync
11267
    disk_abort = not _WaitForSync(self, instance,
11268
                                  oneshot=not self.op.wait_for_sync)
11269
    if disk_abort:
11270
      raise errors.OpExecError("There are some degraded disks for"
11271
                               " this instance, please cleanup manually")
11272

    
11273
  def _ConvertDrbdToPlain(self, feedback_fn):
11274
    """Converts an instance from drbd to plain.
11275

11276
    """
11277
    instance = self.instance
11278
    assert len(instance.secondary_nodes) == 1
11279
    pnode = instance.primary_node
11280
    snode = instance.secondary_nodes[0]
11281
    feedback_fn("Converting template to plain")
11282

    
11283
    old_disks = instance.disks
11284
    new_disks = [d.children[0] for d in old_disks]
11285

    
11286
    # copy over size and mode
11287
    for parent, child in zip(old_disks, new_disks):
11288
      child.size = parent.size
11289
      child.mode = parent.mode
11290

    
11291
    # update instance structure
11292
    instance.disks = new_disks
11293
    instance.disk_template = constants.DT_PLAIN
11294
    self.cfg.Update(instance, feedback_fn)
11295

    
11296
    feedback_fn("Removing volumes on the secondary node...")
11297
    for disk in old_disks:
11298
      self.cfg.SetDiskID(disk, snode)
11299
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11300
      if msg:
11301
        self.LogWarning("Could not remove block device %s on node %s,"
11302
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11303

    
11304
    feedback_fn("Removing unneeded volumes on the primary node...")
11305
    for idx, disk in enumerate(old_disks):
11306
      meta = disk.children[1]
11307
      self.cfg.SetDiskID(meta, pnode)
11308
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11309
      if msg:
11310
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11311
                        " continuing anyway: %s", idx, pnode, msg)
11312

    
11313
  def Exec(self, feedback_fn):
11314
    """Modifies an instance.
11315

11316
    All parameters take effect only at the next restart of the instance.
11317

11318
    """
11319
    # Process here the warnings from CheckPrereq, as we don't have a
11320
    # feedback_fn there.
11321
    for warn in self.warn:
11322
      feedback_fn("WARNING: %s" % warn)
11323

    
11324
    result = []
11325
    instance = self.instance
11326
    # disk changes
11327
    for disk_op, disk_dict in self.op.disks:
11328
      if disk_op == constants.DDM_REMOVE:
11329
        # remove the last disk
11330
        device = instance.disks.pop()
11331
        device_idx = len(instance.disks)
11332
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11333
          self.cfg.SetDiskID(disk, node)
11334
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11335
          if msg:
11336
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11337
                            " continuing anyway", device_idx, node, msg)
11338
        result.append(("disk/%d" % device_idx, "remove"))
11339
      elif disk_op == constants.DDM_ADD:
11340
        # add a new disk
11341
        if instance.disk_template in (constants.DT_FILE,
11342
                                        constants.DT_SHARED_FILE):
11343
          file_driver, file_path = instance.disks[0].logical_id
11344
          file_path = os.path.dirname(file_path)
11345
        else:
11346
          file_driver = file_path = None
11347
        disk_idx_base = len(instance.disks)
11348
        new_disk = _GenerateDiskTemplate(self,
11349
                                         instance.disk_template,
11350
                                         instance.name, instance.primary_node,
11351
                                         instance.secondary_nodes,
11352
                                         [disk_dict],
11353
                                         file_path,
11354
                                         file_driver,
11355
                                         disk_idx_base, feedback_fn)[0]
11356
        instance.disks.append(new_disk)
11357
        info = _GetInstanceInfoText(instance)
11358

    
11359
        logging.info("Creating volume %s for instance %s",
11360
                     new_disk.iv_name, instance.name)
11361
        # Note: this needs to be kept in sync with _CreateDisks
11362
        #HARDCODE
11363
        for node in instance.all_nodes:
11364
          f_create = node == instance.primary_node
11365
          try:
11366
            _CreateBlockDev(self, node, instance, new_disk,
11367
                            f_create, info, f_create)
11368
          except errors.OpExecError, err:
11369
            self.LogWarning("Failed to create volume %s (%s) on"
11370
                            " node %s: %s",
11371
                            new_disk.iv_name, new_disk, node, err)
11372
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11373
                       (new_disk.size, new_disk.mode)))
11374
      else:
11375
        # change a given disk
11376
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11377
        result.append(("disk.mode/%d" % disk_op,
11378
                       disk_dict[constants.IDISK_MODE]))
11379

    
11380
    if self.op.disk_template:
11381
      r_shut = _ShutdownInstanceDisks(self, instance)
11382
      if not r_shut:
11383
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11384
                                 " proceed with disk template conversion")
11385
      mode = (instance.disk_template, self.op.disk_template)
11386
      try:
11387
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11388
      except:
11389
        self.cfg.ReleaseDRBDMinors(instance.name)
11390
        raise
11391
      result.append(("disk_template", self.op.disk_template))
11392

    
11393
    # NIC changes
11394
    for nic_op, nic_dict in self.op.nics:
11395
      if nic_op == constants.DDM_REMOVE:
11396
        # remove the last nic
11397
        del instance.nics[-1]
11398
        result.append(("nic.%d" % len(instance.nics), "remove"))
11399
      elif nic_op == constants.DDM_ADD:
11400
        # mac and bridge should be set, by now
11401
        mac = nic_dict[constants.INIC_MAC]
11402
        ip = nic_dict.get(constants.INIC_IP, None)
11403
        nicparams = self.nic_pinst[constants.DDM_ADD]
11404
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11405
        instance.nics.append(new_nic)
11406
        result.append(("nic.%d" % (len(instance.nics) - 1),
11407
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11408
                       (new_nic.mac, new_nic.ip,
11409
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11410
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11411
                       )))
11412
      else:
11413
        for key in (constants.INIC_MAC, constants.INIC_IP):
11414
          if key in nic_dict:
11415
            setattr(instance.nics[nic_op], key, nic_dict[key])
11416
        if nic_op in self.nic_pinst:
11417
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11418
        for key, val in nic_dict.iteritems():
11419
          result.append(("nic.%s/%d" % (key, nic_op), val))
11420

    
11421
    # hvparams changes
11422
    if self.op.hvparams:
11423
      instance.hvparams = self.hv_inst
11424
      for key, val in self.op.hvparams.iteritems():
11425
        result.append(("hv/%s" % key, val))
11426

    
11427
    # beparams changes
11428
    if self.op.beparams:
11429
      instance.beparams = self.be_inst
11430
      for key, val in self.op.beparams.iteritems():
11431
        result.append(("be/%s" % key, val))
11432

    
11433
    # OS change
11434
    if self.op.os_name:
11435
      instance.os = self.op.os_name
11436

    
11437
    # osparams changes
11438
    if self.op.osparams:
11439
      instance.osparams = self.os_inst
11440
      for key, val in self.op.osparams.iteritems():
11441
        result.append(("os/%s" % key, val))
11442

    
11443
    self.cfg.Update(instance, feedback_fn)
11444

    
11445
    return result
11446

    
11447
  _DISK_CONVERSIONS = {
11448
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11449
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11450
    }
11451

    
11452

    
11453
class LUInstanceChangeGroup(LogicalUnit):
11454
  HPATH = "instance-change-group"
11455
  HTYPE = constants.HTYPE_INSTANCE
11456
  REQ_BGL = False
11457

    
11458
  def ExpandNames(self):
11459
    self.share_locks = _ShareAll()
11460
    self.needed_locks = {
11461
      locking.LEVEL_NODEGROUP: [],
11462
      locking.LEVEL_NODE: [],
11463
      }
11464

    
11465
    self._ExpandAndLockInstance()
11466

    
11467
    if self.op.target_groups:
11468
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11469
                                  self.op.target_groups)
11470
    else:
11471
      self.req_target_uuids = None
11472

    
11473
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11474

    
11475
  def DeclareLocks(self, level):
11476
    if level == locking.LEVEL_NODEGROUP:
11477
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11478

    
11479
      if self.req_target_uuids:
11480
        lock_groups = set(self.req_target_uuids)
11481

    
11482
        # Lock all groups used by instance optimistically; this requires going
11483
        # via the node before it's locked, requiring verification later on
11484
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11485
        lock_groups.update(instance_groups)
11486
      else:
11487
        # No target groups, need to lock all of them
11488
        lock_groups = locking.ALL_SET
11489

    
11490
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11491

    
11492
    elif level == locking.LEVEL_NODE:
11493
      if self.req_target_uuids:
11494
        # Lock all nodes used by instances
11495
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11496
        self._LockInstancesNodes()
11497

    
11498
        # Lock all nodes in all potential target groups
11499
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11500
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11501
        member_nodes = [node_name
11502
                        for group in lock_groups
11503
                        for node_name in self.cfg.GetNodeGroup(group).members]
11504
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11505
      else:
11506
        # Lock all nodes as all groups are potential targets
11507
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11508

    
11509
  def CheckPrereq(self):
11510
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11511
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11512
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11513

    
11514
    assert (self.req_target_uuids is None or
11515
            owned_groups.issuperset(self.req_target_uuids))
11516
    assert owned_instances == set([self.op.instance_name])
11517

    
11518
    # Get instance information
11519
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11520

    
11521
    # Check if node groups for locked instance are still correct
11522
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11523
      ("Instance %s's nodes changed while we kept the lock" %
11524
       self.op.instance_name)
11525

    
11526
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11527
                                           owned_groups)
11528

    
11529
    if self.req_target_uuids:
11530
      # User requested specific target groups
11531
      self.target_uuids = self.req_target_uuids
11532
    else:
11533
      # All groups except those used by the instance are potential targets
11534
      self.target_uuids = owned_groups - inst_groups
11535

    
11536
    conflicting_groups = self.target_uuids & inst_groups
11537
    if conflicting_groups:
11538
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11539
                                 " used by the instance '%s'" %
11540
                                 (utils.CommaJoin(conflicting_groups),
11541
                                  self.op.instance_name),
11542
                                 errors.ECODE_INVAL)
11543

    
11544
    if not self.target_uuids:
11545
      raise errors.OpPrereqError("There are no possible target groups",
11546
                                 errors.ECODE_INVAL)
11547

    
11548
  def BuildHooksEnv(self):
11549
    """Build hooks env.
11550

11551
    """
11552
    assert self.target_uuids
11553

    
11554
    env = {
11555
      "TARGET_GROUPS": " ".join(self.target_uuids),
11556
      }
11557

    
11558
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11559

    
11560
    return env
11561

    
11562
  def BuildHooksNodes(self):
11563
    """Build hooks nodes.
11564

11565
    """
11566
    mn = self.cfg.GetMasterNode()
11567
    return ([mn], [mn])
11568

    
11569
  def Exec(self, feedback_fn):
11570
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11571

    
11572
    assert instances == [self.op.instance_name], "Instance not locked"
11573

    
11574
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11575
                     instances=instances, target_groups=list(self.target_uuids))
11576

    
11577
    ial.Run(self.op.iallocator)
11578

    
11579
    if not ial.success:
11580
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11581
                                 " instance '%s' using iallocator '%s': %s" %
11582
                                 (self.op.instance_name, self.op.iallocator,
11583
                                  ial.info),
11584
                                 errors.ECODE_NORES)
11585

    
11586
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11587

    
11588
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11589
                 " instance '%s'", len(jobs), self.op.instance_name)
11590

    
11591
    return ResultWithJobs(jobs)
11592

    
11593

    
11594
class LUBackupQuery(NoHooksLU):
11595
  """Query the exports list
11596

11597
  """
11598
  REQ_BGL = False
11599

    
11600
  def ExpandNames(self):
11601
    self.needed_locks = {}
11602
    self.share_locks[locking.LEVEL_NODE] = 1
11603
    if not self.op.nodes:
11604
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11605
    else:
11606
      self.needed_locks[locking.LEVEL_NODE] = \
11607
        _GetWantedNodes(self, self.op.nodes)
11608

    
11609
  def Exec(self, feedback_fn):
11610
    """Compute the list of all the exported system images.
11611

11612
    @rtype: dict
11613
    @return: a dictionary with the structure node->(export-list)
11614
        where export-list is a list of the instances exported on
11615
        that node.
11616

11617
    """
11618
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11619
    rpcresult = self.rpc.call_export_list(self.nodes)
11620
    result = {}
11621
    for node in rpcresult:
11622
      if rpcresult[node].fail_msg:
11623
        result[node] = False
11624
      else:
11625
        result[node] = rpcresult[node].payload
11626

    
11627
    return result
11628

    
11629

    
11630
class LUBackupPrepare(NoHooksLU):
11631
  """Prepares an instance for an export and returns useful information.
11632

11633
  """
11634
  REQ_BGL = False
11635

    
11636
  def ExpandNames(self):
11637
    self._ExpandAndLockInstance()
11638

    
11639
  def CheckPrereq(self):
11640
    """Check prerequisites.
11641

11642
    """
11643
    instance_name = self.op.instance_name
11644

    
11645
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11646
    assert self.instance is not None, \
11647
          "Cannot retrieve locked instance %s" % self.op.instance_name
11648
    _CheckNodeOnline(self, self.instance.primary_node)
11649

    
11650
    self._cds = _GetClusterDomainSecret()
11651

    
11652
  def Exec(self, feedback_fn):
11653
    """Prepares an instance for an export.
11654

11655
    """
11656
    instance = self.instance
11657

    
11658
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11659
      salt = utils.GenerateSecret(8)
11660

    
11661
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11662
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11663
                                              constants.RIE_CERT_VALIDITY)
11664
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11665

    
11666
      (name, cert_pem) = result.payload
11667

    
11668
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11669
                                             cert_pem)
11670

    
11671
      return {
11672
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11673
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11674
                          salt),
11675
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11676
        }
11677

    
11678
    return None
11679

    
11680

    
11681
class LUBackupExport(LogicalUnit):
11682
  """Export an instance to an image in the cluster.
11683

11684
  """
11685
  HPATH = "instance-export"
11686
  HTYPE = constants.HTYPE_INSTANCE
11687
  REQ_BGL = False
11688

    
11689
  def CheckArguments(self):
11690
    """Check the arguments.
11691

11692
    """
11693
    self.x509_key_name = self.op.x509_key_name
11694
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11695

    
11696
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11697
      if not self.x509_key_name:
11698
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11699
                                   errors.ECODE_INVAL)
11700

    
11701
      if not self.dest_x509_ca_pem:
11702
        raise errors.OpPrereqError("Missing destination X509 CA",
11703
                                   errors.ECODE_INVAL)
11704

    
11705
  def ExpandNames(self):
11706
    self._ExpandAndLockInstance()
11707

    
11708
    # Lock all nodes for local exports
11709
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11710
      # FIXME: lock only instance primary and destination node
11711
      #
11712
      # Sad but true, for now we have do lock all nodes, as we don't know where
11713
      # the previous export might be, and in this LU we search for it and
11714
      # remove it from its current node. In the future we could fix this by:
11715
      #  - making a tasklet to search (share-lock all), then create the
11716
      #    new one, then one to remove, after
11717
      #  - removing the removal operation altogether
11718
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11719

    
11720
  def DeclareLocks(self, level):
11721
    """Last minute lock declaration."""
11722
    # All nodes are locked anyway, so nothing to do here.
11723

    
11724
  def BuildHooksEnv(self):
11725
    """Build hooks env.
11726

11727
    This will run on the master, primary node and target node.
11728

11729
    """
11730
    env = {
11731
      "EXPORT_MODE": self.op.mode,
11732
      "EXPORT_NODE": self.op.target_node,
11733
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11734
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11735
      # TODO: Generic function for boolean env variables
11736
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11737
      }
11738

    
11739
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11740

    
11741
    return env
11742

    
11743
  def BuildHooksNodes(self):
11744
    """Build hooks nodes.
11745

11746
    """
11747
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11748

    
11749
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11750
      nl.append(self.op.target_node)
11751

    
11752
    return (nl, nl)
11753

    
11754
  def CheckPrereq(self):
11755
    """Check prerequisites.
11756

11757
    This checks that the instance and node names are valid.
11758

11759
    """
11760
    instance_name = self.op.instance_name
11761

    
11762
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11763
    assert self.instance is not None, \
11764
          "Cannot retrieve locked instance %s" % self.op.instance_name
11765
    _CheckNodeOnline(self, self.instance.primary_node)
11766

    
11767
    if (self.op.remove_instance and self.instance.admin_up and
11768
        not self.op.shutdown):
11769
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11770
                                 " down before")
11771

    
11772
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11773
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11774
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11775
      assert self.dst_node is not None
11776

    
11777
      _CheckNodeOnline(self, self.dst_node.name)
11778
      _CheckNodeNotDrained(self, self.dst_node.name)
11779

    
11780
      self._cds = None
11781
      self.dest_disk_info = None
11782
      self.dest_x509_ca = None
11783

    
11784
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11785
      self.dst_node = None
11786

    
11787
      if len(self.op.target_node) != len(self.instance.disks):
11788
        raise errors.OpPrereqError(("Received destination information for %s"
11789
                                    " disks, but instance %s has %s disks") %
11790
                                   (len(self.op.target_node), instance_name,
11791
                                    len(self.instance.disks)),
11792
                                   errors.ECODE_INVAL)
11793

    
11794
      cds = _GetClusterDomainSecret()
11795

    
11796
      # Check X509 key name
11797
      try:
11798
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11799
      except (TypeError, ValueError), err:
11800
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11801

    
11802
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11803
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11804
                                   errors.ECODE_INVAL)
11805

    
11806
      # Load and verify CA
11807
      try:
11808
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11809
      except OpenSSL.crypto.Error, err:
11810
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11811
                                   (err, ), errors.ECODE_INVAL)
11812

    
11813
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11814
      if errcode is not None:
11815
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11816
                                   (msg, ), errors.ECODE_INVAL)
11817

    
11818
      self.dest_x509_ca = cert
11819

    
11820
      # Verify target information
11821
      disk_info = []
11822
      for idx, disk_data in enumerate(self.op.target_node):
11823
        try:
11824
          (host, port, magic) = \
11825
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11826
        except errors.GenericError, err:
11827
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11828
                                     (idx, err), errors.ECODE_INVAL)
11829

    
11830
        disk_info.append((host, port, magic))
11831

    
11832
      assert len(disk_info) == len(self.op.target_node)
11833
      self.dest_disk_info = disk_info
11834

    
11835
    else:
11836
      raise errors.ProgrammerError("Unhandled export mode %r" %
11837
                                   self.op.mode)
11838

    
11839
    # instance disk type verification
11840
    # TODO: Implement export support for file-based disks
11841
    for disk in self.instance.disks:
11842
      if disk.dev_type == constants.LD_FILE:
11843
        raise errors.OpPrereqError("Export not supported for instances with"
11844
                                   " file-based disks", errors.ECODE_INVAL)
11845

    
11846
  def _CleanupExports(self, feedback_fn):
11847
    """Removes exports of current instance from all other nodes.
11848

11849
    If an instance in a cluster with nodes A..D was exported to node C, its
11850
    exports will be removed from the nodes A, B and D.
11851

11852
    """
11853
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11854

    
11855
    nodelist = self.cfg.GetNodeList()
11856
    nodelist.remove(self.dst_node.name)
11857

    
11858
    # on one-node clusters nodelist will be empty after the removal
11859
    # if we proceed the backup would be removed because OpBackupQuery
11860
    # substitutes an empty list with the full cluster node list.
11861
    iname = self.instance.name
11862
    if nodelist:
11863
      feedback_fn("Removing old exports for instance %s" % iname)
11864
      exportlist = self.rpc.call_export_list(nodelist)
11865
      for node in exportlist:
11866
        if exportlist[node].fail_msg:
11867
          continue
11868
        if iname in exportlist[node].payload:
11869
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11870
          if msg:
11871
            self.LogWarning("Could not remove older export for instance %s"
11872
                            " on node %s: %s", iname, node, msg)
11873

    
11874
  def Exec(self, feedback_fn):
11875
    """Export an instance to an image in the cluster.
11876

11877
    """
11878
    assert self.op.mode in constants.EXPORT_MODES
11879

    
11880
    instance = self.instance
11881
    src_node = instance.primary_node
11882

    
11883
    if self.op.shutdown:
11884
      # shutdown the instance, but not the disks
11885
      feedback_fn("Shutting down instance %s" % instance.name)
11886
      result = self.rpc.call_instance_shutdown(src_node, instance,
11887
                                               self.op.shutdown_timeout)
11888
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11889
      result.Raise("Could not shutdown instance %s on"
11890
                   " node %s" % (instance.name, src_node))
11891

    
11892
    # set the disks ID correctly since call_instance_start needs the
11893
    # correct drbd minor to create the symlinks
11894
    for disk in instance.disks:
11895
      self.cfg.SetDiskID(disk, src_node)
11896

    
11897
    activate_disks = (not instance.admin_up)
11898

    
11899
    if activate_disks:
11900
      # Activate the instance disks if we'exporting a stopped instance
11901
      feedback_fn("Activating disks for %s" % instance.name)
11902
      _StartInstanceDisks(self, instance, None)
11903

    
11904
    try:
11905
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11906
                                                     instance)
11907

    
11908
      helper.CreateSnapshots()
11909
      try:
11910
        if (self.op.shutdown and instance.admin_up and
11911
            not self.op.remove_instance):
11912
          assert not activate_disks
11913
          feedback_fn("Starting instance %s" % instance.name)
11914
          result = self.rpc.call_instance_start(src_node,
11915
                                                (instance, None, None), False)
11916
          msg = result.fail_msg
11917
          if msg:
11918
            feedback_fn("Failed to start instance: %s" % msg)
11919
            _ShutdownInstanceDisks(self, instance)
11920
            raise errors.OpExecError("Could not start instance: %s" % msg)
11921

    
11922
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11923
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11924
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11925
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11926
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11927

    
11928
          (key_name, _, _) = self.x509_key_name
11929

    
11930
          dest_ca_pem = \
11931
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11932
                                            self.dest_x509_ca)
11933

    
11934
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11935
                                                     key_name, dest_ca_pem,
11936
                                                     timeouts)
11937
      finally:
11938
        helper.Cleanup()
11939

    
11940
      # Check for backwards compatibility
11941
      assert len(dresults) == len(instance.disks)
11942
      assert compat.all(isinstance(i, bool) for i in dresults), \
11943
             "Not all results are boolean: %r" % dresults
11944

    
11945
    finally:
11946
      if activate_disks:
11947
        feedback_fn("Deactivating disks for %s" % instance.name)
11948
        _ShutdownInstanceDisks(self, instance)
11949

    
11950
    if not (compat.all(dresults) and fin_resu):
11951
      failures = []
11952
      if not fin_resu:
11953
        failures.append("export finalization")
11954
      if not compat.all(dresults):
11955
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11956
                               if not dsk)
11957
        failures.append("disk export: disk(s) %s" % fdsk)
11958

    
11959
      raise errors.OpExecError("Export failed, errors in %s" %
11960
                               utils.CommaJoin(failures))
11961

    
11962
    # At this point, the export was successful, we can cleanup/finish
11963

    
11964
    # Remove instance if requested
11965
    if self.op.remove_instance:
11966
      feedback_fn("Removing instance %s" % instance.name)
11967
      _RemoveInstance(self, feedback_fn, instance,
11968
                      self.op.ignore_remove_failures)
11969

    
11970
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11971
      self._CleanupExports(feedback_fn)
11972

    
11973
    return fin_resu, dresults
11974

    
11975

    
11976
class LUBackupRemove(NoHooksLU):
11977
  """Remove exports related to the named instance.
11978

11979
  """
11980
  REQ_BGL = False
11981

    
11982
  def ExpandNames(self):
11983
    self.needed_locks = {}
11984
    # We need all nodes to be locked in order for RemoveExport to work, but we
11985
    # don't need to lock the instance itself, as nothing will happen to it (and
11986
    # we can remove exports also for a removed instance)
11987
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11988

    
11989
  def Exec(self, feedback_fn):
11990
    """Remove any export.
11991

11992
    """
11993
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11994
    # If the instance was not found we'll try with the name that was passed in.
11995
    # This will only work if it was an FQDN, though.
11996
    fqdn_warn = False
11997
    if not instance_name:
11998
      fqdn_warn = True
11999
      instance_name = self.op.instance_name
12000

    
12001
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12002
    exportlist = self.rpc.call_export_list(locked_nodes)
12003
    found = False
12004
    for node in exportlist:
12005
      msg = exportlist[node].fail_msg
12006
      if msg:
12007
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12008
        continue
12009
      if instance_name in exportlist[node].payload:
12010
        found = True
12011
        result = self.rpc.call_export_remove(node, instance_name)
12012
        msg = result.fail_msg
12013
        if msg:
12014
          logging.error("Could not remove export for instance %s"
12015
                        " on node %s: %s", instance_name, node, msg)
12016

    
12017
    if fqdn_warn and not found:
12018
      feedback_fn("Export not found. If trying to remove an export belonging"
12019
                  " to a deleted instance please use its Fully Qualified"
12020
                  " Domain Name.")
12021

    
12022

    
12023
class LUGroupAdd(LogicalUnit):
12024
  """Logical unit for creating node groups.
12025

12026
  """
12027
  HPATH = "group-add"
12028
  HTYPE = constants.HTYPE_GROUP
12029
  REQ_BGL = False
12030

    
12031
  def ExpandNames(self):
12032
    # We need the new group's UUID here so that we can create and acquire the
12033
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12034
    # that it should not check whether the UUID exists in the configuration.
12035
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12036
    self.needed_locks = {}
12037
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12038

    
12039
  def CheckPrereq(self):
12040
    """Check prerequisites.
12041

12042
    This checks that the given group name is not an existing node group
12043
    already.
12044

12045
    """
12046
    try:
12047
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12048
    except errors.OpPrereqError:
12049
      pass
12050
    else:
12051
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12052
                                 " node group (UUID: %s)" %
12053
                                 (self.op.group_name, existing_uuid),
12054
                                 errors.ECODE_EXISTS)
12055

    
12056
    if self.op.ndparams:
12057
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12058

    
12059
  def BuildHooksEnv(self):
12060
    """Build hooks env.
12061

12062
    """
12063
    return {
12064
      "GROUP_NAME": self.op.group_name,
12065
      }
12066

    
12067
  def BuildHooksNodes(self):
12068
    """Build hooks nodes.
12069

12070
    """
12071
    mn = self.cfg.GetMasterNode()
12072
    return ([mn], [mn])
12073

    
12074
  def Exec(self, feedback_fn):
12075
    """Add the node group to the cluster.
12076

12077
    """
12078
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12079
                                  uuid=self.group_uuid,
12080
                                  alloc_policy=self.op.alloc_policy,
12081
                                  ndparams=self.op.ndparams)
12082

    
12083
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12084
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12085

    
12086

    
12087
class LUGroupAssignNodes(NoHooksLU):
12088
  """Logical unit for assigning nodes to groups.
12089

12090
  """
12091
  REQ_BGL = False
12092

    
12093
  def ExpandNames(self):
12094
    # These raise errors.OpPrereqError on their own:
12095
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12096
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12097

    
12098
    # We want to lock all the affected nodes and groups. We have readily
12099
    # available the list of nodes, and the *destination* group. To gather the
12100
    # list of "source" groups, we need to fetch node information later on.
12101
    self.needed_locks = {
12102
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12103
      locking.LEVEL_NODE: self.op.nodes,
12104
      }
12105

    
12106
  def DeclareLocks(self, level):
12107
    if level == locking.LEVEL_NODEGROUP:
12108
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12109

    
12110
      # Try to get all affected nodes' groups without having the group or node
12111
      # lock yet. Needs verification later in the code flow.
12112
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12113

    
12114
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12115

    
12116
  def CheckPrereq(self):
12117
    """Check prerequisites.
12118

12119
    """
12120
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12121
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12122
            frozenset(self.op.nodes))
12123

    
12124
    expected_locks = (set([self.group_uuid]) |
12125
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12126
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12127
    if actual_locks != expected_locks:
12128
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12129
                               " current groups are '%s', used to be '%s'" %
12130
                               (utils.CommaJoin(expected_locks),
12131
                                utils.CommaJoin(actual_locks)))
12132

    
12133
    self.node_data = self.cfg.GetAllNodesInfo()
12134
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12135
    instance_data = self.cfg.GetAllInstancesInfo()
12136

    
12137
    if self.group is None:
12138
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12139
                               (self.op.group_name, self.group_uuid))
12140

    
12141
    (new_splits, previous_splits) = \
12142
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12143
                                             for node in self.op.nodes],
12144
                                            self.node_data, instance_data)
12145

    
12146
    if new_splits:
12147
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12148

    
12149
      if not self.op.force:
12150
        raise errors.OpExecError("The following instances get split by this"
12151
                                 " change and --force was not given: %s" %
12152
                                 fmt_new_splits)
12153
      else:
12154
        self.LogWarning("This operation will split the following instances: %s",
12155
                        fmt_new_splits)
12156

    
12157
        if previous_splits:
12158
          self.LogWarning("In addition, these already-split instances continue"
12159
                          " to be split across groups: %s",
12160
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12161

    
12162
  def Exec(self, feedback_fn):
12163
    """Assign nodes to a new group.
12164

12165
    """
12166
    for node in self.op.nodes:
12167
      self.node_data[node].group = self.group_uuid
12168

    
12169
    # FIXME: Depends on side-effects of modifying the result of
12170
    # C{cfg.GetAllNodesInfo}
12171

    
12172
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12173

    
12174
  @staticmethod
12175
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12176
    """Check for split instances after a node assignment.
12177

12178
    This method considers a series of node assignments as an atomic operation,
12179
    and returns information about split instances after applying the set of
12180
    changes.
12181

12182
    In particular, it returns information about newly split instances, and
12183
    instances that were already split, and remain so after the change.
12184

12185
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12186
    considered.
12187

12188
    @type changes: list of (node_name, new_group_uuid) pairs.
12189
    @param changes: list of node assignments to consider.
12190
    @param node_data: a dict with data for all nodes
12191
    @param instance_data: a dict with all instances to consider
12192
    @rtype: a two-tuple
12193
    @return: a list of instances that were previously okay and result split as a
12194
      consequence of this change, and a list of instances that were previously
12195
      split and this change does not fix.
12196

12197
    """
12198
    changed_nodes = dict((node, group) for node, group in changes
12199
                         if node_data[node].group != group)
12200

    
12201
    all_split_instances = set()
12202
    previously_split_instances = set()
12203

    
12204
    def InstanceNodes(instance):
12205
      return [instance.primary_node] + list(instance.secondary_nodes)
12206

    
12207
    for inst in instance_data.values():
12208
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12209
        continue
12210

    
12211
      instance_nodes = InstanceNodes(inst)
12212

    
12213
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12214
        previously_split_instances.add(inst.name)
12215

    
12216
      if len(set(changed_nodes.get(node, node_data[node].group)
12217
                 for node in instance_nodes)) > 1:
12218
        all_split_instances.add(inst.name)
12219

    
12220
    return (list(all_split_instances - previously_split_instances),
12221
            list(previously_split_instances & all_split_instances))
12222

    
12223

    
12224
class _GroupQuery(_QueryBase):
12225
  FIELDS = query.GROUP_FIELDS
12226

    
12227
  def ExpandNames(self, lu):
12228
    lu.needed_locks = {}
12229

    
12230
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12231
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12232

    
12233
    if not self.names:
12234
      self.wanted = [name_to_uuid[name]
12235
                     for name in utils.NiceSort(name_to_uuid.keys())]
12236
    else:
12237
      # Accept names to be either names or UUIDs.
12238
      missing = []
12239
      self.wanted = []
12240
      all_uuid = frozenset(self._all_groups.keys())
12241

    
12242
      for name in self.names:
12243
        if name in all_uuid:
12244
          self.wanted.append(name)
12245
        elif name in name_to_uuid:
12246
          self.wanted.append(name_to_uuid[name])
12247
        else:
12248
          missing.append(name)
12249

    
12250
      if missing:
12251
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12252
                                   utils.CommaJoin(missing),
12253
                                   errors.ECODE_NOENT)
12254

    
12255
  def DeclareLocks(self, lu, level):
12256
    pass
12257

    
12258
  def _GetQueryData(self, lu):
12259
    """Computes the list of node groups and their attributes.
12260

12261
    """
12262
    do_nodes = query.GQ_NODE in self.requested_data
12263
    do_instances = query.GQ_INST in self.requested_data
12264

    
12265
    group_to_nodes = None
12266
    group_to_instances = None
12267

    
12268
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12269
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12270
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12271
    # instance->node. Hence, we will need to process nodes even if we only need
12272
    # instance information.
12273
    if do_nodes or do_instances:
12274
      all_nodes = lu.cfg.GetAllNodesInfo()
12275
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12276
      node_to_group = {}
12277

    
12278
      for node in all_nodes.values():
12279
        if node.group in group_to_nodes:
12280
          group_to_nodes[node.group].append(node.name)
12281
          node_to_group[node.name] = node.group
12282

    
12283
      if do_instances:
12284
        all_instances = lu.cfg.GetAllInstancesInfo()
12285
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12286

    
12287
        for instance in all_instances.values():
12288
          node = instance.primary_node
12289
          if node in node_to_group:
12290
            group_to_instances[node_to_group[node]].append(instance.name)
12291

    
12292
        if not do_nodes:
12293
          # Do not pass on node information if it was not requested.
12294
          group_to_nodes = None
12295

    
12296
    return query.GroupQueryData([self._all_groups[uuid]
12297
                                 for uuid in self.wanted],
12298
                                group_to_nodes, group_to_instances)
12299

    
12300

    
12301
class LUGroupQuery(NoHooksLU):
12302
  """Logical unit for querying node groups.
12303

12304
  """
12305
  REQ_BGL = False
12306

    
12307
  def CheckArguments(self):
12308
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12309
                          self.op.output_fields, False)
12310

    
12311
  def ExpandNames(self):
12312
    self.gq.ExpandNames(self)
12313

    
12314
  def DeclareLocks(self, level):
12315
    self.gq.DeclareLocks(self, level)
12316

    
12317
  def Exec(self, feedback_fn):
12318
    return self.gq.OldStyleQuery(self)
12319

    
12320

    
12321
class LUGroupSetParams(LogicalUnit):
12322
  """Modifies the parameters of a node group.
12323

12324
  """
12325
  HPATH = "group-modify"
12326
  HTYPE = constants.HTYPE_GROUP
12327
  REQ_BGL = False
12328

    
12329
  def CheckArguments(self):
12330
    all_changes = [
12331
      self.op.ndparams,
12332
      self.op.alloc_policy,
12333
      ]
12334

    
12335
    if all_changes.count(None) == len(all_changes):
12336
      raise errors.OpPrereqError("Please pass at least one modification",
12337
                                 errors.ECODE_INVAL)
12338

    
12339
  def ExpandNames(self):
12340
    # This raises errors.OpPrereqError on its own:
12341
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12342

    
12343
    self.needed_locks = {
12344
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12345
      }
12346

    
12347
  def CheckPrereq(self):
12348
    """Check prerequisites.
12349

12350
    """
12351
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12352

    
12353
    if self.group is None:
12354
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12355
                               (self.op.group_name, self.group_uuid))
12356

    
12357
    if self.op.ndparams:
12358
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12359
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12360
      self.new_ndparams = new_ndparams
12361

    
12362
  def BuildHooksEnv(self):
12363
    """Build hooks env.
12364

12365
    """
12366
    return {
12367
      "GROUP_NAME": self.op.group_name,
12368
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12369
      }
12370

    
12371
  def BuildHooksNodes(self):
12372
    """Build hooks nodes.
12373

12374
    """
12375
    mn = self.cfg.GetMasterNode()
12376
    return ([mn], [mn])
12377

    
12378
  def Exec(self, feedback_fn):
12379
    """Modifies the node group.
12380

12381
    """
12382
    result = []
12383

    
12384
    if self.op.ndparams:
12385
      self.group.ndparams = self.new_ndparams
12386
      result.append(("ndparams", str(self.group.ndparams)))
12387

    
12388
    if self.op.alloc_policy:
12389
      self.group.alloc_policy = self.op.alloc_policy
12390

    
12391
    self.cfg.Update(self.group, feedback_fn)
12392
    return result
12393

    
12394

    
12395
class LUGroupRemove(LogicalUnit):
12396
  HPATH = "group-remove"
12397
  HTYPE = constants.HTYPE_GROUP
12398
  REQ_BGL = False
12399

    
12400
  def ExpandNames(self):
12401
    # This will raises errors.OpPrereqError on its own:
12402
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12403
    self.needed_locks = {
12404
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12405
      }
12406

    
12407
  def CheckPrereq(self):
12408
    """Check prerequisites.
12409

12410
    This checks that the given group name exists as a node group, that is
12411
    empty (i.e., contains no nodes), and that is not the last group of the
12412
    cluster.
12413

12414
    """
12415
    # Verify that the group is empty.
12416
    group_nodes = [node.name
12417
                   for node in self.cfg.GetAllNodesInfo().values()
12418
                   if node.group == self.group_uuid]
12419

    
12420
    if group_nodes:
12421
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12422
                                 " nodes: %s" %
12423
                                 (self.op.group_name,
12424
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12425
                                 errors.ECODE_STATE)
12426

    
12427
    # Verify the cluster would not be left group-less.
12428
    if len(self.cfg.GetNodeGroupList()) == 1:
12429
      raise errors.OpPrereqError("Group '%s' is the only group,"
12430
                                 " cannot be removed" %
12431
                                 self.op.group_name,
12432
                                 errors.ECODE_STATE)
12433

    
12434
  def BuildHooksEnv(self):
12435
    """Build hooks env.
12436

12437
    """
12438
    return {
12439
      "GROUP_NAME": self.op.group_name,
12440
      }
12441

    
12442
  def BuildHooksNodes(self):
12443
    """Build hooks nodes.
12444

12445
    """
12446
    mn = self.cfg.GetMasterNode()
12447
    return ([mn], [mn])
12448

    
12449
  def Exec(self, feedback_fn):
12450
    """Remove the node group.
12451

12452
    """
12453
    try:
12454
      self.cfg.RemoveNodeGroup(self.group_uuid)
12455
    except errors.ConfigurationError:
12456
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12457
                               (self.op.group_name, self.group_uuid))
12458

    
12459
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12460

    
12461

    
12462
class LUGroupRename(LogicalUnit):
12463
  HPATH = "group-rename"
12464
  HTYPE = constants.HTYPE_GROUP
12465
  REQ_BGL = False
12466

    
12467
  def ExpandNames(self):
12468
    # This raises errors.OpPrereqError on its own:
12469
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12470

    
12471
    self.needed_locks = {
12472
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12473
      }
12474

    
12475
  def CheckPrereq(self):
12476
    """Check prerequisites.
12477

12478
    Ensures requested new name is not yet used.
12479

12480
    """
12481
    try:
12482
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12483
    except errors.OpPrereqError:
12484
      pass
12485
    else:
12486
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12487
                                 " node group (UUID: %s)" %
12488
                                 (self.op.new_name, new_name_uuid),
12489
                                 errors.ECODE_EXISTS)
12490

    
12491
  def BuildHooksEnv(self):
12492
    """Build hooks env.
12493

12494
    """
12495
    return {
12496
      "OLD_NAME": self.op.group_name,
12497
      "NEW_NAME": self.op.new_name,
12498
      }
12499

    
12500
  def BuildHooksNodes(self):
12501
    """Build hooks nodes.
12502

12503
    """
12504
    mn = self.cfg.GetMasterNode()
12505

    
12506
    all_nodes = self.cfg.GetAllNodesInfo()
12507
    all_nodes.pop(mn, None)
12508

    
12509
    run_nodes = [mn]
12510
    run_nodes.extend(node.name for node in all_nodes.values()
12511
                     if node.group == self.group_uuid)
12512

    
12513
    return (run_nodes, run_nodes)
12514

    
12515
  def Exec(self, feedback_fn):
12516
    """Rename the node group.
12517

12518
    """
12519
    group = self.cfg.GetNodeGroup(self.group_uuid)
12520

    
12521
    if group is None:
12522
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12523
                               (self.op.group_name, self.group_uuid))
12524

    
12525
    group.name = self.op.new_name
12526
    self.cfg.Update(group, feedback_fn)
12527

    
12528
    return self.op.new_name
12529

    
12530

    
12531
class LUGroupEvacuate(LogicalUnit):
12532
  HPATH = "group-evacuate"
12533
  HTYPE = constants.HTYPE_GROUP
12534
  REQ_BGL = False
12535

    
12536
  def ExpandNames(self):
12537
    # This raises errors.OpPrereqError on its own:
12538
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12539

    
12540
    if self.op.target_groups:
12541
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12542
                                  self.op.target_groups)
12543
    else:
12544
      self.req_target_uuids = []
12545

    
12546
    if self.group_uuid in self.req_target_uuids:
12547
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12548
                                 " as a target group (targets are %s)" %
12549
                                 (self.group_uuid,
12550
                                  utils.CommaJoin(self.req_target_uuids)),
12551
                                 errors.ECODE_INVAL)
12552

    
12553
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12554

    
12555
    self.share_locks = _ShareAll()
12556
    self.needed_locks = {
12557
      locking.LEVEL_INSTANCE: [],
12558
      locking.LEVEL_NODEGROUP: [],
12559
      locking.LEVEL_NODE: [],
12560
      }
12561

    
12562
  def DeclareLocks(self, level):
12563
    if level == locking.LEVEL_INSTANCE:
12564
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12565

    
12566
      # Lock instances optimistically, needs verification once node and group
12567
      # locks have been acquired
12568
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12569
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12570

    
12571
    elif level == locking.LEVEL_NODEGROUP:
12572
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12573

    
12574
      if self.req_target_uuids:
12575
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12576

    
12577
        # Lock all groups used by instances optimistically; this requires going
12578
        # via the node before it's locked, requiring verification later on
12579
        lock_groups.update(group_uuid
12580
                           for instance_name in
12581
                             self.owned_locks(locking.LEVEL_INSTANCE)
12582
                           for group_uuid in
12583
                             self.cfg.GetInstanceNodeGroups(instance_name))
12584
      else:
12585
        # No target groups, need to lock all of them
12586
        lock_groups = locking.ALL_SET
12587

    
12588
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12589

    
12590
    elif level == locking.LEVEL_NODE:
12591
      # This will only lock the nodes in the group to be evacuated which
12592
      # contain actual instances
12593
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12594
      self._LockInstancesNodes()
12595

    
12596
      # Lock all nodes in group to be evacuated and target groups
12597
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12598
      assert self.group_uuid in owned_groups
12599
      member_nodes = [node_name
12600
                      for group in owned_groups
12601
                      for node_name in self.cfg.GetNodeGroup(group).members]
12602
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12603

    
12604
  def CheckPrereq(self):
12605
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12606
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12607
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12608

    
12609
    assert owned_groups.issuperset(self.req_target_uuids)
12610
    assert self.group_uuid in owned_groups
12611

    
12612
    # Check if locked instances are still correct
12613
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12614

    
12615
    # Get instance information
12616
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12617

    
12618
    # Check if node groups for locked instances are still correct
12619
    for instance_name in owned_instances:
12620
      inst = self.instances[instance_name]
12621
      assert owned_nodes.issuperset(inst.all_nodes), \
12622
        "Instance %s's nodes changed while we kept the lock" % instance_name
12623

    
12624
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12625
                                             owned_groups)
12626

    
12627
      assert self.group_uuid in inst_groups, \
12628
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12629

    
12630
    if self.req_target_uuids:
12631
      # User requested specific target groups
12632
      self.target_uuids = self.req_target_uuids
12633
    else:
12634
      # All groups except the one to be evacuated are potential targets
12635
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12636
                           if group_uuid != self.group_uuid]
12637

    
12638
      if not self.target_uuids:
12639
        raise errors.OpPrereqError("There are no possible target groups",
12640
                                   errors.ECODE_INVAL)
12641

    
12642
  def BuildHooksEnv(self):
12643
    """Build hooks env.
12644

12645
    """
12646
    return {
12647
      "GROUP_NAME": self.op.group_name,
12648
      "TARGET_GROUPS": " ".join(self.target_uuids),
12649
      }
12650

    
12651
  def BuildHooksNodes(self):
12652
    """Build hooks nodes.
12653

12654
    """
12655
    mn = self.cfg.GetMasterNode()
12656

    
12657
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12658

    
12659
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12660

    
12661
    return (run_nodes, run_nodes)
12662

    
12663
  def Exec(self, feedback_fn):
12664
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12665

    
12666
    assert self.group_uuid not in self.target_uuids
12667

    
12668
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12669
                     instances=instances, target_groups=self.target_uuids)
12670

    
12671
    ial.Run(self.op.iallocator)
12672

    
12673
    if not ial.success:
12674
      raise errors.OpPrereqError("Can't compute group evacuation using"
12675
                                 " iallocator '%s': %s" %
12676
                                 (self.op.iallocator, ial.info),
12677
                                 errors.ECODE_NORES)
12678

    
12679
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12680

    
12681
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12682
                 len(jobs), self.op.group_name)
12683

    
12684
    return ResultWithJobs(jobs)
12685

    
12686

    
12687
class TagsLU(NoHooksLU): # pylint: disable=W0223
12688
  """Generic tags LU.
12689

12690
  This is an abstract class which is the parent of all the other tags LUs.
12691

12692
  """
12693
  def ExpandNames(self):
12694
    self.group_uuid = None
12695
    self.needed_locks = {}
12696
    if self.op.kind == constants.TAG_NODE:
12697
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12698
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12699
    elif self.op.kind == constants.TAG_INSTANCE:
12700
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12701
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12702
    elif self.op.kind == constants.TAG_NODEGROUP:
12703
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12704

    
12705
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12706
    # not possible to acquire the BGL based on opcode parameters)
12707

    
12708
  def CheckPrereq(self):
12709
    """Check prerequisites.
12710

12711
    """
12712
    if self.op.kind == constants.TAG_CLUSTER:
12713
      self.target = self.cfg.GetClusterInfo()
12714
    elif self.op.kind == constants.TAG_NODE:
12715
      self.target = self.cfg.GetNodeInfo(self.op.name)
12716
    elif self.op.kind == constants.TAG_INSTANCE:
12717
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12718
    elif self.op.kind == constants.TAG_NODEGROUP:
12719
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12720
    else:
12721
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12722
                                 str(self.op.kind), errors.ECODE_INVAL)
12723

    
12724

    
12725
class LUTagsGet(TagsLU):
12726
  """Returns the tags of a given object.
12727

12728
  """
12729
  REQ_BGL = False
12730

    
12731
  def ExpandNames(self):
12732
    TagsLU.ExpandNames(self)
12733

    
12734
    # Share locks as this is only a read operation
12735
    self.share_locks = _ShareAll()
12736

    
12737
  def Exec(self, feedback_fn):
12738
    """Returns the tag list.
12739

12740
    """
12741
    return list(self.target.GetTags())
12742

    
12743

    
12744
class LUTagsSearch(NoHooksLU):
12745
  """Searches the tags for a given pattern.
12746

12747
  """
12748
  REQ_BGL = False
12749

    
12750
  def ExpandNames(self):
12751
    self.needed_locks = {}
12752

    
12753
  def CheckPrereq(self):
12754
    """Check prerequisites.
12755

12756
    This checks the pattern passed for validity by compiling it.
12757

12758
    """
12759
    try:
12760
      self.re = re.compile(self.op.pattern)
12761
    except re.error, err:
12762
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12763
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12764

    
12765
  def Exec(self, feedback_fn):
12766
    """Returns the tag list.
12767

12768
    """
12769
    cfg = self.cfg
12770
    tgts = [("/cluster", cfg.GetClusterInfo())]
12771
    ilist = cfg.GetAllInstancesInfo().values()
12772
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12773
    nlist = cfg.GetAllNodesInfo().values()
12774
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12775
    tgts.extend(("/nodegroup/%s" % n.name, n)
12776
                for n in cfg.GetAllNodeGroupsInfo().values())
12777
    results = []
12778
    for path, target in tgts:
12779
      for tag in target.GetTags():
12780
        if self.re.search(tag):
12781
          results.append((path, tag))
12782
    return results
12783

    
12784

    
12785
class LUTagsSet(TagsLU):
12786
  """Sets a tag on a given object.
12787

12788
  """
12789
  REQ_BGL = False
12790

    
12791
  def CheckPrereq(self):
12792
    """Check prerequisites.
12793

12794
    This checks the type and length of the tag name and value.
12795

12796
    """
12797
    TagsLU.CheckPrereq(self)
12798
    for tag in self.op.tags:
12799
      objects.TaggableObject.ValidateTag(tag)
12800

    
12801
  def Exec(self, feedback_fn):
12802
    """Sets the tag.
12803

12804
    """
12805
    try:
12806
      for tag in self.op.tags:
12807
        self.target.AddTag(tag)
12808
    except errors.TagError, err:
12809
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12810
    self.cfg.Update(self.target, feedback_fn)
12811

    
12812

    
12813
class LUTagsDel(TagsLU):
12814
  """Delete a list of tags from a given object.
12815

12816
  """
12817
  REQ_BGL = False
12818

    
12819
  def CheckPrereq(self):
12820
    """Check prerequisites.
12821

12822
    This checks that we have the given tag.
12823

12824
    """
12825
    TagsLU.CheckPrereq(self)
12826
    for tag in self.op.tags:
12827
      objects.TaggableObject.ValidateTag(tag)
12828
    del_tags = frozenset(self.op.tags)
12829
    cur_tags = self.target.GetTags()
12830

    
12831
    diff_tags = del_tags - cur_tags
12832
    if diff_tags:
12833
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12834
      raise errors.OpPrereqError("Tag(s) %s not found" %
12835
                                 (utils.CommaJoin(diff_names), ),
12836
                                 errors.ECODE_NOENT)
12837

    
12838
  def Exec(self, feedback_fn):
12839
    """Remove the tag from the object.
12840

12841
    """
12842
    for tag in self.op.tags:
12843
      self.target.RemoveTag(tag)
12844
    self.cfg.Update(self.target, feedback_fn)
12845

    
12846

    
12847
class LUTestDelay(NoHooksLU):
12848
  """Sleep for a specified amount of time.
12849

12850
  This LU sleeps on the master and/or nodes for a specified amount of
12851
  time.
12852

12853
  """
12854
  REQ_BGL = False
12855

    
12856
  def ExpandNames(self):
12857
    """Expand names and set required locks.
12858

12859
    This expands the node list, if any.
12860

12861
    """
12862
    self.needed_locks = {}
12863
    if self.op.on_nodes:
12864
      # _GetWantedNodes can be used here, but is not always appropriate to use
12865
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12866
      # more information.
12867
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12868
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12869

    
12870
  def _TestDelay(self):
12871
    """Do the actual sleep.
12872

12873
    """
12874
    if self.op.on_master:
12875
      if not utils.TestDelay(self.op.duration):
12876
        raise errors.OpExecError("Error during master delay test")
12877
    if self.op.on_nodes:
12878
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12879
      for node, node_result in result.items():
12880
        node_result.Raise("Failure during rpc call to node %s" % node)
12881

    
12882
  def Exec(self, feedback_fn):
12883
    """Execute the test delay opcode, with the wanted repetitions.
12884

12885
    """
12886
    if self.op.repeat == 0:
12887
      self._TestDelay()
12888
    else:
12889
      top_value = self.op.repeat - 1
12890
      for i in range(self.op.repeat):
12891
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12892
        self._TestDelay()
12893

    
12894

    
12895
class LUTestJqueue(NoHooksLU):
12896
  """Utility LU to test some aspects of the job queue.
12897

12898
  """
12899
  REQ_BGL = False
12900

    
12901
  # Must be lower than default timeout for WaitForJobChange to see whether it
12902
  # notices changed jobs
12903
  _CLIENT_CONNECT_TIMEOUT = 20.0
12904
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12905

    
12906
  @classmethod
12907
  def _NotifyUsingSocket(cls, cb, errcls):
12908
    """Opens a Unix socket and waits for another program to connect.
12909

12910
    @type cb: callable
12911
    @param cb: Callback to send socket name to client
12912
    @type errcls: class
12913
    @param errcls: Exception class to use for errors
12914

12915
    """
12916
    # Using a temporary directory as there's no easy way to create temporary
12917
    # sockets without writing a custom loop around tempfile.mktemp and
12918
    # socket.bind
12919
    tmpdir = tempfile.mkdtemp()
12920
    try:
12921
      tmpsock = utils.PathJoin(tmpdir, "sock")
12922

    
12923
      logging.debug("Creating temporary socket at %s", tmpsock)
12924
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12925
      try:
12926
        sock.bind(tmpsock)
12927
        sock.listen(1)
12928

    
12929
        # Send details to client
12930
        cb(tmpsock)
12931

    
12932
        # Wait for client to connect before continuing
12933
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12934
        try:
12935
          (conn, _) = sock.accept()
12936
        except socket.error, err:
12937
          raise errcls("Client didn't connect in time (%s)" % err)
12938
      finally:
12939
        sock.close()
12940
    finally:
12941
      # Remove as soon as client is connected
12942
      shutil.rmtree(tmpdir)
12943

    
12944
    # Wait for client to close
12945
    try:
12946
      try:
12947
        # pylint: disable=E1101
12948
        # Instance of '_socketobject' has no ... member
12949
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12950
        conn.recv(1)
12951
      except socket.error, err:
12952
        raise errcls("Client failed to confirm notification (%s)" % err)
12953
    finally:
12954
      conn.close()
12955

    
12956
  def _SendNotification(self, test, arg, sockname):
12957
    """Sends a notification to the client.
12958

12959
    @type test: string
12960
    @param test: Test name
12961
    @param arg: Test argument (depends on test)
12962
    @type sockname: string
12963
    @param sockname: Socket path
12964

12965
    """
12966
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12967

    
12968
  def _Notify(self, prereq, test, arg):
12969
    """Notifies the client of a test.
12970

12971
    @type prereq: bool
12972
    @param prereq: Whether this is a prereq-phase test
12973
    @type test: string
12974
    @param test: Test name
12975
    @param arg: Test argument (depends on test)
12976

12977
    """
12978
    if prereq:
12979
      errcls = errors.OpPrereqError
12980
    else:
12981
      errcls = errors.OpExecError
12982

    
12983
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12984
                                                  test, arg),
12985
                                   errcls)
12986

    
12987
  def CheckArguments(self):
12988
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12989
    self.expandnames_calls = 0
12990

    
12991
  def ExpandNames(self):
12992
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12993
    if checkargs_calls < 1:
12994
      raise errors.ProgrammerError("CheckArguments was not called")
12995

    
12996
    self.expandnames_calls += 1
12997

    
12998
    if self.op.notify_waitlock:
12999
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13000

    
13001
    self.LogInfo("Expanding names")
13002

    
13003
    # Get lock on master node (just to get a lock, not for a particular reason)
13004
    self.needed_locks = {
13005
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13006
      }
13007

    
13008
  def Exec(self, feedback_fn):
13009
    if self.expandnames_calls < 1:
13010
      raise errors.ProgrammerError("ExpandNames was not called")
13011

    
13012
    if self.op.notify_exec:
13013
      self._Notify(False, constants.JQT_EXEC, None)
13014

    
13015
    self.LogInfo("Executing")
13016

    
13017
    if self.op.log_messages:
13018
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13019
      for idx, msg in enumerate(self.op.log_messages):
13020
        self.LogInfo("Sending log message %s", idx + 1)
13021
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13022
        # Report how many test messages have been sent
13023
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13024

    
13025
    if self.op.fail:
13026
      raise errors.OpExecError("Opcode failure was requested")
13027

    
13028
    return True
13029

    
13030

    
13031
class IAllocator(object):
13032
  """IAllocator framework.
13033

13034
  An IAllocator instance has three sets of attributes:
13035
    - cfg that is needed to query the cluster
13036
    - input data (all members of the _KEYS class attribute are required)
13037
    - four buffer attributes (in|out_data|text), that represent the
13038
      input (to the external script) in text and data structure format,
13039
      and the output from it, again in two formats
13040
    - the result variables from the script (success, info, nodes) for
13041
      easy usage
13042

13043
  """
13044
  # pylint: disable=R0902
13045
  # lots of instance attributes
13046

    
13047
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13048
    self.cfg = cfg
13049
    self.rpc = rpc_runner
13050
    # init buffer variables
13051
    self.in_text = self.out_text = self.in_data = self.out_data = None
13052
    # init all input fields so that pylint is happy
13053
    self.mode = mode
13054
    self.memory = self.disks = self.disk_template = None
13055
    self.os = self.tags = self.nics = self.vcpus = None
13056
    self.hypervisor = None
13057
    self.relocate_from = None
13058
    self.name = None
13059
    self.instances = None
13060
    self.evac_mode = None
13061
    self.target_groups = []
13062
    # computed fields
13063
    self.required_nodes = None
13064
    # init result fields
13065
    self.success = self.info = self.result = None
13066

    
13067
    try:
13068
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13069
    except KeyError:
13070
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13071
                                   " IAllocator" % self.mode)
13072

    
13073
    keyset = [n for (n, _) in keydata]
13074

    
13075
    for key in kwargs:
13076
      if key not in keyset:
13077
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13078
                                     " IAllocator" % key)
13079
      setattr(self, key, kwargs[key])
13080

    
13081
    for key in keyset:
13082
      if key not in kwargs:
13083
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13084
                                     " IAllocator" % key)
13085
    self._BuildInputData(compat.partial(fn, self), keydata)
13086

    
13087
  def _ComputeClusterData(self):
13088
    """Compute the generic allocator input data.
13089

13090
    This is the data that is independent of the actual operation.
13091

13092
    """
13093
    cfg = self.cfg
13094
    cluster_info = cfg.GetClusterInfo()
13095
    # cluster data
13096
    data = {
13097
      "version": constants.IALLOCATOR_VERSION,
13098
      "cluster_name": cfg.GetClusterName(),
13099
      "cluster_tags": list(cluster_info.GetTags()),
13100
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13101
      # we don't have job IDs
13102
      }
13103
    ninfo = cfg.GetAllNodesInfo()
13104
    iinfo = cfg.GetAllInstancesInfo().values()
13105
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13106

    
13107
    # node data
13108
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13109

    
13110
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13111
      hypervisor_name = self.hypervisor
13112
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13113
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13114
    else:
13115
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13116

    
13117
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13118
                                        hypervisor_name)
13119
    node_iinfo = \
13120
      self.rpc.call_all_instances_info(node_list,
13121
                                       cluster_info.enabled_hypervisors)
13122

    
13123
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13124

    
13125
    config_ndata = self._ComputeBasicNodeData(ninfo)
13126
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13127
                                                 i_list, config_ndata)
13128
    assert len(data["nodes"]) == len(ninfo), \
13129
        "Incomplete node data computed"
13130

    
13131
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13132

    
13133
    self.in_data = data
13134

    
13135
  @staticmethod
13136
  def _ComputeNodeGroupData(cfg):
13137
    """Compute node groups data.
13138

13139
    """
13140
    ng = dict((guuid, {
13141
      "name": gdata.name,
13142
      "alloc_policy": gdata.alloc_policy,
13143
      })
13144
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13145

    
13146
    return ng
13147

    
13148
  @staticmethod
13149
  def _ComputeBasicNodeData(node_cfg):
13150
    """Compute global node data.
13151

13152
    @rtype: dict
13153
    @returns: a dict of name: (node dict, node config)
13154

13155
    """
13156
    # fill in static (config-based) values
13157
    node_results = dict((ninfo.name, {
13158
      "tags": list(ninfo.GetTags()),
13159
      "primary_ip": ninfo.primary_ip,
13160
      "secondary_ip": ninfo.secondary_ip,
13161
      "offline": ninfo.offline,
13162
      "drained": ninfo.drained,
13163
      "master_candidate": ninfo.master_candidate,
13164
      "group": ninfo.group,
13165
      "master_capable": ninfo.master_capable,
13166
      "vm_capable": ninfo.vm_capable,
13167
      })
13168
      for ninfo in node_cfg.values())
13169

    
13170
    return node_results
13171

    
13172
  @staticmethod
13173
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13174
                              node_results):
13175
    """Compute global node data.
13176

13177
    @param node_results: the basic node structures as filled from the config
13178

13179
    """
13180
    # make a copy of the current dict
13181
    node_results = dict(node_results)
13182
    for nname, nresult in node_data.items():
13183
      assert nname in node_results, "Missing basic data for node %s" % nname
13184
      ninfo = node_cfg[nname]
13185

    
13186
      if not (ninfo.offline or ninfo.drained):
13187
        nresult.Raise("Can't get data for node %s" % nname)
13188
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13189
                                nname)
13190
        remote_info = nresult.payload
13191

    
13192
        for attr in ["memory_total", "memory_free", "memory_dom0",
13193
                     "vg_size", "vg_free", "cpu_total"]:
13194
          if attr not in remote_info:
13195
            raise errors.OpExecError("Node '%s' didn't return attribute"
13196
                                     " '%s'" % (nname, attr))
13197
          if not isinstance(remote_info[attr], int):
13198
            raise errors.OpExecError("Node '%s' returned invalid value"
13199
                                     " for '%s': %s" %
13200
                                     (nname, attr, remote_info[attr]))
13201
        # compute memory used by primary instances
13202
        i_p_mem = i_p_up_mem = 0
13203
        for iinfo, beinfo in i_list:
13204
          if iinfo.primary_node == nname:
13205
            i_p_mem += beinfo[constants.BE_MEMORY]
13206
            if iinfo.name not in node_iinfo[nname].payload:
13207
              i_used_mem = 0
13208
            else:
13209
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13210
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13211
            remote_info["memory_free"] -= max(0, i_mem_diff)
13212

    
13213
            if iinfo.admin_up:
13214
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13215

    
13216
        # compute memory used by instances
13217
        pnr_dyn = {
13218
          "total_memory": remote_info["memory_total"],
13219
          "reserved_memory": remote_info["memory_dom0"],
13220
          "free_memory": remote_info["memory_free"],
13221
          "total_disk": remote_info["vg_size"],
13222
          "free_disk": remote_info["vg_free"],
13223
          "total_cpus": remote_info["cpu_total"],
13224
          "i_pri_memory": i_p_mem,
13225
          "i_pri_up_memory": i_p_up_mem,
13226
          }
13227
        pnr_dyn.update(node_results[nname])
13228
        node_results[nname] = pnr_dyn
13229

    
13230
    return node_results
13231

    
13232
  @staticmethod
13233
  def _ComputeInstanceData(cluster_info, i_list):
13234
    """Compute global instance data.
13235

13236
    """
13237
    instance_data = {}
13238
    for iinfo, beinfo in i_list:
13239
      nic_data = []
13240
      for nic in iinfo.nics:
13241
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13242
        nic_dict = {
13243
          "mac": nic.mac,
13244
          "ip": nic.ip,
13245
          "mode": filled_params[constants.NIC_MODE],
13246
          "link": filled_params[constants.NIC_LINK],
13247
          }
13248
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13249
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13250
        nic_data.append(nic_dict)
13251
      pir = {
13252
        "tags": list(iinfo.GetTags()),
13253
        "admin_up": iinfo.admin_up,
13254
        "vcpus": beinfo[constants.BE_VCPUS],
13255
        "memory": beinfo[constants.BE_MEMORY],
13256
        "os": iinfo.os,
13257
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13258
        "nics": nic_data,
13259
        "disks": [{constants.IDISK_SIZE: dsk.size,
13260
                   constants.IDISK_MODE: dsk.mode}
13261
                  for dsk in iinfo.disks],
13262
        "disk_template": iinfo.disk_template,
13263
        "hypervisor": iinfo.hypervisor,
13264
        }
13265
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13266
                                                 pir["disks"])
13267
      instance_data[iinfo.name] = pir
13268

    
13269
    return instance_data
13270

    
13271
  def _AddNewInstance(self):
13272
    """Add new instance data to allocator structure.
13273

13274
    This in combination with _AllocatorGetClusterData will create the
13275
    correct structure needed as input for the allocator.
13276

13277
    The checks for the completeness of the opcode must have already been
13278
    done.
13279

13280
    """
13281
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13282

    
13283
    if self.disk_template in constants.DTS_INT_MIRROR:
13284
      self.required_nodes = 2
13285
    else:
13286
      self.required_nodes = 1
13287

    
13288
    request = {
13289
      "name": self.name,
13290
      "disk_template": self.disk_template,
13291
      "tags": self.tags,
13292
      "os": self.os,
13293
      "vcpus": self.vcpus,
13294
      "memory": self.memory,
13295
      "disks": self.disks,
13296
      "disk_space_total": disk_space,
13297
      "nics": self.nics,
13298
      "required_nodes": self.required_nodes,
13299
      "hypervisor": self.hypervisor,
13300
      }
13301

    
13302
    return request
13303

    
13304
  def _AddRelocateInstance(self):
13305
    """Add relocate instance data to allocator structure.
13306

13307
    This in combination with _IAllocatorGetClusterData will create the
13308
    correct structure needed as input for the allocator.
13309

13310
    The checks for the completeness of the opcode must have already been
13311
    done.
13312

13313
    """
13314
    instance = self.cfg.GetInstanceInfo(self.name)
13315
    if instance is None:
13316
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13317
                                   " IAllocator" % self.name)
13318

    
13319
    if instance.disk_template not in constants.DTS_MIRRORED:
13320
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13321
                                 errors.ECODE_INVAL)
13322

    
13323
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13324
        len(instance.secondary_nodes) != 1:
13325
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13326
                                 errors.ECODE_STATE)
13327

    
13328
    self.required_nodes = 1
13329
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13330
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13331

    
13332
    request = {
13333
      "name": self.name,
13334
      "disk_space_total": disk_space,
13335
      "required_nodes": self.required_nodes,
13336
      "relocate_from": self.relocate_from,
13337
      }
13338
    return request
13339

    
13340
  def _AddNodeEvacuate(self):
13341
    """Get data for node-evacuate requests.
13342

13343
    """
13344
    return {
13345
      "instances": self.instances,
13346
      "evac_mode": self.evac_mode,
13347
      }
13348

    
13349
  def _AddChangeGroup(self):
13350
    """Get data for node-evacuate requests.
13351

13352
    """
13353
    return {
13354
      "instances": self.instances,
13355
      "target_groups": self.target_groups,
13356
      }
13357

    
13358
  def _BuildInputData(self, fn, keydata):
13359
    """Build input data structures.
13360

13361
    """
13362
    self._ComputeClusterData()
13363

    
13364
    request = fn()
13365
    request["type"] = self.mode
13366
    for keyname, keytype in keydata:
13367
      if keyname not in request:
13368
        raise errors.ProgrammerError("Request parameter %s is missing" %
13369
                                     keyname)
13370
      val = request[keyname]
13371
      if not keytype(val):
13372
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13373
                                     " validation, value %s, expected"
13374
                                     " type %s" % (keyname, val, keytype))
13375
    self.in_data["request"] = request
13376

    
13377
    self.in_text = serializer.Dump(self.in_data)
13378

    
13379
  _STRING_LIST = ht.TListOf(ht.TString)
13380
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13381
     # pylint: disable=E1101
13382
     # Class '...' has no 'OP_ID' member
13383
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13384
                          opcodes.OpInstanceMigrate.OP_ID,
13385
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13386
     })))
13387

    
13388
  _NEVAC_MOVED = \
13389
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13390
                       ht.TItems([ht.TNonEmptyString,
13391
                                  ht.TNonEmptyString,
13392
                                  ht.TListOf(ht.TNonEmptyString),
13393
                                 ])))
13394
  _NEVAC_FAILED = \
13395
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13396
                       ht.TItems([ht.TNonEmptyString,
13397
                                  ht.TMaybeString,
13398
                                 ])))
13399
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13400
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13401

    
13402
  _MODE_DATA = {
13403
    constants.IALLOCATOR_MODE_ALLOC:
13404
      (_AddNewInstance,
13405
       [
13406
        ("name", ht.TString),
13407
        ("memory", ht.TInt),
13408
        ("disks", ht.TListOf(ht.TDict)),
13409
        ("disk_template", ht.TString),
13410
        ("os", ht.TString),
13411
        ("tags", _STRING_LIST),
13412
        ("nics", ht.TListOf(ht.TDict)),
13413
        ("vcpus", ht.TInt),
13414
        ("hypervisor", ht.TString),
13415
        ], ht.TList),
13416
    constants.IALLOCATOR_MODE_RELOC:
13417
      (_AddRelocateInstance,
13418
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13419
       ht.TList),
13420
     constants.IALLOCATOR_MODE_NODE_EVAC:
13421
      (_AddNodeEvacuate, [
13422
        ("instances", _STRING_LIST),
13423
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13424
        ], _NEVAC_RESULT),
13425
     constants.IALLOCATOR_MODE_CHG_GROUP:
13426
      (_AddChangeGroup, [
13427
        ("instances", _STRING_LIST),
13428
        ("target_groups", _STRING_LIST),
13429
        ], _NEVAC_RESULT),
13430
    }
13431

    
13432
  def Run(self, name, validate=True, call_fn=None):
13433
    """Run an instance allocator and return the results.
13434

13435
    """
13436
    if call_fn is None:
13437
      call_fn = self.rpc.call_iallocator_runner
13438

    
13439
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13440
    result.Raise("Failure while running the iallocator script")
13441

    
13442
    self.out_text = result.payload
13443
    if validate:
13444
      self._ValidateResult()
13445

    
13446
  def _ValidateResult(self):
13447
    """Process the allocator results.
13448

13449
    This will process and if successful save the result in
13450
    self.out_data and the other parameters.
13451

13452
    """
13453
    try:
13454
      rdict = serializer.Load(self.out_text)
13455
    except Exception, err:
13456
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13457

    
13458
    if not isinstance(rdict, dict):
13459
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13460

    
13461
    # TODO: remove backwards compatiblity in later versions
13462
    if "nodes" in rdict and "result" not in rdict:
13463
      rdict["result"] = rdict["nodes"]
13464
      del rdict["nodes"]
13465

    
13466
    for key in "success", "info", "result":
13467
      if key not in rdict:
13468
        raise errors.OpExecError("Can't parse iallocator results:"
13469
                                 " missing key '%s'" % key)
13470
      setattr(self, key, rdict[key])
13471

    
13472
    if not self._result_check(self.result):
13473
      raise errors.OpExecError("Iallocator returned invalid result,"
13474
                               " expected %s, got %s" %
13475
                               (self._result_check, self.result),
13476
                               errors.ECODE_INVAL)
13477

    
13478
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13479
      assert self.relocate_from is not None
13480
      assert self.required_nodes == 1
13481

    
13482
      node2group = dict((name, ndata["group"])
13483
                        for (name, ndata) in self.in_data["nodes"].items())
13484

    
13485
      fn = compat.partial(self._NodesToGroups, node2group,
13486
                          self.in_data["nodegroups"])
13487

    
13488
      instance = self.cfg.GetInstanceInfo(self.name)
13489
      request_groups = fn(self.relocate_from + [instance.primary_node])
13490
      result_groups = fn(rdict["result"] + [instance.primary_node])
13491

    
13492
      if self.success and not set(result_groups).issubset(request_groups):
13493
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13494
                                 " differ from original groups (%s)" %
13495
                                 (utils.CommaJoin(result_groups),
13496
                                  utils.CommaJoin(request_groups)))
13497

    
13498
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13499
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13500

    
13501
    self.out_data = rdict
13502

    
13503
  @staticmethod
13504
  def _NodesToGroups(node2group, groups, nodes):
13505
    """Returns a list of unique group names for a list of nodes.
13506

13507
    @type node2group: dict
13508
    @param node2group: Map from node name to group UUID
13509
    @type groups: dict
13510
    @param groups: Group information
13511
    @type nodes: list
13512
    @param nodes: Node names
13513

13514
    """
13515
    result = set()
13516

    
13517
    for node in nodes:
13518
      try:
13519
        group_uuid = node2group[node]
13520
      except KeyError:
13521
        # Ignore unknown node
13522
        pass
13523
      else:
13524
        try:
13525
          group = groups[group_uuid]
13526
        except KeyError:
13527
          # Can't find group, let's use UUID
13528
          group_name = group_uuid
13529
        else:
13530
          group_name = group["name"]
13531

    
13532
        result.add(group_name)
13533

    
13534
    return sorted(result)
13535

    
13536

    
13537
class LUTestAllocator(NoHooksLU):
13538
  """Run allocator tests.
13539

13540
  This LU runs the allocator tests
13541

13542
  """
13543
  def CheckPrereq(self):
13544
    """Check prerequisites.
13545

13546
    This checks the opcode parameters depending on the director and mode test.
13547

13548
    """
13549
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13550
      for attr in ["memory", "disks", "disk_template",
13551
                   "os", "tags", "nics", "vcpus"]:
13552
        if not hasattr(self.op, attr):
13553
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13554
                                     attr, errors.ECODE_INVAL)
13555
      iname = self.cfg.ExpandInstanceName(self.op.name)
13556
      if iname is not None:
13557
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13558
                                   iname, errors.ECODE_EXISTS)
13559
      if not isinstance(self.op.nics, list):
13560
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13561
                                   errors.ECODE_INVAL)
13562
      if not isinstance(self.op.disks, list):
13563
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13564
                                   errors.ECODE_INVAL)
13565
      for row in self.op.disks:
13566
        if (not isinstance(row, dict) or
13567
            constants.IDISK_SIZE not in row or
13568
            not isinstance(row[constants.IDISK_SIZE], int) or
13569
            constants.IDISK_MODE not in row or
13570
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13571
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13572
                                     " parameter", errors.ECODE_INVAL)
13573
      if self.op.hypervisor is None:
13574
        self.op.hypervisor = self.cfg.GetHypervisorType()
13575
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13576
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13577
      self.op.name = fname
13578
      self.relocate_from = \
13579
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13580
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13581
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13582
      if not self.op.instances:
13583
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13584
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13585
    else:
13586
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13587
                                 self.op.mode, errors.ECODE_INVAL)
13588

    
13589
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13590
      if self.op.allocator is None:
13591
        raise errors.OpPrereqError("Missing allocator name",
13592
                                   errors.ECODE_INVAL)
13593
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13594
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13595
                                 self.op.direction, errors.ECODE_INVAL)
13596

    
13597
  def Exec(self, feedback_fn):
13598
    """Run the allocator test.
13599

13600
    """
13601
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13602
      ial = IAllocator(self.cfg, self.rpc,
13603
                       mode=self.op.mode,
13604
                       name=self.op.name,
13605
                       memory=self.op.memory,
13606
                       disks=self.op.disks,
13607
                       disk_template=self.op.disk_template,
13608
                       os=self.op.os,
13609
                       tags=self.op.tags,
13610
                       nics=self.op.nics,
13611
                       vcpus=self.op.vcpus,
13612
                       hypervisor=self.op.hypervisor,
13613
                       )
13614
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13615
      ial = IAllocator(self.cfg, self.rpc,
13616
                       mode=self.op.mode,
13617
                       name=self.op.name,
13618
                       relocate_from=list(self.relocate_from),
13619
                       )
13620
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13621
      ial = IAllocator(self.cfg, self.rpc,
13622
                       mode=self.op.mode,
13623
                       instances=self.op.instances,
13624
                       target_groups=self.op.target_groups)
13625
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13626
      ial = IAllocator(self.cfg, self.rpc,
13627
                       mode=self.op.mode,
13628
                       instances=self.op.instances,
13629
                       evac_mode=self.op.evac_mode)
13630
    else:
13631
      raise errors.ProgrammerError("Uncatched mode %s in"
13632
                                   " LUTestAllocator.Exec", self.op.mode)
13633

    
13634
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13635
      result = ial.in_text
13636
    else:
13637
      ial.Run(self.op.allocator, validate=False)
13638
      result = ial.out_text
13639
    return result
13640

    
13641

    
13642
#: Query type implementations
13643
_QUERY_IMPL = {
13644
  constants.QR_INSTANCE: _InstanceQuery,
13645
  constants.QR_NODE: _NodeQuery,
13646
  constants.QR_GROUP: _GroupQuery,
13647
  constants.QR_OS: _OsQuery,
13648
  }
13649

    
13650
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13651

    
13652

    
13653
def _GetQueryImplementation(name):
13654
  """Returns the implemtnation for a query type.
13655

13656
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13657

13658
  """
13659
  try:
13660
    return _QUERY_IMPL[name]
13661
  except KeyError:
13662
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13663
                               errors.ECODE_INVAL)