Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1986b21f

History | View | Annotate | Download (482.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False,
353
                          level=locking.LEVEL_NODE):
354
    """Helper function to declare instances' nodes for locking.
355

356
    This function should be called after locking one or more instances to lock
357
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
358
    with all primary or secondary nodes for instances already locked and
359
    present in self.needed_locks[locking.LEVEL_INSTANCE].
360

361
    It should be called from DeclareLocks, and for safety only works if
362
    self.recalculate_locks[locking.LEVEL_NODE] is set.
363

364
    In the future it may grow parameters to just lock some instance's nodes, or
365
    to just lock primaries or secondary nodes, if needed.
366

367
    If should be called in DeclareLocks in a way similar to::
368

369
      if level == locking.LEVEL_NODE:
370
        self._LockInstancesNodes()
371

372
    @type primary_only: boolean
373
    @param primary_only: only lock primary nodes of locked instances
374
    @param level: Which lock level to use for locking nodes
375

376
    """
377
    assert level in self.recalculate_locks, \
378
      "_LockInstancesNodes helper function called with no nodes to recalculate"
379

    
380
    # TODO: check if we're really been called with the instance locks held
381

    
382
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
383
    # future we might want to have different behaviors depending on the value
384
    # of self.recalculate_locks[locking.LEVEL_NODE]
385
    wanted_nodes = []
386
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
387
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
388
      wanted_nodes.append(instance.primary_node)
389
      if not primary_only:
390
        wanted_nodes.extend(instance.secondary_nodes)
391

    
392
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
393
      self.needed_locks[level] = wanted_nodes
394
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
395
      self.needed_locks[level].extend(wanted_nodes)
396
    else:
397
      raise errors.ProgrammerError("Unknown recalculation mode")
398

    
399
    del self.recalculate_locks[level]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, qfilter, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.owned_locks(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _ShareAll():
563
  """Returns a dict declaring all lock levels shared.
564

565
  """
566
  return dict.fromkeys(locking.LEVELS, 1)
567

    
568

    
569
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
570
  """Checks if the owned node groups are still correct for an instance.
571

572
  @type cfg: L{config.ConfigWriter}
573
  @param cfg: The cluster configuration
574
  @type instance_name: string
575
  @param instance_name: Instance name
576
  @type owned_groups: set or frozenset
577
  @param owned_groups: List of currently owned node groups
578

579
  """
580
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
581

    
582
  if not owned_groups.issuperset(inst_groups):
583
    raise errors.OpPrereqError("Instance %s's node groups changed since"
584
                               " locks were acquired, current groups are"
585
                               " are '%s', owning groups '%s'; retry the"
586
                               " operation" %
587
                               (instance_name,
588
                                utils.CommaJoin(inst_groups),
589
                                utils.CommaJoin(owned_groups)),
590
                               errors.ECODE_STATE)
591

    
592
  return inst_groups
593

    
594

    
595
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
596
  """Checks if the instances in a node group are still correct.
597

598
  @type cfg: L{config.ConfigWriter}
599
  @param cfg: The cluster configuration
600
  @type group_uuid: string
601
  @param group_uuid: Node group UUID
602
  @type owned_instances: set or frozenset
603
  @param owned_instances: List of currently owned instances
604

605
  """
606
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
607
  if owned_instances != wanted_instances:
608
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
609
                               " locks were acquired, wanted '%s', have '%s';"
610
                               " retry the operation" %
611
                               (group_uuid,
612
                                utils.CommaJoin(wanted_instances),
613
                                utils.CommaJoin(owned_instances)),
614
                               errors.ECODE_STATE)
615

    
616
  return wanted_instances
617

    
618

    
619
def _SupportsOob(cfg, node):
620
  """Tells if node supports OOB.
621

622
  @type cfg: L{config.ConfigWriter}
623
  @param cfg: The cluster configuration
624
  @type node: L{objects.Node}
625
  @param node: The node
626
  @return: The OOB script if supported or an empty string otherwise
627

628
  """
629
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if nodes:
645
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
646

    
647
  return utils.NiceSort(lu.cfg.GetNodeList())
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _ReleaseLocks(lu, level, names=None, keep=None):
704
  """Releases locks owned by an LU.
705

706
  @type lu: L{LogicalUnit}
707
  @param level: Lock level
708
  @type names: list or None
709
  @param names: Names of locks to release
710
  @type keep: list or None
711
  @param keep: Names of locks to retain
712

713
  """
714
  assert not (keep is not None and names is not None), \
715
         "Only one of the 'names' and the 'keep' parameters can be given"
716

    
717
  if names is not None:
718
    should_release = names.__contains__
719
  elif keep:
720
    should_release = lambda name: name not in keep
721
  else:
722
    should_release = None
723

    
724
  if should_release:
725
    retain = []
726
    release = []
727

    
728
    # Determine which locks to release
729
    for name in lu.owned_locks(level):
730
      if should_release(name):
731
        release.append(name)
732
      else:
733
        retain.append(name)
734

    
735
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
736

    
737
    # Release just some locks
738
    lu.glm.release(level, names=release)
739

    
740
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
741
  else:
742
    # Release everything
743
    lu.glm.release(level)
744

    
745
    assert not lu.glm.is_owned(level), "No locks should be owned"
746

    
747

    
748
def _MapInstanceDisksToNodes(instances):
749
  """Creates a map from (node, volume) to instance name.
750

751
  @type instances: list of L{objects.Instance}
752
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
753

754
  """
755
  return dict(((node, vol), inst.name)
756
              for inst in instances
757
              for (node, vols) in inst.MapLVsByNode().items()
758
              for vol in vols)
759

    
760

    
761
def _RunPostHook(lu, node_name):
762
  """Runs the post-hook for an opcode on a single node.
763

764
  """
765
  hm = lu.proc.BuildHooksManager(lu)
766
  try:
767
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
768
  except:
769
    # pylint: disable=W0702
770
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
771

    
772

    
773
def _CheckOutputFields(static, dynamic, selected):
774
  """Checks whether all selected fields are valid.
775

776
  @type static: L{utils.FieldSet}
777
  @param static: static fields set
778
  @type dynamic: L{utils.FieldSet}
779
  @param dynamic: dynamic fields set
780

781
  """
782
  f = utils.FieldSet()
783
  f.Extend(static)
784
  f.Extend(dynamic)
785

    
786
  delta = f.NonMatching(selected)
787
  if delta:
788
    raise errors.OpPrereqError("Unknown output fields selected: %s"
789
                               % ",".join(delta), errors.ECODE_INVAL)
790

    
791

    
792
def _CheckGlobalHvParams(params):
793
  """Validates that given hypervisor params are not global ones.
794

795
  This will ensure that instances don't get customised versions of
796
  global params.
797

798
  """
799
  used_globals = constants.HVC_GLOBALS.intersection(params)
800
  if used_globals:
801
    msg = ("The following hypervisor parameters are global and cannot"
802
           " be customized at instance level, please modify them at"
803
           " cluster level: %s" % utils.CommaJoin(used_globals))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805

    
806

    
807
def _CheckNodeOnline(lu, node, msg=None):
808
  """Ensure that a given node is online.
809

810
  @param lu: the LU on behalf of which we make the check
811
  @param node: the node to check
812
  @param msg: if passed, should be a message to replace the default one
813
  @raise errors.OpPrereqError: if the node is offline
814

815
  """
816
  if msg is None:
817
    msg = "Can't use offline node"
818
  if lu.cfg.GetNodeInfo(node).offline:
819
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
820

    
821

    
822
def _CheckNodeNotDrained(lu, node):
823
  """Ensure that a given node is not drained.
824

825
  @param lu: the LU on behalf of which we make the check
826
  @param node: the node to check
827
  @raise errors.OpPrereqError: if the node is drained
828

829
  """
830
  if lu.cfg.GetNodeInfo(node).drained:
831
    raise errors.OpPrereqError("Can't use drained node %s" % node,
832
                               errors.ECODE_STATE)
833

    
834

    
835
def _CheckNodeVmCapable(lu, node):
836
  """Ensure that a given node is vm capable.
837

838
  @param lu: the LU on behalf of which we make the check
839
  @param node: the node to check
840
  @raise errors.OpPrereqError: if the node is not vm capable
841

842
  """
843
  if not lu.cfg.GetNodeInfo(node).vm_capable:
844
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
845
                               errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeHasOS(lu, node, os_name, force_variant):
849
  """Ensure that a node supports a given OS.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @param os_name: the OS to query about
854
  @param force_variant: whether to ignore variant errors
855
  @raise errors.OpPrereqError: if the node is not supporting the OS
856

857
  """
858
  result = lu.rpc.call_os_get(node, os_name)
859
  result.Raise("OS '%s' not in supported OS list for node %s" %
860
               (os_name, node),
861
               prereq=True, ecode=errors.ECODE_INVAL)
862
  if not force_variant:
863
    _CheckOSVariant(result.payload, os_name)
864

    
865

    
866
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
867
  """Ensure that a node has the given secondary ip.
868

869
  @type lu: L{LogicalUnit}
870
  @param lu: the LU on behalf of which we make the check
871
  @type node: string
872
  @param node: the node to check
873
  @type secondary_ip: string
874
  @param secondary_ip: the ip to check
875
  @type prereq: boolean
876
  @param prereq: whether to throw a prerequisite or an execute error
877
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
878
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
879

880
  """
881
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
882
  result.Raise("Failure checking secondary ip on node %s" % node,
883
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
884
  if not result.payload:
885
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
886
           " please fix and re-run this command" % secondary_ip)
887
    if prereq:
888
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
889
    else:
890
      raise errors.OpExecError(msg)
891

    
892

    
893
def _GetClusterDomainSecret():
894
  """Reads the cluster domain secret.
895

896
  """
897
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
898
                               strict=True)
899

    
900

    
901
def _CheckInstanceDown(lu, instance, reason):
902
  """Ensure that an instance is not running."""
903
  if instance.admin_up:
904
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907
  pnode = instance.primary_node
908
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
909
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
910
              prereq=True, ecode=errors.ECODE_ENVIRON)
911

    
912
  if instance.name in ins_l.payload:
913
    raise errors.OpPrereqError("Instance %s is running, %s" %
914
                               (instance.name, reason), errors.ECODE_STATE)
915

    
916

    
917
def _ExpandItemName(fn, name, kind):
918
  """Expand an item name.
919

920
  @param fn: the function to use for expansion
921
  @param name: requested item name
922
  @param kind: text description ('Node' or 'Instance')
923
  @return: the resolved (full) name
924
  @raise errors.OpPrereqError: if the item is not found
925

926
  """
927
  full_name = fn(name)
928
  if full_name is None:
929
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
930
                               errors.ECODE_NOENT)
931
  return full_name
932

    
933

    
934
def _ExpandNodeName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for nodes."""
936
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
937

    
938

    
939
def _ExpandInstanceName(cfg, name):
940
  """Wrapper over L{_ExpandItemName} for instance."""
941
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
942

    
943

    
944
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
945
                          memory, vcpus, nics, disk_template, disks,
946
                          bep, hvp, hypervisor_name, tags):
947
  """Builds instance related env variables for hooks
948

949
  This builds the hook environment from individual variables.
950

951
  @type name: string
952
  @param name: the name of the instance
953
  @type primary_node: string
954
  @param primary_node: the name of the instance's primary node
955
  @type secondary_nodes: list
956
  @param secondary_nodes: list of secondary nodes as strings
957
  @type os_type: string
958
  @param os_type: the name of the instance's OS
959
  @type status: boolean
960
  @param status: the should_run status of the instance
961
  @type memory: string
962
  @param memory: the memory size of the instance
963
  @type vcpus: string
964
  @param vcpus: the count of VCPUs the instance has
965
  @type nics: list
966
  @param nics: list of tuples (ip, mac, mode, link) representing
967
      the NICs the instance has
968
  @type disk_template: string
969
  @param disk_template: the disk template of the instance
970
  @type disks: list
971
  @param disks: the list of (size, mode) pairs
972
  @type bep: dict
973
  @param bep: the backend parameters for the instance
974
  @type hvp: dict
975
  @param hvp: the hypervisor parameters for the instance
976
  @type hypervisor_name: string
977
  @param hypervisor_name: the hypervisor for the instance
978
  @type tags: list
979
  @param tags: list of instance tags as strings
980
  @rtype: dict
981
  @return: the hook environment for this instance
982

983
  """
984
  if status:
985
    str_status = "up"
986
  else:
987
    str_status = "down"
988
  env = {
989
    "OP_TARGET": name,
990
    "INSTANCE_NAME": name,
991
    "INSTANCE_PRIMARY": primary_node,
992
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
993
    "INSTANCE_OS_TYPE": os_type,
994
    "INSTANCE_STATUS": str_status,
995
    "INSTANCE_MEMORY": memory,
996
    "INSTANCE_VCPUS": vcpus,
997
    "INSTANCE_DISK_TEMPLATE": disk_template,
998
    "INSTANCE_HYPERVISOR": hypervisor_name,
999
  }
1000

    
1001
  if nics:
1002
    nic_count = len(nics)
1003
    for idx, (ip, mac, mode, link) in enumerate(nics):
1004
      if ip is None:
1005
        ip = ""
1006
      env["INSTANCE_NIC%d_IP" % idx] = ip
1007
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1008
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1009
      env["INSTANCE_NIC%d_LINK" % idx] = link
1010
      if mode == constants.NIC_MODE_BRIDGED:
1011
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012
  else:
1013
    nic_count = 0
1014

    
1015
  env["INSTANCE_NIC_COUNT"] = nic_count
1016

    
1017
  if disks:
1018
    disk_count = len(disks)
1019
    for idx, (size, mode) in enumerate(disks):
1020
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1021
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1022
  else:
1023
    disk_count = 0
1024

    
1025
  env["INSTANCE_DISK_COUNT"] = disk_count
1026

    
1027
  if not tags:
1028
    tags = []
1029

    
1030
  env["INSTANCE_TAGS"] = " ".join(tags)
1031

    
1032
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033
    for key, value in source.items():
1034
      env["INSTANCE_%s_%s" % (kind, key)] = value
1035

    
1036
  return env
1037

    
1038

    
1039
def _NICListToTuple(lu, nics):
1040
  """Build a list of nic information tuples.
1041

1042
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043
  value in LUInstanceQueryData.
1044

1045
  @type lu:  L{LogicalUnit}
1046
  @param lu: the logical unit on whose behalf we execute
1047
  @type nics: list of L{objects.NIC}
1048
  @param nics: list of nics to convert to hooks tuples
1049

1050
  """
1051
  hooks_nics = []
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  for nic in nics:
1054
    ip = nic.ip
1055
    mac = nic.mac
1056
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057
    mode = filled_params[constants.NIC_MODE]
1058
    link = filled_params[constants.NIC_LINK]
1059
    hooks_nics.append((ip, mac, mode, link))
1060
  return hooks_nics
1061

    
1062

    
1063
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064
  """Builds instance related env variables for hooks from an object.
1065

1066
  @type lu: L{LogicalUnit}
1067
  @param lu: the logical unit on whose behalf we execute
1068
  @type instance: L{objects.Instance}
1069
  @param instance: the instance for which we should build the
1070
      environment
1071
  @type override: dict
1072
  @param override: dictionary with key/values that will override
1073
      our values
1074
  @rtype: dict
1075
  @return: the hook environment dictionary
1076

1077
  """
1078
  cluster = lu.cfg.GetClusterInfo()
1079
  bep = cluster.FillBE(instance)
1080
  hvp = cluster.FillHV(instance)
1081
  args = {
1082
    "name": instance.name,
1083
    "primary_node": instance.primary_node,
1084
    "secondary_nodes": instance.secondary_nodes,
1085
    "os_type": instance.os,
1086
    "status": instance.admin_up,
1087
    "memory": bep[constants.BE_MEMORY],
1088
    "vcpus": bep[constants.BE_VCPUS],
1089
    "nics": _NICListToTuple(lu, instance.nics),
1090
    "disk_template": instance.disk_template,
1091
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092
    "bep": bep,
1093
    "hvp": hvp,
1094
    "hypervisor_name": instance.hypervisor,
1095
    "tags": instance.tags,
1096
  }
1097
  if override:
1098
    args.update(override)
1099
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100

    
1101

    
1102
def _AdjustCandidatePool(lu, exceptions):
1103
  """Adjust the candidate pool after node operations.
1104

1105
  """
1106
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107
  if mod_list:
1108
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1109
               utils.CommaJoin(node.name for node in mod_list))
1110
    for name in mod_list:
1111
      lu.context.ReaddNode(name)
1112
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113
  if mc_now > mc_max:
1114
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115
               (mc_now, mc_max))
1116

    
1117

    
1118
def _DecideSelfPromotion(lu, exceptions=None):
1119
  """Decide whether I should promote myself as a master candidate.
1120

1121
  """
1122
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124
  # the new node will increase mc_max with one, so:
1125
  mc_should = min(mc_should + 1, cp_size)
1126
  return mc_now < mc_should
1127

    
1128

    
1129
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130
  """Check that the brigdes needed by a list of nics exist.
1131

1132
  """
1133
  cluster = lu.cfg.GetClusterInfo()
1134
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135
  brlist = [params[constants.NIC_LINK] for params in paramslist
1136
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137
  if brlist:
1138
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1139
    result.Raise("Error checking bridges on destination node '%s'" %
1140
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141

    
1142

    
1143
def _CheckInstanceBridgesExist(lu, instance, node=None):
1144
  """Check that the brigdes needed by an instance exist.
1145

1146
  """
1147
  if node is None:
1148
    node = instance.primary_node
1149
  _CheckNicsBridgesExist(lu, instance.nics, node)
1150

    
1151

    
1152
def _CheckOSVariant(os_obj, name):
1153
  """Check whether an OS name conforms to the os variants specification.
1154

1155
  @type os_obj: L{objects.OS}
1156
  @param os_obj: OS object to check
1157
  @type name: string
1158
  @param name: OS name passed by the user, to check for validity
1159

1160
  """
1161
  variant = objects.OS.GetVariant(name)
1162
  if not os_obj.supported_variants:
1163
    if variant:
1164
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165
                                 " passed)" % (os_obj.name, variant),
1166
                                 errors.ECODE_INVAL)
1167
    return
1168
  if not variant:
1169
    raise errors.OpPrereqError("OS name must include a variant",
1170
                               errors.ECODE_INVAL)
1171

    
1172
  if variant not in os_obj.supported_variants:
1173
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174

    
1175

    
1176
def _GetNodeInstancesInner(cfg, fn):
1177
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178

    
1179

    
1180
def _GetNodeInstances(cfg, node_name):
1181
  """Returns a list of all primary and secondary instances on a node.
1182

1183
  """
1184

    
1185
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186

    
1187

    
1188
def _GetNodePrimaryInstances(cfg, node_name):
1189
  """Returns primary instances on a node.
1190

1191
  """
1192
  return _GetNodeInstancesInner(cfg,
1193
                                lambda inst: node_name == inst.primary_node)
1194

    
1195

    
1196
def _GetNodeSecondaryInstances(cfg, node_name):
1197
  """Returns secondary instances on a node.
1198

1199
  """
1200
  return _GetNodeInstancesInner(cfg,
1201
                                lambda inst: node_name in inst.secondary_nodes)
1202

    
1203

    
1204
def _GetStorageTypeArgs(cfg, storage_type):
1205
  """Returns the arguments for a storage type.
1206

1207
  """
1208
  # Special case for file storage
1209
  if storage_type == constants.ST_FILE:
1210
    # storage.FileStorage wants a list of storage directories
1211
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212

    
1213
  return []
1214

    
1215

    
1216
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217
  faulty = []
1218

    
1219
  for dev in instance.disks:
1220
    cfg.SetDiskID(dev, node_name)
1221

    
1222
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223
  result.Raise("Failed to get disk status from node %s" % node_name,
1224
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225

    
1226
  for idx, bdev_status in enumerate(result.payload):
1227
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228
      faulty.append(idx)
1229

    
1230
  return faulty
1231

    
1232

    
1233
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234
  """Check the sanity of iallocator and node arguments and use the
1235
  cluster-wide iallocator if appropriate.
1236

1237
  Check that at most one of (iallocator, node) is specified. If none is
1238
  specified, then the LU's opcode's iallocator slot is filled with the
1239
  cluster-wide default iallocator.
1240

1241
  @type iallocator_slot: string
1242
  @param iallocator_slot: the name of the opcode iallocator slot
1243
  @type node_slot: string
1244
  @param node_slot: the name of the opcode target node slot
1245

1246
  """
1247
  node = getattr(lu.op, node_slot, None)
1248
  iallocator = getattr(lu.op, iallocator_slot, None)
1249

    
1250
  if node is not None and iallocator is not None:
1251
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252
                               errors.ECODE_INVAL)
1253
  elif node is None and iallocator is None:
1254
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1255
    if default_iallocator:
1256
      setattr(lu.op, iallocator_slot, default_iallocator)
1257
    else:
1258
      raise errors.OpPrereqError("No iallocator or node given and no"
1259
                                 " cluster-wide default iallocator found;"
1260
                                 " please specify either an iallocator or a"
1261
                                 " node, or set a cluster-wide default"
1262
                                 " iallocator")
1263

    
1264

    
1265
def _GetDefaultIAllocator(cfg, iallocator):
1266
  """Decides on which iallocator to use.
1267

1268
  @type cfg: L{config.ConfigWriter}
1269
  @param cfg: Cluster configuration object
1270
  @type iallocator: string or None
1271
  @param iallocator: Iallocator specified in opcode
1272
  @rtype: string
1273
  @return: Iallocator name
1274

1275
  """
1276
  if not iallocator:
1277
    # Use default iallocator
1278
    iallocator = cfg.GetDefaultIAllocator()
1279

    
1280
  if not iallocator:
1281
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282
                               " opcode nor as a cluster-wide default",
1283
                               errors.ECODE_INVAL)
1284

    
1285
  return iallocator
1286

    
1287

    
1288
class LUClusterPostInit(LogicalUnit):
1289
  """Logical unit for running hooks after cluster initialization.
1290

1291
  """
1292
  HPATH = "cluster-init"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294

    
1295
  def BuildHooksEnv(self):
1296
    """Build hooks env.
1297

1298
    """
1299
    return {
1300
      "OP_TARGET": self.cfg.GetClusterName(),
1301
      }
1302

    
1303
  def BuildHooksNodes(self):
1304
    """Build hooks nodes.
1305

1306
    """
1307
    return ([], [self.cfg.GetMasterNode()])
1308

    
1309
  def Exec(self, feedback_fn):
1310
    """Nothing to do.
1311

1312
    """
1313
    return True
1314

    
1315

    
1316
class LUClusterDestroy(LogicalUnit):
1317
  """Logical unit for destroying the cluster.
1318

1319
  """
1320
  HPATH = "cluster-destroy"
1321
  HTYPE = constants.HTYPE_CLUSTER
1322

    
1323
  def BuildHooksEnv(self):
1324
    """Build hooks env.
1325

1326
    """
1327
    return {
1328
      "OP_TARGET": self.cfg.GetClusterName(),
1329
      }
1330

    
1331
  def BuildHooksNodes(self):
1332
    """Build hooks nodes.
1333

1334
    """
1335
    return ([], [])
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    This checks whether the cluster is empty.
1341

1342
    Any errors are signaled by raising errors.OpPrereqError.
1343

1344
    """
1345
    master = self.cfg.GetMasterNode()
1346

    
1347
    nodelist = self.cfg.GetNodeList()
1348
    if len(nodelist) != 1 or nodelist[0] != master:
1349
      raise errors.OpPrereqError("There are still %d node(s) in"
1350
                                 " this cluster." % (len(nodelist) - 1),
1351
                                 errors.ECODE_INVAL)
1352
    instancelist = self.cfg.GetInstanceList()
1353
    if instancelist:
1354
      raise errors.OpPrereqError("There are still %d instance(s) in"
1355
                                 " this cluster." % len(instancelist),
1356
                                 errors.ECODE_INVAL)
1357

    
1358
  def Exec(self, feedback_fn):
1359
    """Destroys the cluster.
1360

1361
    """
1362
    master_params = self.cfg.GetMasterNetworkParameters()
1363

    
1364
    # Run post hooks on master node before it's removed
1365
    _RunPostHook(self, master_params.name)
1366

    
1367
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1368
                                                     master_params)
1369
    result.Raise("Could not disable the master role")
1370

    
1371
    return master_params.name
1372

    
1373

    
1374
def _VerifyCertificate(filename):
1375
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1376

1377
  @type filename: string
1378
  @param filename: Path to PEM file
1379

1380
  """
1381
  try:
1382
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1383
                                           utils.ReadFile(filename))
1384
  except Exception, err: # pylint: disable=W0703
1385
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1386
            "Failed to load X509 certificate %s: %s" % (filename, err))
1387

    
1388
  (errcode, msg) = \
1389
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1390
                                constants.SSL_CERT_EXPIRATION_ERROR)
1391

    
1392
  if msg:
1393
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1394
  else:
1395
    fnamemsg = None
1396

    
1397
  if errcode is None:
1398
    return (None, fnamemsg)
1399
  elif errcode == utils.CERT_WARNING:
1400
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1401
  elif errcode == utils.CERT_ERROR:
1402
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1403

    
1404
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1405

    
1406

    
1407
def _GetAllHypervisorParameters(cluster, instances):
1408
  """Compute the set of all hypervisor parameters.
1409

1410
  @type cluster: L{objects.Cluster}
1411
  @param cluster: the cluster object
1412
  @param instances: list of L{objects.Instance}
1413
  @param instances: additional instances from which to obtain parameters
1414
  @rtype: list of (origin, hypervisor, parameters)
1415
  @return: a list with all parameters found, indicating the hypervisor they
1416
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1417

1418
  """
1419
  hvp_data = []
1420

    
1421
  for hv_name in cluster.enabled_hypervisors:
1422
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1423

    
1424
  for os_name, os_hvp in cluster.os_hvp.items():
1425
    for hv_name, hv_params in os_hvp.items():
1426
      if hv_params:
1427
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1428
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1429

    
1430
  # TODO: collapse identical parameter values in a single one
1431
  for instance in instances:
1432
    if instance.hvparams:
1433
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1434
                       cluster.FillHV(instance)))
1435

    
1436
  return hvp_data
1437

    
1438

    
1439
class _VerifyErrors(object):
1440
  """Mix-in for cluster/group verify LUs.
1441

1442
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1443
  self.op and self._feedback_fn to be available.)
1444

1445
  """
1446

    
1447
  ETYPE_FIELD = "code"
1448
  ETYPE_ERROR = "ERROR"
1449
  ETYPE_WARNING = "WARNING"
1450

    
1451
  def _Error(self, ecode, item, msg, *args, **kwargs):
1452
    """Format an error message.
1453

1454
    Based on the opcode's error_codes parameter, either format a
1455
    parseable error code, or a simpler error string.
1456

1457
    This must be called only from Exec and functions called from Exec.
1458

1459
    """
1460
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1461
    itype, etxt, _ = ecode
1462
    # first complete the msg
1463
    if args:
1464
      msg = msg % args
1465
    # then format the whole message
1466
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1467
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1468
    else:
1469
      if item:
1470
        item = " " + item
1471
      else:
1472
        item = ""
1473
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1474
    # and finally report it via the feedback_fn
1475
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1476

    
1477
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1478
    """Log an error message if the passed condition is True.
1479

1480
    """
1481
    cond = (bool(cond)
1482
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1483

    
1484
    # If the error code is in the list of ignored errors, demote the error to a
1485
    # warning
1486
    (_, etxt, _) = ecode
1487
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1488
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1489

    
1490
    if cond:
1491
      self._Error(ecode, *args, **kwargs)
1492

    
1493
    # do not mark the operation as failed for WARN cases only
1494
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1495
      self.bad = self.bad or cond
1496

    
1497

    
1498
class LUClusterVerify(NoHooksLU):
1499
  """Submits all jobs necessary to verify the cluster.
1500

1501
  """
1502
  REQ_BGL = False
1503

    
1504
  def ExpandNames(self):
1505
    self.needed_locks = {}
1506

    
1507
  def Exec(self, feedback_fn):
1508
    jobs = []
1509

    
1510
    if self.op.group_name:
1511
      groups = [self.op.group_name]
1512
      depends_fn = lambda: None
1513
    else:
1514
      groups = self.cfg.GetNodeGroupList()
1515

    
1516
      # Verify global configuration
1517
      jobs.append([
1518
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1519
        ])
1520

    
1521
      # Always depend on global verification
1522
      depends_fn = lambda: [(-len(jobs), [])]
1523

    
1524
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1525
                                            ignore_errors=self.op.ignore_errors,
1526
                                            depends=depends_fn())]
1527
                for group in groups)
1528

    
1529
    # Fix up all parameters
1530
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1531
      op.debug_simulate_errors = self.op.debug_simulate_errors
1532
      op.verbose = self.op.verbose
1533
      op.error_codes = self.op.error_codes
1534
      try:
1535
        op.skip_checks = self.op.skip_checks
1536
      except AttributeError:
1537
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1538

    
1539
    return ResultWithJobs(jobs)
1540

    
1541

    
1542
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1543
  """Verifies the cluster config.
1544

1545
  """
1546
  REQ_BGL = True
1547

    
1548
  def _VerifyHVP(self, hvp_data):
1549
    """Verifies locally the syntax of the hypervisor parameters.
1550

1551
    """
1552
    for item, hv_name, hv_params in hvp_data:
1553
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1554
             (item, hv_name))
1555
      try:
1556
        hv_class = hypervisor.GetHypervisor(hv_name)
1557
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1558
        hv_class.CheckParameterSyntax(hv_params)
1559
      except errors.GenericError, err:
1560
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1561

    
1562
  def ExpandNames(self):
1563
    # Information can be safely retrieved as the BGL is acquired in exclusive
1564
    # mode
1565
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1566
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1567
    self.all_node_info = self.cfg.GetAllNodesInfo()
1568
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1569
    self.needed_locks = {}
1570

    
1571
  def Exec(self, feedback_fn):
1572
    """Verify integrity of cluster, performing various test on nodes.
1573

1574
    """
1575
    self.bad = False
1576
    self._feedback_fn = feedback_fn
1577

    
1578
    feedback_fn("* Verifying cluster config")
1579

    
1580
    for msg in self.cfg.VerifyConfig():
1581
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1582

    
1583
    feedback_fn("* Verifying cluster certificate files")
1584

    
1585
    for cert_filename in constants.ALL_CERT_FILES:
1586
      (errcode, msg) = _VerifyCertificate(cert_filename)
1587
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1588

    
1589
    feedback_fn("* Verifying hypervisor parameters")
1590

    
1591
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1592
                                                self.all_inst_info.values()))
1593

    
1594
    feedback_fn("* Verifying all nodes belong to an existing group")
1595

    
1596
    # We do this verification here because, should this bogus circumstance
1597
    # occur, it would never be caught by VerifyGroup, which only acts on
1598
    # nodes/instances reachable from existing node groups.
1599

    
1600
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1601
                         if node.group not in self.all_group_info)
1602

    
1603
    dangling_instances = {}
1604
    no_node_instances = []
1605

    
1606
    for inst in self.all_inst_info.values():
1607
      if inst.primary_node in dangling_nodes:
1608
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1609
      elif inst.primary_node not in self.all_node_info:
1610
        no_node_instances.append(inst.name)
1611

    
1612
    pretty_dangling = [
1613
        "%s (%s)" %
1614
        (node.name,
1615
         utils.CommaJoin(dangling_instances.get(node.name,
1616
                                                ["no instances"])))
1617
        for node in dangling_nodes]
1618

    
1619
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1620
                  None,
1621
                  "the following nodes (and their instances) belong to a non"
1622
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1623

    
1624
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1625
                  None,
1626
                  "the following instances have a non-existing primary-node:"
1627
                  " %s", utils.CommaJoin(no_node_instances))
1628

    
1629
    return not self.bad
1630

    
1631

    
1632
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1633
  """Verifies the status of a node group.
1634

1635
  """
1636
  HPATH = "cluster-verify"
1637
  HTYPE = constants.HTYPE_CLUSTER
1638
  REQ_BGL = False
1639

    
1640
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1641

    
1642
  class NodeImage(object):
1643
    """A class representing the logical and physical status of a node.
1644

1645
    @type name: string
1646
    @ivar name: the node name to which this object refers
1647
    @ivar volumes: a structure as returned from
1648
        L{ganeti.backend.GetVolumeList} (runtime)
1649
    @ivar instances: a list of running instances (runtime)
1650
    @ivar pinst: list of configured primary instances (config)
1651
    @ivar sinst: list of configured secondary instances (config)
1652
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1653
        instances for which this node is secondary (config)
1654
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1655
    @ivar dfree: free disk, as reported by the node (runtime)
1656
    @ivar offline: the offline status (config)
1657
    @type rpc_fail: boolean
1658
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1659
        not whether the individual keys were correct) (runtime)
1660
    @type lvm_fail: boolean
1661
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1662
    @type hyp_fail: boolean
1663
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1664
    @type ghost: boolean
1665
    @ivar ghost: whether this is a known node or not (config)
1666
    @type os_fail: boolean
1667
    @ivar os_fail: whether the RPC call didn't return valid OS data
1668
    @type oslist: list
1669
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1670
    @type vm_capable: boolean
1671
    @ivar vm_capable: whether the node can host instances
1672

1673
    """
1674
    def __init__(self, offline=False, name=None, vm_capable=True):
1675
      self.name = name
1676
      self.volumes = {}
1677
      self.instances = []
1678
      self.pinst = []
1679
      self.sinst = []
1680
      self.sbp = {}
1681
      self.mfree = 0
1682
      self.dfree = 0
1683
      self.offline = offline
1684
      self.vm_capable = vm_capable
1685
      self.rpc_fail = False
1686
      self.lvm_fail = False
1687
      self.hyp_fail = False
1688
      self.ghost = False
1689
      self.os_fail = False
1690
      self.oslist = {}
1691

    
1692
  def ExpandNames(self):
1693
    # This raises errors.OpPrereqError on its own:
1694
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1695

    
1696
    # Get instances in node group; this is unsafe and needs verification later
1697
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1698

    
1699
    self.needed_locks = {
1700
      locking.LEVEL_INSTANCE: inst_names,
1701
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1702
      locking.LEVEL_NODE: [],
1703
      }
1704

    
1705
    self.share_locks = _ShareAll()
1706

    
1707
  def DeclareLocks(self, level):
1708
    if level == locking.LEVEL_NODE:
1709
      # Get members of node group; this is unsafe and needs verification later
1710
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1711

    
1712
      all_inst_info = self.cfg.GetAllInstancesInfo()
1713

    
1714
      # In Exec(), we warn about mirrored instances that have primary and
1715
      # secondary living in separate node groups. To fully verify that
1716
      # volumes for these instances are healthy, we will need to do an
1717
      # extra call to their secondaries. We ensure here those nodes will
1718
      # be locked.
1719
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1720
        # Important: access only the instances whose lock is owned
1721
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1722
          nodes.update(all_inst_info[inst].secondary_nodes)
1723

    
1724
      self.needed_locks[locking.LEVEL_NODE] = nodes
1725

    
1726
  def CheckPrereq(self):
1727
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1728
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1729

    
1730
    group_nodes = set(self.group_info.members)
1731
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1732

    
1733
    unlocked_nodes = \
1734
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1735

    
1736
    unlocked_instances = \
1737
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1738

    
1739
    if unlocked_nodes:
1740
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1741
                                 utils.CommaJoin(unlocked_nodes))
1742

    
1743
    if unlocked_instances:
1744
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1745
                                 utils.CommaJoin(unlocked_instances))
1746

    
1747
    self.all_node_info = self.cfg.GetAllNodesInfo()
1748
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1749

    
1750
    self.my_node_names = utils.NiceSort(group_nodes)
1751
    self.my_inst_names = utils.NiceSort(group_instances)
1752

    
1753
    self.my_node_info = dict((name, self.all_node_info[name])
1754
                             for name in self.my_node_names)
1755

    
1756
    self.my_inst_info = dict((name, self.all_inst_info[name])
1757
                             for name in self.my_inst_names)
1758

    
1759
    # We detect here the nodes that will need the extra RPC calls for verifying
1760
    # split LV volumes; they should be locked.
1761
    extra_lv_nodes = set()
1762

    
1763
    for inst in self.my_inst_info.values():
1764
      if inst.disk_template in constants.DTS_INT_MIRROR:
1765
        group = self.my_node_info[inst.primary_node].group
1766
        for nname in inst.secondary_nodes:
1767
          if self.all_node_info[nname].group != group:
1768
            extra_lv_nodes.add(nname)
1769

    
1770
    unlocked_lv_nodes = \
1771
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1772

    
1773
    if unlocked_lv_nodes:
1774
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1775
                                 utils.CommaJoin(unlocked_lv_nodes))
1776
    self.extra_lv_nodes = list(extra_lv_nodes)
1777

    
1778
  def _VerifyNode(self, ninfo, nresult):
1779
    """Perform some basic validation on data returned from a node.
1780

1781
      - check the result data structure is well formed and has all the
1782
        mandatory fields
1783
      - check ganeti version
1784

1785
    @type ninfo: L{objects.Node}
1786
    @param ninfo: the node to check
1787
    @param nresult: the results from the node
1788
    @rtype: boolean
1789
    @return: whether overall this call was successful (and we can expect
1790
         reasonable values in the respose)
1791

1792
    """
1793
    node = ninfo.name
1794
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1795

    
1796
    # main result, nresult should be a non-empty dict
1797
    test = not nresult or not isinstance(nresult, dict)
1798
    _ErrorIf(test, constants.CV_ENODERPC, node,
1799
                  "unable to verify node: no data returned")
1800
    if test:
1801
      return False
1802

    
1803
    # compares ganeti version
1804
    local_version = constants.PROTOCOL_VERSION
1805
    remote_version = nresult.get("version", None)
1806
    test = not (remote_version and
1807
                isinstance(remote_version, (list, tuple)) and
1808
                len(remote_version) == 2)
1809
    _ErrorIf(test, constants.CV_ENODERPC, node,
1810
             "connection to node returned invalid data")
1811
    if test:
1812
      return False
1813

    
1814
    test = local_version != remote_version[0]
1815
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1816
             "incompatible protocol versions: master %s,"
1817
             " node %s", local_version, remote_version[0])
1818
    if test:
1819
      return False
1820

    
1821
    # node seems compatible, we can actually try to look into its results
1822

    
1823
    # full package version
1824
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1825
                  constants.CV_ENODEVERSION, node,
1826
                  "software version mismatch: master %s, node %s",
1827
                  constants.RELEASE_VERSION, remote_version[1],
1828
                  code=self.ETYPE_WARNING)
1829

    
1830
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1831
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1832
      for hv_name, hv_result in hyp_result.iteritems():
1833
        test = hv_result is not None
1834
        _ErrorIf(test, constants.CV_ENODEHV, node,
1835
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1836

    
1837
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1838
    if ninfo.vm_capable and isinstance(hvp_result, list):
1839
      for item, hv_name, hv_result in hvp_result:
1840
        _ErrorIf(True, constants.CV_ENODEHV, node,
1841
                 "hypervisor %s parameter verify failure (source %s): %s",
1842
                 hv_name, item, hv_result)
1843

    
1844
    test = nresult.get(constants.NV_NODESETUP,
1845
                       ["Missing NODESETUP results"])
1846
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1847
             "; ".join(test))
1848

    
1849
    return True
1850

    
1851
  def _VerifyNodeTime(self, ninfo, nresult,
1852
                      nvinfo_starttime, nvinfo_endtime):
1853
    """Check the node time.
1854

1855
    @type ninfo: L{objects.Node}
1856
    @param ninfo: the node to check
1857
    @param nresult: the remote results for the node
1858
    @param nvinfo_starttime: the start time of the RPC call
1859
    @param nvinfo_endtime: the end time of the RPC call
1860

1861
    """
1862
    node = ninfo.name
1863
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1864

    
1865
    ntime = nresult.get(constants.NV_TIME, None)
1866
    try:
1867
      ntime_merged = utils.MergeTime(ntime)
1868
    except (ValueError, TypeError):
1869
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1870
      return
1871

    
1872
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1873
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1874
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1875
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1876
    else:
1877
      ntime_diff = None
1878

    
1879
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1880
             "Node time diverges by at least %s from master node time",
1881
             ntime_diff)
1882

    
1883
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1884
    """Check the node LVM results.
1885

1886
    @type ninfo: L{objects.Node}
1887
    @param ninfo: the node to check
1888
    @param nresult: the remote results for the node
1889
    @param vg_name: the configured VG name
1890

1891
    """
1892
    if vg_name is None:
1893
      return
1894

    
1895
    node = ninfo.name
1896
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1897

    
1898
    # checks vg existence and size > 20G
1899
    vglist = nresult.get(constants.NV_VGLIST, None)
1900
    test = not vglist
1901
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1902
    if not test:
1903
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1904
                                            constants.MIN_VG_SIZE)
1905
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1906

    
1907
    # check pv names
1908
    pvlist = nresult.get(constants.NV_PVLIST, None)
1909
    test = pvlist is None
1910
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1911
    if not test:
1912
      # check that ':' is not present in PV names, since it's a
1913
      # special character for lvcreate (denotes the range of PEs to
1914
      # use on the PV)
1915
      for _, pvname, owner_vg in pvlist:
1916
        test = ":" in pvname
1917
        _ErrorIf(test, constants.CV_ENODELVM, node,
1918
                 "Invalid character ':' in PV '%s' of VG '%s'",
1919
                 pvname, owner_vg)
1920

    
1921
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1922
    """Check the node bridges.
1923

1924
    @type ninfo: L{objects.Node}
1925
    @param ninfo: the node to check
1926
    @param nresult: the remote results for the node
1927
    @param bridges: the expected list of bridges
1928

1929
    """
1930
    if not bridges:
1931
      return
1932

    
1933
    node = ninfo.name
1934
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1935

    
1936
    missing = nresult.get(constants.NV_BRIDGES, None)
1937
    test = not isinstance(missing, list)
1938
    _ErrorIf(test, constants.CV_ENODENET, node,
1939
             "did not return valid bridge information")
1940
    if not test:
1941
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1942
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1943

    
1944
  def _VerifyNodeNetwork(self, ninfo, nresult):
1945
    """Check the node network connectivity results.
1946

1947
    @type ninfo: L{objects.Node}
1948
    @param ninfo: the node to check
1949
    @param nresult: the remote results for the node
1950

1951
    """
1952
    node = ninfo.name
1953
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1954

    
1955
    test = constants.NV_NODELIST not in nresult
1956
    _ErrorIf(test, constants.CV_ENODESSH, node,
1957
             "node hasn't returned node ssh connectivity data")
1958
    if not test:
1959
      if nresult[constants.NV_NODELIST]:
1960
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1961
          _ErrorIf(True, constants.CV_ENODESSH, node,
1962
                   "ssh communication with node '%s': %s", a_node, a_msg)
1963

    
1964
    test = constants.NV_NODENETTEST not in nresult
1965
    _ErrorIf(test, constants.CV_ENODENET, node,
1966
             "node hasn't returned node tcp connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODENETTEST]:
1969
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1970
        for anode in nlist:
1971
          _ErrorIf(True, constants.CV_ENODENET, node,
1972
                   "tcp communication with node '%s': %s",
1973
                   anode, nresult[constants.NV_NODENETTEST][anode])
1974

    
1975
    test = constants.NV_MASTERIP not in nresult
1976
    _ErrorIf(test, constants.CV_ENODENET, node,
1977
             "node hasn't returned node master IP reachability data")
1978
    if not test:
1979
      if not nresult[constants.NV_MASTERIP]:
1980
        if node == self.master_node:
1981
          msg = "the master node cannot reach the master IP (not configured?)"
1982
        else:
1983
          msg = "cannot reach the master IP"
1984
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1985

    
1986
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1987
                      diskstatus):
1988
    """Verify an instance.
1989

1990
    This function checks to see if the required block devices are
1991
    available on the instance's node.
1992

1993
    """
1994
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1995
    node_current = instanceconfig.primary_node
1996

    
1997
    node_vol_should = {}
1998
    instanceconfig.MapLVsByNode(node_vol_should)
1999

    
2000
    for node in node_vol_should:
2001
      n_img = node_image[node]
2002
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2003
        # ignore missing volumes on offline or broken nodes
2004
        continue
2005
      for volume in node_vol_should[node]:
2006
        test = volume not in n_img.volumes
2007
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2008
                 "volume %s missing on node %s", volume, node)
2009

    
2010
    if instanceconfig.admin_up:
2011
      pri_img = node_image[node_current]
2012
      test = instance not in pri_img.instances and not pri_img.offline
2013
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2014
               "instance not running on its primary node %s",
2015
               node_current)
2016

    
2017
    diskdata = [(nname, success, status, idx)
2018
                for (nname, disks) in diskstatus.items()
2019
                for idx, (success, status) in enumerate(disks)]
2020

    
2021
    for nname, success, bdev_status, idx in diskdata:
2022
      # the 'ghost node' construction in Exec() ensures that we have a
2023
      # node here
2024
      snode = node_image[nname]
2025
      bad_snode = snode.ghost or snode.offline
2026
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2027
               constants.CV_EINSTANCEFAULTYDISK, instance,
2028
               "couldn't retrieve status for disk/%s on %s: %s",
2029
               idx, nname, bdev_status)
2030
      _ErrorIf((instanceconfig.admin_up and success and
2031
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2032
               constants.CV_EINSTANCEFAULTYDISK, instance,
2033
               "disk/%s on %s is faulty", idx, nname)
2034

    
2035
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2036
    """Verify if there are any unknown volumes in the cluster.
2037

2038
    The .os, .swap and backup volumes are ignored. All other volumes are
2039
    reported as unknown.
2040

2041
    @type reserved: L{ganeti.utils.FieldSet}
2042
    @param reserved: a FieldSet of reserved volume names
2043

2044
    """
2045
    for node, n_img in node_image.items():
2046
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2047
        # skip non-healthy nodes
2048
        continue
2049
      for volume in n_img.volumes:
2050
        test = ((node not in node_vol_should or
2051
                volume not in node_vol_should[node]) and
2052
                not reserved.Matches(volume))
2053
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2054
                      "volume %s is unknown", volume)
2055

    
2056
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2057
    """Verify N+1 Memory Resilience.
2058

2059
    Check that if one single node dies we can still start all the
2060
    instances it was primary for.
2061

2062
    """
2063
    cluster_info = self.cfg.GetClusterInfo()
2064
    for node, n_img in node_image.items():
2065
      # This code checks that every node which is now listed as
2066
      # secondary has enough memory to host all instances it is
2067
      # supposed to should a single other node in the cluster fail.
2068
      # FIXME: not ready for failover to an arbitrary node
2069
      # FIXME: does not support file-backed instances
2070
      # WARNING: we currently take into account down instances as well
2071
      # as up ones, considering that even if they're down someone
2072
      # might want to start them even in the event of a node failure.
2073
      if n_img.offline:
2074
        # we're skipping offline nodes from the N+1 warning, since
2075
        # most likely we don't have good memory infromation from them;
2076
        # we already list instances living on such nodes, and that's
2077
        # enough warning
2078
        continue
2079
      for prinode, instances in n_img.sbp.items():
2080
        needed_mem = 0
2081
        for instance in instances:
2082
          bep = cluster_info.FillBE(instance_cfg[instance])
2083
          if bep[constants.BE_AUTO_BALANCE]:
2084
            needed_mem += bep[constants.BE_MEMORY]
2085
        test = n_img.mfree < needed_mem
2086
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2087
                      "not enough memory to accomodate instance failovers"
2088
                      " should node %s fail (%dMiB needed, %dMiB available)",
2089
                      prinode, needed_mem, n_img.mfree)
2090

    
2091
  @classmethod
2092
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2093
                   (files_all, files_opt, files_mc, files_vm)):
2094
    """Verifies file checksums collected from all nodes.
2095

2096
    @param errorif: Callback for reporting errors
2097
    @param nodeinfo: List of L{objects.Node} objects
2098
    @param master_node: Name of master node
2099
    @param all_nvinfo: RPC results
2100

2101
    """
2102
    # Define functions determining which nodes to consider for a file
2103
    files2nodefn = [
2104
      (files_all, None),
2105
      (files_mc, lambda node: (node.master_candidate or
2106
                               node.name == master_node)),
2107
      (files_vm, lambda node: node.vm_capable),
2108
      ]
2109

    
2110
    # Build mapping from filename to list of nodes which should have the file
2111
    nodefiles = {}
2112
    for (files, fn) in files2nodefn:
2113
      if fn is None:
2114
        filenodes = nodeinfo
2115
      else:
2116
        filenodes = filter(fn, nodeinfo)
2117
      nodefiles.update((filename,
2118
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2119
                       for filename in files)
2120

    
2121
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2122

    
2123
    fileinfo = dict((filename, {}) for filename in nodefiles)
2124
    ignore_nodes = set()
2125

    
2126
    for node in nodeinfo:
2127
      if node.offline:
2128
        ignore_nodes.add(node.name)
2129
        continue
2130

    
2131
      nresult = all_nvinfo[node.name]
2132

    
2133
      if nresult.fail_msg or not nresult.payload:
2134
        node_files = None
2135
      else:
2136
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2137

    
2138
      test = not (node_files and isinstance(node_files, dict))
2139
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2140
              "Node did not return file checksum data")
2141
      if test:
2142
        ignore_nodes.add(node.name)
2143
        continue
2144

    
2145
      # Build per-checksum mapping from filename to nodes having it
2146
      for (filename, checksum) in node_files.items():
2147
        assert filename in nodefiles
2148
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2149

    
2150
    for (filename, checksums) in fileinfo.items():
2151
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2152

    
2153
      # Nodes having the file
2154
      with_file = frozenset(node_name
2155
                            for nodes in fileinfo[filename].values()
2156
                            for node_name in nodes) - ignore_nodes
2157

    
2158
      expected_nodes = nodefiles[filename] - ignore_nodes
2159

    
2160
      # Nodes missing file
2161
      missing_file = expected_nodes - with_file
2162

    
2163
      if filename in files_opt:
2164
        # All or no nodes
2165
        errorif(missing_file and missing_file != expected_nodes,
2166
                constants.CV_ECLUSTERFILECHECK, None,
2167
                "File %s is optional, but it must exist on all or no"
2168
                " nodes (not found on %s)",
2169
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2170
      else:
2171
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2172
                "File %s is missing from node(s) %s", filename,
2173
                utils.CommaJoin(utils.NiceSort(missing_file)))
2174

    
2175
        # Warn if a node has a file it shouldn't
2176
        unexpected = with_file - expected_nodes
2177
        errorif(unexpected,
2178
                constants.CV_ECLUSTERFILECHECK, None,
2179
                "File %s should not exist on node(s) %s",
2180
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2181

    
2182
      # See if there are multiple versions of the file
2183
      test = len(checksums) > 1
2184
      if test:
2185
        variants = ["variant %s on %s" %
2186
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2187
                    for (idx, (checksum, nodes)) in
2188
                      enumerate(sorted(checksums.items()))]
2189
      else:
2190
        variants = []
2191

    
2192
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2193
              "File %s found with %s different checksums (%s)",
2194
              filename, len(checksums), "; ".join(variants))
2195

    
2196
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2197
                      drbd_map):
2198
    """Verifies and the node DRBD status.
2199

2200
    @type ninfo: L{objects.Node}
2201
    @param ninfo: the node to check
2202
    @param nresult: the remote results for the node
2203
    @param instanceinfo: the dict of instances
2204
    @param drbd_helper: the configured DRBD usermode helper
2205
    @param drbd_map: the DRBD map as returned by
2206
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2207

2208
    """
2209
    node = ninfo.name
2210
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211

    
2212
    if drbd_helper:
2213
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2214
      test = (helper_result == None)
2215
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2216
               "no drbd usermode helper returned")
2217
      if helper_result:
2218
        status, payload = helper_result
2219
        test = not status
2220
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2221
                 "drbd usermode helper check unsuccessful: %s", payload)
2222
        test = status and (payload != drbd_helper)
2223
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2224
                 "wrong drbd usermode helper: %s", payload)
2225

    
2226
    # compute the DRBD minors
2227
    node_drbd = {}
2228
    for minor, instance in drbd_map[node].items():
2229
      test = instance not in instanceinfo
2230
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2231
               "ghost instance '%s' in temporary DRBD map", instance)
2232
        # ghost instance should not be running, but otherwise we
2233
        # don't give double warnings (both ghost instance and
2234
        # unallocated minor in use)
2235
      if test:
2236
        node_drbd[minor] = (instance, False)
2237
      else:
2238
        instance = instanceinfo[instance]
2239
        node_drbd[minor] = (instance.name, instance.admin_up)
2240

    
2241
    # and now check them
2242
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2243
    test = not isinstance(used_minors, (tuple, list))
2244
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2245
             "cannot parse drbd status file: %s", str(used_minors))
2246
    if test:
2247
      # we cannot check drbd status
2248
      return
2249

    
2250
    for minor, (iname, must_exist) in node_drbd.items():
2251
      test = minor not in used_minors and must_exist
2252
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2253
               "drbd minor %d of instance %s is not active", minor, iname)
2254
    for minor in used_minors:
2255
      test = minor not in node_drbd
2256
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2257
               "unallocated drbd minor %d is in use", minor)
2258

    
2259
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2260
    """Builds the node OS structures.
2261

2262
    @type ninfo: L{objects.Node}
2263
    @param ninfo: the node to check
2264
    @param nresult: the remote results for the node
2265
    @param nimg: the node image object
2266

2267
    """
2268
    node = ninfo.name
2269
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2270

    
2271
    remote_os = nresult.get(constants.NV_OSLIST, None)
2272
    test = (not isinstance(remote_os, list) or
2273
            not compat.all(isinstance(v, list) and len(v) == 7
2274
                           for v in remote_os))
2275

    
2276
    _ErrorIf(test, constants.CV_ENODEOS, node,
2277
             "node hasn't returned valid OS data")
2278

    
2279
    nimg.os_fail = test
2280

    
2281
    if test:
2282
      return
2283

    
2284
    os_dict = {}
2285

    
2286
    for (name, os_path, status, diagnose,
2287
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2288

    
2289
      if name not in os_dict:
2290
        os_dict[name] = []
2291

    
2292
      # parameters is a list of lists instead of list of tuples due to
2293
      # JSON lacking a real tuple type, fix it:
2294
      parameters = [tuple(v) for v in parameters]
2295
      os_dict[name].append((os_path, status, diagnose,
2296
                            set(variants), set(parameters), set(api_ver)))
2297

    
2298
    nimg.oslist = os_dict
2299

    
2300
  def _VerifyNodeOS(self, ninfo, nimg, base):
2301
    """Verifies the node OS list.
2302

2303
    @type ninfo: L{objects.Node}
2304
    @param ninfo: the node to check
2305
    @param nimg: the node image object
2306
    @param base: the 'template' node we match against (e.g. from the master)
2307

2308
    """
2309
    node = ninfo.name
2310
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2311

    
2312
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2313

    
2314
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2315
    for os_name, os_data in nimg.oslist.items():
2316
      assert os_data, "Empty OS status for OS %s?!" % os_name
2317
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2318
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2319
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2320
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2321
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2322
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2323
      # comparisons with the 'base' image
2324
      test = os_name not in base.oslist
2325
      _ErrorIf(test, constants.CV_ENODEOS, node,
2326
               "Extra OS %s not present on reference node (%s)",
2327
               os_name, base.name)
2328
      if test:
2329
        continue
2330
      assert base.oslist[os_name], "Base node has empty OS status?"
2331
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2332
      if not b_status:
2333
        # base OS is invalid, skipping
2334
        continue
2335
      for kind, a, b in [("API version", f_api, b_api),
2336
                         ("variants list", f_var, b_var),
2337
                         ("parameters", beautify_params(f_param),
2338
                          beautify_params(b_param))]:
2339
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2340
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2341
                 kind, os_name, base.name,
2342
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2343

    
2344
    # check any missing OSes
2345
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2346
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2347
             "OSes present on reference node %s but missing on this node: %s",
2348
             base.name, utils.CommaJoin(missing))
2349

    
2350
  def _VerifyOob(self, ninfo, nresult):
2351
    """Verifies out of band functionality of a node.
2352

2353
    @type ninfo: L{objects.Node}
2354
    @param ninfo: the node to check
2355
    @param nresult: the remote results for the node
2356

2357
    """
2358
    node = ninfo.name
2359
    # We just have to verify the paths on master and/or master candidates
2360
    # as the oob helper is invoked on the master
2361
    if ((ninfo.master_candidate or ninfo.master_capable) and
2362
        constants.NV_OOB_PATHS in nresult):
2363
      for path_result in nresult[constants.NV_OOB_PATHS]:
2364
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2365

    
2366
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2367
    """Verifies and updates the node volume data.
2368

2369
    This function will update a L{NodeImage}'s internal structures
2370
    with data from the remote call.
2371

2372
    @type ninfo: L{objects.Node}
2373
    @param ninfo: the node to check
2374
    @param nresult: the remote results for the node
2375
    @param nimg: the node image object
2376
    @param vg_name: the configured VG name
2377

2378
    """
2379
    node = ninfo.name
2380
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2381

    
2382
    nimg.lvm_fail = True
2383
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2384
    if vg_name is None:
2385
      pass
2386
    elif isinstance(lvdata, basestring):
2387
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2388
               utils.SafeEncode(lvdata))
2389
    elif not isinstance(lvdata, dict):
2390
      _ErrorIf(True, constants.CV_ENODELVM, node,
2391
               "rpc call to node failed (lvlist)")
2392
    else:
2393
      nimg.volumes = lvdata
2394
      nimg.lvm_fail = False
2395

    
2396
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2397
    """Verifies and updates the node instance list.
2398

2399
    If the listing was successful, then updates this node's instance
2400
    list. Otherwise, it marks the RPC call as failed for the instance
2401
    list key.
2402

2403
    @type ninfo: L{objects.Node}
2404
    @param ninfo: the node to check
2405
    @param nresult: the remote results for the node
2406
    @param nimg: the node image object
2407

2408
    """
2409
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2410
    test = not isinstance(idata, list)
2411
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2412
                  "rpc call to node failed (instancelist): %s",
2413
                  utils.SafeEncode(str(idata)))
2414
    if test:
2415
      nimg.hyp_fail = True
2416
    else:
2417
      nimg.instances = idata
2418

    
2419
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2420
    """Verifies and computes a node information map
2421

2422
    @type ninfo: L{objects.Node}
2423
    @param ninfo: the node to check
2424
    @param nresult: the remote results for the node
2425
    @param nimg: the node image object
2426
    @param vg_name: the configured VG name
2427

2428
    """
2429
    node = ninfo.name
2430
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2431

    
2432
    # try to read free memory (from the hypervisor)
2433
    hv_info = nresult.get(constants.NV_HVINFO, None)
2434
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2435
    _ErrorIf(test, constants.CV_ENODEHV, node,
2436
             "rpc call to node failed (hvinfo)")
2437
    if not test:
2438
      try:
2439
        nimg.mfree = int(hv_info["memory_free"])
2440
      except (ValueError, TypeError):
2441
        _ErrorIf(True, constants.CV_ENODERPC, node,
2442
                 "node returned invalid nodeinfo, check hypervisor")
2443

    
2444
    # FIXME: devise a free space model for file based instances as well
2445
    if vg_name is not None:
2446
      test = (constants.NV_VGLIST not in nresult or
2447
              vg_name not in nresult[constants.NV_VGLIST])
2448
      _ErrorIf(test, constants.CV_ENODELVM, node,
2449
               "node didn't return data for the volume group '%s'"
2450
               " - it is either missing or broken", vg_name)
2451
      if not test:
2452
        try:
2453
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2454
        except (ValueError, TypeError):
2455
          _ErrorIf(True, constants.CV_ENODERPC, node,
2456
                   "node returned invalid LVM info, check LVM status")
2457

    
2458
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2459
    """Gets per-disk status information for all instances.
2460

2461
    @type nodelist: list of strings
2462
    @param nodelist: Node names
2463
    @type node_image: dict of (name, L{objects.Node})
2464
    @param node_image: Node objects
2465
    @type instanceinfo: dict of (name, L{objects.Instance})
2466
    @param instanceinfo: Instance objects
2467
    @rtype: {instance: {node: [(succes, payload)]}}
2468
    @return: a dictionary of per-instance dictionaries with nodes as
2469
        keys and disk information as values; the disk information is a
2470
        list of tuples (success, payload)
2471

2472
    """
2473
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2474

    
2475
    node_disks = {}
2476
    node_disks_devonly = {}
2477
    diskless_instances = set()
2478
    diskless = constants.DT_DISKLESS
2479

    
2480
    for nname in nodelist:
2481
      node_instances = list(itertools.chain(node_image[nname].pinst,
2482
                                            node_image[nname].sinst))
2483
      diskless_instances.update(inst for inst in node_instances
2484
                                if instanceinfo[inst].disk_template == diskless)
2485
      disks = [(inst, disk)
2486
               for inst in node_instances
2487
               for disk in instanceinfo[inst].disks]
2488

    
2489
      if not disks:
2490
        # No need to collect data
2491
        continue
2492

    
2493
      node_disks[nname] = disks
2494

    
2495
      # Creating copies as SetDiskID below will modify the objects and that can
2496
      # lead to incorrect data returned from nodes
2497
      devonly = [dev.Copy() for (_, dev) in disks]
2498

    
2499
      for dev in devonly:
2500
        self.cfg.SetDiskID(dev, nname)
2501

    
2502
      node_disks_devonly[nname] = devonly
2503

    
2504
    assert len(node_disks) == len(node_disks_devonly)
2505

    
2506
    # Collect data from all nodes with disks
2507
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2508
                                                          node_disks_devonly)
2509

    
2510
    assert len(result) == len(node_disks)
2511

    
2512
    instdisk = {}
2513

    
2514
    for (nname, nres) in result.items():
2515
      disks = node_disks[nname]
2516

    
2517
      if nres.offline:
2518
        # No data from this node
2519
        data = len(disks) * [(False, "node offline")]
2520
      else:
2521
        msg = nres.fail_msg
2522
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2523
                 "while getting disk information: %s", msg)
2524
        if msg:
2525
          # No data from this node
2526
          data = len(disks) * [(False, msg)]
2527
        else:
2528
          data = []
2529
          for idx, i in enumerate(nres.payload):
2530
            if isinstance(i, (tuple, list)) and len(i) == 2:
2531
              data.append(i)
2532
            else:
2533
              logging.warning("Invalid result from node %s, entry %d: %s",
2534
                              nname, idx, i)
2535
              data.append((False, "Invalid result from the remote node"))
2536

    
2537
      for ((inst, _), status) in zip(disks, data):
2538
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2539

    
2540
    # Add empty entries for diskless instances.
2541
    for inst in diskless_instances:
2542
      assert inst not in instdisk
2543
      instdisk[inst] = {}
2544

    
2545
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2546
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2547
                      compat.all(isinstance(s, (tuple, list)) and
2548
                                 len(s) == 2 for s in statuses)
2549
                      for inst, nnames in instdisk.items()
2550
                      for nname, statuses in nnames.items())
2551
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2552

    
2553
    return instdisk
2554

    
2555
  @staticmethod
2556
  def _SshNodeSelector(group_uuid, all_nodes):
2557
    """Create endless iterators for all potential SSH check hosts.
2558

2559
    """
2560
    nodes = [node for node in all_nodes
2561
             if (node.group != group_uuid and
2562
                 not node.offline)]
2563
    keyfunc = operator.attrgetter("group")
2564

    
2565
    return map(itertools.cycle,
2566
               [sorted(map(operator.attrgetter("name"), names))
2567
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2568
                                                  keyfunc)])
2569

    
2570
  @classmethod
2571
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2572
    """Choose which nodes should talk to which other nodes.
2573

2574
    We will make nodes contact all nodes in their group, and one node from
2575
    every other group.
2576

2577
    @warning: This algorithm has a known issue if one node group is much
2578
      smaller than others (e.g. just one node). In such a case all other
2579
      nodes will talk to the single node.
2580

2581
    """
2582
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2583
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2584

    
2585
    return (online_nodes,
2586
            dict((name, sorted([i.next() for i in sel]))
2587
                 for name in online_nodes))
2588

    
2589
  def BuildHooksEnv(self):
2590
    """Build hooks env.
2591

2592
    Cluster-Verify hooks just ran in the post phase and their failure makes
2593
    the output be logged in the verify output and the verification to fail.
2594

2595
    """
2596
    env = {
2597
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2598
      }
2599

    
2600
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2601
               for node in self.my_node_info.values())
2602

    
2603
    return env
2604

    
2605
  def BuildHooksNodes(self):
2606
    """Build hooks nodes.
2607

2608
    """
2609
    return ([], self.my_node_names)
2610

    
2611
  def Exec(self, feedback_fn):
2612
    """Verify integrity of the node group, performing various test on nodes.
2613

2614
    """
2615
    # This method has too many local variables. pylint: disable=R0914
2616
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2617

    
2618
    if not self.my_node_names:
2619
      # empty node group
2620
      feedback_fn("* Empty node group, skipping verification")
2621
      return True
2622

    
2623
    self.bad = False
2624
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2625
    verbose = self.op.verbose
2626
    self._feedback_fn = feedback_fn
2627

    
2628
    vg_name = self.cfg.GetVGName()
2629
    drbd_helper = self.cfg.GetDRBDHelper()
2630
    cluster = self.cfg.GetClusterInfo()
2631
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2632
    hypervisors = cluster.enabled_hypervisors
2633
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2634

    
2635
    i_non_redundant = [] # Non redundant instances
2636
    i_non_a_balanced = [] # Non auto-balanced instances
2637
    n_offline = 0 # Count of offline nodes
2638
    n_drained = 0 # Count of nodes being drained
2639
    node_vol_should = {}
2640

    
2641
    # FIXME: verify OS list
2642

    
2643
    # File verification
2644
    filemap = _ComputeAncillaryFiles(cluster, False)
2645

    
2646
    # do local checksums
2647
    master_node = self.master_node = self.cfg.GetMasterNode()
2648
    master_ip = self.cfg.GetMasterIP()
2649

    
2650
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2651

    
2652
    node_verify_param = {
2653
      constants.NV_FILELIST:
2654
        utils.UniqueSequence(filename
2655
                             for files in filemap
2656
                             for filename in files),
2657
      constants.NV_NODELIST:
2658
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2659
                                  self.all_node_info.values()),
2660
      constants.NV_HYPERVISOR: hypervisors,
2661
      constants.NV_HVPARAMS:
2662
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2663
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2664
                                 for node in node_data_list
2665
                                 if not node.offline],
2666
      constants.NV_INSTANCELIST: hypervisors,
2667
      constants.NV_VERSION: None,
2668
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2669
      constants.NV_NODESETUP: None,
2670
      constants.NV_TIME: None,
2671
      constants.NV_MASTERIP: (master_node, master_ip),
2672
      constants.NV_OSLIST: None,
2673
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2674
      }
2675

    
2676
    if vg_name is not None:
2677
      node_verify_param[constants.NV_VGLIST] = None
2678
      node_verify_param[constants.NV_LVLIST] = vg_name
2679
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2680
      node_verify_param[constants.NV_DRBDLIST] = None
2681

    
2682
    if drbd_helper:
2683
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2684

    
2685
    # bridge checks
2686
    # FIXME: this needs to be changed per node-group, not cluster-wide
2687
    bridges = set()
2688
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2689
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2690
      bridges.add(default_nicpp[constants.NIC_LINK])
2691
    for instance in self.my_inst_info.values():
2692
      for nic in instance.nics:
2693
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2694
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2695
          bridges.add(full_nic[constants.NIC_LINK])
2696

    
2697
    if bridges:
2698
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2699

    
2700
    # Build our expected cluster state
2701
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2702
                                                 name=node.name,
2703
                                                 vm_capable=node.vm_capable))
2704
                      for node in node_data_list)
2705

    
2706
    # Gather OOB paths
2707
    oob_paths = []
2708
    for node in self.all_node_info.values():
2709
      path = _SupportsOob(self.cfg, node)
2710
      if path and path not in oob_paths:
2711
        oob_paths.append(path)
2712

    
2713
    if oob_paths:
2714
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2715

    
2716
    for instance in self.my_inst_names:
2717
      inst_config = self.my_inst_info[instance]
2718

    
2719
      for nname in inst_config.all_nodes:
2720
        if nname not in node_image:
2721
          gnode = self.NodeImage(name=nname)
2722
          gnode.ghost = (nname not in self.all_node_info)
2723
          node_image[nname] = gnode
2724

    
2725
      inst_config.MapLVsByNode(node_vol_should)
2726

    
2727
      pnode = inst_config.primary_node
2728
      node_image[pnode].pinst.append(instance)
2729

    
2730
      for snode in inst_config.secondary_nodes:
2731
        nimg = node_image[snode]
2732
        nimg.sinst.append(instance)
2733
        if pnode not in nimg.sbp:
2734
          nimg.sbp[pnode] = []
2735
        nimg.sbp[pnode].append(instance)
2736

    
2737
    # At this point, we have the in-memory data structures complete,
2738
    # except for the runtime information, which we'll gather next
2739

    
2740
    # Due to the way our RPC system works, exact response times cannot be
2741
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2742
    # time before and after executing the request, we can at least have a time
2743
    # window.
2744
    nvinfo_starttime = time.time()
2745
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2746
                                           node_verify_param,
2747
                                           self.cfg.GetClusterName())
2748
    nvinfo_endtime = time.time()
2749

    
2750
    if self.extra_lv_nodes and vg_name is not None:
2751
      extra_lv_nvinfo = \
2752
          self.rpc.call_node_verify(self.extra_lv_nodes,
2753
                                    {constants.NV_LVLIST: vg_name},
2754
                                    self.cfg.GetClusterName())
2755
    else:
2756
      extra_lv_nvinfo = {}
2757

    
2758
    all_drbd_map = self.cfg.ComputeDRBDMap()
2759

    
2760
    feedback_fn("* Gathering disk information (%s nodes)" %
2761
                len(self.my_node_names))
2762
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2763
                                     self.my_inst_info)
2764

    
2765
    feedback_fn("* Verifying configuration file consistency")
2766

    
2767
    # If not all nodes are being checked, we need to make sure the master node
2768
    # and a non-checked vm_capable node are in the list.
2769
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2770
    if absent_nodes:
2771
      vf_nvinfo = all_nvinfo.copy()
2772
      vf_node_info = list(self.my_node_info.values())
2773
      additional_nodes = []
2774
      if master_node not in self.my_node_info:
2775
        additional_nodes.append(master_node)
2776
        vf_node_info.append(self.all_node_info[master_node])
2777
      # Add the first vm_capable node we find which is not included
2778
      for node in absent_nodes:
2779
        nodeinfo = self.all_node_info[node]
2780
        if nodeinfo.vm_capable and not nodeinfo.offline:
2781
          additional_nodes.append(node)
2782
          vf_node_info.append(self.all_node_info[node])
2783
          break
2784
      key = constants.NV_FILELIST
2785
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2786
                                                 {key: node_verify_param[key]},
2787
                                                 self.cfg.GetClusterName()))
2788
    else:
2789
      vf_nvinfo = all_nvinfo
2790
      vf_node_info = self.my_node_info.values()
2791

    
2792
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2793

    
2794
    feedback_fn("* Verifying node status")
2795

    
2796
    refos_img = None
2797

    
2798
    for node_i in node_data_list:
2799
      node = node_i.name
2800
      nimg = node_image[node]
2801

    
2802
      if node_i.offline:
2803
        if verbose:
2804
          feedback_fn("* Skipping offline node %s" % (node,))
2805
        n_offline += 1
2806
        continue
2807

    
2808
      if node == master_node:
2809
        ntype = "master"
2810
      elif node_i.master_candidate:
2811
        ntype = "master candidate"
2812
      elif node_i.drained:
2813
        ntype = "drained"
2814
        n_drained += 1
2815
      else:
2816
        ntype = "regular"
2817
      if verbose:
2818
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2819

    
2820
      msg = all_nvinfo[node].fail_msg
2821
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2822
               msg)
2823
      if msg:
2824
        nimg.rpc_fail = True
2825
        continue
2826

    
2827
      nresult = all_nvinfo[node].payload
2828

    
2829
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2830
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2831
      self._VerifyNodeNetwork(node_i, nresult)
2832
      self._VerifyOob(node_i, nresult)
2833

    
2834
      if nimg.vm_capable:
2835
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2836
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2837
                             all_drbd_map)
2838

    
2839
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2840
        self._UpdateNodeInstances(node_i, nresult, nimg)
2841
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2842
        self._UpdateNodeOS(node_i, nresult, nimg)
2843

    
2844
        if not nimg.os_fail:
2845
          if refos_img is None:
2846
            refos_img = nimg
2847
          self._VerifyNodeOS(node_i, nimg, refos_img)
2848
        self._VerifyNodeBridges(node_i, nresult, bridges)
2849

    
2850
        # Check whether all running instancies are primary for the node. (This
2851
        # can no longer be done from _VerifyInstance below, since some of the
2852
        # wrong instances could be from other node groups.)
2853
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2854

    
2855
        for inst in non_primary_inst:
2856
          test = inst in self.all_inst_info
2857
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2858
                   "instance should not run on node %s", node_i.name)
2859
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2860
                   "node is running unknown instance %s", inst)
2861

    
2862
    for node, result in extra_lv_nvinfo.items():
2863
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2864
                              node_image[node], vg_name)
2865

    
2866
    feedback_fn("* Verifying instance status")
2867
    for instance in self.my_inst_names:
2868
      if verbose:
2869
        feedback_fn("* Verifying instance %s" % instance)
2870
      inst_config = self.my_inst_info[instance]
2871
      self._VerifyInstance(instance, inst_config, node_image,
2872
                           instdisk[instance])
2873
      inst_nodes_offline = []
2874

    
2875
      pnode = inst_config.primary_node
2876
      pnode_img = node_image[pnode]
2877
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2878
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2879
               " primary node failed", instance)
2880

    
2881
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2882
               constants.CV_EINSTANCEBADNODE, instance,
2883
               "instance is marked as running and lives on offline node %s",
2884
               inst_config.primary_node)
2885

    
2886
      # If the instance is non-redundant we cannot survive losing its primary
2887
      # node, so we are not N+1 compliant. On the other hand we have no disk
2888
      # templates with more than one secondary so that situation is not well
2889
      # supported either.
2890
      # FIXME: does not support file-backed instances
2891
      if not inst_config.secondary_nodes:
2892
        i_non_redundant.append(instance)
2893

    
2894
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2895
               constants.CV_EINSTANCELAYOUT,
2896
               instance, "instance has multiple secondary nodes: %s",
2897
               utils.CommaJoin(inst_config.secondary_nodes),
2898
               code=self.ETYPE_WARNING)
2899

    
2900
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2901
        pnode = inst_config.primary_node
2902
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2903
        instance_groups = {}
2904

    
2905
        for node in instance_nodes:
2906
          instance_groups.setdefault(self.all_node_info[node].group,
2907
                                     []).append(node)
2908

    
2909
        pretty_list = [
2910
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2911
          # Sort so that we always list the primary node first.
2912
          for group, nodes in sorted(instance_groups.items(),
2913
                                     key=lambda (_, nodes): pnode in nodes,
2914
                                     reverse=True)]
2915

    
2916
        self._ErrorIf(len(instance_groups) > 1,
2917
                      constants.CV_EINSTANCESPLITGROUPS,
2918
                      instance, "instance has primary and secondary nodes in"
2919
                      " different groups: %s", utils.CommaJoin(pretty_list),
2920
                      code=self.ETYPE_WARNING)
2921

    
2922
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2923
        i_non_a_balanced.append(instance)
2924

    
2925
      for snode in inst_config.secondary_nodes:
2926
        s_img = node_image[snode]
2927
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2928
                 snode, "instance %s, connection to secondary node failed",
2929
                 instance)
2930

    
2931
        if s_img.offline:
2932
          inst_nodes_offline.append(snode)
2933

    
2934
      # warn that the instance lives on offline nodes
2935
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2936
               "instance has offline secondary node(s) %s",
2937
               utils.CommaJoin(inst_nodes_offline))
2938
      # ... or ghost/non-vm_capable nodes
2939
      for node in inst_config.all_nodes:
2940
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2941
                 instance, "instance lives on ghost node %s", node)
2942
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2943
                 instance, "instance lives on non-vm_capable node %s", node)
2944

    
2945
    feedback_fn("* Verifying orphan volumes")
2946
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2947

    
2948
    # We will get spurious "unknown volume" warnings if any node of this group
2949
    # is secondary for an instance whose primary is in another group. To avoid
2950
    # them, we find these instances and add their volumes to node_vol_should.
2951
    for inst in self.all_inst_info.values():
2952
      for secondary in inst.secondary_nodes:
2953
        if (secondary in self.my_node_info
2954
            and inst.name not in self.my_inst_info):
2955
          inst.MapLVsByNode(node_vol_should)
2956
          break
2957

    
2958
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2959

    
2960
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2961
      feedback_fn("* Verifying N+1 Memory redundancy")
2962
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2963

    
2964
    feedback_fn("* Other Notes")
2965
    if i_non_redundant:
2966
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2967
                  % len(i_non_redundant))
2968

    
2969
    if i_non_a_balanced:
2970
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2971
                  % len(i_non_a_balanced))
2972

    
2973
    if n_offline:
2974
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2975

    
2976
    if n_drained:
2977
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2978

    
2979
    return not self.bad
2980

    
2981
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2982
    """Analyze the post-hooks' result
2983

2984
    This method analyses the hook result, handles it, and sends some
2985
    nicely-formatted feedback back to the user.
2986

2987
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2988
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2989
    @param hooks_results: the results of the multi-node hooks rpc call
2990
    @param feedback_fn: function used send feedback back to the caller
2991
    @param lu_result: previous Exec result
2992
    @return: the new Exec result, based on the previous result
2993
        and hook results
2994

2995
    """
2996
    # We only really run POST phase hooks, only for non-empty groups,
2997
    # and are only interested in their results
2998
    if not self.my_node_names:
2999
      # empty node group
3000
      pass
3001
    elif phase == constants.HOOKS_PHASE_POST:
3002
      # Used to change hooks' output to proper indentation
3003
      feedback_fn("* Hooks Results")
3004
      assert hooks_results, "invalid result from hooks"
3005

    
3006
      for node_name in hooks_results:
3007
        res = hooks_results[node_name]
3008
        msg = res.fail_msg
3009
        test = msg and not res.offline
3010
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3011
                      "Communication failure in hooks execution: %s", msg)
3012
        if res.offline or msg:
3013
          # No need to investigate payload if node is offline or gave
3014
          # an error.
3015
          continue
3016
        for script, hkr, output in res.payload:
3017
          test = hkr == constants.HKR_FAIL
3018
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3019
                        "Script %s failed, output:", script)
3020
          if test:
3021
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3022
            feedback_fn("%s" % output)
3023
            lu_result = False
3024

    
3025
    return lu_result
3026

    
3027

    
3028
class LUClusterVerifyDisks(NoHooksLU):
3029
  """Verifies the cluster disks status.
3030

3031
  """
3032
  REQ_BGL = False
3033

    
3034
  def ExpandNames(self):
3035
    self.share_locks = _ShareAll()
3036
    self.needed_locks = {
3037
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3038
      }
3039

    
3040
  def Exec(self, feedback_fn):
3041
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3042

    
3043
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3044
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3045
                           for group in group_names])
3046

    
3047

    
3048
class LUGroupVerifyDisks(NoHooksLU):
3049
  """Verifies the status of all disks in a node group.
3050

3051
  """
3052
  REQ_BGL = False
3053

    
3054
  def ExpandNames(self):
3055
    # Raises errors.OpPrereqError on its own if group can't be found
3056
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3057

    
3058
    self.share_locks = _ShareAll()
3059
    self.needed_locks = {
3060
      locking.LEVEL_INSTANCE: [],
3061
      locking.LEVEL_NODEGROUP: [],
3062
      locking.LEVEL_NODE: [],
3063
      }
3064

    
3065
  def DeclareLocks(self, level):
3066
    if level == locking.LEVEL_INSTANCE:
3067
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3068

    
3069
      # Lock instances optimistically, needs verification once node and group
3070
      # locks have been acquired
3071
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3072
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3073

    
3074
    elif level == locking.LEVEL_NODEGROUP:
3075
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3076

    
3077
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3078
        set([self.group_uuid] +
3079
            # Lock all groups used by instances optimistically; this requires
3080
            # going via the node before it's locked, requiring verification
3081
            # later on
3082
            [group_uuid
3083
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3084
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3085

    
3086
    elif level == locking.LEVEL_NODE:
3087
      # This will only lock the nodes in the group to be verified which contain
3088
      # actual instances
3089
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3090
      self._LockInstancesNodes()
3091

    
3092
      # Lock all nodes in group to be verified
3093
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3094
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3095
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3096

    
3097
  def CheckPrereq(self):
3098
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3099
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3100
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3101

    
3102
    assert self.group_uuid in owned_groups
3103

    
3104
    # Check if locked instances are still correct
3105
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3106

    
3107
    # Get instance information
3108
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3109

    
3110
    # Check if node groups for locked instances are still correct
3111
    for (instance_name, inst) in self.instances.items():
3112
      assert owned_nodes.issuperset(inst.all_nodes), \
3113
        "Instance %s's nodes changed while we kept the lock" % instance_name
3114

    
3115
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3116
                                             owned_groups)
3117

    
3118
      assert self.group_uuid in inst_groups, \
3119
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3120

    
3121
  def Exec(self, feedback_fn):
3122
    """Verify integrity of cluster disks.
3123

3124
    @rtype: tuple of three items
3125
    @return: a tuple of (dict of node-to-node_error, list of instances
3126
        which need activate-disks, dict of instance: (node, volume) for
3127
        missing volumes
3128

3129
    """
3130
    res_nodes = {}
3131
    res_instances = set()
3132
    res_missing = {}
3133

    
3134
    nv_dict = _MapInstanceDisksToNodes([inst
3135
                                        for inst in self.instances.values()
3136
                                        if inst.admin_up])
3137

    
3138
    if nv_dict:
3139
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3140
                             set(self.cfg.GetVmCapableNodeList()))
3141

    
3142
      node_lvs = self.rpc.call_lv_list(nodes, [])
3143

    
3144
      for (node, node_res) in node_lvs.items():
3145
        if node_res.offline:
3146
          continue
3147

    
3148
        msg = node_res.fail_msg
3149
        if msg:
3150
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3151
          res_nodes[node] = msg
3152
          continue
3153

    
3154
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3155
          inst = nv_dict.pop((node, lv_name), None)
3156
          if not (lv_online or inst is None):
3157
            res_instances.add(inst)
3158

    
3159
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3160
      # better
3161
      for key, inst in nv_dict.iteritems():
3162
        res_missing.setdefault(inst, []).append(list(key))
3163

    
3164
    return (res_nodes, list(res_instances), res_missing)
3165

    
3166

    
3167
class LUClusterRepairDiskSizes(NoHooksLU):
3168
  """Verifies the cluster disks sizes.
3169

3170
  """
3171
  REQ_BGL = False
3172

    
3173
  def ExpandNames(self):
3174
    if self.op.instances:
3175
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3176
      self.needed_locks = {
3177
        locking.LEVEL_NODE: [],
3178
        locking.LEVEL_INSTANCE: self.wanted_names,
3179
        }
3180
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3181
    else:
3182
      self.wanted_names = None
3183
      self.needed_locks = {
3184
        locking.LEVEL_NODE: locking.ALL_SET,
3185
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3186
        }
3187
    self.share_locks = _ShareAll()
3188

    
3189
  def DeclareLocks(self, level):
3190
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3191
      self._LockInstancesNodes(primary_only=True)
3192

    
3193
  def CheckPrereq(self):
3194
    """Check prerequisites.
3195

3196
    This only checks the optional instance list against the existing names.
3197

3198
    """
3199
    if self.wanted_names is None:
3200
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3201

    
3202
    self.wanted_instances = \
3203
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3204

    
3205
  def _EnsureChildSizes(self, disk):
3206
    """Ensure children of the disk have the needed disk size.
3207

3208
    This is valid mainly for DRBD8 and fixes an issue where the
3209
    children have smaller disk size.
3210

3211
    @param disk: an L{ganeti.objects.Disk} object
3212

3213
    """
3214
    if disk.dev_type == constants.LD_DRBD8:
3215
      assert disk.children, "Empty children for DRBD8?"
3216
      fchild = disk.children[0]
3217
      mismatch = fchild.size < disk.size
3218
      if mismatch:
3219
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3220
                     fchild.size, disk.size)
3221
        fchild.size = disk.size
3222

    
3223
      # and we recurse on this child only, not on the metadev
3224
      return self._EnsureChildSizes(fchild) or mismatch
3225
    else:
3226
      return False
3227

    
3228
  def Exec(self, feedback_fn):
3229
    """Verify the size of cluster disks.
3230

3231
    """
3232
    # TODO: check child disks too
3233
    # TODO: check differences in size between primary/secondary nodes
3234
    per_node_disks = {}
3235
    for instance in self.wanted_instances:
3236
      pnode = instance.primary_node
3237
      if pnode not in per_node_disks:
3238
        per_node_disks[pnode] = []
3239
      for idx, disk in enumerate(instance.disks):
3240
        per_node_disks[pnode].append((instance, idx, disk))
3241

    
3242
    changed = []
3243
    for node, dskl in per_node_disks.items():
3244
      newl = [v[2].Copy() for v in dskl]
3245
      for dsk in newl:
3246
        self.cfg.SetDiskID(dsk, node)
3247
      result = self.rpc.call_blockdev_getsize(node, newl)
3248
      if result.fail_msg:
3249
        self.LogWarning("Failure in blockdev_getsize call to node"
3250
                        " %s, ignoring", node)
3251
        continue
3252
      if len(result.payload) != len(dskl):
3253
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3254
                        " result.payload=%s", node, len(dskl), result.payload)
3255
        self.LogWarning("Invalid result from node %s, ignoring node results",
3256
                        node)
3257
        continue
3258
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3259
        if size is None:
3260
          self.LogWarning("Disk %d of instance %s did not return size"
3261
                          " information, ignoring", idx, instance.name)
3262
          continue
3263
        if not isinstance(size, (int, long)):
3264
          self.LogWarning("Disk %d of instance %s did not return valid"
3265
                          " size information, ignoring", idx, instance.name)
3266
          continue
3267
        size = size >> 20
3268
        if size != disk.size:
3269
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3270
                       " correcting: recorded %d, actual %d", idx,
3271
                       instance.name, disk.size, size)
3272
          disk.size = size
3273
          self.cfg.Update(instance, feedback_fn)
3274
          changed.append((instance.name, idx, size))
3275
        if self._EnsureChildSizes(disk):
3276
          self.cfg.Update(instance, feedback_fn)
3277
          changed.append((instance.name, idx, disk.size))
3278
    return changed
3279

    
3280

    
3281
class LUClusterRename(LogicalUnit):
3282
  """Rename the cluster.
3283

3284
  """
3285
  HPATH = "cluster-rename"
3286
  HTYPE = constants.HTYPE_CLUSTER
3287

    
3288
  def BuildHooksEnv(self):
3289
    """Build hooks env.
3290

3291
    """
3292
    return {
3293
      "OP_TARGET": self.cfg.GetClusterName(),
3294
      "NEW_NAME": self.op.name,
3295
      }
3296

    
3297
  def BuildHooksNodes(self):
3298
    """Build hooks nodes.
3299

3300
    """
3301
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3302

    
3303
  def CheckPrereq(self):
3304
    """Verify that the passed name is a valid one.
3305

3306
    """
3307
    hostname = netutils.GetHostname(name=self.op.name,
3308
                                    family=self.cfg.GetPrimaryIPFamily())
3309

    
3310
    new_name = hostname.name
3311
    self.ip = new_ip = hostname.ip
3312
    old_name = self.cfg.GetClusterName()
3313
    old_ip = self.cfg.GetMasterIP()
3314
    if new_name == old_name and new_ip == old_ip:
3315
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3316
                                 " cluster has changed",
3317
                                 errors.ECODE_INVAL)
3318
    if new_ip != old_ip:
3319
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3320
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3321
                                   " reachable on the network" %
3322
                                   new_ip, errors.ECODE_NOTUNIQUE)
3323

    
3324
    self.op.name = new_name
3325

    
3326
  def Exec(self, feedback_fn):
3327
    """Rename the cluster.
3328

3329
    """
3330
    clustername = self.op.name
3331
    new_ip = self.ip
3332

    
3333
    # shutdown the master IP
3334
    master_params = self.cfg.GetMasterNetworkParameters()
3335
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3336
                                                     master_params)
3337
    result.Raise("Could not disable the master role")
3338

    
3339
    try:
3340
      cluster = self.cfg.GetClusterInfo()
3341
      cluster.cluster_name = clustername
3342
      cluster.master_ip = new_ip
3343
      self.cfg.Update(cluster, feedback_fn)
3344

    
3345
      # update the known hosts file
3346
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3347
      node_list = self.cfg.GetOnlineNodeList()
3348
      try:
3349
        node_list.remove(master_params.name)
3350
      except ValueError:
3351
        pass
3352
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3353
    finally:
3354
      master_params.ip = new_ip
3355
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3356
                                                     master_params)
3357
      msg = result.fail_msg
3358
      if msg:
3359
        self.LogWarning("Could not re-enable the master role on"
3360
                        " the master, please restart manually: %s", msg)
3361

    
3362
    return clustername
3363

    
3364

    
3365
def _ValidateNetmask(cfg, netmask):
3366
  """Checks if a netmask is valid.
3367

3368
  @type cfg: L{config.ConfigWriter}
3369
  @param cfg: The cluster configuration
3370
  @type netmask: int
3371
  @param netmask: the netmask to be verified
3372
  @raise errors.OpPrereqError: if the validation fails
3373

3374
  """
3375
  ip_family = cfg.GetPrimaryIPFamily()
3376
  try:
3377
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3378
  except errors.ProgrammerError:
3379
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3380
                               ip_family)
3381
  if not ipcls.ValidateNetmask(netmask):
3382
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3383
                                (netmask))
3384

    
3385

    
3386
class LUClusterSetParams(LogicalUnit):
3387
  """Change the parameters of the cluster.
3388

3389
  """
3390
  HPATH = "cluster-modify"
3391
  HTYPE = constants.HTYPE_CLUSTER
3392
  REQ_BGL = False
3393

    
3394
  def CheckArguments(self):
3395
    """Check parameters
3396

3397
    """
3398
    if self.op.uid_pool:
3399
      uidpool.CheckUidPool(self.op.uid_pool)
3400

    
3401
    if self.op.add_uids:
3402
      uidpool.CheckUidPool(self.op.add_uids)
3403

    
3404
    if self.op.remove_uids:
3405
      uidpool.CheckUidPool(self.op.remove_uids)
3406

    
3407
    if self.op.master_netmask is not None:
3408
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3409

    
3410
  def ExpandNames(self):
3411
    # FIXME: in the future maybe other cluster params won't require checking on
3412
    # all nodes to be modified.
3413
    self.needed_locks = {
3414
      locking.LEVEL_NODE: locking.ALL_SET,
3415
    }
3416
    self.share_locks[locking.LEVEL_NODE] = 1
3417

    
3418
  def BuildHooksEnv(self):
3419
    """Build hooks env.
3420

3421
    """
3422
    return {
3423
      "OP_TARGET": self.cfg.GetClusterName(),
3424
      "NEW_VG_NAME": self.op.vg_name,
3425
      }
3426

    
3427
  def BuildHooksNodes(self):
3428
    """Build hooks nodes.
3429

3430
    """
3431
    mn = self.cfg.GetMasterNode()
3432
    return ([mn], [mn])
3433

    
3434
  def CheckPrereq(self):
3435
    """Check prerequisites.
3436

3437
    This checks whether the given params don't conflict and
3438
    if the given volume group is valid.
3439

3440
    """
3441
    if self.op.vg_name is not None and not self.op.vg_name:
3442
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3443
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3444
                                   " instances exist", errors.ECODE_INVAL)
3445

    
3446
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3447
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3448
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3449
                                   " drbd-based instances exist",
3450
                                   errors.ECODE_INVAL)
3451

    
3452
    node_list = self.owned_locks(locking.LEVEL_NODE)
3453

    
3454
    # if vg_name not None, checks given volume group on all nodes
3455
    if self.op.vg_name:
3456
      vglist = self.rpc.call_vg_list(node_list)
3457
      for node in node_list:
3458
        msg = vglist[node].fail_msg
3459
        if msg:
3460
          # ignoring down node
3461
          self.LogWarning("Error while gathering data on node %s"
3462
                          " (ignoring node): %s", node, msg)
3463
          continue
3464
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3465
                                              self.op.vg_name,
3466
                                              constants.MIN_VG_SIZE)
3467
        if vgstatus:
3468
          raise errors.OpPrereqError("Error on node '%s': %s" %
3469
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3470

    
3471
    if self.op.drbd_helper:
3472
      # checks given drbd helper on all nodes
3473
      helpers = self.rpc.call_drbd_helper(node_list)
3474
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3475
        if ninfo.offline:
3476
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3477
          continue
3478
        msg = helpers[node].fail_msg
3479
        if msg:
3480
          raise errors.OpPrereqError("Error checking drbd helper on node"
3481
                                     " '%s': %s" % (node, msg),
3482
                                     errors.ECODE_ENVIRON)
3483
        node_helper = helpers[node].payload
3484
        if node_helper != self.op.drbd_helper:
3485
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3486
                                     (node, node_helper), errors.ECODE_ENVIRON)
3487

    
3488
    self.cluster = cluster = self.cfg.GetClusterInfo()
3489
    # validate params changes
3490
    if self.op.beparams:
3491
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3492
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3493

    
3494
    if self.op.ndparams:
3495
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3496
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3497

    
3498
      # TODO: we need a more general way to handle resetting
3499
      # cluster-level parameters to default values
3500
      if self.new_ndparams["oob_program"] == "":
3501
        self.new_ndparams["oob_program"] = \
3502
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3503

    
3504
    if self.op.nicparams:
3505
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3506
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3507
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3508
      nic_errors = []
3509

    
3510
      # check all instances for consistency
3511
      for instance in self.cfg.GetAllInstancesInfo().values():
3512
        for nic_idx, nic in enumerate(instance.nics):
3513
          params_copy = copy.deepcopy(nic.nicparams)
3514
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3515

    
3516
          # check parameter syntax
3517
          try:
3518
            objects.NIC.CheckParameterSyntax(params_filled)
3519
          except errors.ConfigurationError, err:
3520
            nic_errors.append("Instance %s, nic/%d: %s" %
3521
                              (instance.name, nic_idx, err))
3522

    
3523
          # if we're moving instances to routed, check that they have an ip
3524
          target_mode = params_filled[constants.NIC_MODE]
3525
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3526
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3527
                              " address" % (instance.name, nic_idx))
3528
      if nic_errors:
3529
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3530
                                   "\n".join(nic_errors))
3531

    
3532
    # hypervisor list/parameters
3533
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3534
    if self.op.hvparams:
3535
      for hv_name, hv_dict in self.op.hvparams.items():
3536
        if hv_name not in self.new_hvparams:
3537
          self.new_hvparams[hv_name] = hv_dict
3538
        else:
3539
          self.new_hvparams[hv_name].update(hv_dict)
3540

    
3541
    # os hypervisor parameters
3542
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3543
    if self.op.os_hvp:
3544
      for os_name, hvs in self.op.os_hvp.items():
3545
        if os_name not in self.new_os_hvp:
3546
          self.new_os_hvp[os_name] = hvs
3547
        else:
3548
          for hv_name, hv_dict in hvs.items():
3549
            if hv_name not in self.new_os_hvp[os_name]:
3550
              self.new_os_hvp[os_name][hv_name] = hv_dict
3551
            else:
3552
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3553

    
3554
    # os parameters
3555
    self.new_osp = objects.FillDict(cluster.osparams, {})
3556
    if self.op.osparams:
3557
      for os_name, osp in self.op.osparams.items():
3558
        if os_name not in self.new_osp:
3559
          self.new_osp[os_name] = {}
3560

    
3561
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3562
                                                  use_none=True)
3563

    
3564
        if not self.new_osp[os_name]:
3565
          # we removed all parameters
3566
          del self.new_osp[os_name]
3567
        else:
3568
          # check the parameter validity (remote check)
3569
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3570
                         os_name, self.new_osp[os_name])
3571

    
3572
    # changes to the hypervisor list
3573
    if self.op.enabled_hypervisors is not None:
3574
      self.hv_list = self.op.enabled_hypervisors
3575
      for hv in self.hv_list:
3576
        # if the hypervisor doesn't already exist in the cluster
3577
        # hvparams, we initialize it to empty, and then (in both
3578
        # cases) we make sure to fill the defaults, as we might not
3579
        # have a complete defaults list if the hypervisor wasn't
3580
        # enabled before
3581
        if hv not in new_hvp:
3582
          new_hvp[hv] = {}
3583
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3584
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3585
    else:
3586
      self.hv_list = cluster.enabled_hypervisors
3587

    
3588
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3589
      # either the enabled list has changed, or the parameters have, validate
3590
      for hv_name, hv_params in self.new_hvparams.items():
3591
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3592
            (self.op.enabled_hypervisors and
3593
             hv_name in self.op.enabled_hypervisors)):
3594
          # either this is a new hypervisor, or its parameters have changed
3595
          hv_class = hypervisor.GetHypervisor(hv_name)
3596
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3597
          hv_class.CheckParameterSyntax(hv_params)
3598
          _CheckHVParams(self, node_list, hv_name, hv_params)
3599

    
3600
    if self.op.os_hvp:
3601
      # no need to check any newly-enabled hypervisors, since the
3602
      # defaults have already been checked in the above code-block
3603
      for os_name, os_hvp in self.new_os_hvp.items():
3604
        for hv_name, hv_params in os_hvp.items():
3605
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3606
          # we need to fill in the new os_hvp on top of the actual hv_p
3607
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3608
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3609
          hv_class = hypervisor.GetHypervisor(hv_name)
3610
          hv_class.CheckParameterSyntax(new_osp)
3611
          _CheckHVParams(self, node_list, hv_name, new_osp)
3612

    
3613
    if self.op.default_iallocator:
3614
      alloc_script = utils.FindFile(self.op.default_iallocator,
3615
                                    constants.IALLOCATOR_SEARCH_PATH,
3616
                                    os.path.isfile)
3617
      if alloc_script is None:
3618
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3619
                                   " specified" % self.op.default_iallocator,
3620
                                   errors.ECODE_INVAL)
3621

    
3622
  def Exec(self, feedback_fn):
3623
    """Change the parameters of the cluster.
3624

3625
    """
3626
    if self.op.vg_name is not None:
3627
      new_volume = self.op.vg_name
3628
      if not new_volume:
3629
        new_volume = None
3630
      if new_volume != self.cfg.GetVGName():
3631
        self.cfg.SetVGName(new_volume)
3632
      else:
3633
        feedback_fn("Cluster LVM configuration already in desired"
3634
                    " state, not changing")
3635
    if self.op.drbd_helper is not None:
3636
      new_helper = self.op.drbd_helper
3637
      if not new_helper:
3638
        new_helper = None
3639
      if new_helper != self.cfg.GetDRBDHelper():
3640
        self.cfg.SetDRBDHelper(new_helper)
3641
      else:
3642
        feedback_fn("Cluster DRBD helper already in desired state,"
3643
                    " not changing")
3644
    if self.op.hvparams:
3645
      self.cluster.hvparams = self.new_hvparams
3646
    if self.op.os_hvp:
3647
      self.cluster.os_hvp = self.new_os_hvp
3648
    if self.op.enabled_hypervisors is not None:
3649
      self.cluster.hvparams = self.new_hvparams
3650
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3651
    if self.op.beparams:
3652
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3653
    if self.op.nicparams:
3654
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3655
    if self.op.osparams:
3656
      self.cluster.osparams = self.new_osp
3657
    if self.op.ndparams:
3658
      self.cluster.ndparams = self.new_ndparams
3659

    
3660
    if self.op.candidate_pool_size is not None:
3661
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3662
      # we need to update the pool size here, otherwise the save will fail
3663
      _AdjustCandidatePool(self, [])
3664

    
3665
    if self.op.maintain_node_health is not None:
3666
      self.cluster.maintain_node_health = self.op.maintain_node_health
3667

    
3668
    if self.op.prealloc_wipe_disks is not None:
3669
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3670

    
3671
    if self.op.add_uids is not None:
3672
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3673

    
3674
    if self.op.remove_uids is not None:
3675
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3676

    
3677
    if self.op.uid_pool is not None:
3678
      self.cluster.uid_pool = self.op.uid_pool
3679

    
3680
    if self.op.default_iallocator is not None:
3681
      self.cluster.default_iallocator = self.op.default_iallocator
3682

    
3683
    if self.op.reserved_lvs is not None:
3684
      self.cluster.reserved_lvs = self.op.reserved_lvs
3685

    
3686
    def helper_os(aname, mods, desc):
3687
      desc += " OS list"
3688
      lst = getattr(self.cluster, aname)
3689
      for key, val in mods:
3690
        if key == constants.DDM_ADD:
3691
          if val in lst:
3692
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3693
          else:
3694
            lst.append(val)
3695
        elif key == constants.DDM_REMOVE:
3696
          if val in lst:
3697
            lst.remove(val)
3698
          else:
3699
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3700
        else:
3701
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3702

    
3703
    if self.op.hidden_os:
3704
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3705

    
3706
    if self.op.blacklisted_os:
3707
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3708

    
3709
    if self.op.master_netdev:
3710
      master_params = self.cfg.GetMasterNetworkParameters()
3711
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3712
                  self.cluster.master_netdev)
3713
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3714
                                                       master_params)
3715
      result.Raise("Could not disable the master ip")
3716
      feedback_fn("Changing master_netdev from %s to %s" %
3717
                  (master_params.netdev, self.op.master_netdev))
3718
      self.cluster.master_netdev = self.op.master_netdev
3719

    
3720
    if self.op.master_netmask:
3721
      master_params = self.cfg.GetMasterNetworkParameters()
3722
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3723
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3724
                                                        master_params.netmask,
3725
                                                        self.op.master_netmask,
3726
                                                        master_params.ip,
3727
                                                        master_params.netdev)
3728
      if result.fail_msg:
3729
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3730
        self.LogWarning(msg)
3731
        feedback_fn(msg)
3732
      else:
3733
        self.cluster.master_netmask = self.op.master_netmask
3734

    
3735
    self.cfg.Update(self.cluster, feedback_fn)
3736

    
3737
    if self.op.master_netdev:
3738
      master_params = self.cfg.GetMasterNetworkParameters()
3739
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3740
                  self.op.master_netdev)
3741
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3742
                                                     master_params)
3743
      if result.fail_msg:
3744
        self.LogWarning("Could not re-enable the master ip on"
3745
                        " the master, please restart manually: %s",
3746
                        result.fail_msg)
3747

    
3748

    
3749
def _UploadHelper(lu, nodes, fname):
3750
  """Helper for uploading a file and showing warnings.
3751

3752
  """
3753
  if os.path.exists(fname):
3754
    result = lu.rpc.call_upload_file(nodes, fname)
3755
    for to_node, to_result in result.items():
3756
      msg = to_result.fail_msg
3757
      if msg:
3758
        msg = ("Copy of file %s to node %s failed: %s" %
3759
               (fname, to_node, msg))
3760
        lu.proc.LogWarning(msg)
3761

    
3762

    
3763
def _ComputeAncillaryFiles(cluster, redist):
3764
  """Compute files external to Ganeti which need to be consistent.
3765

3766
  @type redist: boolean
3767
  @param redist: Whether to include files which need to be redistributed
3768

3769
  """
3770
  # Compute files for all nodes
3771
  files_all = set([
3772
    constants.SSH_KNOWN_HOSTS_FILE,
3773
    constants.CONFD_HMAC_KEY,
3774
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3775
    constants.SPICE_CERT_FILE,
3776
    constants.SPICE_CACERT_FILE,
3777
    constants.RAPI_USERS_FILE,
3778
    ])
3779

    
3780
  if not redist:
3781
    files_all.update(constants.ALL_CERT_FILES)
3782
    files_all.update(ssconf.SimpleStore().GetFileList())
3783
  else:
3784
    # we need to ship at least the RAPI certificate
3785
    files_all.add(constants.RAPI_CERT_FILE)
3786

    
3787
  if cluster.modify_etc_hosts:
3788
    files_all.add(constants.ETC_HOSTS)
3789

    
3790
  # Files which are optional, these must:
3791
  # - be present in one other category as well
3792
  # - either exist or not exist on all nodes of that category (mc, vm all)
3793
  files_opt = set([
3794
    constants.RAPI_USERS_FILE,
3795
    ])
3796

    
3797
  # Files which should only be on master candidates
3798
  files_mc = set()
3799
  if not redist:
3800
    files_mc.add(constants.CLUSTER_CONF_FILE)
3801

    
3802
  # Files which should only be on VM-capable nodes
3803
  files_vm = set(filename
3804
    for hv_name in cluster.enabled_hypervisors
3805
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3806

    
3807
  files_opt |= set(filename
3808
    for hv_name in cluster.enabled_hypervisors
3809
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3810

    
3811
  # Filenames in each category must be unique
3812
  all_files_set = files_all | files_mc | files_vm
3813
  assert (len(all_files_set) ==
3814
          sum(map(len, [files_all, files_mc, files_vm]))), \
3815
         "Found file listed in more than one file list"
3816

    
3817
  # Optional files must be present in one other category
3818
  assert all_files_set.issuperset(files_opt), \
3819
         "Optional file not in a different required list"
3820

    
3821
  return (files_all, files_opt, files_mc, files_vm)
3822

    
3823

    
3824
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3825
  """Distribute additional files which are part of the cluster configuration.
3826

3827
  ConfigWriter takes care of distributing the config and ssconf files, but
3828
  there are more files which should be distributed to all nodes. This function
3829
  makes sure those are copied.
3830

3831
  @param lu: calling logical unit
3832
  @param additional_nodes: list of nodes not in the config to distribute to
3833
  @type additional_vm: boolean
3834
  @param additional_vm: whether the additional nodes are vm-capable or not
3835

3836
  """
3837
  # Gather target nodes
3838
  cluster = lu.cfg.GetClusterInfo()
3839
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3840

    
3841
  online_nodes = lu.cfg.GetOnlineNodeList()
3842
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3843

    
3844
  if additional_nodes is not None:
3845
    online_nodes.extend(additional_nodes)
3846
    if additional_vm:
3847
      vm_nodes.extend(additional_nodes)
3848

    
3849
  # Never distribute to master node
3850
  for nodelist in [online_nodes, vm_nodes]:
3851
    if master_info.name in nodelist:
3852
      nodelist.remove(master_info.name)
3853

    
3854
  # Gather file lists
3855
  (files_all, _, files_mc, files_vm) = \
3856
    _ComputeAncillaryFiles(cluster, True)
3857

    
3858
  # Never re-distribute configuration file from here
3859
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3860
              constants.CLUSTER_CONF_FILE in files_vm)
3861
  assert not files_mc, "Master candidates not handled in this function"
3862

    
3863
  filemap = [
3864
    (online_nodes, files_all),
3865
    (vm_nodes, files_vm),
3866
    ]
3867

    
3868
  # Upload the files
3869
  for (node_list, files) in filemap:
3870
    for fname in files:
3871
      _UploadHelper(lu, node_list, fname)
3872

    
3873

    
3874
class LUClusterRedistConf(NoHooksLU):
3875
  """Force the redistribution of cluster configuration.
3876

3877
  This is a very simple LU.
3878

3879
  """
3880
  REQ_BGL = False
3881

    
3882
  def ExpandNames(self):
3883
    self.needed_locks = {
3884
      locking.LEVEL_NODE: locking.ALL_SET,
3885
    }
3886
    self.share_locks[locking.LEVEL_NODE] = 1
3887

    
3888
  def Exec(self, feedback_fn):
3889
    """Redistribute the configuration.
3890

3891
    """
3892
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3893
    _RedistributeAncillaryFiles(self)
3894

    
3895

    
3896
class LUClusterActivateMasterIp(NoHooksLU):
3897
  """Activate the master IP on the master node.
3898

3899
  """
3900
  def Exec(self, feedback_fn):
3901
    """Activate the master IP.
3902

3903
    """
3904
    master_params = self.cfg.GetMasterNetworkParameters()
3905
    self.rpc.call_node_activate_master_ip(master_params.name,
3906
                                          master_params)
3907

    
3908

    
3909
class LUClusterDeactivateMasterIp(NoHooksLU):
3910
  """Deactivate the master IP on the master node.
3911

3912
  """
3913
  def Exec(self, feedback_fn):
3914
    """Deactivate the master IP.
3915

3916
    """
3917
    master_params = self.cfg.GetMasterNetworkParameters()
3918
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params)
3919

    
3920

    
3921
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3922
  """Sleep and poll for an instance's disk to sync.
3923

3924
  """
3925
  if not instance.disks or disks is not None and not disks:
3926
    return True
3927

    
3928
  disks = _ExpandCheckDisks(instance, disks)
3929

    
3930
  if not oneshot:
3931
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3932

    
3933
  node = instance.primary_node
3934

    
3935
  for dev in disks:
3936
    lu.cfg.SetDiskID(dev, node)
3937

    
3938
  # TODO: Convert to utils.Retry
3939

    
3940
  retries = 0
3941
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3942
  while True:
3943
    max_time = 0
3944
    done = True
3945
    cumul_degraded = False
3946
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3947
    msg = rstats.fail_msg
3948
    if msg:
3949
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3950
      retries += 1
3951
      if retries >= 10:
3952
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3953
                                 " aborting." % node)
3954
      time.sleep(6)
3955
      continue
3956
    rstats = rstats.payload
3957
    retries = 0
3958
    for i, mstat in enumerate(rstats):
3959
      if mstat is None:
3960
        lu.LogWarning("Can't compute data for node %s/%s",
3961
                           node, disks[i].iv_name)
3962
        continue
3963

    
3964
      cumul_degraded = (cumul_degraded or
3965
                        (mstat.is_degraded and mstat.sync_percent is None))
3966
      if mstat.sync_percent is not None:
3967
        done = False
3968
        if mstat.estimated_time is not None:
3969
          rem_time = ("%s remaining (estimated)" %
3970
                      utils.FormatSeconds(mstat.estimated_time))
3971
          max_time = mstat.estimated_time
3972
        else:
3973
          rem_time = "no time estimate"
3974
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3975
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3976

    
3977
    # if we're done but degraded, let's do a few small retries, to
3978
    # make sure we see a stable and not transient situation; therefore
3979
    # we force restart of the loop
3980
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3981
      logging.info("Degraded disks found, %d retries left", degr_retries)
3982
      degr_retries -= 1
3983
      time.sleep(1)
3984
      continue
3985

    
3986
    if done or oneshot:
3987
      break
3988

    
3989
    time.sleep(min(60, max_time))
3990

    
3991
  if done:
3992
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3993
  return not cumul_degraded
3994

    
3995

    
3996
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3997
  """Check that mirrors are not degraded.
3998

3999
  The ldisk parameter, if True, will change the test from the
4000
  is_degraded attribute (which represents overall non-ok status for
4001
  the device(s)) to the ldisk (representing the local storage status).
4002

4003
  """
4004
  lu.cfg.SetDiskID(dev, node)
4005

    
4006
  result = True
4007

    
4008
  if on_primary or dev.AssembleOnSecondary():
4009
    rstats = lu.rpc.call_blockdev_find(node, dev)
4010
    msg = rstats.fail_msg
4011
    if msg:
4012
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4013
      result = False
4014
    elif not rstats.payload:
4015
      lu.LogWarning("Can't find disk on node %s", node)
4016
      result = False
4017
    else:
4018
      if ldisk:
4019
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4020
      else:
4021
        result = result and not rstats.payload.is_degraded
4022

    
4023
  if dev.children:
4024
    for child in dev.children:
4025
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4026

    
4027
  return result
4028

    
4029

    
4030
class LUOobCommand(NoHooksLU):
4031
  """Logical unit for OOB handling.
4032

4033
  """
4034
  REG_BGL = False
4035
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4036

    
4037
  def ExpandNames(self):
4038
    """Gather locks we need.
4039

4040
    """
4041
    if self.op.node_names:
4042
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4043
      lock_names = self.op.node_names
4044
    else:
4045
      lock_names = locking.ALL_SET
4046

    
4047
    self.needed_locks = {
4048
      locking.LEVEL_NODE: lock_names,
4049
      }
4050

    
4051
  def CheckPrereq(self):
4052
    """Check prerequisites.
4053

4054
    This checks:
4055
     - the node exists in the configuration
4056
     - OOB is supported
4057

4058
    Any errors are signaled by raising errors.OpPrereqError.
4059

4060
    """
4061
    self.nodes = []
4062
    self.master_node = self.cfg.GetMasterNode()
4063

    
4064
    assert self.op.power_delay >= 0.0
4065

    
4066
    if self.op.node_names:
4067
      if (self.op.command in self._SKIP_MASTER and
4068
          self.master_node in self.op.node_names):
4069
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4070
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4071

    
4072
        if master_oob_handler:
4073
          additional_text = ("run '%s %s %s' if you want to operate on the"
4074
                             " master regardless") % (master_oob_handler,
4075
                                                      self.op.command,
4076
                                                      self.master_node)
4077
        else:
4078
          additional_text = "it does not support out-of-band operations"
4079

    
4080
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4081
                                    " allowed for %s; %s") %
4082
                                   (self.master_node, self.op.command,
4083
                                    additional_text), errors.ECODE_INVAL)
4084
    else:
4085
      self.op.node_names = self.cfg.GetNodeList()
4086
      if self.op.command in self._SKIP_MASTER:
4087
        self.op.node_names.remove(self.master_node)
4088

    
4089
    if self.op.command in self._SKIP_MASTER:
4090
      assert self.master_node not in self.op.node_names
4091

    
4092
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4093
      if node is None:
4094
        raise errors.OpPrereqError("Node %s not found" % node_name,
4095
                                   errors.ECODE_NOENT)
4096
      else:
4097
        self.nodes.append(node)
4098

    
4099
      if (not self.op.ignore_status and
4100
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4101
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4102
                                    " not marked offline") % node_name,
4103
                                   errors.ECODE_STATE)
4104

    
4105
  def Exec(self, feedback_fn):
4106
    """Execute OOB and return result if we expect any.
4107

4108
    """
4109
    master_node = self.master_node
4110
    ret = []
4111

    
4112
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4113
                                              key=lambda node: node.name)):
4114
      node_entry = [(constants.RS_NORMAL, node.name)]
4115
      ret.append(node_entry)
4116

    
4117
      oob_program = _SupportsOob(self.cfg, node)
4118

    
4119
      if not oob_program:
4120
        node_entry.append((constants.RS_UNAVAIL, None))
4121
        continue
4122

    
4123
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4124
                   self.op.command, oob_program, node.name)
4125
      result = self.rpc.call_run_oob(master_node, oob_program,
4126
                                     self.op.command, node.name,
4127
                                     self.op.timeout)
4128

    
4129
      if result.fail_msg:
4130
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4131
                        node.name, result.fail_msg)
4132
        node_entry.append((constants.RS_NODATA, None))
4133
      else:
4134
        try:
4135
          self._CheckPayload(result)
4136
        except errors.OpExecError, err:
4137
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4138
                          node.name, err)
4139
          node_entry.append((constants.RS_NODATA, None))
4140
        else:
4141
          if self.op.command == constants.OOB_HEALTH:
4142
            # For health we should log important events
4143
            for item, status in result.payload:
4144
              if status in [constants.OOB_STATUS_WARNING,
4145
                            constants.OOB_STATUS_CRITICAL]:
4146
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4147
                                item, node.name, status)
4148

    
4149
          if self.op.command == constants.OOB_POWER_ON:
4150
            node.powered = True
4151
          elif self.op.command == constants.OOB_POWER_OFF:
4152
            node.powered = False
4153
          elif self.op.command == constants.OOB_POWER_STATUS:
4154
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4155
            if powered != node.powered:
4156
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4157
                               " match actual power state (%s)"), node.powered,
4158
                              node.name, powered)
4159

    
4160
          # For configuration changing commands we should update the node
4161
          if self.op.command in (constants.OOB_POWER_ON,
4162
                                 constants.OOB_POWER_OFF):
4163
            self.cfg.Update(node, feedback_fn)
4164

    
4165
          node_entry.append((constants.RS_NORMAL, result.payload))
4166

    
4167
          if (self.op.command == constants.OOB_POWER_ON and
4168
              idx < len(self.nodes) - 1):
4169
            time.sleep(self.op.power_delay)
4170

    
4171
    return ret
4172

    
4173
  def _CheckPayload(self, result):
4174
    """Checks if the payload is valid.
4175

4176
    @param result: RPC result
4177
    @raises errors.OpExecError: If payload is not valid
4178

4179
    """
4180
    errs = []
4181
    if self.op.command == constants.OOB_HEALTH:
4182
      if not isinstance(result.payload, list):
4183
        errs.append("command 'health' is expected to return a list but got %s" %
4184
                    type(result.payload))
4185
      else:
4186
        for item, status in result.payload:
4187
          if status not in constants.OOB_STATUSES:
4188
            errs.append("health item '%s' has invalid status '%s'" %
4189
                        (item, status))
4190

    
4191
    if self.op.command == constants.OOB_POWER_STATUS:
4192
      if not isinstance(result.payload, dict):
4193
        errs.append("power-status is expected to return a dict but got %s" %
4194
                    type(result.payload))
4195

    
4196
    if self.op.command in [
4197
        constants.OOB_POWER_ON,
4198
        constants.OOB_POWER_OFF,
4199
        constants.OOB_POWER_CYCLE,
4200
        ]:
4201
      if result.payload is not None:
4202
        errs.append("%s is expected to not return payload but got '%s'" %
4203
                    (self.op.command, result.payload))
4204

    
4205
    if errs:
4206
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4207
                               utils.CommaJoin(errs))
4208

    
4209

    
4210
class _OsQuery(_QueryBase):
4211
  FIELDS = query.OS_FIELDS
4212

    
4213
  def ExpandNames(self, lu):
4214
    # Lock all nodes in shared mode
4215
    # Temporary removal of locks, should be reverted later
4216
    # TODO: reintroduce locks when they are lighter-weight
4217
    lu.needed_locks = {}
4218
    #self.share_locks[locking.LEVEL_NODE] = 1
4219
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4220

    
4221
    # The following variables interact with _QueryBase._GetNames
4222
    if self.names:
4223
      self.wanted = self.names
4224
    else:
4225
      self.wanted = locking.ALL_SET
4226

    
4227
    self.do_locking = self.use_locking
4228

    
4229
  def DeclareLocks(self, lu, level):
4230
    pass
4231

    
4232
  @staticmethod
4233
  def _DiagnoseByOS(rlist):
4234
    """Remaps a per-node return list into an a per-os per-node dictionary
4235

4236
    @param rlist: a map with node names as keys and OS objects as values
4237

4238
    @rtype: dict
4239
    @return: a dictionary with osnames as keys and as value another
4240
        map, with nodes as keys and tuples of (path, status, diagnose,
4241
        variants, parameters, api_versions) as values, eg::
4242

4243
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4244
                                     (/srv/..., False, "invalid api")],
4245
                           "node2": [(/srv/..., True, "", [], [])]}
4246
          }
4247

4248
    """
4249
    all_os = {}
4250
    # we build here the list of nodes that didn't fail the RPC (at RPC
4251
    # level), so that nodes with a non-responding node daemon don't
4252
    # make all OSes invalid
4253
    good_nodes = [node_name for node_name in rlist
4254
                  if not rlist[node_name].fail_msg]
4255
    for node_name, nr in rlist.items():
4256
      if nr.fail_msg or not nr.payload:
4257
        continue
4258
      for (name, path, status, diagnose, variants,
4259
           params, api_versions) in nr.payload:
4260
        if name not in all_os:
4261
          # build a list of nodes for this os containing empty lists
4262
          # for each node in node_list
4263
          all_os[name] = {}
4264
          for nname in good_nodes:
4265
            all_os[name][nname] = []
4266
        # convert params from [name, help] to (name, help)
4267
        params = [tuple(v) for v in params]
4268
        all_os[name][node_name].append((path, status, diagnose,
4269
                                        variants, params, api_versions))
4270
    return all_os
4271

    
4272
  def _GetQueryData(self, lu):
4273
    """Computes the list of nodes and their attributes.
4274

4275
    """
4276
    # Locking is not used
4277
    assert not (compat.any(lu.glm.is_owned(level)
4278
                           for level in locking.LEVELS
4279
                           if level != locking.LEVEL_CLUSTER) or
4280
                self.do_locking or self.use_locking)
4281

    
4282
    valid_nodes = [node.name
4283
                   for node in lu.cfg.GetAllNodesInfo().values()
4284
                   if not node.offline and node.vm_capable]
4285
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4286
    cluster = lu.cfg.GetClusterInfo()
4287

    
4288
    data = {}
4289

    
4290
    for (os_name, os_data) in pol.items():
4291
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4292
                          hidden=(os_name in cluster.hidden_os),
4293
                          blacklisted=(os_name in cluster.blacklisted_os))
4294

    
4295
      variants = set()
4296
      parameters = set()
4297
      api_versions = set()
4298

    
4299
      for idx, osl in enumerate(os_data.values()):
4300
        info.valid = bool(info.valid and osl and osl[0][1])
4301
        if not info.valid:
4302
          break
4303

    
4304
        (node_variants, node_params, node_api) = osl[0][3:6]
4305
        if idx == 0:
4306
          # First entry
4307
          variants.update(node_variants)
4308
          parameters.update(node_params)
4309
          api_versions.update(node_api)
4310
        else:
4311
          # Filter out inconsistent values
4312
          variants.intersection_update(node_variants)
4313
          parameters.intersection_update(node_params)
4314
          api_versions.intersection_update(node_api)
4315

    
4316
      info.variants = list(variants)
4317
      info.parameters = list(parameters)
4318
      info.api_versions = list(api_versions)
4319

    
4320
      data[os_name] = info
4321

    
4322
    # Prepare data in requested order
4323
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4324
            if name in data]
4325

    
4326

    
4327
class LUOsDiagnose(NoHooksLU):
4328
  """Logical unit for OS diagnose/query.
4329

4330
  """
4331
  REQ_BGL = False
4332

    
4333
  @staticmethod
4334
  def _BuildFilter(fields, names):
4335
    """Builds a filter for querying OSes.
4336

4337
    """
4338
    name_filter = qlang.MakeSimpleFilter("name", names)
4339

    
4340
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4341
    # respective field is not requested
4342
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4343
                     for fname in ["hidden", "blacklisted"]
4344
                     if fname not in fields]
4345
    if "valid" not in fields:
4346
      status_filter.append([qlang.OP_TRUE, "valid"])
4347

    
4348
    if status_filter:
4349
      status_filter.insert(0, qlang.OP_AND)
4350
    else:
4351
      status_filter = None
4352

    
4353
    if name_filter and status_filter:
4354
      return [qlang.OP_AND, name_filter, status_filter]
4355
    elif name_filter:
4356
      return name_filter
4357
    else:
4358
      return status_filter
4359

    
4360
  def CheckArguments(self):
4361
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4362
                       self.op.output_fields, False)
4363

    
4364
  def ExpandNames(self):
4365
    self.oq.ExpandNames(self)
4366

    
4367
  def Exec(self, feedback_fn):
4368
    return self.oq.OldStyleQuery(self)
4369

    
4370

    
4371
class LUNodeRemove(LogicalUnit):
4372
  """Logical unit for removing a node.
4373

4374
  """
4375
  HPATH = "node-remove"
4376
  HTYPE = constants.HTYPE_NODE
4377

    
4378
  def BuildHooksEnv(self):
4379
    """Build hooks env.
4380

4381
    This doesn't run on the target node in the pre phase as a failed
4382
    node would then be impossible to remove.
4383

4384
    """
4385
    return {
4386
      "OP_TARGET": self.op.node_name,
4387
      "NODE_NAME": self.op.node_name,
4388
      }
4389

    
4390
  def BuildHooksNodes(self):
4391
    """Build hooks nodes.
4392

4393
    """
4394
    all_nodes = self.cfg.GetNodeList()
4395
    try:
4396
      all_nodes.remove(self.op.node_name)
4397
    except ValueError:
4398
      logging.warning("Node '%s', which is about to be removed, was not found"
4399
                      " in the list of all nodes", self.op.node_name)
4400
    return (all_nodes, all_nodes)
4401

    
4402
  def CheckPrereq(self):
4403
    """Check prerequisites.
4404

4405
    This checks:
4406
     - the node exists in the configuration
4407
     - it does not have primary or secondary instances
4408
     - it's not the master
4409

4410
    Any errors are signaled by raising errors.OpPrereqError.
4411

4412
    """
4413
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4414
    node = self.cfg.GetNodeInfo(self.op.node_name)
4415
    assert node is not None
4416

    
4417
    masternode = self.cfg.GetMasterNode()
4418
    if node.name == masternode:
4419
      raise errors.OpPrereqError("Node is the master node, failover to another"
4420
                                 " node is required", errors.ECODE_INVAL)
4421

    
4422
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4423
      if node.name in instance.all_nodes:
4424
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4425
                                   " please remove first" % instance_name,
4426
                                   errors.ECODE_INVAL)
4427
    self.op.node_name = node.name
4428
    self.node = node
4429

    
4430
  def Exec(self, feedback_fn):
4431
    """Removes the node from the cluster.
4432

4433
    """
4434
    node = self.node
4435
    logging.info("Stopping the node daemon and removing configs from node %s",
4436
                 node.name)
4437

    
4438
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4439

    
4440
    # Promote nodes to master candidate as needed
4441
    _AdjustCandidatePool(self, exceptions=[node.name])
4442
    self.context.RemoveNode(node.name)
4443

    
4444
    # Run post hooks on the node before it's removed
4445
    _RunPostHook(self, node.name)
4446

    
4447
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4448
    msg = result.fail_msg
4449
    if msg:
4450
      self.LogWarning("Errors encountered on the remote node while leaving"
4451
                      " the cluster: %s", msg)
4452

    
4453
    # Remove node from our /etc/hosts
4454
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4455
      master_node = self.cfg.GetMasterNode()
4456
      result = self.rpc.call_etc_hosts_modify(master_node,
4457
                                              constants.ETC_HOSTS_REMOVE,
4458
                                              node.name, None)
4459
      result.Raise("Can't update hosts file with new host data")
4460
      _RedistributeAncillaryFiles(self)
4461

    
4462

    
4463
class _NodeQuery(_QueryBase):
4464
  FIELDS = query.NODE_FIELDS
4465

    
4466
  def ExpandNames(self, lu):
4467
    lu.needed_locks = {}
4468
    lu.share_locks = _ShareAll()
4469

    
4470
    if self.names:
4471
      self.wanted = _GetWantedNodes(lu, self.names)
4472
    else:
4473
      self.wanted = locking.ALL_SET
4474

    
4475
    self.do_locking = (self.use_locking and
4476
                       query.NQ_LIVE in self.requested_data)
4477

    
4478
    if self.do_locking:
4479
      # If any non-static field is requested we need to lock the nodes
4480
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4481

    
4482
  def DeclareLocks(self, lu, level):
4483
    pass
4484

    
4485
  def _GetQueryData(self, lu):
4486
    """Computes the list of nodes and their attributes.
4487

4488
    """
4489
    all_info = lu.cfg.GetAllNodesInfo()
4490

    
4491
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4492

    
4493
    # Gather data as requested
4494
    if query.NQ_LIVE in self.requested_data:
4495
      # filter out non-vm_capable nodes
4496
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4497

    
4498
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4499
                                        lu.cfg.GetHypervisorType())
4500
      live_data = dict((name, nresult.payload)
4501
                       for (name, nresult) in node_data.items()
4502
                       if not nresult.fail_msg and nresult.payload)
4503
    else:
4504
      live_data = None
4505

    
4506
    if query.NQ_INST in self.requested_data:
4507
      node_to_primary = dict([(name, set()) for name in nodenames])
4508
      node_to_secondary = dict([(name, set()) for name in nodenames])
4509

    
4510
      inst_data = lu.cfg.GetAllInstancesInfo()
4511

    
4512
      for inst in inst_data.values():
4513
        if inst.primary_node in node_to_primary:
4514
          node_to_primary[inst.primary_node].add(inst.name)
4515
        for secnode in inst.secondary_nodes:
4516
          if secnode in node_to_secondary:
4517
            node_to_secondary[secnode].add(inst.name)
4518
    else:
4519
      node_to_primary = None
4520
      node_to_secondary = None
4521

    
4522
    if query.NQ_OOB in self.requested_data:
4523
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4524
                         for name, node in all_info.iteritems())
4525
    else:
4526
      oob_support = None
4527

    
4528
    if query.NQ_GROUP in self.requested_data:
4529
      groups = lu.cfg.GetAllNodeGroupsInfo()
4530
    else:
4531
      groups = {}
4532

    
4533
    return query.NodeQueryData([all_info[name] for name in nodenames],
4534
                               live_data, lu.cfg.GetMasterNode(),
4535
                               node_to_primary, node_to_secondary, groups,
4536
                               oob_support, lu.cfg.GetClusterInfo())
4537

    
4538

    
4539
class LUNodeQuery(NoHooksLU):
4540
  """Logical unit for querying nodes.
4541

4542
  """
4543
  # pylint: disable=W0142
4544
  REQ_BGL = False
4545

    
4546
  def CheckArguments(self):
4547
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4548
                         self.op.output_fields, self.op.use_locking)
4549

    
4550
  def ExpandNames(self):
4551
    self.nq.ExpandNames(self)
4552

    
4553
  def Exec(self, feedback_fn):
4554
    return self.nq.OldStyleQuery(self)
4555

    
4556

    
4557
class LUNodeQueryvols(NoHooksLU):
4558
  """Logical unit for getting volumes on node(s).
4559

4560
  """
4561
  REQ_BGL = False
4562
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4563
  _FIELDS_STATIC = utils.FieldSet("node")
4564

    
4565
  def CheckArguments(self):
4566
    _CheckOutputFields(static=self._FIELDS_STATIC,
4567
                       dynamic=self._FIELDS_DYNAMIC,
4568
                       selected=self.op.output_fields)
4569

    
4570
  def ExpandNames(self):
4571
    self.needed_locks = {}
4572
    self.share_locks[locking.LEVEL_NODE] = 1
4573
    if not self.op.nodes:
4574
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4575
    else:
4576
      self.needed_locks[locking.LEVEL_NODE] = \
4577
        _GetWantedNodes(self, self.op.nodes)
4578

    
4579
  def Exec(self, feedback_fn):
4580
    """Computes the list of nodes and their attributes.
4581

4582
    """
4583
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4584
    volumes = self.rpc.call_node_volumes(nodenames)
4585

    
4586
    ilist = self.cfg.GetAllInstancesInfo()
4587
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4588

    
4589
    output = []
4590
    for node in nodenames:
4591
      nresult = volumes[node]
4592
      if nresult.offline:
4593
        continue
4594
      msg = nresult.fail_msg
4595
      if msg:
4596
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4597
        continue
4598

    
4599
      node_vols = sorted(nresult.payload,
4600
                         key=operator.itemgetter("dev"))
4601

    
4602
      for vol in node_vols:
4603
        node_output = []
4604
        for field in self.op.output_fields:
4605
          if field == "node":
4606
            val = node
4607
          elif field == "phys":
4608
            val = vol["dev"]
4609
          elif field == "vg":
4610
            val = vol["vg"]
4611
          elif field == "name":
4612
            val = vol["name"]
4613
          elif field == "size":
4614
            val = int(float(vol["size"]))
4615
          elif field == "instance":
4616
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4617
          else:
4618
            raise errors.ParameterError(field)
4619
          node_output.append(str(val))
4620

    
4621
        output.append(node_output)
4622

    
4623
    return output
4624

    
4625

    
4626
class LUNodeQueryStorage(NoHooksLU):
4627
  """Logical unit for getting information on storage units on node(s).
4628

4629
  """
4630
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4631
  REQ_BGL = False
4632

    
4633
  def CheckArguments(self):
4634
    _CheckOutputFields(static=self._FIELDS_STATIC,
4635
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4636
                       selected=self.op.output_fields)
4637

    
4638
  def ExpandNames(self):
4639
    self.needed_locks = {}
4640
    self.share_locks[locking.LEVEL_NODE] = 1
4641

    
4642
    if self.op.nodes:
4643
      self.needed_locks[locking.LEVEL_NODE] = \
4644
        _GetWantedNodes(self, self.op.nodes)
4645
    else:
4646
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4647

    
4648
  def Exec(self, feedback_fn):
4649
    """Computes the list of nodes and their attributes.
4650

4651
    """
4652
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4653

    
4654
    # Always get name to sort by
4655
    if constants.SF_NAME in self.op.output_fields:
4656
      fields = self.op.output_fields[:]
4657
    else:
4658
      fields = [constants.SF_NAME] + self.op.output_fields
4659

    
4660
    # Never ask for node or type as it's only known to the LU
4661
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4662
      while extra in fields:
4663
        fields.remove(extra)
4664

    
4665
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4666
    name_idx = field_idx[constants.SF_NAME]
4667

    
4668
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4669
    data = self.rpc.call_storage_list(self.nodes,
4670
                                      self.op.storage_type, st_args,
4671
                                      self.op.name, fields)
4672

    
4673
    result = []
4674

    
4675
    for node in utils.NiceSort(self.nodes):
4676
      nresult = data[node]
4677
      if nresult.offline:
4678
        continue
4679

    
4680
      msg = nresult.fail_msg
4681
      if msg:
4682
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4683
        continue
4684

    
4685
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4686

    
4687
      for name in utils.NiceSort(rows.keys()):
4688
        row = rows[name]
4689

    
4690
        out = []
4691

    
4692
        for field in self.op.output_fields:
4693
          if field == constants.SF_NODE:
4694
            val = node
4695
          elif field == constants.SF_TYPE:
4696
            val = self.op.storage_type
4697
          elif field in field_idx:
4698
            val = row[field_idx[field]]
4699
          else:
4700
            raise errors.ParameterError(field)
4701

    
4702
          out.append(val)
4703

    
4704
        result.append(out)
4705

    
4706
    return result
4707

    
4708

    
4709
class _InstanceQuery(_QueryBase):
4710
  FIELDS = query.INSTANCE_FIELDS
4711

    
4712
  def ExpandNames(self, lu):
4713
    lu.needed_locks = {}
4714
    lu.share_locks = _ShareAll()
4715

    
4716
    if self.names:
4717
      self.wanted = _GetWantedInstances(lu, self.names)
4718
    else:
4719
      self.wanted = locking.ALL_SET
4720

    
4721
    self.do_locking = (self.use_locking and
4722
                       query.IQ_LIVE in self.requested_data)
4723
    if self.do_locking:
4724
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4725
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4726
      lu.needed_locks[locking.LEVEL_NODE] = []
4727
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4728

    
4729
    self.do_grouplocks = (self.do_locking and
4730
                          query.IQ_NODES in self.requested_data)
4731

    
4732
  def DeclareLocks(self, lu, level):
4733
    if self.do_locking:
4734
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4735
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4736

    
4737
        # Lock all groups used by instances optimistically; this requires going
4738
        # via the node before it's locked, requiring verification later on
4739
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4740
          set(group_uuid
4741
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4742
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4743
      elif level == locking.LEVEL_NODE:
4744
        lu._LockInstancesNodes() # pylint: disable=W0212
4745

    
4746
  @staticmethod
4747
  def _CheckGroupLocks(lu):
4748
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4749
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4750

    
4751
    # Check if node groups for locked instances are still correct
4752
    for instance_name in owned_instances:
4753
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4754

    
4755
  def _GetQueryData(self, lu):
4756
    """Computes the list of instances and their attributes.
4757

4758
    """
4759
    if self.do_grouplocks:
4760
      self._CheckGroupLocks(lu)
4761

    
4762
    cluster = lu.cfg.GetClusterInfo()
4763
    all_info = lu.cfg.GetAllInstancesInfo()
4764

    
4765
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4766

    
4767
    instance_list = [all_info[name] for name in instance_names]
4768
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4769
                                        for inst in instance_list)))
4770
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4771
    bad_nodes = []
4772
    offline_nodes = []
4773
    wrongnode_inst = set()
4774

    
4775
    # Gather data as requested
4776
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4777
      live_data = {}
4778
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4779
      for name in nodes:
4780
        result = node_data[name]
4781
        if result.offline:
4782
          # offline nodes will be in both lists
4783
          assert result.fail_msg
4784
          offline_nodes.append(name)
4785
        if result.fail_msg:
4786
          bad_nodes.append(name)
4787
        elif result.payload:
4788
          for inst in result.payload:
4789
            if inst in all_info:
4790
              if all_info[inst].primary_node == name:
4791
                live_data.update(result.payload)
4792
              else:
4793
                wrongnode_inst.add(inst)
4794
            else:
4795
              # orphan instance; we don't list it here as we don't
4796
              # handle this case yet in the output of instance listing
4797
              logging.warning("Orphan instance '%s' found on node %s",
4798
                              inst, name)
4799
        # else no instance is alive
4800
    else:
4801
      live_data = {}
4802

    
4803
    if query.IQ_DISKUSAGE in self.requested_data:
4804
      disk_usage = dict((inst.name,
4805
                         _ComputeDiskSize(inst.disk_template,
4806
                                          [{constants.IDISK_SIZE: disk.size}
4807
                                           for disk in inst.disks]))
4808
                        for inst in instance_list)
4809
    else:
4810
      disk_usage = None
4811

    
4812
    if query.IQ_CONSOLE in self.requested_data:
4813
      consinfo = {}
4814
      for inst in instance_list:
4815
        if inst.name in live_data:
4816
          # Instance is running
4817
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4818
        else:
4819
          consinfo[inst.name] = None
4820
      assert set(consinfo.keys()) == set(instance_names)
4821
    else:
4822
      consinfo = None
4823

    
4824
    if query.IQ_NODES in self.requested_data:
4825
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4826
                                            instance_list)))
4827
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4828
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4829
                    for uuid in set(map(operator.attrgetter("group"),
4830
                                        nodes.values())))
4831
    else:
4832
      nodes = None
4833
      groups = None
4834

    
4835
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4836
                                   disk_usage, offline_nodes, bad_nodes,
4837
                                   live_data, wrongnode_inst, consinfo,
4838
                                   nodes, groups)
4839

    
4840

    
4841
class LUQuery(NoHooksLU):
4842
  """Query for resources/items of a certain kind.
4843

4844
  """
4845
  # pylint: disable=W0142
4846
  REQ_BGL = False
4847

    
4848
  def CheckArguments(self):
4849
    qcls = _GetQueryImplementation(self.op.what)
4850

    
4851
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4852

    
4853
  def ExpandNames(self):
4854
    self.impl.ExpandNames(self)
4855

    
4856
  def DeclareLocks(self, level):
4857
    self.impl.DeclareLocks(self, level)
4858

    
4859
  def Exec(self, feedback_fn):
4860
    return self.impl.NewStyleQuery(self)
4861

    
4862

    
4863
class LUQueryFields(NoHooksLU):
4864
  """Query for resources/items of a certain kind.
4865

4866
  """
4867
  # pylint: disable=W0142
4868
  REQ_BGL = False
4869

    
4870
  def CheckArguments(self):
4871
    self.qcls = _GetQueryImplementation(self.op.what)
4872

    
4873
  def ExpandNames(self):
4874
    self.needed_locks = {}
4875

    
4876
  def Exec(self, feedback_fn):
4877
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4878

    
4879

    
4880
class LUNodeModifyStorage(NoHooksLU):
4881
  """Logical unit for modifying a storage volume on a node.
4882

4883
  """
4884
  REQ_BGL = False
4885

    
4886
  def CheckArguments(self):
4887
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4888

    
4889
    storage_type = self.op.storage_type
4890

    
4891
    try:
4892
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4893
    except KeyError:
4894
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4895
                                 " modified" % storage_type,
4896
                                 errors.ECODE_INVAL)
4897

    
4898
    diff = set(self.op.changes.keys()) - modifiable
4899
    if diff:
4900
      raise errors.OpPrereqError("The following fields can not be modified for"
4901
                                 " storage units of type '%s': %r" %
4902
                                 (storage_type, list(diff)),
4903
                                 errors.ECODE_INVAL)
4904

    
4905
  def ExpandNames(self):
4906
    self.needed_locks = {
4907
      locking.LEVEL_NODE: self.op.node_name,
4908
      }
4909

    
4910
  def Exec(self, feedback_fn):
4911
    """Computes the list of nodes and their attributes.
4912

4913
    """
4914
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4915
    result = self.rpc.call_storage_modify(self.op.node_name,
4916
                                          self.op.storage_type, st_args,
4917
                                          self.op.name, self.op.changes)
4918
    result.Raise("Failed to modify storage unit '%s' on %s" %
4919
                 (self.op.name, self.op.node_name))
4920

    
4921

    
4922
class LUNodeAdd(LogicalUnit):
4923
  """Logical unit for adding node to the cluster.
4924

4925
  """
4926
  HPATH = "node-add"
4927
  HTYPE = constants.HTYPE_NODE
4928
  _NFLAGS = ["master_capable", "vm_capable"]
4929

    
4930
  def CheckArguments(self):
4931
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4932
    # validate/normalize the node name
4933
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4934
                                         family=self.primary_ip_family)
4935
    self.op.node_name = self.hostname.name
4936

    
4937
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4938
      raise errors.OpPrereqError("Cannot readd the master node",
4939
                                 errors.ECODE_STATE)
4940

    
4941
    if self.op.readd and self.op.group:
4942
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4943
                                 " being readded", errors.ECODE_INVAL)
4944

    
4945
  def BuildHooksEnv(self):
4946
    """Build hooks env.
4947

4948
    This will run on all nodes before, and on all nodes + the new node after.
4949

4950
    """
4951
    return {
4952
      "OP_TARGET": self.op.node_name,
4953
      "NODE_NAME": self.op.node_name,
4954
      "NODE_PIP": self.op.primary_ip,
4955
      "NODE_SIP": self.op.secondary_ip,
4956
      "MASTER_CAPABLE": str(self.op.master_capable),
4957
      "VM_CAPABLE": str(self.op.vm_capable),
4958
      }
4959

    
4960
  def BuildHooksNodes(self):
4961
    """Build hooks nodes.
4962

4963
    """
4964
    # Exclude added node
4965
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4966
    post_nodes = pre_nodes + [self.op.node_name, ]
4967

    
4968
    return (pre_nodes, post_nodes)
4969

    
4970
  def CheckPrereq(self):
4971
    """Check prerequisites.
4972

4973
    This checks:
4974
     - the new node is not already in the config
4975
     - it is resolvable
4976
     - its parameters (single/dual homed) matches the cluster
4977

4978
    Any errors are signaled by raising errors.OpPrereqError.
4979

4980
    """
4981
    cfg = self.cfg
4982
    hostname = self.hostname
4983
    node = hostname.name
4984
    primary_ip = self.op.primary_ip = hostname.ip
4985
    if self.op.secondary_ip is None:
4986
      if self.primary_ip_family == netutils.IP6Address.family:
4987
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4988
                                   " IPv4 address must be given as secondary",
4989
                                   errors.ECODE_INVAL)
4990
      self.op.secondary_ip = primary_ip
4991

    
4992
    secondary_ip = self.op.secondary_ip
4993
    if not netutils.IP4Address.IsValid(secondary_ip):
4994
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4995
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4996

    
4997
    node_list = cfg.GetNodeList()
4998
    if not self.op.readd and node in node_list:
4999
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5000
                                 node, errors.ECODE_EXISTS)
5001
    elif self.op.readd and node not in node_list:
5002
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5003
                                 errors.ECODE_NOENT)
5004

    
5005
    self.changed_primary_ip = False
5006

    
5007
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5008
      if self.op.readd and node == existing_node_name:
5009
        if existing_node.secondary_ip != secondary_ip:
5010
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5011
                                     " address configuration as before",
5012
                                     errors.ECODE_INVAL)
5013
        if existing_node.primary_ip != primary_ip:
5014
          self.changed_primary_ip = True
5015

    
5016
        continue
5017

    
5018
      if (existing_node.primary_ip == primary_ip or
5019
          existing_node.secondary_ip == primary_ip or
5020
          existing_node.primary_ip == secondary_ip or
5021
          existing_node.secondary_ip == secondary_ip):
5022
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5023
                                   " existing node %s" % existing_node.name,
5024
                                   errors.ECODE_NOTUNIQUE)
5025

    
5026
    # After this 'if' block, None is no longer a valid value for the
5027
    # _capable op attributes
5028
    if self.op.readd:
5029
      old_node = self.cfg.GetNodeInfo(node)
5030
      assert old_node is not None, "Can't retrieve locked node %s" % node
5031
      for attr in self._NFLAGS:
5032
        if getattr(self.op, attr) is None:
5033
          setattr(self.op, attr, getattr(old_node, attr))
5034
    else:
5035
      for attr in self._NFLAGS:
5036
        if getattr(self.op, attr) is None:
5037
          setattr(self.op, attr, True)
5038

    
5039
    if self.op.readd and not self.op.vm_capable:
5040
      pri, sec = cfg.GetNodeInstances(node)
5041
      if pri or sec:
5042
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5043
                                   " flag set to false, but it already holds"
5044
                                   " instances" % node,
5045
                                   errors.ECODE_STATE)
5046

    
5047
    # check that the type of the node (single versus dual homed) is the
5048
    # same as for the master
5049
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5050
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5051
    newbie_singlehomed = secondary_ip == primary_ip
5052
    if master_singlehomed != newbie_singlehomed:
5053
      if master_singlehomed:
5054
        raise errors.OpPrereqError("The master has no secondary ip but the"
5055
                                   " new node has one",
5056
                                   errors.ECODE_INVAL)
5057
      else:
5058
        raise errors.OpPrereqError("The master has a secondary ip but the"
5059
                                   " new node doesn't have one",
5060
                                   errors.ECODE_INVAL)
5061

    
5062
    # checks reachability
5063
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5064
      raise errors.OpPrereqError("Node not reachable by ping",
5065
                                 errors.ECODE_ENVIRON)
5066

    
5067
    if not newbie_singlehomed:
5068
      # check reachability from my secondary ip to newbie's secondary ip
5069
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5070
                           source=myself.secondary_ip):
5071
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5072
                                   " based ping to node daemon port",
5073
                                   errors.ECODE_ENVIRON)
5074

    
5075
    if self.op.readd:
5076
      exceptions = [node]
5077
    else:
5078
      exceptions = []
5079

    
5080
    if self.op.master_capable:
5081
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5082
    else:
5083
      self.master_candidate = False
5084

    
5085
    if self.op.readd:
5086
      self.new_node = old_node
5087
    else:
5088
      node_group = cfg.LookupNodeGroup(self.op.group)
5089
      self.new_node = objects.Node(name=node,
5090
                                   primary_ip=primary_ip,
5091
                                   secondary_ip=secondary_ip,
5092
                                   master_candidate=self.master_candidate,
5093
                                   offline=False, drained=False,
5094
                                   group=node_group)
5095

    
5096
    if self.op.ndparams:
5097
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5098

    
5099
  def Exec(self, feedback_fn):
5100
    """Adds the new node to the cluster.
5101

5102
    """
5103
    new_node = self.new_node
5104
    node = new_node.name
5105

    
5106
    # We adding a new node so we assume it's powered
5107
    new_node.powered = True
5108

    
5109
    # for re-adds, reset the offline/drained/master-candidate flags;
5110
    # we need to reset here, otherwise offline would prevent RPC calls
5111
    # later in the procedure; this also means that if the re-add
5112
    # fails, we are left with a non-offlined, broken node
5113
    if self.op.readd:
5114
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5115
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5116
      # if we demote the node, we do cleanup later in the procedure
5117
      new_node.master_candidate = self.master_candidate
5118
      if self.changed_primary_ip:
5119
        new_node.primary_ip = self.op.primary_ip
5120

    
5121
    # copy the master/vm_capable flags
5122
    for attr in self._NFLAGS:
5123
      setattr(new_node, attr, getattr(self.op, attr))
5124

    
5125
    # notify the user about any possible mc promotion
5126
    if new_node.master_candidate:
5127
      self.LogInfo("Node will be a master candidate")
5128

    
5129
    if self.op.ndparams:
5130
      new_node.ndparams = self.op.ndparams
5131
    else:
5132
      new_node.ndparams = {}
5133

    
5134
    # check connectivity
5135
    result = self.rpc.call_version([node])[node]
5136
    result.Raise("Can't get version information from node %s" % node)
5137
    if constants.PROTOCOL_VERSION == result.payload:
5138
      logging.info("Communication to node %s fine, sw version %s match",
5139
                   node, result.payload)
5140
    else:
5141
      raise errors.OpExecError("Version mismatch master version %s,"
5142
                               " node version %s" %
5143
                               (constants.PROTOCOL_VERSION, result.payload))
5144

    
5145
    # Add node to our /etc/hosts, and add key to known_hosts
5146
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5147
      master_node = self.cfg.GetMasterNode()
5148
      result = self.rpc.call_etc_hosts_modify(master_node,
5149
                                              constants.ETC_HOSTS_ADD,
5150
                                              self.hostname.name,
5151
                                              self.hostname.ip)
5152
      result.Raise("Can't update hosts file with new host data")
5153

    
5154
    if new_node.secondary_ip != new_node.primary_ip:
5155
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5156
                               False)
5157

    
5158
    node_verify_list = [self.cfg.GetMasterNode()]
5159
    node_verify_param = {
5160
      constants.NV_NODELIST: ([node], {}),
5161
      # TODO: do a node-net-test as well?
5162
    }
5163

    
5164
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5165
                                       self.cfg.GetClusterName())
5166
    for verifier in node_verify_list:
5167
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5168
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5169
      if nl_payload:
5170
        for failed in nl_payload:
5171
          feedback_fn("ssh/hostname verification failed"
5172
                      " (checking from %s): %s" %
5173
                      (verifier, nl_payload[failed]))
5174
        raise errors.OpExecError("ssh/hostname verification failed")
5175

    
5176
    if self.op.readd:
5177
      _RedistributeAncillaryFiles(self)
5178
      self.context.ReaddNode(new_node)
5179
      # make sure we redistribute the config
5180
      self.cfg.Update(new_node, feedback_fn)
5181
      # and make sure the new node will not have old files around
5182
      if not new_node.master_candidate:
5183
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5184
        msg = result.fail_msg
5185
        if msg:
5186
          self.LogWarning("Node failed to demote itself from master"
5187
                          " candidate status: %s" % msg)
5188
    else:
5189
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5190
                                  additional_vm=self.op.vm_capable)
5191
      self.context.AddNode(new_node, self.proc.GetECId())
5192

    
5193

    
5194
class LUNodeSetParams(LogicalUnit):
5195
  """Modifies the parameters of a node.
5196

5197
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5198
      to the node role (as _ROLE_*)
5199
  @cvar _R2F: a dictionary from node role to tuples of flags
5200
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5201

5202
  """
5203
  HPATH = "node-modify"
5204
  HTYPE = constants.HTYPE_NODE
5205
  REQ_BGL = False
5206
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5207
  _F2R = {
5208
    (True, False, False): _ROLE_CANDIDATE,
5209
    (False, True, False): _ROLE_DRAINED,
5210
    (False, False, True): _ROLE_OFFLINE,
5211
    (False, False, False): _ROLE_REGULAR,
5212
    }
5213
  _R2F = dict((v, k) for k, v in _F2R.items())
5214
  _FLAGS = ["master_candidate", "drained", "offline"]
5215

    
5216
  def CheckArguments(self):
5217
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5218
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5219
                self.op.master_capable, self.op.vm_capable,
5220
                self.op.secondary_ip, self.op.ndparams]
5221
    if all_mods.count(None) == len(all_mods):
5222
      raise errors.OpPrereqError("Please pass at least one modification",
5223
                                 errors.ECODE_INVAL)
5224
    if all_mods.count(True) > 1:
5225
      raise errors.OpPrereqError("Can't set the node into more than one"
5226
                                 " state at the same time",
5227
                                 errors.ECODE_INVAL)
5228

    
5229
    # Boolean value that tells us whether we might be demoting from MC
5230
    self.might_demote = (self.op.master_candidate == False or
5231
                         self.op.offline == True or
5232
                         self.op.drained == True or
5233
                         self.op.master_capable == False)
5234

    
5235
    if self.op.secondary_ip:
5236
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5237
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5238
                                   " address" % self.op.secondary_ip,
5239
                                   errors.ECODE_INVAL)
5240

    
5241
    self.lock_all = self.op.auto_promote and self.might_demote
5242
    self.lock_instances = self.op.secondary_ip is not None
5243

    
5244
  def ExpandNames(self):
5245
    if self.lock_all:
5246
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5247
    else:
5248
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5249

    
5250
    if self.lock_instances:
5251
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5252

    
5253
  def DeclareLocks(self, level):
5254
    # If we have locked all instances, before waiting to lock nodes, release
5255
    # all the ones living on nodes unrelated to the current operation.
5256
    if level == locking.LEVEL_NODE and self.lock_instances:
5257
      self.affected_instances = []
5258
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5259
        instances_keep = []
5260

    
5261
        # Build list of instances to release
5262
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5263
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5264
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5265
              self.op.node_name in instance.all_nodes):
5266
            instances_keep.append(instance_name)
5267
            self.affected_instances.append(instance)
5268

    
5269
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5270

    
5271
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5272
                set(instances_keep))
5273

    
5274
  def BuildHooksEnv(self):
5275
    """Build hooks env.
5276

5277
    This runs on the master node.
5278

5279
    """
5280
    return {
5281
      "OP_TARGET": self.op.node_name,
5282
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5283
      "OFFLINE": str(self.op.offline),
5284
      "DRAINED": str(self.op.drained),
5285
      "MASTER_CAPABLE": str(self.op.master_capable),
5286
      "VM_CAPABLE": str(self.op.vm_capable),
5287
      }
5288

    
5289
  def BuildHooksNodes(self):
5290
    """Build hooks nodes.
5291

5292
    """
5293
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5294
    return (nl, nl)
5295

    
5296
  def CheckPrereq(self):
5297
    """Check prerequisites.
5298

5299
    This only checks the instance list against the existing names.
5300

5301
    """
5302
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5303

    
5304
    if (self.op.master_candidate is not None or
5305
        self.op.drained is not None or
5306
        self.op.offline is not None):
5307
      # we can't change the master's node flags
5308
      if self.op.node_name == self.cfg.GetMasterNode():
5309
        raise errors.OpPrereqError("The master role can be changed"
5310
                                   " only via master-failover",
5311
                                   errors.ECODE_INVAL)
5312

    
5313
    if self.op.master_candidate and not node.master_capable:
5314
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5315
                                 " it a master candidate" % node.name,
5316
                                 errors.ECODE_STATE)
5317

    
5318
    if self.op.vm_capable == False:
5319
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5320
      if ipri or isec:
5321
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5322
                                   " the vm_capable flag" % node.name,
5323
                                   errors.ECODE_STATE)
5324

    
5325
    if node.master_candidate and self.might_demote and not self.lock_all:
5326
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5327
      # check if after removing the current node, we're missing master
5328
      # candidates
5329
      (mc_remaining, mc_should, _) = \
5330
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5331
      if mc_remaining < mc_should:
5332
        raise errors.OpPrereqError("Not enough master candidates, please"
5333
                                   " pass auto promote option to allow"
5334
                                   " promotion", errors.ECODE_STATE)
5335

    
5336
    self.old_flags = old_flags = (node.master_candidate,
5337
                                  node.drained, node.offline)
5338
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5339
    self.old_role = old_role = self._F2R[old_flags]
5340

    
5341
    # Check for ineffective changes
5342
    for attr in self._FLAGS:
5343
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5344
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5345
        setattr(self.op, attr, None)
5346

    
5347
    # Past this point, any flag change to False means a transition
5348
    # away from the respective state, as only real changes are kept
5349

    
5350
    # TODO: We might query the real power state if it supports OOB
5351
    if _SupportsOob(self.cfg, node):
5352
      if self.op.offline is False and not (node.powered or
5353
                                           self.op.powered == True):
5354
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5355
                                    " offline status can be reset") %
5356
                                   self.op.node_name)
5357
    elif self.op.powered is not None:
5358
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5359
                                  " as it does not support out-of-band"
5360
                                  " handling") % self.op.node_name)
5361

    
5362
    # If we're being deofflined/drained, we'll MC ourself if needed
5363
    if (self.op.drained == False or self.op.offline == False or
5364
        (self.op.master_capable and not node.master_capable)):
5365
      if _DecideSelfPromotion(self):
5366
        self.op.master_candidate = True
5367
        self.LogInfo("Auto-promoting node to master candidate")
5368

    
5369
    # If we're no longer master capable, we'll demote ourselves from MC
5370
    if self.op.master_capable == False and node.master_candidate:
5371
      self.LogInfo("Demoting from master candidate")
5372
      self.op.master_candidate = False
5373

    
5374
    # Compute new role
5375
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5376
    if self.op.master_candidate:
5377
      new_role = self._ROLE_CANDIDATE
5378
    elif self.op.drained:
5379
      new_role = self._ROLE_DRAINED
5380
    elif self.op.offline:
5381
      new_role = self._ROLE_OFFLINE
5382
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5383
      # False is still in new flags, which means we're un-setting (the
5384
      # only) True flag
5385
      new_role = self._ROLE_REGULAR
5386
    else: # no new flags, nothing, keep old role
5387
      new_role = old_role
5388

    
5389
    self.new_role = new_role
5390

    
5391
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5392
      # Trying to transition out of offline status
5393
      # TODO: Use standard RPC runner, but make sure it works when the node is
5394
      # still marked offline
5395
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5396
      if result.fail_msg:
5397
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5398
                                   " to report its version: %s" %
5399
                                   (node.name, result.fail_msg),
5400
                                   errors.ECODE_STATE)
5401
      else:
5402
        self.LogWarning("Transitioning node from offline to online state"
5403
                        " without using re-add. Please make sure the node"
5404
                        " is healthy!")
5405

    
5406
    if self.op.secondary_ip:
5407
      # Ok even without locking, because this can't be changed by any LU
5408
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5409
      master_singlehomed = master.secondary_ip == master.primary_ip
5410
      if master_singlehomed and self.op.secondary_ip:
5411
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5412
                                   " homed cluster", errors.ECODE_INVAL)
5413

    
5414
      if node.offline:
5415
        if self.affected_instances:
5416
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5417
                                     " node has instances (%s) configured"
5418
                                     " to use it" % self.affected_instances)
5419
      else:
5420
        # On online nodes, check that no instances are running, and that
5421
        # the node has the new ip and we can reach it.
5422
        for instance in self.affected_instances:
5423
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5424

    
5425
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5426
        if master.name != node.name:
5427
          # check reachability from master secondary ip to new secondary ip
5428
          if not netutils.TcpPing(self.op.secondary_ip,
5429
                                  constants.DEFAULT_NODED_PORT,
5430
                                  source=master.secondary_ip):
5431
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5432
                                       " based ping to node daemon port",
5433
                                       errors.ECODE_ENVIRON)
5434

    
5435
    if self.op.ndparams:
5436
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5437
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5438
      self.new_ndparams = new_ndparams
5439

    
5440
  def Exec(self, feedback_fn):
5441
    """Modifies a node.
5442

5443
    """
5444
    node = self.node
5445
    old_role = self.old_role
5446
    new_role = self.new_role
5447

    
5448
    result = []
5449

    
5450
    if self.op.ndparams:
5451
      node.ndparams = self.new_ndparams
5452

    
5453
    if self.op.powered is not None:
5454
      node.powered = self.op.powered
5455

    
5456
    for attr in ["master_capable", "vm_capable"]:
5457
      val = getattr(self.op, attr)
5458
      if val is not None:
5459
        setattr(node, attr, val)
5460
        result.append((attr, str(val)))
5461

    
5462
    if new_role != old_role:
5463
      # Tell the node to demote itself, if no longer MC and not offline
5464
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5465
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5466
        if msg:
5467
          self.LogWarning("Node failed to demote itself: %s", msg)
5468

    
5469
      new_flags = self._R2F[new_role]
5470
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5471
        if of != nf:
5472
          result.append((desc, str(nf)))
5473
      (node.master_candidate, node.drained, node.offline) = new_flags
5474

    
5475
      # we locked all nodes, we adjust the CP before updating this node
5476
      if self.lock_all:
5477
        _AdjustCandidatePool(self, [node.name])
5478

    
5479
    if self.op.secondary_ip:
5480
      node.secondary_ip = self.op.secondary_ip
5481
      result.append(("secondary_ip", self.op.secondary_ip))
5482

    
5483
    # this will trigger configuration file update, if needed
5484
    self.cfg.Update(node, feedback_fn)
5485

    
5486
    # this will trigger job queue propagation or cleanup if the mc
5487
    # flag changed
5488
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5489
      self.context.ReaddNode(node)
5490

    
5491
    return result
5492

    
5493

    
5494
class LUNodePowercycle(NoHooksLU):
5495
  """Powercycles a node.
5496

5497
  """
5498
  REQ_BGL = False
5499

    
5500
  def CheckArguments(self):
5501
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5502
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5503
      raise errors.OpPrereqError("The node is the master and the force"
5504
                                 " parameter was not set",
5505
                                 errors.ECODE_INVAL)
5506

    
5507
  def ExpandNames(self):
5508
    """Locking for PowercycleNode.
5509

5510
    This is a last-resort option and shouldn't block on other
5511
    jobs. Therefore, we grab no locks.
5512

5513
    """
5514
    self.needed_locks = {}
5515

    
5516
  def Exec(self, feedback_fn):
5517
    """Reboots a node.
5518

5519
    """
5520
    result = self.rpc.call_node_powercycle(self.op.node_name,
5521
                                           self.cfg.GetHypervisorType())
5522
    result.Raise("Failed to schedule the reboot")
5523
    return result.payload
5524

    
5525

    
5526
class LUClusterQuery(NoHooksLU):
5527
  """Query cluster configuration.
5528

5529
  """
5530
  REQ_BGL = False
5531

    
5532
  def ExpandNames(self):
5533
    self.needed_locks = {}
5534

    
5535
  def Exec(self, feedback_fn):
5536
    """Return cluster config.
5537

5538
    """
5539
    cluster = self.cfg.GetClusterInfo()
5540
    os_hvp = {}
5541

    
5542
    # Filter just for enabled hypervisors
5543
    for os_name, hv_dict in cluster.os_hvp.items():
5544
      os_hvp[os_name] = {}
5545
      for hv_name, hv_params in hv_dict.items():
5546
        if hv_name in cluster.enabled_hypervisors:
5547
          os_hvp[os_name][hv_name] = hv_params
5548

    
5549
    # Convert ip_family to ip_version
5550
    primary_ip_version = constants.IP4_VERSION
5551
    if cluster.primary_ip_family == netutils.IP6Address.family:
5552
      primary_ip_version = constants.IP6_VERSION
5553

    
5554
    result = {
5555
      "software_version": constants.RELEASE_VERSION,
5556
      "protocol_version": constants.PROTOCOL_VERSION,
5557
      "config_version": constants.CONFIG_VERSION,
5558
      "os_api_version": max(constants.OS_API_VERSIONS),
5559
      "export_version": constants.EXPORT_VERSION,
5560
      "architecture": (platform.architecture()[0], platform.machine()),
5561
      "name": cluster.cluster_name,
5562
      "master": cluster.master_node,
5563
      "default_hypervisor": cluster.enabled_hypervisors[0],
5564
      "enabled_hypervisors": cluster.enabled_hypervisors,
5565
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5566
                        for hypervisor_name in cluster.enabled_hypervisors]),
5567
      "os_hvp": os_hvp,
5568
      "beparams": cluster.beparams,
5569
      "osparams": cluster.osparams,
5570
      "nicparams": cluster.nicparams,
5571
      "ndparams": cluster.ndparams,
5572
      "candidate_pool_size": cluster.candidate_pool_size,
5573
      "master_netdev": cluster.master_netdev,
5574
      "master_netmask": cluster.master_netmask,
5575
      "volume_group_name": cluster.volume_group_name,
5576
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5577
      "file_storage_dir": cluster.file_storage_dir,
5578
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5579
      "maintain_node_health": cluster.maintain_node_health,
5580
      "ctime": cluster.ctime,
5581
      "mtime": cluster.mtime,
5582
      "uuid": cluster.uuid,
5583
      "tags": list(cluster.GetTags()),
5584
      "uid_pool": cluster.uid_pool,
5585
      "default_iallocator": cluster.default_iallocator,
5586
      "reserved_lvs": cluster.reserved_lvs,
5587
      "primary_ip_version": primary_ip_version,
5588
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5589
      "hidden_os": cluster.hidden_os,
5590
      "blacklisted_os": cluster.blacklisted_os,
5591
      }
5592

    
5593
    return result
5594

    
5595

    
5596
class LUClusterConfigQuery(NoHooksLU):
5597
  """Return configuration values.
5598

5599
  """
5600
  REQ_BGL = False
5601
  _FIELDS_DYNAMIC = utils.FieldSet()
5602
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5603
                                  "watcher_pause", "volume_group_name")
5604

    
5605
  def CheckArguments(self):
5606
    _CheckOutputFields(static=self._FIELDS_STATIC,
5607
                       dynamic=self._FIELDS_DYNAMIC,
5608
                       selected=self.op.output_fields)
5609

    
5610
  def ExpandNames(self):
5611
    self.needed_locks = {}
5612

    
5613
  def Exec(self, feedback_fn):
5614
    """Dump a representation of the cluster config to the standard output.
5615

5616
    """
5617
    values = []
5618
    for field in self.op.output_fields:
5619
      if field == "cluster_name":
5620
        entry = self.cfg.GetClusterName()
5621
      elif field == "master_node":
5622
        entry = self.cfg.GetMasterNode()
5623
      elif field == "drain_flag":
5624
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5625
      elif field == "watcher_pause":
5626
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5627
      elif field == "volume_group_name":
5628
        entry = self.cfg.GetVGName()
5629
      else:
5630
        raise errors.ParameterError(field)
5631
      values.append(entry)
5632
    return values
5633

    
5634

    
5635
class LUInstanceActivateDisks(NoHooksLU):
5636
  """Bring up an instance's disks.
5637

5638
  """
5639
  REQ_BGL = False
5640

    
5641
  def ExpandNames(self):
5642
    self._ExpandAndLockInstance()
5643
    self.needed_locks[locking.LEVEL_NODE] = []
5644
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5645

    
5646
  def DeclareLocks(self, level):
5647
    if level == locking.LEVEL_NODE:
5648
      self._LockInstancesNodes()
5649

    
5650
  def CheckPrereq(self):
5651
    """Check prerequisites.
5652

5653
    This checks that the instance is in the cluster.
5654

5655
    """
5656
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5657
    assert self.instance is not None, \
5658
      "Cannot retrieve locked instance %s" % self.op.instance_name
5659
    _CheckNodeOnline(self, self.instance.primary_node)
5660

    
5661
  def Exec(self, feedback_fn):
5662
    """Activate the disks.
5663

5664
    """
5665
    disks_ok, disks_info = \
5666
              _AssembleInstanceDisks(self, self.instance,
5667
                                     ignore_size=self.op.ignore_size)
5668
    if not disks_ok:
5669
      raise errors.OpExecError("Cannot activate block devices")
5670

    
5671
    return disks_info
5672

    
5673

    
5674
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5675
                           ignore_size=False):
5676
  """Prepare the block devices for an instance.
5677

5678
  This sets up the block devices on all nodes.
5679

5680
  @type lu: L{LogicalUnit}
5681
  @param lu: the logical unit on whose behalf we execute
5682
  @type instance: L{objects.Instance}
5683
  @param instance: the instance for whose disks we assemble
5684
  @type disks: list of L{objects.Disk} or None
5685
  @param disks: which disks to assemble (or all, if None)
5686
  @type ignore_secondaries: boolean
5687
  @param ignore_secondaries: if true, errors on secondary nodes
5688
      won't result in an error return from the function
5689
  @type ignore_size: boolean
5690
  @param ignore_size: if true, the current known size of the disk
5691
      will not be used during the disk activation, useful for cases
5692
      when the size is wrong
5693
  @return: False if the operation failed, otherwise a list of
5694
      (host, instance_visible_name, node_visible_name)
5695
      with the mapping from node devices to instance devices
5696

5697
  """
5698
  device_info = []
5699
  disks_ok = True
5700
  iname = instance.name
5701
  disks = _ExpandCheckDisks(instance, disks)
5702

    
5703
  # With the two passes mechanism we try to reduce the window of
5704
  # opportunity for the race condition of switching DRBD to primary
5705
  # before handshaking occured, but we do not eliminate it
5706

    
5707
  # The proper fix would be to wait (with some limits) until the
5708
  # connection has been made and drbd transitions from WFConnection
5709
  # into any other network-connected state (Connected, SyncTarget,
5710
  # SyncSource, etc.)
5711

    
5712
  # 1st pass, assemble on all nodes in secondary mode
5713
  for idx, inst_disk in enumerate(disks):
5714
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5715
      if ignore_size:
5716
        node_disk = node_disk.Copy()
5717
        node_disk.UnsetSize()
5718
      lu.cfg.SetDiskID(node_disk, node)
5719
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5720
      msg = result.fail_msg
5721
      if msg:
5722
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5723
                           " (is_primary=False, pass=1): %s",
5724
                           inst_disk.iv_name, node, msg)
5725
        if not ignore_secondaries:
5726
          disks_ok = False
5727

    
5728
  # FIXME: race condition on drbd migration to primary
5729

    
5730
  # 2nd pass, do only the primary node
5731
  for idx, inst_disk in enumerate(disks):
5732
    dev_path = None
5733

    
5734
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5735
      if node != instance.primary_node:
5736
        continue
5737
      if ignore_size:
5738
        node_disk = node_disk.Copy()
5739
        node_disk.UnsetSize()
5740
      lu.cfg.SetDiskID(node_disk, node)
5741
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5742
      msg = result.fail_msg
5743
      if msg:
5744
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5745
                           " (is_primary=True, pass=2): %s",
5746
                           inst_disk.iv_name, node, msg)
5747
        disks_ok = False
5748
      else:
5749
        dev_path = result.payload
5750

    
5751
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5752

    
5753
  # leave the disks configured for the primary node
5754
  # this is a workaround that would be fixed better by
5755
  # improving the logical/physical id handling
5756
  for disk in disks:
5757
    lu.cfg.SetDiskID(disk, instance.primary_node)
5758

    
5759
  return disks_ok, device_info
5760

    
5761

    
5762
def _StartInstanceDisks(lu, instance, force):
5763
  """Start the disks of an instance.
5764

5765
  """
5766
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5767
                                           ignore_secondaries=force)
5768
  if not disks_ok:
5769
    _ShutdownInstanceDisks(lu, instance)
5770
    if force is not None and not force:
5771
      lu.proc.LogWarning("", hint="If the message above refers to a"
5772
                         " secondary node,"
5773
                         " you can retry the operation using '--force'.")
5774
    raise errors.OpExecError("Disk consistency error")
5775

    
5776

    
5777
class LUInstanceDeactivateDisks(NoHooksLU):
5778
  """Shutdown an instance's disks.
5779

5780
  """
5781
  REQ_BGL = False
5782

    
5783
  def ExpandNames(self):
5784
    self._ExpandAndLockInstance()
5785
    self.needed_locks[locking.LEVEL_NODE] = []
5786
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5787

    
5788
  def DeclareLocks(self, level):
5789
    if level == locking.LEVEL_NODE:
5790
      self._LockInstancesNodes()
5791

    
5792
  def CheckPrereq(self):
5793
    """Check prerequisites.
5794

5795
    This checks that the instance is in the cluster.
5796

5797
    """
5798
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5799
    assert self.instance is not None, \
5800
      "Cannot retrieve locked instance %s" % self.op.instance_name
5801

    
5802
  def Exec(self, feedback_fn):
5803
    """Deactivate the disks
5804

5805
    """
5806
    instance = self.instance
5807
    if self.op.force:
5808
      _ShutdownInstanceDisks(self, instance)
5809
    else:
5810
      _SafeShutdownInstanceDisks(self, instance)
5811

    
5812

    
5813
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5814
  """Shutdown block devices of an instance.
5815

5816
  This function checks if an instance is running, before calling
5817
  _ShutdownInstanceDisks.
5818

5819
  """
5820
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5821
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5822

    
5823

    
5824
def _ExpandCheckDisks(instance, disks):
5825
  """Return the instance disks selected by the disks list
5826

5827
  @type disks: list of L{objects.Disk} or None
5828
  @param disks: selected disks
5829
  @rtype: list of L{objects.Disk}
5830
  @return: selected instance disks to act on
5831

5832
  """
5833
  if disks is None:
5834
    return instance.disks
5835
  else:
5836
    if not set(disks).issubset(instance.disks):
5837
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5838
                                   " target instance")
5839
    return disks
5840

    
5841

    
5842
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5843
  """Shutdown block devices of an instance.
5844

5845
  This does the shutdown on all nodes of the instance.
5846

5847
  If the ignore_primary is false, errors on the primary node are
5848
  ignored.
5849

5850
  """
5851
  all_result = True
5852
  disks = _ExpandCheckDisks(instance, disks)
5853

    
5854
  for disk in disks:
5855
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5856
      lu.cfg.SetDiskID(top_disk, node)
5857
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5858
      msg = result.fail_msg
5859
      if msg:
5860
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5861
                      disk.iv_name, node, msg)
5862
        if ((node == instance.primary_node and not ignore_primary) or
5863
            (node != instance.primary_node and not result.offline)):
5864
          all_result = False
5865
  return all_result
5866

    
5867

    
5868
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5869
  """Checks if a node has enough free memory.
5870

5871
  This function check if a given node has the needed amount of free
5872
  memory. In case the node has less memory or we cannot get the
5873
  information from the node, this function raise an OpPrereqError
5874
  exception.
5875

5876
  @type lu: C{LogicalUnit}
5877
  @param lu: a logical unit from which we get configuration data
5878
  @type node: C{str}
5879
  @param node: the node to check
5880
  @type reason: C{str}
5881
  @param reason: string to use in the error message
5882
  @type requested: C{int}
5883
  @param requested: the amount of memory in MiB to check for
5884
  @type hypervisor_name: C{str}
5885
  @param hypervisor_name: the hypervisor to ask for memory stats
5886
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5887
      we cannot check the node
5888

5889
  """
5890
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5891
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5892
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5893
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5894
  if not isinstance(free_mem, int):
5895
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5896
                               " was '%s'" % (node, free_mem),
5897
                               errors.ECODE_ENVIRON)
5898
  if requested > free_mem:
5899
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5900
                               " needed %s MiB, available %s MiB" %
5901
                               (node, reason, requested, free_mem),
5902
                               errors.ECODE_NORES)
5903

    
5904

    
5905
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5906
  """Checks if nodes have enough free disk space in the all VGs.
5907

5908
  This function check if all given nodes have the needed amount of
5909
  free disk. In case any node has less disk or we cannot get the
5910
  information from the node, this function raise an OpPrereqError
5911
  exception.
5912

5913
  @type lu: C{LogicalUnit}
5914
  @param lu: a logical unit from which we get configuration data
5915
  @type nodenames: C{list}
5916
  @param nodenames: the list of node names to check
5917
  @type req_sizes: C{dict}
5918
  @param req_sizes: the hash of vg and corresponding amount of disk in
5919
      MiB to check for
5920
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5921
      or we cannot check the node
5922

5923
  """
5924
  for vg, req_size in req_sizes.items():
5925
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5926

    
5927

    
5928
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5929
  """Checks if nodes have enough free disk space in the specified VG.
5930

5931
  This function check if all given nodes have the needed amount of
5932
  free disk. In case any node has less disk or we cannot get the
5933
  information from the node, this function raise an OpPrereqError
5934
  exception.
5935

5936
  @type lu: C{LogicalUnit}
5937
  @param lu: a logical unit from which we get configuration data
5938
  @type nodenames: C{list}
5939
  @param nodenames: the list of node names to check
5940
  @type vg: C{str}
5941
  @param vg: the volume group to check
5942
  @type requested: C{int}
5943
  @param requested: the amount of disk in MiB to check for
5944
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5945
      or we cannot check the node
5946

5947
  """
5948
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5949
  for node in nodenames:
5950
    info = nodeinfo[node]
5951
    info.Raise("Cannot get current information from node %s" % node,
5952
               prereq=True, ecode=errors.ECODE_ENVIRON)
5953
    vg_free = info.payload.get("vg_free", None)
5954
    if not isinstance(vg_free, int):
5955
      raise errors.OpPrereqError("Can't compute free disk space on node"
5956
                                 " %s for vg %s, result was '%s'" %
5957
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5958
    if requested > vg_free:
5959
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5960
                                 " vg %s: required %d MiB, available %d MiB" %
5961
                                 (node, vg, requested, vg_free),
5962
                                 errors.ECODE_NORES)
5963

    
5964

    
5965
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5966
  """Checks if nodes have enough physical CPUs
5967

5968
  This function checks if all given nodes have the needed number of
5969
  physical CPUs. In case any node has less CPUs or we cannot get the
5970
  information from the node, this function raises an OpPrereqError
5971
  exception.
5972

5973
  @type lu: C{LogicalUnit}
5974
  @param lu: a logical unit from which we get configuration data
5975
  @type nodenames: C{list}
5976
  @param nodenames: the list of node names to check
5977
  @type requested: C{int}
5978
  @param requested: the minimum acceptable number of physical CPUs
5979
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5980
      or we cannot check the node
5981

5982
  """
5983
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5984
  for node in nodenames:
5985
    info = nodeinfo[node]
5986
    info.Raise("Cannot get current information from node %s" % node,
5987
               prereq=True, ecode=errors.ECODE_ENVIRON)
5988
    num_cpus = info.payload.get("cpu_total", None)
5989
    if not isinstance(num_cpus, int):
5990
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5991
                                 " on node %s, result was '%s'" %
5992
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5993
    if requested > num_cpus:
5994
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5995
                                 "required" % (node, num_cpus, requested),
5996
                                 errors.ECODE_NORES)
5997

    
5998

    
5999
class LUInstanceStartup(LogicalUnit):
6000
  """Starts an instance.
6001

6002
  """
6003
  HPATH = "instance-start"
6004
  HTYPE = constants.HTYPE_INSTANCE
6005
  REQ_BGL = False
6006

    
6007
  def CheckArguments(self):
6008
    # extra beparams
6009
    if self.op.beparams:
6010
      # fill the beparams dict
6011
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6012

    
6013
  def ExpandNames(self):
6014
    self._ExpandAndLockInstance()
6015

    
6016
  def BuildHooksEnv(self):
6017
    """Build hooks env.
6018

6019
    This runs on master, primary and secondary nodes of the instance.
6020

6021
    """
6022
    env = {
6023
      "FORCE": self.op.force,
6024
      }
6025

    
6026
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6027

    
6028
    return env
6029

    
6030
  def BuildHooksNodes(self):
6031
    """Build hooks nodes.
6032

6033
    """
6034
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6035
    return (nl, nl)
6036

    
6037
  def CheckPrereq(self):
6038
    """Check prerequisites.
6039

6040
    This checks that the instance is in the cluster.
6041

6042
    """
6043
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6044
    assert self.instance is not None, \
6045
      "Cannot retrieve locked instance %s" % self.op.instance_name
6046

    
6047
    # extra hvparams
6048
    if self.op.hvparams:
6049
      # check hypervisor parameter syntax (locally)
6050
      cluster = self.cfg.GetClusterInfo()
6051
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6052
      filled_hvp = cluster.FillHV(instance)
6053
      filled_hvp.update(self.op.hvparams)
6054
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6055
      hv_type.CheckParameterSyntax(filled_hvp)
6056
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6057

    
6058
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6059

    
6060
    if self.primary_offline and self.op.ignore_offline_nodes:
6061
      self.proc.LogWarning("Ignoring offline primary node")
6062

    
6063
      if self.op.hvparams or self.op.beparams:
6064
        self.proc.LogWarning("Overridden parameters are ignored")
6065
    else:
6066
      _CheckNodeOnline(self, instance.primary_node)
6067

    
6068
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6069

    
6070
      # check bridges existence
6071
      _CheckInstanceBridgesExist(self, instance)
6072

    
6073
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6074
                                                instance.name,
6075
                                                instance.hypervisor)
6076
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6077
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6078
      if not remote_info.payload: # not running already
6079
        _CheckNodeFreeMemory(self, instance.primary_node,
6080
                             "starting instance %s" % instance.name,
6081
                             bep[constants.BE_MEMORY], instance.hypervisor)
6082

    
6083
  def Exec(self, feedback_fn):
6084
    """Start the instance.
6085

6086
    """
6087
    instance = self.instance
6088
    force = self.op.force
6089

    
6090
    if not self.op.no_remember:
6091
      self.cfg.MarkInstanceUp(instance.name)
6092

    
6093
    if self.primary_offline:
6094
      assert self.op.ignore_offline_nodes
6095
      self.proc.LogInfo("Primary node offline, marked instance as started")
6096
    else:
6097
      node_current = instance.primary_node
6098

    
6099
      _StartInstanceDisks(self, instance, force)
6100

    
6101
      result = \
6102
        self.rpc.call_instance_start(node_current,
6103
                                     (instance, self.op.hvparams,
6104
                                      self.op.beparams),
6105
                                     self.op.startup_paused)
6106
      msg = result.fail_msg
6107
      if msg:
6108
        _ShutdownInstanceDisks(self, instance)
6109
        raise errors.OpExecError("Could not start instance: %s" % msg)
6110

    
6111

    
6112
class LUInstanceReboot(LogicalUnit):
6113
  """Reboot an instance.
6114

6115
  """
6116
  HPATH = "instance-reboot"
6117
  HTYPE = constants.HTYPE_INSTANCE
6118
  REQ_BGL = False
6119

    
6120
  def ExpandNames(self):
6121
    self._ExpandAndLockInstance()
6122

    
6123
  def BuildHooksEnv(self):
6124
    """Build hooks env.
6125

6126
    This runs on master, primary and secondary nodes of the instance.
6127

6128
    """
6129
    env = {
6130
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6131
      "REBOOT_TYPE": self.op.reboot_type,
6132
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6133
      }
6134

    
6135
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6136

    
6137
    return env
6138

    
6139
  def BuildHooksNodes(self):
6140
    """Build hooks nodes.
6141

6142
    """
6143
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6144
    return (nl, nl)
6145

    
6146
  def CheckPrereq(self):
6147
    """Check prerequisites.
6148

6149
    This checks that the instance is in the cluster.
6150

6151
    """
6152
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6153
    assert self.instance is not None, \
6154
      "Cannot retrieve locked instance %s" % self.op.instance_name
6155

    
6156
    _CheckNodeOnline(self, instance.primary_node)
6157

    
6158
    # check bridges existence
6159
    _CheckInstanceBridgesExist(self, instance)
6160

    
6161
  def Exec(self, feedback_fn):
6162
    """Reboot the instance.
6163

6164
    """
6165
    instance = self.instance
6166
    ignore_secondaries = self.op.ignore_secondaries
6167
    reboot_type = self.op.reboot_type
6168

    
6169
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6170
                                              instance.name,
6171
                                              instance.hypervisor)
6172
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6173
    instance_running = bool(remote_info.payload)
6174

    
6175
    node_current = instance.primary_node
6176

    
6177
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6178
                                            constants.INSTANCE_REBOOT_HARD]:
6179
      for disk in instance.disks:
6180
        self.cfg.SetDiskID(disk, node_current)
6181
      result = self.rpc.call_instance_reboot(node_current, instance,
6182
                                             reboot_type,
6183
                                             self.op.shutdown_timeout)
6184
      result.Raise("Could not reboot instance")
6185
    else:
6186
      if instance_running:
6187
        result = self.rpc.call_instance_shutdown(node_current, instance,
6188
                                                 self.op.shutdown_timeout)
6189
        result.Raise("Could not shutdown instance for full reboot")
6190
        _ShutdownInstanceDisks(self, instance)
6191
      else:
6192
        self.LogInfo("Instance %s was already stopped, starting now",
6193
                     instance.name)
6194
      _StartInstanceDisks(self, instance, ignore_secondaries)
6195
      result = self.rpc.call_instance_start(node_current,
6196
                                            (instance, None, None), False)
6197
      msg = result.fail_msg
6198
      if msg:
6199
        _ShutdownInstanceDisks(self, instance)
6200
        raise errors.OpExecError("Could not start instance for"
6201
                                 " full reboot: %s" % msg)
6202

    
6203
    self.cfg.MarkInstanceUp(instance.name)
6204

    
6205

    
6206
class LUInstanceShutdown(LogicalUnit):
6207
  """Shutdown an instance.
6208

6209
  """
6210
  HPATH = "instance-stop"
6211
  HTYPE = constants.HTYPE_INSTANCE
6212
  REQ_BGL = False
6213

    
6214
  def ExpandNames(self):
6215
    self._ExpandAndLockInstance()
6216

    
6217
  def BuildHooksEnv(self):
6218
    """Build hooks env.
6219

6220
    This runs on master, primary and secondary nodes of the instance.
6221

6222
    """
6223
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6224
    env["TIMEOUT"] = self.op.timeout
6225
    return env
6226

    
6227
  def BuildHooksNodes(self):
6228
    """Build hooks nodes.
6229

6230
    """
6231
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6232
    return (nl, nl)
6233

    
6234
  def CheckPrereq(self):
6235
    """Check prerequisites.
6236

6237
    This checks that the instance is in the cluster.
6238

6239
    """
6240
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6241
    assert self.instance is not None, \
6242
      "Cannot retrieve locked instance %s" % self.op.instance_name
6243

    
6244
    self.primary_offline = \
6245
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6246

    
6247
    if self.primary_offline and self.op.ignore_offline_nodes:
6248
      self.proc.LogWarning("Ignoring offline primary node")
6249
    else:
6250
      _CheckNodeOnline(self, self.instance.primary_node)
6251

    
6252
  def Exec(self, feedback_fn):
6253
    """Shutdown the instance.
6254

6255
    """
6256
    instance = self.instance
6257
    node_current = instance.primary_node
6258
    timeout = self.op.timeout
6259

    
6260
    if not self.op.no_remember:
6261
      self.cfg.MarkInstanceDown(instance.name)
6262

    
6263
    if self.primary_offline:
6264
      assert self.op.ignore_offline_nodes
6265
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6266
    else:
6267
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6268
      msg = result.fail_msg
6269
      if msg:
6270
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6271

    
6272
      _ShutdownInstanceDisks(self, instance)
6273

    
6274

    
6275
class LUInstanceReinstall(LogicalUnit):
6276
  """Reinstall an instance.
6277

6278
  """
6279
  HPATH = "instance-reinstall"
6280
  HTYPE = constants.HTYPE_INSTANCE
6281
  REQ_BGL = False
6282

    
6283
  def ExpandNames(self):
6284
    self._ExpandAndLockInstance()
6285

    
6286
  def BuildHooksEnv(self):
6287
    """Build hooks env.
6288

6289
    This runs on master, primary and secondary nodes of the instance.
6290

6291
    """
6292
    return _BuildInstanceHookEnvByObject(self, self.instance)
6293

    
6294
  def BuildHooksNodes(self):
6295
    """Build hooks nodes.
6296

6297
    """
6298
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6299
    return (nl, nl)
6300

    
6301
  def CheckPrereq(self):
6302
    """Check prerequisites.
6303

6304
    This checks that the instance is in the cluster and is not running.
6305

6306
    """
6307
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6308
    assert instance is not None, \
6309
      "Cannot retrieve locked instance %s" % self.op.instance_name
6310
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6311
                     " offline, cannot reinstall")
6312
    for node in instance.secondary_nodes:
6313
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6314
                       " cannot reinstall")
6315

    
6316
    if instance.disk_template == constants.DT_DISKLESS:
6317
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6318
                                 self.op.instance_name,
6319
                                 errors.ECODE_INVAL)
6320
    _CheckInstanceDown(self, instance, "cannot reinstall")
6321

    
6322
    if self.op.os_type is not None:
6323
      # OS verification
6324
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6325
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6326
      instance_os = self.op.os_type
6327
    else:
6328
      instance_os = instance.os
6329

    
6330
    nodelist = list(instance.all_nodes)
6331

    
6332
    if self.op.osparams:
6333
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6334
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6335
      self.os_inst = i_osdict # the new dict (without defaults)
6336
    else:
6337
      self.os_inst = None
6338

    
6339
    self.instance = instance
6340

    
6341
  def Exec(self, feedback_fn):
6342
    """Reinstall the instance.
6343

6344
    """
6345
    inst = self.instance
6346

    
6347
    if self.op.os_type is not None:
6348
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6349
      inst.os = self.op.os_type
6350
      # Write to configuration
6351
      self.cfg.Update(inst, feedback_fn)
6352

    
6353
    _StartInstanceDisks(self, inst, None)
6354
    try:
6355
      feedback_fn("Running the instance OS create scripts...")
6356
      # FIXME: pass debug option from opcode to backend
6357
      result = self.rpc.call_instance_os_add(inst.primary_node,
6358
                                             (inst, self.os_inst), True,
6359
                                             self.op.debug_level)
6360
      result.Raise("Could not install OS for instance %s on node %s" %
6361
                   (inst.name, inst.primary_node))
6362
    finally:
6363
      _ShutdownInstanceDisks(self, inst)
6364

    
6365

    
6366
class LUInstanceRecreateDisks(LogicalUnit):
6367
  """Recreate an instance's missing disks.
6368

6369
  """
6370
  HPATH = "instance-recreate-disks"
6371
  HTYPE = constants.HTYPE_INSTANCE
6372
  REQ_BGL = False
6373

    
6374
  def CheckArguments(self):
6375
    # normalise the disk list
6376
    self.op.disks = sorted(frozenset(self.op.disks))
6377

    
6378
  def ExpandNames(self):
6379
    self._ExpandAndLockInstance()
6380
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6381
    if self.op.nodes:
6382
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6383
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6384
    else:
6385
      self.needed_locks[locking.LEVEL_NODE] = []
6386

    
6387
  def DeclareLocks(self, level):
6388
    if level == locking.LEVEL_NODE:
6389
      # if we replace the nodes, we only need to lock the old primary,
6390
      # otherwise we need to lock all nodes for disk re-creation
6391
      primary_only = bool(self.op.nodes)
6392
      self._LockInstancesNodes(primary_only=primary_only)
6393

    
6394
  def BuildHooksEnv(self):
6395
    """Build hooks env.
6396

6397
    This runs on master, primary and secondary nodes of the instance.
6398

6399
    """
6400
    return _BuildInstanceHookEnvByObject(self, self.instance)
6401

    
6402
  def BuildHooksNodes(self):
6403
    """Build hooks nodes.
6404

6405
    """
6406
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6407
    return (nl, nl)
6408

    
6409
  def CheckPrereq(self):
6410
    """Check prerequisites.
6411

6412
    This checks that the instance is in the cluster and is not running.
6413

6414
    """
6415
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6416
    assert instance is not None, \
6417
      "Cannot retrieve locked instance %s" % self.op.instance_name
6418
    if self.op.nodes:
6419
      if len(self.op.nodes) != len(instance.all_nodes):
6420
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6421
                                   " %d replacement nodes were specified" %
6422
                                   (instance.name, len(instance.all_nodes),
6423
                                    len(self.op.nodes)),
6424
                                   errors.ECODE_INVAL)
6425
      assert instance.disk_template != constants.DT_DRBD8 or \
6426
          len(self.op.nodes) == 2
6427
      assert instance.disk_template != constants.DT_PLAIN or \
6428
          len(self.op.nodes) == 1
6429
      primary_node = self.op.nodes[0]
6430
    else:
6431
      primary_node = instance.primary_node
6432
    _CheckNodeOnline(self, primary_node)
6433

    
6434
    if instance.disk_template == constants.DT_DISKLESS:
6435
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6436
                                 self.op.instance_name, errors.ECODE_INVAL)
6437
    # if we replace nodes *and* the old primary is offline, we don't
6438
    # check
6439
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6440
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6441
    if not (self.op.nodes and old_pnode.offline):
6442
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6443

    
6444
    if not self.op.disks:
6445
      self.op.disks = range(len(instance.disks))
6446
    else:
6447
      for idx in self.op.disks:
6448
        if idx >= len(instance.disks):
6449
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6450
                                     errors.ECODE_INVAL)
6451
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6452
      raise errors.OpPrereqError("Can't recreate disks partially and"
6453
                                 " change the nodes at the same time",
6454
                                 errors.ECODE_INVAL)
6455
    self.instance = instance
6456

    
6457
  def Exec(self, feedback_fn):
6458
    """Recreate the disks.
6459

6460
    """
6461
    instance = self.instance
6462

    
6463
    to_skip = []
6464
    mods = [] # keeps track of needed logical_id changes
6465

    
6466
    for idx, disk in enumerate(instance.disks):
6467
      if idx not in self.op.disks: # disk idx has not been passed in
6468
        to_skip.append(idx)
6469
        continue
6470
      # update secondaries for disks, if needed
6471
      if self.op.nodes:
6472
        if disk.dev_type == constants.LD_DRBD8:
6473
          # need to update the nodes and minors
6474
          assert len(self.op.nodes) == 2
6475
          assert len(disk.logical_id) == 6 # otherwise disk internals
6476
                                           # have changed
6477
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6478
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6479
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6480
                    new_minors[0], new_minors[1], old_secret)
6481
          assert len(disk.logical_id) == len(new_id)
6482
          mods.append((idx, new_id))
6483

    
6484
    # now that we have passed all asserts above, we can apply the mods
6485
    # in a single run (to avoid partial changes)
6486
    for idx, new_id in mods:
6487
      instance.disks[idx].logical_id = new_id
6488

    
6489
    # change primary node, if needed
6490
    if self.op.nodes:
6491
      instance.primary_node = self.op.nodes[0]
6492
      self.LogWarning("Changing the instance's nodes, you will have to"
6493
                      " remove any disks left on the older nodes manually")
6494

    
6495
    if self.op.nodes:
6496
      self.cfg.Update(instance, feedback_fn)
6497

    
6498
    _CreateDisks(self, instance, to_skip=to_skip)
6499

    
6500

    
6501
class LUInstanceRename(LogicalUnit):
6502
  """Rename an instance.
6503

6504
  """
6505
  HPATH = "instance-rename"
6506
  HTYPE = constants.HTYPE_INSTANCE
6507

    
6508
  def CheckArguments(self):
6509
    """Check arguments.
6510

6511
    """
6512
    if self.op.ip_check and not self.op.name_check:
6513
      # TODO: make the ip check more flexible and not depend on the name check
6514
      raise errors.OpPrereqError("IP address check requires a name check",
6515
                                 errors.ECODE_INVAL)
6516

    
6517
  def BuildHooksEnv(self):
6518
    """Build hooks env.
6519

6520
    This runs on master, primary and secondary nodes of the instance.
6521

6522
    """
6523
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6524
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6525
    return env
6526

    
6527
  def BuildHooksNodes(self):
6528
    """Build hooks nodes.
6529

6530
    """
6531
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6532
    return (nl, nl)
6533

    
6534
  def CheckPrereq(self):
6535
    """Check prerequisites.
6536

6537
    This checks that the instance is in the cluster and is not running.
6538

6539
    """
6540
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6541
                                                self.op.instance_name)
6542
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6543
    assert instance is not None
6544
    _CheckNodeOnline(self, instance.primary_node)
6545
    _CheckInstanceDown(self, instance, "cannot rename")
6546
    self.instance = instance
6547

    
6548
    new_name = self.op.new_name
6549
    if self.op.name_check:
6550
      hostname = netutils.GetHostname(name=new_name)
6551
      if hostname != new_name:
6552
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6553
                     hostname.name)
6554
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6555
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6556
                                    " same as given hostname '%s'") %
6557
                                    (hostname.name, self.op.new_name),
6558
                                    errors.ECODE_INVAL)
6559
      new_name = self.op.new_name = hostname.name
6560
      if (self.op.ip_check and
6561
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6562
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6563
                                   (hostname.ip, new_name),
6564
                                   errors.ECODE_NOTUNIQUE)
6565

    
6566
    instance_list = self.cfg.GetInstanceList()
6567
    if new_name in instance_list and new_name != instance.name:
6568
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6569
                                 new_name, errors.ECODE_EXISTS)
6570

    
6571
  def Exec(self, feedback_fn):
6572
    """Rename the instance.
6573

6574
    """
6575
    inst = self.instance
6576
    old_name = inst.name
6577

    
6578
    rename_file_storage = False
6579
    if (inst.disk_template in constants.DTS_FILEBASED and
6580
        self.op.new_name != inst.name):
6581
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6582
      rename_file_storage = True
6583

    
6584
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6585
    # Change the instance lock. This is definitely safe while we hold the BGL.
6586
    # Otherwise the new lock would have to be added in acquired mode.
6587
    assert self.REQ_BGL
6588
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6589
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6590

    
6591
    # re-read the instance from the configuration after rename
6592
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6593

    
6594
    if rename_file_storage:
6595
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6596
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6597
                                                     old_file_storage_dir,
6598
                                                     new_file_storage_dir)
6599
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6600
                   " (but the instance has been renamed in Ganeti)" %
6601
                   (inst.primary_node, old_file_storage_dir,
6602
                    new_file_storage_dir))
6603

    
6604
    _StartInstanceDisks(self, inst, None)
6605
    try:
6606
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6607
                                                 old_name, self.op.debug_level)
6608
      msg = result.fail_msg
6609
      if msg:
6610
        msg = ("Could not run OS rename script for instance %s on node %s"
6611
               " (but the instance has been renamed in Ganeti): %s" %
6612
               (inst.name, inst.primary_node, msg))
6613
        self.proc.LogWarning(msg)
6614
    finally:
6615
      _ShutdownInstanceDisks(self, inst)
6616

    
6617
    return inst.name
6618

    
6619

    
6620
class LUInstanceRemove(LogicalUnit):
6621
  """Remove an instance.
6622

6623
  """
6624
  HPATH = "instance-remove"
6625
  HTYPE = constants.HTYPE_INSTANCE
6626
  REQ_BGL = False
6627

    
6628
  def ExpandNames(self):
6629
    self._ExpandAndLockInstance()
6630
    self.needed_locks[locking.LEVEL_NODE] = []
6631
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6632

    
6633
  def DeclareLocks(self, level):
6634
    if level == locking.LEVEL_NODE:
6635
      self._LockInstancesNodes()
6636

    
6637
  def BuildHooksEnv(self):
6638
    """Build hooks env.
6639

6640
    This runs on master, primary and secondary nodes of the instance.
6641

6642
    """
6643
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6644
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6645
    return env
6646

    
6647
  def BuildHooksNodes(self):
6648
    """Build hooks nodes.
6649

6650
    """
6651
    nl = [self.cfg.GetMasterNode()]
6652
    nl_post = list(self.instance.all_nodes) + nl
6653
    return (nl, nl_post)
6654

    
6655
  def CheckPrereq(self):
6656
    """Check prerequisites.
6657

6658
    This checks that the instance is in the cluster.
6659

6660
    """
6661
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6662
    assert self.instance is not None, \
6663
      "Cannot retrieve locked instance %s" % self.op.instance_name
6664

    
6665
  def Exec(self, feedback_fn):
6666
    """Remove the instance.
6667

6668
    """
6669
    instance = self.instance
6670
    logging.info("Shutting down instance %s on node %s",
6671
                 instance.name, instance.primary_node)
6672

    
6673
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6674
                                             self.op.shutdown_timeout)
6675
    msg = result.fail_msg
6676
    if msg:
6677
      if self.op.ignore_failures:
6678
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6679
      else:
6680
        raise errors.OpExecError("Could not shutdown instance %s on"
6681
                                 " node %s: %s" %
6682
                                 (instance.name, instance.primary_node, msg))
6683

    
6684
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6685

    
6686

    
6687
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6688
  """Utility function to remove an instance.
6689

6690
  """
6691
  logging.info("Removing block devices for instance %s", instance.name)
6692

    
6693
  if not _RemoveDisks(lu, instance):
6694
    if not ignore_failures:
6695
      raise errors.OpExecError("Can't remove instance's disks")
6696
    feedback_fn("Warning: can't remove instance's disks")
6697

    
6698
  logging.info("Removing instance %s out of cluster config", instance.name)
6699

    
6700
  lu.cfg.RemoveInstance(instance.name)
6701

    
6702
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6703
    "Instance lock removal conflict"
6704

    
6705
  # Remove lock for the instance
6706
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6707

    
6708

    
6709
class LUInstanceQuery(NoHooksLU):
6710
  """Logical unit for querying instances.
6711

6712
  """
6713
  # pylint: disable=W0142
6714
  REQ_BGL = False
6715

    
6716
  def CheckArguments(self):
6717
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6718
                             self.op.output_fields, self.op.use_locking)
6719

    
6720
  def ExpandNames(self):
6721
    self.iq.ExpandNames(self)
6722

    
6723
  def DeclareLocks(self, level):
6724
    self.iq.DeclareLocks(self, level)
6725

    
6726
  def Exec(self, feedback_fn):
6727
    return self.iq.OldStyleQuery(self)
6728

    
6729

    
6730
class LUInstanceFailover(LogicalUnit):
6731
  """Failover an instance.
6732

6733
  """
6734
  HPATH = "instance-failover"
6735
  HTYPE = constants.HTYPE_INSTANCE
6736
  REQ_BGL = False
6737

    
6738
  def CheckArguments(self):
6739
    """Check the arguments.
6740

6741
    """
6742
    self.iallocator = getattr(self.op, "iallocator", None)
6743
    self.target_node = getattr(self.op, "target_node", None)
6744

    
6745
  def ExpandNames(self):
6746
    self._ExpandAndLockInstance()
6747

    
6748
    if self.op.target_node is not None:
6749
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6750

    
6751
    self.needed_locks[locking.LEVEL_NODE] = []
6752
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6753

    
6754
    ignore_consistency = self.op.ignore_consistency
6755
    shutdown_timeout = self.op.shutdown_timeout
6756
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6757
                                       cleanup=False,
6758
                                       failover=True,
6759
                                       ignore_consistency=ignore_consistency,
6760
                                       shutdown_timeout=shutdown_timeout)
6761
    self.tasklets = [self._migrater]
6762

    
6763
  def DeclareLocks(self, level):
6764
    if level == locking.LEVEL_NODE:
6765
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6766
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6767
        if self.op.target_node is None:
6768
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6769
        else:
6770
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6771
                                                   self.op.target_node]
6772
        del self.recalculate_locks[locking.LEVEL_NODE]
6773
      else:
6774
        self._LockInstancesNodes()
6775

    
6776
  def BuildHooksEnv(self):
6777
    """Build hooks env.
6778

6779
    This runs on master, primary and secondary nodes of the instance.
6780

6781
    """
6782
    instance = self._migrater.instance
6783
    source_node = instance.primary_node
6784
    target_node = self.op.target_node
6785
    env = {
6786
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6787
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6788
      "OLD_PRIMARY": source_node,
6789
      "NEW_PRIMARY": target_node,
6790
      }
6791

    
6792
    if instance.disk_template in constants.DTS_INT_MIRROR:
6793
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6794
      env["NEW_SECONDARY"] = source_node
6795
    else:
6796
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6797

    
6798
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6799

    
6800
    return env
6801

    
6802
  def BuildHooksNodes(self):
6803
    """Build hooks nodes.
6804

6805
    """
6806
    instance = self._migrater.instance
6807
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6808
    return (nl, nl + [instance.primary_node])
6809

    
6810

    
6811
class LUInstanceMigrate(LogicalUnit):
6812
  """Migrate an instance.
6813

6814
  This is migration without shutting down, compared to the failover,
6815
  which is done with shutdown.
6816

6817
  """
6818
  HPATH = "instance-migrate"
6819
  HTYPE = constants.HTYPE_INSTANCE
6820
  REQ_BGL = False
6821

    
6822
  def ExpandNames(self):
6823
    self._ExpandAndLockInstance()
6824

    
6825
    if self.op.target_node is not None:
6826
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6827

    
6828
    self.needed_locks[locking.LEVEL_NODE] = []
6829
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6830

    
6831
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6832
                                       cleanup=self.op.cleanup,
6833
                                       failover=False,
6834
                                       fallback=self.op.allow_failover)
6835
    self.tasklets = [self._migrater]
6836

    
6837
  def DeclareLocks(self, level):
6838
    if level == locking.LEVEL_NODE:
6839
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6840
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6841
        if self.op.target_node is None:
6842
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6843
        else:
6844
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6845
                                                   self.op.target_node]
6846
        del self.recalculate_locks[locking.LEVEL_NODE]
6847
      else:
6848
        self._LockInstancesNodes()
6849

    
6850
  def BuildHooksEnv(self):
6851
    """Build hooks env.
6852

6853
    This runs on master, primary and secondary nodes of the instance.
6854

6855
    """
6856
    instance = self._migrater.instance
6857
    source_node = instance.primary_node
6858
    target_node = self.op.target_node
6859
    env = _BuildInstanceHookEnvByObject(self, instance)
6860
    env.update({
6861
      "MIGRATE_LIVE": self._migrater.live,
6862
      "MIGRATE_CLEANUP": self.op.cleanup,
6863
      "OLD_PRIMARY": source_node,
6864
      "NEW_PRIMARY": target_node,
6865
      })
6866

    
6867
    if instance.disk_template in constants.DTS_INT_MIRROR:
6868
      env["OLD_SECONDARY"] = target_node
6869
      env["NEW_SECONDARY"] = source_node
6870
    else:
6871
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6872

    
6873
    return env
6874

    
6875
  def BuildHooksNodes(self):
6876
    """Build hooks nodes.
6877

6878
    """
6879
    instance = self._migrater.instance
6880
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6881
    return (nl, nl + [instance.primary_node])
6882

    
6883

    
6884
class LUInstanceMove(LogicalUnit):
6885
  """Move an instance by data-copying.
6886

6887
  """
6888
  HPATH = "instance-move"
6889
  HTYPE = constants.HTYPE_INSTANCE
6890
  REQ_BGL = False
6891

    
6892
  def ExpandNames(self):
6893
    self._ExpandAndLockInstance()
6894
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6895
    self.op.target_node = target_node
6896
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6897
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6898

    
6899
  def DeclareLocks(self, level):
6900
    if level == locking.LEVEL_NODE:
6901
      self._LockInstancesNodes(primary_only=True)
6902

    
6903
  def BuildHooksEnv(self):
6904
    """Build hooks env.
6905

6906
    This runs on master, primary and secondary nodes of the instance.
6907

6908
    """
6909
    env = {
6910
      "TARGET_NODE": self.op.target_node,
6911
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6912
      }
6913
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6914
    return env
6915

    
6916
  def BuildHooksNodes(self):
6917
    """Build hooks nodes.
6918

6919
    """
6920
    nl = [
6921
      self.cfg.GetMasterNode(),
6922
      self.instance.primary_node,
6923
      self.op.target_node,
6924
      ]
6925
    return (nl, nl)
6926

    
6927
  def CheckPrereq(self):
6928
    """Check prerequisites.
6929

6930
    This checks that the instance is in the cluster.
6931

6932
    """
6933
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6934
    assert self.instance is not None, \
6935
      "Cannot retrieve locked instance %s" % self.op.instance_name
6936

    
6937
    node = self.cfg.GetNodeInfo(self.op.target_node)
6938
    assert node is not None, \
6939
      "Cannot retrieve locked node %s" % self.op.target_node
6940

    
6941
    self.target_node = target_node = node.name
6942

    
6943
    if target_node == instance.primary_node:
6944
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6945
                                 (instance.name, target_node),
6946
                                 errors.ECODE_STATE)
6947

    
6948
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6949

    
6950
    for idx, dsk in enumerate(instance.disks):
6951
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6952
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6953
                                   " cannot copy" % idx, errors.ECODE_STATE)
6954

    
6955
    _CheckNodeOnline(self, target_node)
6956
    _CheckNodeNotDrained(self, target_node)
6957
    _CheckNodeVmCapable(self, target_node)
6958

    
6959
    if instance.admin_up:
6960
      # check memory requirements on the secondary node
6961
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6962
                           instance.name, bep[constants.BE_MEMORY],
6963
                           instance.hypervisor)
6964
    else:
6965
      self.LogInfo("Not checking memory on the secondary node as"
6966
                   " instance will not be started")
6967

    
6968
    # check bridge existance
6969
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6970

    
6971
  def Exec(self, feedback_fn):
6972
    """Move an instance.
6973

6974
    The move is done by shutting it down on its present node, copying
6975
    the data over (slow) and starting it on the new node.
6976

6977
    """
6978
    instance = self.instance
6979

    
6980
    source_node = instance.primary_node
6981
    target_node = self.target_node
6982

    
6983
    self.LogInfo("Shutting down instance %s on source node %s",
6984
                 instance.name, source_node)
6985

    
6986
    result = self.rpc.call_instance_shutdown(source_node, instance,
6987
                                             self.op.shutdown_timeout)
6988
    msg = result.fail_msg
6989
    if msg:
6990
      if self.op.ignore_consistency:
6991
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6992
                             " Proceeding anyway. Please make sure node"
6993
                             " %s is down. Error details: %s",
6994
                             instance.name, source_node, source_node, msg)
6995
      else:
6996
        raise errors.OpExecError("Could not shutdown instance %s on"
6997
                                 " node %s: %s" %
6998
                                 (instance.name, source_node, msg))
6999

    
7000
    # create the target disks
7001
    try:
7002
      _CreateDisks(self, instance, target_node=target_node)
7003
    except errors.OpExecError:
7004
      self.LogWarning("Device creation failed, reverting...")
7005
      try:
7006
        _RemoveDisks(self, instance, target_node=target_node)
7007
      finally:
7008
        self.cfg.ReleaseDRBDMinors(instance.name)
7009
        raise
7010

    
7011
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7012

    
7013
    errs = []
7014
    # activate, get path, copy the data over
7015
    for idx, disk in enumerate(instance.disks):
7016
      self.LogInfo("Copying data for disk %d", idx)
7017
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7018
                                               instance.name, True, idx)
7019
      if result.fail_msg:
7020
        self.LogWarning("Can't assemble newly created disk %d: %s",
7021
                        idx, result.fail_msg)
7022
        errs.append(result.fail_msg)
7023
        break
7024
      dev_path = result.payload
7025
      result = self.rpc.call_blockdev_export(source_node, disk,
7026
                                             target_node, dev_path,
7027
                                             cluster_name)
7028
      if result.fail_msg:
7029
        self.LogWarning("Can't copy data over for disk %d: %s",
7030
                        idx, result.fail_msg)
7031
        errs.append(result.fail_msg)
7032
        break
7033

    
7034
    if errs:
7035
      self.LogWarning("Some disks failed to copy, aborting")
7036
      try:
7037
        _RemoveDisks(self, instance, target_node=target_node)
7038
      finally:
7039
        self.cfg.ReleaseDRBDMinors(instance.name)
7040
        raise errors.OpExecError("Errors during disk copy: %s" %
7041
                                 (",".join(errs),))
7042

    
7043
    instance.primary_node = target_node
7044
    self.cfg.Update(instance, feedback_fn)
7045

    
7046
    self.LogInfo("Removing the disks on the original node")
7047
    _RemoveDisks(self, instance, target_node=source_node)
7048

    
7049
    # Only start the instance if it's marked as up
7050
    if instance.admin_up:
7051
      self.LogInfo("Starting instance %s on node %s",
7052
                   instance.name, target_node)
7053

    
7054
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7055
                                           ignore_secondaries=True)
7056
      if not disks_ok:
7057
        _ShutdownInstanceDisks(self, instance)
7058
        raise errors.OpExecError("Can't activate the instance's disks")
7059

    
7060
      result = self.rpc.call_instance_start(target_node,
7061
                                            (instance, None, None), False)
7062
      msg = result.fail_msg
7063
      if msg:
7064
        _ShutdownInstanceDisks(self, instance)
7065
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7066
                                 (instance.name, target_node, msg))
7067

    
7068

    
7069
class LUNodeMigrate(LogicalUnit):
7070
  """Migrate all instances from a node.
7071

7072
  """
7073
  HPATH = "node-migrate"
7074
  HTYPE = constants.HTYPE_NODE
7075
  REQ_BGL = False
7076

    
7077
  def CheckArguments(self):
7078
    pass
7079

    
7080
  def ExpandNames(self):
7081
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7082

    
7083
    self.share_locks = _ShareAll()
7084
    self.needed_locks = {
7085
      locking.LEVEL_NODE: [self.op.node_name],
7086
      }
7087

    
7088
  def BuildHooksEnv(self):
7089
    """Build hooks env.
7090

7091
    This runs on the master, the primary and all the secondaries.
7092

7093
    """
7094
    return {
7095
      "NODE_NAME": self.op.node_name,
7096
      }
7097

    
7098
  def BuildHooksNodes(self):
7099
    """Build hooks nodes.
7100

7101
    """
7102
    nl = [self.cfg.GetMasterNode()]
7103
    return (nl, nl)
7104

    
7105
  def CheckPrereq(self):
7106
    pass
7107

    
7108
  def Exec(self, feedback_fn):
7109
    # Prepare jobs for migration instances
7110
    jobs = [
7111
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7112
                                 mode=self.op.mode,
7113
                                 live=self.op.live,
7114
                                 iallocator=self.op.iallocator,
7115
                                 target_node=self.op.target_node)]
7116
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7117
      ]
7118

    
7119
    # TODO: Run iallocator in this opcode and pass correct placement options to
7120
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7121
    # running the iallocator and the actual migration, a good consistency model
7122
    # will have to be found.
7123

    
7124
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7125
            frozenset([self.op.node_name]))
7126

    
7127
    return ResultWithJobs(jobs)
7128

    
7129

    
7130
class TLMigrateInstance(Tasklet):
7131
  """Tasklet class for instance migration.
7132

7133
  @type live: boolean
7134
  @ivar live: whether the migration will be done live or non-live;
7135
      this variable is initalized only after CheckPrereq has run
7136
  @type cleanup: boolean
7137
  @ivar cleanup: Wheater we cleanup from a failed migration
7138
  @type iallocator: string
7139
  @ivar iallocator: The iallocator used to determine target_node
7140
  @type target_node: string
7141
  @ivar target_node: If given, the target_node to reallocate the instance to
7142
  @type failover: boolean
7143
  @ivar failover: Whether operation results in failover or migration
7144
  @type fallback: boolean
7145
  @ivar fallback: Whether fallback to failover is allowed if migration not
7146
                  possible
7147
  @type ignore_consistency: boolean
7148
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7149
                            and target node
7150
  @type shutdown_timeout: int
7151
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7152

7153
  """
7154

    
7155
  # Constants
7156
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7157
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7158

    
7159
  def __init__(self, lu, instance_name, cleanup=False,
7160
               failover=False, fallback=False,
7161
               ignore_consistency=False,
7162
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7163
    """Initializes this class.
7164

7165
    """
7166
    Tasklet.__init__(self, lu)
7167

    
7168
    # Parameters
7169
    self.instance_name = instance_name
7170
    self.cleanup = cleanup
7171
    self.live = False # will be overridden later
7172
    self.failover = failover
7173
    self.fallback = fallback
7174
    self.ignore_consistency = ignore_consistency
7175
    self.shutdown_timeout = shutdown_timeout
7176

    
7177
  def CheckPrereq(self):
7178
    """Check prerequisites.
7179

7180
    This checks that the instance is in the cluster.
7181

7182
    """
7183
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7184
    instance = self.cfg.GetInstanceInfo(instance_name)
7185
    assert instance is not None
7186
    self.instance = instance
7187

    
7188
    if (not self.cleanup and not instance.admin_up and not self.failover and
7189
        self.fallback):
7190
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7191
                      " to failover")
7192
      self.failover = True
7193

    
7194
    if instance.disk_template not in constants.DTS_MIRRORED:
7195
      if self.failover:
7196
        text = "failovers"
7197
      else:
7198
        text = "migrations"
7199
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7200
                                 " %s" % (instance.disk_template, text),
7201
                                 errors.ECODE_STATE)
7202

    
7203
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7204
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7205

    
7206
      if self.lu.op.iallocator:
7207
        self._RunAllocator()
7208
      else:
7209
        # We set set self.target_node as it is required by
7210
        # BuildHooksEnv
7211
        self.target_node = self.lu.op.target_node
7212

    
7213
      # self.target_node is already populated, either directly or by the
7214
      # iallocator run
7215
      target_node = self.target_node
7216
      if self.target_node == instance.primary_node:
7217
        raise errors.OpPrereqError("Cannot migrate instance %s"
7218
                                   " to its primary (%s)" %
7219
                                   (instance.name, instance.primary_node))
7220

    
7221
      if len(self.lu.tasklets) == 1:
7222
        # It is safe to release locks only when we're the only tasklet
7223
        # in the LU
7224
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7225
                      keep=[instance.primary_node, self.target_node])
7226

    
7227
    else:
7228
      secondary_nodes = instance.secondary_nodes
7229
      if not secondary_nodes:
7230
        raise errors.ConfigurationError("No secondary node but using"
7231
                                        " %s disk template" %
7232
                                        instance.disk_template)
7233
      target_node = secondary_nodes[0]
7234
      if self.lu.op.iallocator or (self.lu.op.target_node and
7235
                                   self.lu.op.target_node != target_node):
7236
        if self.failover:
7237
          text = "failed over"
7238
        else:
7239
          text = "migrated"
7240
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7241
                                   " be %s to arbitrary nodes"
7242
                                   " (neither an iallocator nor a target"
7243
                                   " node can be passed)" %
7244
                                   (instance.disk_template, text),
7245
                                   errors.ECODE_INVAL)
7246

    
7247
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7248

    
7249
    # check memory requirements on the secondary node
7250
    if not self.failover or instance.admin_up:
7251
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7252
                           instance.name, i_be[constants.BE_MEMORY],
7253
                           instance.hypervisor)
7254
    else:
7255
      self.lu.LogInfo("Not checking memory on the secondary node as"
7256
                      " instance will not be started")
7257

    
7258
    # check bridge existance
7259
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7260

    
7261
    if not self.cleanup:
7262
      _CheckNodeNotDrained(self.lu, target_node)
7263
      if not self.failover:
7264
        result = self.rpc.call_instance_migratable(instance.primary_node,
7265
                                                   instance)
7266
        if result.fail_msg and self.fallback:
7267
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7268
                          " failover")
7269
          self.failover = True
7270
        else:
7271
          result.Raise("Can't migrate, please use failover",
7272
                       prereq=True, ecode=errors.ECODE_STATE)
7273

    
7274
    assert not (self.failover and self.cleanup)
7275

    
7276
    if not self.failover:
7277
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7278
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7279
                                   " parameters are accepted",
7280
                                   errors.ECODE_INVAL)
7281
      if self.lu.op.live is not None:
7282
        if self.lu.op.live:
7283
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7284
        else:
7285
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7286
        # reset the 'live' parameter to None so that repeated
7287
        # invocations of CheckPrereq do not raise an exception
7288
        self.lu.op.live = None
7289
      elif self.lu.op.mode is None:
7290
        # read the default value from the hypervisor
7291
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7292
                                                skip_globals=False)
7293
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7294

    
7295
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7296
    else:
7297
      # Failover is never live
7298
      self.live = False
7299

    
7300
  def _RunAllocator(self):
7301
    """Run the allocator based on input opcode.
7302

7303
    """
7304
    ial = IAllocator(self.cfg, self.rpc,
7305
                     mode=constants.IALLOCATOR_MODE_RELOC,
7306
                     name=self.instance_name,
7307
                     # TODO See why hail breaks with a single node below
7308
                     relocate_from=[self.instance.primary_node,
7309
                                    self.instance.primary_node],
7310
                     )
7311

    
7312
    ial.Run(self.lu.op.iallocator)
7313

    
7314
    if not ial.success:
7315
      raise errors.OpPrereqError("Can't compute nodes using"
7316
                                 " iallocator '%s': %s" %
7317
                                 (self.lu.op.iallocator, ial.info),
7318
                                 errors.ECODE_NORES)
7319
    if len(ial.result) != ial.required_nodes:
7320
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7321
                                 " of nodes (%s), required %s" %
7322
                                 (self.lu.op.iallocator, len(ial.result),
7323
                                  ial.required_nodes), errors.ECODE_FAULT)
7324
    self.target_node = ial.result[0]
7325
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7326
                 self.instance_name, self.lu.op.iallocator,
7327
                 utils.CommaJoin(ial.result))
7328

    
7329
  def _WaitUntilSync(self):
7330
    """Poll with custom rpc for disk sync.
7331

7332
    This uses our own step-based rpc call.
7333

7334
    """
7335
    self.feedback_fn("* wait until resync is done")
7336
    all_done = False
7337
    while not all_done:
7338
      all_done = True
7339
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7340
                                            self.nodes_ip,
7341
                                            self.instance.disks)
7342
      min_percent = 100
7343
      for node, nres in result.items():
7344
        nres.Raise("Cannot resync disks on node %s" % node)
7345
        node_done, node_percent = nres.payload
7346
        all_done = all_done and node_done
7347
        if node_percent is not None:
7348
          min_percent = min(min_percent, node_percent)
7349
      if not all_done:
7350
        if min_percent < 100:
7351
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7352
        time.sleep(2)
7353

    
7354
  def _EnsureSecondary(self, node):
7355
    """Demote a node to secondary.
7356

7357
    """
7358
    self.feedback_fn("* switching node %s to secondary mode" % node)
7359

    
7360
    for dev in self.instance.disks:
7361
      self.cfg.SetDiskID(dev, node)
7362

    
7363
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7364
                                          self.instance.disks)
7365
    result.Raise("Cannot change disk to secondary on node %s" % node)
7366

    
7367
  def _GoStandalone(self):
7368
    """Disconnect from the network.
7369

7370
    """
7371
    self.feedback_fn("* changing into standalone mode")
7372
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7373
                                               self.instance.disks)
7374
    for node, nres in result.items():
7375
      nres.Raise("Cannot disconnect disks node %s" % node)
7376

    
7377
  def _GoReconnect(self, multimaster):
7378
    """Reconnect to the network.
7379

7380
    """
7381
    if multimaster:
7382
      msg = "dual-master"
7383
    else:
7384
      msg = "single-master"
7385
    self.feedback_fn("* changing disks into %s mode" % msg)
7386
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7387
                                           self.instance.disks,
7388
                                           self.instance.name, multimaster)
7389
    for node, nres in result.items():
7390
      nres.Raise("Cannot change disks config on node %s" % node)
7391

    
7392
  def _ExecCleanup(self):
7393
    """Try to cleanup after a failed migration.
7394

7395
    The cleanup is done by:
7396
      - check that the instance is running only on one node
7397
        (and update the config if needed)
7398
      - change disks on its secondary node to secondary
7399
      - wait until disks are fully synchronized
7400
      - disconnect from the network
7401
      - change disks into single-master mode
7402
      - wait again until disks are fully synchronized
7403

7404
    """
7405
    instance = self.instance
7406
    target_node = self.target_node
7407
    source_node = self.source_node
7408

    
7409
    # check running on only one node
7410
    self.feedback_fn("* checking where the instance actually runs"
7411
                     " (if this hangs, the hypervisor might be in"
7412
                     " a bad state)")
7413
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7414
    for node, result in ins_l.items():
7415
      result.Raise("Can't contact node %s" % node)
7416

    
7417
    runningon_source = instance.name in ins_l[source_node].payload
7418
    runningon_target = instance.name in ins_l[target_node].payload
7419

    
7420
    if runningon_source and runningon_target:
7421
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7422
                               " or the hypervisor is confused; you will have"
7423
                               " to ensure manually that it runs only on one"
7424
                               " and restart this operation")
7425

    
7426
    if not (runningon_source or runningon_target):
7427
      raise errors.OpExecError("Instance does not seem to be running at all;"
7428
                               " in this case it's safer to repair by"
7429
                               " running 'gnt-instance stop' to ensure disk"
7430
                               " shutdown, and then restarting it")
7431

    
7432
    if runningon_target:
7433
      # the migration has actually succeeded, we need to update the config
7434
      self.feedback_fn("* instance running on secondary node (%s),"
7435
                       " updating config" % target_node)
7436
      instance.primary_node = target_node
7437
      self.cfg.Update(instance, self.feedback_fn)
7438
      demoted_node = source_node
7439
    else:
7440
      self.feedback_fn("* instance confirmed to be running on its"
7441
                       " primary node (%s)" % source_node)
7442
      demoted_node = target_node
7443

    
7444
    if instance.disk_template in constants.DTS_INT_MIRROR:
7445
      self._EnsureSecondary(demoted_node)
7446
      try:
7447
        self._WaitUntilSync()
7448
      except errors.OpExecError:
7449
        # we ignore here errors, since if the device is standalone, it
7450
        # won't be able to sync
7451
        pass
7452
      self._GoStandalone()
7453
      self._GoReconnect(False)
7454
      self._WaitUntilSync()
7455

    
7456
    self.feedback_fn("* done")
7457

    
7458
  def _RevertDiskStatus(self):
7459
    """Try to revert the disk status after a failed migration.
7460

7461
    """
7462
    target_node = self.target_node
7463
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7464
      return
7465

    
7466
    try:
7467
      self._EnsureSecondary(target_node)
7468
      self._GoStandalone()
7469
      self._GoReconnect(False)
7470
      self._WaitUntilSync()
7471
    except errors.OpExecError, err:
7472
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7473
                         " please try to recover the instance manually;"
7474
                         " error '%s'" % str(err))
7475

    
7476
  def _AbortMigration(self):
7477
    """Call the hypervisor code to abort a started migration.
7478

7479
    """
7480
    instance = self.instance
7481
    target_node = self.target_node
7482
    source_node = self.source_node
7483
    migration_info = self.migration_info
7484

    
7485
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7486
                                                                 instance,
7487
                                                                 migration_info,
7488
                                                                 False)
7489
    abort_msg = abort_result.fail_msg
7490
    if abort_msg:
7491
      logging.error("Aborting migration failed on target node %s: %s",
7492
                    target_node, abort_msg)
7493
      # Don't raise an exception here, as we stil have to try to revert the
7494
      # disk status, even if this step failed.
7495

    
7496
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7497
        instance, False, self.live)
7498
    abort_msg = abort_result.fail_msg
7499
    if abort_msg:
7500
      logging.error("Aborting migration failed on source node %s: %s",
7501
                    source_node, abort_msg)
7502

    
7503
  def _ExecMigration(self):
7504
    """Migrate an instance.
7505

7506
    The migrate is done by:
7507
      - change the disks into dual-master mode
7508
      - wait until disks are fully synchronized again
7509
      - migrate the instance
7510
      - change disks on the new secondary node (the old primary) to secondary
7511
      - wait until disks are fully synchronized
7512
      - change disks into single-master mode
7513

7514
    """
7515
    instance = self.instance
7516
    target_node = self.target_node
7517
    source_node = self.source_node
7518

    
7519
    # Check for hypervisor version mismatch and warn the user.
7520
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7521
                                       None, self.instance.hypervisor)
7522
    src_info = nodeinfo[source_node]
7523
    dst_info = nodeinfo[target_node]
7524

    
7525
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7526
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7527
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7528
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7529
      if src_version != dst_version:
7530
        self.feedback_fn("* warning: hypervisor version mismatch between"
7531
                         " source (%s) and target (%s) node" %
7532
                         (src_version, dst_version))
7533

    
7534
    self.feedback_fn("* checking disk consistency between source and target")
7535
    for dev in instance.disks:
7536
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7537
        raise errors.OpExecError("Disk %s is degraded or not fully"
7538
                                 " synchronized on target node,"
7539
                                 " aborting migration" % dev.iv_name)
7540

    
7541
    # First get the migration information from the remote node
7542
    result = self.rpc.call_migration_info(source_node, instance)
7543
    msg = result.fail_msg
7544
    if msg:
7545
      log_err = ("Failed fetching source migration information from %s: %s" %
7546
                 (source_node, msg))
7547
      logging.error(log_err)
7548
      raise errors.OpExecError(log_err)
7549

    
7550
    self.migration_info = migration_info = result.payload
7551

    
7552
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7553
      # Then switch the disks to master/master mode
7554
      self._EnsureSecondary(target_node)
7555
      self._GoStandalone()
7556
      self._GoReconnect(True)
7557
      self._WaitUntilSync()
7558

    
7559
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7560
    result = self.rpc.call_accept_instance(target_node,
7561
                                           instance,
7562
                                           migration_info,
7563
                                           self.nodes_ip[target_node])
7564

    
7565
    msg = result.fail_msg
7566
    if msg:
7567
      logging.error("Instance pre-migration failed, trying to revert"
7568
                    " disk status: %s", msg)
7569
      self.feedback_fn("Pre-migration failed, aborting")
7570
      self._AbortMigration()
7571
      self._RevertDiskStatus()
7572
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7573
                               (instance.name, msg))
7574

    
7575
    self.feedback_fn("* migrating instance to %s" % target_node)
7576
    result = self.rpc.call_instance_migrate(source_node, instance,
7577
                                            self.nodes_ip[target_node],
7578
                                            self.live)
7579
    msg = result.fail_msg
7580
    if msg:
7581
      logging.error("Instance migration failed, trying to revert"
7582
                    " disk status: %s", msg)
7583
      self.feedback_fn("Migration failed, aborting")
7584
      self._AbortMigration()
7585
      self._RevertDiskStatus()
7586
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7587
                               (instance.name, msg))
7588

    
7589
    self.feedback_fn("* starting memory transfer")
7590
    last_feedback = time.time()
7591
    while True:
7592
      result = self.rpc.call_instance_get_migration_status(source_node,
7593
                                                           instance)
7594
      msg = result.fail_msg
7595
      ms = result.payload   # MigrationStatus instance
7596
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7597
        logging.error("Instance migration failed, trying to revert"
7598
                      " disk status: %s", msg)
7599
        self.feedback_fn("Migration failed, aborting")
7600
        self._AbortMigration()
7601
        self._RevertDiskStatus()
7602
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7603
                                 (instance.name, msg))
7604

    
7605
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7606
        self.feedback_fn("* memory transfer complete")
7607
        break
7608

    
7609
      if (utils.TimeoutExpired(last_feedback,
7610
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7611
          ms.transferred_ram is not None):
7612
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7613
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7614
        last_feedback = time.time()
7615

    
7616
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7617

    
7618
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7619
                                                           instance,
7620
                                                           True,
7621
                                                           self.live)
7622
    msg = result.fail_msg
7623
    if msg:
7624
      logging.error("Instance migration succeeded, but finalization failed"
7625
                    " on the source node: %s", msg)
7626
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7627
                               msg)
7628

    
7629
    instance.primary_node = target_node
7630

    
7631
    # distribute new instance config to the other nodes
7632
    self.cfg.Update(instance, self.feedback_fn)
7633

    
7634
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7635
                                                           instance,
7636
                                                           migration_info,
7637
                                                           True)
7638
    msg = result.fail_msg
7639
    if msg:
7640
      logging.error("Instance migration succeeded, but finalization failed"
7641
                    " on the target node: %s", msg)
7642
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7643
                               msg)
7644

    
7645
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7646
      self._EnsureSecondary(source_node)
7647
      self._WaitUntilSync()
7648
      self._GoStandalone()
7649
      self._GoReconnect(False)
7650
      self._WaitUntilSync()
7651

    
7652
    self.feedback_fn("* done")
7653

    
7654
  def _ExecFailover(self):
7655
    """Failover an instance.
7656

7657
    The failover is done by shutting it down on its present node and
7658
    starting it on the secondary.
7659

7660
    """
7661
    instance = self.instance
7662
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7663

    
7664
    source_node = instance.primary_node
7665
    target_node = self.target_node
7666

    
7667
    if instance.admin_up:
7668
      self.feedback_fn("* checking disk consistency between source and target")
7669
      for dev in instance.disks:
7670
        # for drbd, these are drbd over lvm
7671
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7672
          if primary_node.offline:
7673
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7674
                             " target node %s" %
7675
                             (primary_node.name, dev.iv_name, target_node))
7676
          elif not self.ignore_consistency:
7677
            raise errors.OpExecError("Disk %s is degraded on target node,"
7678
                                     " aborting failover" % dev.iv_name)
7679
    else:
7680
      self.feedback_fn("* not checking disk consistency as instance is not"
7681
                       " running")
7682

    
7683
    self.feedback_fn("* shutting down instance on source node")
7684
    logging.info("Shutting down instance %s on node %s",
7685
                 instance.name, source_node)
7686

    
7687
    result = self.rpc.call_instance_shutdown(source_node, instance,
7688
                                             self.shutdown_timeout)
7689
    msg = result.fail_msg
7690
    if msg:
7691
      if self.ignore_consistency or primary_node.offline:
7692
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7693
                           " proceeding anyway; please make sure node"
7694
                           " %s is down; error details: %s",
7695
                           instance.name, source_node, source_node, msg)
7696
      else:
7697
        raise errors.OpExecError("Could not shutdown instance %s on"
7698
                                 " node %s: %s" %
7699
                                 (instance.name, source_node, msg))
7700

    
7701
    self.feedback_fn("* deactivating the instance's disks on source node")
7702
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7703
      raise errors.OpExecError("Can't shut down the instance's disks")
7704

    
7705
    instance.primary_node = target_node
7706
    # distribute new instance config to the other nodes
7707
    self.cfg.Update(instance, self.feedback_fn)
7708

    
7709
    # Only start the instance if it's marked as up
7710
    if instance.admin_up:
7711
      self.feedback_fn("* activating the instance's disks on target node %s" %
7712
                       target_node)
7713
      logging.info("Starting instance %s on node %s",
7714
                   instance.name, target_node)
7715

    
7716
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7717
                                           ignore_secondaries=True)
7718
      if not disks_ok:
7719
        _ShutdownInstanceDisks(self.lu, instance)
7720
        raise errors.OpExecError("Can't activate the instance's disks")
7721

    
7722
      self.feedback_fn("* starting the instance on the target node %s" %
7723
                       target_node)
7724
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7725
                                            False)
7726
      msg = result.fail_msg
7727
      if msg:
7728
        _ShutdownInstanceDisks(self.lu, instance)
7729
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7730
                                 (instance.name, target_node, msg))
7731

    
7732
  def Exec(self, feedback_fn):
7733
    """Perform the migration.
7734

7735
    """
7736
    self.feedback_fn = feedback_fn
7737
    self.source_node = self.instance.primary_node
7738

    
7739
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7740
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7741
      self.target_node = self.instance.secondary_nodes[0]
7742
      # Otherwise self.target_node has been populated either
7743
      # directly, or through an iallocator.
7744

    
7745
    self.all_nodes = [self.source_node, self.target_node]
7746
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7747
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7748

    
7749
    if self.failover:
7750
      feedback_fn("Failover instance %s" % self.instance.name)
7751
      self._ExecFailover()
7752
    else:
7753
      feedback_fn("Migrating instance %s" % self.instance.name)
7754

    
7755
      if self.cleanup:
7756
        return self._ExecCleanup()
7757
      else:
7758
        return self._ExecMigration()
7759

    
7760

    
7761
def _CreateBlockDev(lu, node, instance, device, force_create,
7762
                    info, force_open):
7763
  """Create a tree of block devices on a given node.
7764

7765
  If this device type has to be created on secondaries, create it and
7766
  all its children.
7767

7768
  If not, just recurse to children keeping the same 'force' value.
7769

7770
  @param lu: the lu on whose behalf we execute
7771
  @param node: the node on which to create the device
7772
  @type instance: L{objects.Instance}
7773
  @param instance: the instance which owns the device
7774
  @type device: L{objects.Disk}
7775
  @param device: the device to create
7776
  @type force_create: boolean
7777
  @param force_create: whether to force creation of this device; this
7778
      will be change to True whenever we find a device which has
7779
      CreateOnSecondary() attribute
7780
  @param info: the extra 'metadata' we should attach to the device
7781
      (this will be represented as a LVM tag)
7782
  @type force_open: boolean
7783
  @param force_open: this parameter will be passes to the
7784
      L{backend.BlockdevCreate} function where it specifies
7785
      whether we run on primary or not, and it affects both
7786
      the child assembly and the device own Open() execution
7787

7788
  """
7789
  if device.CreateOnSecondary():
7790
    force_create = True
7791

    
7792
  if device.children:
7793
    for child in device.children:
7794
      _CreateBlockDev(lu, node, instance, child, force_create,
7795
                      info, force_open)
7796

    
7797
  if not force_create:
7798
    return
7799

    
7800
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7801

    
7802

    
7803
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7804
  """Create a single block device on a given node.
7805

7806
  This will not recurse over children of the device, so they must be
7807
  created in advance.
7808

7809
  @param lu: the lu on whose behalf we execute
7810
  @param node: the node on which to create the device
7811
  @type instance: L{objects.Instance}
7812
  @param instance: the instance which owns the device
7813
  @type device: L{objects.Disk}
7814
  @param device: the device to create
7815
  @param info: the extra 'metadata' we should attach to the device
7816
      (this will be represented as a LVM tag)
7817
  @type force_open: boolean
7818
  @param force_open: this parameter will be passes to the
7819
      L{backend.BlockdevCreate} function where it specifies
7820
      whether we run on primary or not, and it affects both
7821
      the child assembly and the device own Open() execution
7822

7823
  """
7824
  lu.cfg.SetDiskID(device, node)
7825
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7826
                                       instance.name, force_open, info)
7827
  result.Raise("Can't create block device %s on"
7828
               " node %s for instance %s" % (device, node, instance.name))
7829
  if device.physical_id is None:
7830
    device.physical_id = result.payload
7831

    
7832

    
7833
def _GenerateUniqueNames(lu, exts):
7834
  """Generate a suitable LV name.
7835

7836
  This will generate a logical volume name for the given instance.
7837

7838
  """
7839
  results = []
7840
  for val in exts:
7841
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7842
    results.append("%s%s" % (new_id, val))
7843
  return results
7844

    
7845

    
7846
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7847
                         iv_name, p_minor, s_minor):
7848
  """Generate a drbd8 device complete with its children.
7849

7850
  """
7851
  assert len(vgnames) == len(names) == 2
7852
  port = lu.cfg.AllocatePort()
7853
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7854
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7855
                          logical_id=(vgnames[0], names[0]))
7856
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7857
                          logical_id=(vgnames[1], names[1]))
7858
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7859
                          logical_id=(primary, secondary, port,
7860
                                      p_minor, s_minor,
7861
                                      shared_secret),
7862
                          children=[dev_data, dev_meta],
7863
                          iv_name=iv_name)
7864
  return drbd_dev
7865

    
7866

    
7867
def _GenerateDiskTemplate(lu, template_name,
7868
                          instance_name, primary_node,
7869
                          secondary_nodes, disk_info,
7870
                          file_storage_dir, file_driver,
7871
                          base_index, feedback_fn):
7872
  """Generate the entire disk layout for a given template type.
7873

7874
  """
7875
  #TODO: compute space requirements
7876

    
7877
  vgname = lu.cfg.GetVGName()
7878
  disk_count = len(disk_info)
7879
  disks = []
7880
  if template_name == constants.DT_DISKLESS:
7881
    pass
7882
  elif template_name == constants.DT_PLAIN:
7883
    if len(secondary_nodes) != 0:
7884
      raise errors.ProgrammerError("Wrong template configuration")
7885

    
7886
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7887
                                      for i in range(disk_count)])
7888
    for idx, disk in enumerate(disk_info):
7889
      disk_index = idx + base_index
7890
      vg = disk.get(constants.IDISK_VG, vgname)
7891
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7892
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7893
                              size=disk[constants.IDISK_SIZE],
7894
                              logical_id=(vg, names[idx]),
7895
                              iv_name="disk/%d" % disk_index,
7896
                              mode=disk[constants.IDISK_MODE])
7897
      disks.append(disk_dev)
7898
  elif template_name == constants.DT_DRBD8:
7899
    if len(secondary_nodes) != 1:
7900
      raise errors.ProgrammerError("Wrong template configuration")
7901
    remote_node = secondary_nodes[0]
7902
    minors = lu.cfg.AllocateDRBDMinor(
7903
      [primary_node, remote_node] * len(disk_info), instance_name)
7904

    
7905
    names = []
7906
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7907
                                               for i in range(disk_count)]):
7908
      names.append(lv_prefix + "_data")
7909
      names.append(lv_prefix + "_meta")
7910
    for idx, disk in enumerate(disk_info):
7911
      disk_index = idx + base_index
7912
      data_vg = disk.get(constants.IDISK_VG, vgname)
7913
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7914
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7915
                                      disk[constants.IDISK_SIZE],
7916
                                      [data_vg, meta_vg],
7917
                                      names[idx * 2:idx * 2 + 2],
7918
                                      "disk/%d" % disk_index,
7919
                                      minors[idx * 2], minors[idx * 2 + 1])
7920
      disk_dev.mode = disk[constants.IDISK_MODE]
7921
      disks.append(disk_dev)
7922
  elif template_name == constants.DT_FILE:
7923
    if len(secondary_nodes) != 0:
7924
      raise errors.ProgrammerError("Wrong template configuration")
7925

    
7926
    opcodes.RequireFileStorage()
7927

    
7928
    for idx, disk in enumerate(disk_info):
7929
      disk_index = idx + base_index
7930
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7931
                              size=disk[constants.IDISK_SIZE],
7932
                              iv_name="disk/%d" % disk_index,
7933
                              logical_id=(file_driver,
7934
                                          "%s/disk%d" % (file_storage_dir,
7935
                                                         disk_index)),
7936
                              mode=disk[constants.IDISK_MODE])
7937
      disks.append(disk_dev)
7938
  elif template_name == constants.DT_SHARED_FILE:
7939
    if len(secondary_nodes) != 0:
7940
      raise errors.ProgrammerError("Wrong template configuration")
7941

    
7942
    opcodes.RequireSharedFileStorage()
7943

    
7944
    for idx, disk in enumerate(disk_info):
7945
      disk_index = idx + base_index
7946
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7947
                              size=disk[constants.IDISK_SIZE],
7948
                              iv_name="disk/%d" % disk_index,
7949
                              logical_id=(file_driver,
7950
                                          "%s/disk%d" % (file_storage_dir,
7951
                                                         disk_index)),
7952
                              mode=disk[constants.IDISK_MODE])
7953
      disks.append(disk_dev)
7954
  elif template_name == constants.DT_BLOCK:
7955
    if len(secondary_nodes) != 0:
7956
      raise errors.ProgrammerError("Wrong template configuration")
7957

    
7958
    for idx, disk in enumerate(disk_info):
7959
      disk_index = idx + base_index
7960
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7961
                              size=disk[constants.IDISK_SIZE],
7962
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7963
                                          disk[constants.IDISK_ADOPT]),
7964
                              iv_name="disk/%d" % disk_index,
7965
                              mode=disk[constants.IDISK_MODE])
7966
      disks.append(disk_dev)
7967

    
7968
  else:
7969
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7970
  return disks
7971

    
7972

    
7973
def _GetInstanceInfoText(instance):
7974
  """Compute that text that should be added to the disk's metadata.
7975

7976
  """
7977
  return "originstname+%s" % instance.name
7978

    
7979

    
7980
def _CalcEta(time_taken, written, total_size):
7981
  """Calculates the ETA based on size written and total size.
7982

7983
  @param time_taken: The time taken so far
7984
  @param written: amount written so far
7985
  @param total_size: The total size of data to be written
7986
  @return: The remaining time in seconds
7987

7988
  """
7989
  avg_time = time_taken / float(written)
7990
  return (total_size - written) * avg_time
7991

    
7992

    
7993
def _WipeDisks(lu, instance):
7994
  """Wipes instance disks.
7995

7996
  @type lu: L{LogicalUnit}
7997
  @param lu: the logical unit on whose behalf we execute
7998
  @type instance: L{objects.Instance}
7999
  @param instance: the instance whose disks we should create
8000
  @return: the success of the wipe
8001

8002
  """
8003
  node = instance.primary_node
8004

    
8005
  for device in instance.disks:
8006
    lu.cfg.SetDiskID(device, node)
8007

    
8008
  logging.info("Pause sync of instance %s disks", instance.name)
8009
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8010

    
8011
  for idx, success in enumerate(result.payload):
8012
    if not success:
8013
      logging.warn("pause-sync of instance %s for disks %d failed",
8014
                   instance.name, idx)
8015

    
8016
  try:
8017
    for idx, device in enumerate(instance.disks):
8018
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8019
      # MAX_WIPE_CHUNK at max
8020
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8021
                            constants.MIN_WIPE_CHUNK_PERCENT)
8022
      # we _must_ make this an int, otherwise rounding errors will
8023
      # occur
8024
      wipe_chunk_size = int(wipe_chunk_size)
8025

    
8026
      lu.LogInfo("* Wiping disk %d", idx)
8027
      logging.info("Wiping disk %d for instance %s, node %s using"
8028
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8029

    
8030
      offset = 0
8031
      size = device.size
8032
      last_output = 0
8033
      start_time = time.time()
8034

    
8035
      while offset < size:
8036
        wipe_size = min(wipe_chunk_size, size - offset)
8037
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8038
                      idx, offset, wipe_size)
8039
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8040
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8041
                     (idx, offset, wipe_size))
8042
        now = time.time()
8043
        offset += wipe_size
8044
        if now - last_output >= 60:
8045
          eta = _CalcEta(now - start_time, offset, size)
8046
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8047
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8048
          last_output = now
8049
  finally:
8050
    logging.info("Resume sync of instance %s disks", instance.name)
8051

    
8052
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8053

    
8054
    for idx, success in enumerate(result.payload):
8055
      if not success:
8056
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8057
                      " look at the status and troubleshoot the issue", idx)
8058
        logging.warn("resume-sync of instance %s for disks %d failed",
8059
                     instance.name, idx)
8060

    
8061

    
8062
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8063
  """Create all disks for an instance.
8064

8065
  This abstracts away some work from AddInstance.
8066

8067
  @type lu: L{LogicalUnit}
8068
  @param lu: the logical unit on whose behalf we execute
8069
  @type instance: L{objects.Instance}
8070
  @param instance: the instance whose disks we should create
8071
  @type to_skip: list
8072
  @param to_skip: list of indices to skip
8073
  @type target_node: string
8074
  @param target_node: if passed, overrides the target node for creation
8075
  @rtype: boolean
8076
  @return: the success of the creation
8077

8078
  """
8079
  info = _GetInstanceInfoText(instance)
8080
  if target_node is None:
8081
    pnode = instance.primary_node
8082
    all_nodes = instance.all_nodes
8083
  else:
8084
    pnode = target_node
8085
    all_nodes = [pnode]
8086

    
8087
  if instance.disk_template in constants.DTS_FILEBASED:
8088
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8089
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8090

    
8091
    result.Raise("Failed to create directory '%s' on"
8092
                 " node %s" % (file_storage_dir, pnode))
8093

    
8094
  # Note: this needs to be kept in sync with adding of disks in
8095
  # LUInstanceSetParams
8096
  for idx, device in enumerate(instance.disks):
8097
    if to_skip and idx in to_skip:
8098
      continue
8099
    logging.info("Creating volume %s for instance %s",
8100
                 device.iv_name, instance.name)
8101
    #HARDCODE
8102
    for node in all_nodes:
8103
      f_create = node == pnode
8104
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8105

    
8106

    
8107
def _RemoveDisks(lu, instance, target_node=None):
8108
  """Remove all disks for an instance.
8109

8110
  This abstracts away some work from `AddInstance()` and
8111
  `RemoveInstance()`. Note that in case some of the devices couldn't
8112
  be removed, the removal will continue with the other ones (compare
8113
  with `_CreateDisks()`).
8114

8115
  @type lu: L{LogicalUnit}
8116
  @param lu: the logical unit on whose behalf we execute
8117
  @type instance: L{objects.Instance}
8118
  @param instance: the instance whose disks we should remove
8119
  @type target_node: string
8120
  @param target_node: used to override the node on which to remove the disks
8121
  @rtype: boolean
8122
  @return: the success of the removal
8123

8124
  """
8125
  logging.info("Removing block devices for instance %s", instance.name)
8126

    
8127
  all_result = True
8128
  for device in instance.disks:
8129
    if target_node:
8130
      edata = [(target_node, device)]
8131
    else:
8132
      edata = device.ComputeNodeTree(instance.primary_node)
8133
    for node, disk in edata:
8134
      lu.cfg.SetDiskID(disk, node)
8135
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8136
      if msg:
8137
        lu.LogWarning("Could not remove block device %s on node %s,"
8138
                      " continuing anyway: %s", device.iv_name, node, msg)
8139
        all_result = False
8140

    
8141
  if instance.disk_template == constants.DT_FILE:
8142
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8143
    if target_node:
8144
      tgt = target_node
8145
    else:
8146
      tgt = instance.primary_node
8147
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8148
    if result.fail_msg:
8149
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8150
                    file_storage_dir, instance.primary_node, result.fail_msg)
8151
      all_result = False
8152

    
8153
  return all_result
8154

    
8155

    
8156
def _ComputeDiskSizePerVG(disk_template, disks):
8157
  """Compute disk size requirements in the volume group
8158

8159
  """
8160
  def _compute(disks, payload):
8161
    """Universal algorithm.
8162

8163
    """
8164
    vgs = {}
8165
    for disk in disks:
8166
      vgs[disk[constants.IDISK_VG]] = \
8167
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8168

    
8169
    return vgs
8170

    
8171
  # Required free disk space as a function of disk and swap space
8172
  req_size_dict = {
8173
    constants.DT_DISKLESS: {},
8174
    constants.DT_PLAIN: _compute(disks, 0),
8175
    # 128 MB are added for drbd metadata for each disk
8176
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8177
    constants.DT_FILE: {},
8178
    constants.DT_SHARED_FILE: {},
8179
  }
8180

    
8181
  if disk_template not in req_size_dict:
8182
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8183
                                 " is unknown" % disk_template)
8184

    
8185
  return req_size_dict[disk_template]
8186

    
8187

    
8188
def _ComputeDiskSize(disk_template, disks):
8189
  """Compute disk size requirements in the volume group
8190

8191
  """
8192
  # Required free disk space as a function of disk and swap space
8193
  req_size_dict = {
8194
    constants.DT_DISKLESS: None,
8195
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8196
    # 128 MB are added for drbd metadata for each disk
8197
    constants.DT_DRBD8:
8198
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8199
    constants.DT_FILE: None,
8200
    constants.DT_SHARED_FILE: 0,
8201
    constants.DT_BLOCK: 0,
8202
  }
8203

    
8204
  if disk_template not in req_size_dict:
8205
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8206
                                 " is unknown" % disk_template)
8207

    
8208
  return req_size_dict[disk_template]
8209

    
8210

    
8211
def _FilterVmNodes(lu, nodenames):
8212
  """Filters out non-vm_capable nodes from a list.
8213

8214
  @type lu: L{LogicalUnit}
8215
  @param lu: the logical unit for which we check
8216
  @type nodenames: list
8217
  @param nodenames: the list of nodes on which we should check
8218
  @rtype: list
8219
  @return: the list of vm-capable nodes
8220

8221
  """
8222
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8223
  return [name for name in nodenames if name not in vm_nodes]
8224

    
8225

    
8226
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8227
  """Hypervisor parameter validation.
8228

8229
  This function abstract the hypervisor parameter validation to be
8230
  used in both instance create and instance modify.
8231

8232
  @type lu: L{LogicalUnit}
8233
  @param lu: the logical unit for which we check
8234
  @type nodenames: list
8235
  @param nodenames: the list of nodes on which we should check
8236
  @type hvname: string
8237
  @param hvname: the name of the hypervisor we should use
8238
  @type hvparams: dict
8239
  @param hvparams: the parameters which we need to check
8240
  @raise errors.OpPrereqError: if the parameters are not valid
8241

8242
  """
8243
  nodenames = _FilterVmNodes(lu, nodenames)
8244

    
8245
  cluster = lu.cfg.GetClusterInfo()
8246
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8247

    
8248
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8249
  for node in nodenames:
8250
    info = hvinfo[node]
8251
    if info.offline:
8252
      continue
8253
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8254

    
8255

    
8256
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8257
  """OS parameters validation.
8258

8259
  @type lu: L{LogicalUnit}
8260
  @param lu: the logical unit for which we check
8261
  @type required: boolean
8262
  @param required: whether the validation should fail if the OS is not
8263
      found
8264
  @type nodenames: list
8265
  @param nodenames: the list of nodes on which we should check
8266
  @type osname: string
8267
  @param osname: the name of the hypervisor we should use
8268
  @type osparams: dict
8269
  @param osparams: the parameters which we need to check
8270
  @raise errors.OpPrereqError: if the parameters are not valid
8271

8272
  """
8273
  nodenames = _FilterVmNodes(lu, nodenames)
8274
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8275
                                   [constants.OS_VALIDATE_PARAMETERS],
8276
                                   osparams)
8277
  for node, nres in result.items():
8278
    # we don't check for offline cases since this should be run only
8279
    # against the master node and/or an instance's nodes
8280
    nres.Raise("OS Parameters validation failed on node %s" % node)
8281
    if not nres.payload:
8282
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8283
                 osname, node)
8284

    
8285

    
8286
class LUInstanceCreate(LogicalUnit):
8287
  """Create an instance.
8288

8289
  """
8290
  HPATH = "instance-add"
8291
  HTYPE = constants.HTYPE_INSTANCE
8292
  REQ_BGL = False
8293

    
8294
  def CheckArguments(self):
8295
    """Check arguments.
8296

8297
    """
8298
    # do not require name_check to ease forward/backward compatibility
8299
    # for tools
8300
    if self.op.no_install and self.op.start:
8301
      self.LogInfo("No-installation mode selected, disabling startup")
8302
      self.op.start = False
8303
    # validate/normalize the instance name
8304
    self.op.instance_name = \
8305
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8306

    
8307
    if self.op.ip_check and not self.op.name_check:
8308
      # TODO: make the ip check more flexible and not depend on the name check
8309
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8310
                                 " check", errors.ECODE_INVAL)
8311

    
8312
    # check nics' parameter names
8313
    for nic in self.op.nics:
8314
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8315

    
8316
    # check disks. parameter names and consistent adopt/no-adopt strategy
8317
    has_adopt = has_no_adopt = False
8318
    for disk in self.op.disks:
8319
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8320
      if constants.IDISK_ADOPT in disk:
8321
        has_adopt = True
8322
      else:
8323
        has_no_adopt = True
8324
    if has_adopt and has_no_adopt:
8325
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8326
                                 errors.ECODE_INVAL)
8327
    if has_adopt:
8328
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8329
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8330
                                   " '%s' disk template" %
8331
                                   self.op.disk_template,
8332
                                   errors.ECODE_INVAL)
8333
      if self.op.iallocator is not None:
8334
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8335
                                   " iallocator script", errors.ECODE_INVAL)
8336
      if self.op.mode == constants.INSTANCE_IMPORT:
8337
        raise errors.OpPrereqError("Disk adoption not allowed for"
8338
                                   " instance import", errors.ECODE_INVAL)
8339
    else:
8340
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8341
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8342
                                   " but no 'adopt' parameter given" %
8343
                                   self.op.disk_template,
8344
                                   errors.ECODE_INVAL)
8345

    
8346
    self.adopt_disks = has_adopt
8347

    
8348
    # instance name verification
8349
    if self.op.name_check:
8350
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8351
      self.op.instance_name = self.hostname1.name
8352
      # used in CheckPrereq for ip ping check
8353
      self.check_ip = self.hostname1.ip
8354
    else:
8355
      self.check_ip = None
8356

    
8357
    # file storage checks
8358
    if (self.op.file_driver and
8359
        not self.op.file_driver in constants.FILE_DRIVER):
8360
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8361
                                 self.op.file_driver, errors.ECODE_INVAL)
8362

    
8363
    if self.op.disk_template == constants.DT_FILE:
8364
      opcodes.RequireFileStorage()
8365
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8366
      opcodes.RequireSharedFileStorage()
8367

    
8368
    ### Node/iallocator related checks
8369
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8370

    
8371
    if self.op.pnode is not None:
8372
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8373
        if self.op.snode is None:
8374
          raise errors.OpPrereqError("The networked disk templates need"
8375
                                     " a mirror node", errors.ECODE_INVAL)
8376
      elif self.op.snode:
8377
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8378
                        " template")
8379
        self.op.snode = None
8380

    
8381
    self._cds = _GetClusterDomainSecret()
8382

    
8383
    if self.op.mode == constants.INSTANCE_IMPORT:
8384
      # On import force_variant must be True, because if we forced it at
8385
      # initial install, our only chance when importing it back is that it
8386
      # works again!
8387
      self.op.force_variant = True
8388

    
8389
      if self.op.no_install:
8390
        self.LogInfo("No-installation mode has no effect during import")
8391

    
8392
    elif self.op.mode == constants.INSTANCE_CREATE:
8393
      if self.op.os_type is None:
8394
        raise errors.OpPrereqError("No guest OS specified",
8395
                                   errors.ECODE_INVAL)
8396
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8397
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8398
                                   " installation" % self.op.os_type,
8399
                                   errors.ECODE_STATE)
8400
      if self.op.disk_template is None:
8401
        raise errors.OpPrereqError("No disk template specified",
8402
                                   errors.ECODE_INVAL)
8403

    
8404
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8405
      # Check handshake to ensure both clusters have the same domain secret
8406
      src_handshake = self.op.source_handshake
8407
      if not src_handshake:
8408
        raise errors.OpPrereqError("Missing source handshake",
8409
                                   errors.ECODE_INVAL)
8410

    
8411
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8412
                                                           src_handshake)
8413
      if errmsg:
8414
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8415
                                   errors.ECODE_INVAL)
8416

    
8417
      # Load and check source CA
8418
      self.source_x509_ca_pem = self.op.source_x509_ca
8419
      if not self.source_x509_ca_pem:
8420
        raise errors.OpPrereqError("Missing source X509 CA",
8421
                                   errors.ECODE_INVAL)
8422

    
8423
      try:
8424
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8425
                                                    self._cds)
8426
      except OpenSSL.crypto.Error, err:
8427
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8428
                                   (err, ), errors.ECODE_INVAL)
8429

    
8430
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8431
      if errcode is not None:
8432
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8433
                                   errors.ECODE_INVAL)
8434

    
8435
      self.source_x509_ca = cert
8436

    
8437
      src_instance_name = self.op.source_instance_name
8438
      if not src_instance_name:
8439
        raise errors.OpPrereqError("Missing source instance name",
8440
                                   errors.ECODE_INVAL)
8441

    
8442
      self.source_instance_name = \
8443
          netutils.GetHostname(name=src_instance_name).name
8444

    
8445
    else:
8446
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8447
                                 self.op.mode, errors.ECODE_INVAL)
8448

    
8449
  def ExpandNames(self):
8450
    """ExpandNames for CreateInstance.
8451

8452
    Figure out the right locks for instance creation.
8453

8454
    """
8455
    self.needed_locks = {}
8456

    
8457
    instance_name = self.op.instance_name
8458
    # this is just a preventive check, but someone might still add this
8459
    # instance in the meantime, and creation will fail at lock-add time
8460
    if instance_name in self.cfg.GetInstanceList():
8461
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8462
                                 instance_name, errors.ECODE_EXISTS)
8463

    
8464
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8465

    
8466
    if self.op.iallocator:
8467
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8468
    else:
8469
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8470
      nodelist = [self.op.pnode]
8471
      if self.op.snode is not None:
8472
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8473
        nodelist.append(self.op.snode)
8474
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8475

    
8476
    # in case of import lock the source node too
8477
    if self.op.mode == constants.INSTANCE_IMPORT:
8478
      src_node = self.op.src_node
8479
      src_path = self.op.src_path
8480

    
8481
      if src_path is None:
8482
        self.op.src_path = src_path = self.op.instance_name
8483

    
8484
      if src_node is None:
8485
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8486
        self.op.src_node = None
8487
        if os.path.isabs(src_path):
8488
          raise errors.OpPrereqError("Importing an instance from a path"
8489
                                     " requires a source node option",
8490
                                     errors.ECODE_INVAL)
8491
      else:
8492
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8493
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8494
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8495
        if not os.path.isabs(src_path):
8496
          self.op.src_path = src_path = \
8497
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8498

    
8499
  def _RunAllocator(self):
8500
    """Run the allocator based on input opcode.
8501

8502
    """
8503
    nics = [n.ToDict() for n in self.nics]
8504
    ial = IAllocator(self.cfg, self.rpc,
8505
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8506
                     name=self.op.instance_name,
8507
                     disk_template=self.op.disk_template,
8508
                     tags=self.op.tags,
8509
                     os=self.op.os_type,
8510
                     vcpus=self.be_full[constants.BE_VCPUS],
8511
                     memory=self.be_full[constants.BE_MEMORY],
8512
                     disks=self.disks,
8513
                     nics=nics,
8514
                     hypervisor=self.op.hypervisor,
8515
                     )
8516

    
8517
    ial.Run(self.op.iallocator)
8518

    
8519
    if not ial.success:
8520
      raise errors.OpPrereqError("Can't compute nodes using"
8521
                                 " iallocator '%s': %s" %
8522
                                 (self.op.iallocator, ial.info),
8523
                                 errors.ECODE_NORES)
8524
    if len(ial.result) != ial.required_nodes:
8525
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8526
                                 " of nodes (%s), required %s" %
8527
                                 (self.op.iallocator, len(ial.result),
8528
                                  ial.required_nodes), errors.ECODE_FAULT)
8529
    self.op.pnode = ial.result[0]
8530
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8531
                 self.op.instance_name, self.op.iallocator,
8532
                 utils.CommaJoin(ial.result))
8533
    if ial.required_nodes == 2:
8534
      self.op.snode = ial.result[1]
8535

    
8536
  def BuildHooksEnv(self):
8537
    """Build hooks env.
8538

8539
    This runs on master, primary and secondary nodes of the instance.
8540

8541
    """
8542
    env = {
8543
      "ADD_MODE": self.op.mode,
8544
      }
8545
    if self.op.mode == constants.INSTANCE_IMPORT:
8546
      env["SRC_NODE"] = self.op.src_node
8547
      env["SRC_PATH"] = self.op.src_path
8548
      env["SRC_IMAGES"] = self.src_images
8549

    
8550
    env.update(_BuildInstanceHookEnv(
8551
      name=self.op.instance_name,
8552
      primary_node=self.op.pnode,
8553
      secondary_nodes=self.secondaries,
8554
      status=self.op.start,
8555
      os_type=self.op.os_type,
8556
      memory=self.be_full[constants.BE_MEMORY],
8557
      vcpus=self.be_full[constants.BE_VCPUS],
8558
      nics=_NICListToTuple(self, self.nics),
8559
      disk_template=self.op.disk_template,
8560
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8561
             for d in self.disks],
8562
      bep=self.be_full,
8563
      hvp=self.hv_full,
8564
      hypervisor_name=self.op.hypervisor,
8565
      tags=self.op.tags,
8566
    ))
8567

    
8568
    return env
8569

    
8570
  def BuildHooksNodes(self):
8571
    """Build hooks nodes.
8572

8573
    """
8574
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8575
    return nl, nl
8576

    
8577
  def _ReadExportInfo(self):
8578
    """Reads the export information from disk.
8579

8580
    It will override the opcode source node and path with the actual
8581
    information, if these two were not specified before.
8582

8583
    @return: the export information
8584

8585
    """
8586
    assert self.op.mode == constants.INSTANCE_IMPORT
8587

    
8588
    src_node = self.op.src_node
8589
    src_path = self.op.src_path
8590

    
8591
    if src_node is None:
8592
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8593
      exp_list = self.rpc.call_export_list(locked_nodes)
8594
      found = False
8595
      for node in exp_list:
8596
        if exp_list[node].fail_msg:
8597
          continue
8598
        if src_path in exp_list[node].payload:
8599
          found = True
8600
          self.op.src_node = src_node = node
8601
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8602
                                                       src_path)
8603
          break
8604
      if not found:
8605
        raise errors.OpPrereqError("No export found for relative path %s" %
8606
                                    src_path, errors.ECODE_INVAL)
8607

    
8608
    _CheckNodeOnline(self, src_node)
8609
    result = self.rpc.call_export_info(src_node, src_path)
8610
    result.Raise("No export or invalid export found in dir %s" % src_path)
8611

    
8612
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8613
    if not export_info.has_section(constants.INISECT_EXP):
8614
      raise errors.ProgrammerError("Corrupted export config",
8615
                                   errors.ECODE_ENVIRON)
8616

    
8617
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8618
    if (int(ei_version) != constants.EXPORT_VERSION):
8619
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8620
                                 (ei_version, constants.EXPORT_VERSION),
8621
                                 errors.ECODE_ENVIRON)
8622
    return export_info
8623

    
8624
  def _ReadExportParams(self, einfo):
8625
    """Use export parameters as defaults.
8626

8627
    In case the opcode doesn't specify (as in override) some instance
8628
    parameters, then try to use them from the export information, if
8629
    that declares them.
8630

8631
    """
8632
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8633

    
8634
    if self.op.disk_template is None:
8635
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8636
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8637
                                          "disk_template")
8638
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8639
          raise errors.OpPrereqError("Disk template specified in configuration"
8640
                                     " file is not one of the allowed values:"
8641
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8642
      else:
8643
        raise errors.OpPrereqError("No disk template specified and the export"
8644
                                   " is missing the disk_template information",
8645
                                   errors.ECODE_INVAL)
8646

    
8647
    if not self.op.disks:
8648
      disks = []
8649
      # TODO: import the disk iv_name too
8650
      for idx in range(constants.MAX_DISKS):
8651
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8652
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8653
          disks.append({constants.IDISK_SIZE: disk_sz})
8654
      self.op.disks = disks
8655
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8656
        raise errors.OpPrereqError("No disk info specified and the export"
8657
                                   " is missing the disk information",
8658
                                   errors.ECODE_INVAL)
8659

    
8660
    if not self.op.nics:
8661
      nics = []
8662
      for idx in range(constants.MAX_NICS):
8663
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8664
          ndict = {}
8665
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8666
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8667
            ndict[name] = v
8668
          nics.append(ndict)
8669
        else:
8670
          break
8671
      self.op.nics = nics
8672

    
8673
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8674
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8675

    
8676
    if (self.op.hypervisor is None and
8677
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8678
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8679

    
8680
    if einfo.has_section(constants.INISECT_HYP):
8681
      # use the export parameters but do not override the ones
8682
      # specified by the user
8683
      for name, value in einfo.items(constants.INISECT_HYP):
8684
        if name not in self.op.hvparams:
8685
          self.op.hvparams[name] = value
8686

    
8687
    if einfo.has_section(constants.INISECT_BEP):
8688
      # use the parameters, without overriding
8689
      for name, value in einfo.items(constants.INISECT_BEP):
8690
        if name not in self.op.beparams:
8691
          self.op.beparams[name] = value
8692
    else:
8693
      # try to read the parameters old style, from the main section
8694
      for name in constants.BES_PARAMETERS:
8695
        if (name not in self.op.beparams and
8696
            einfo.has_option(constants.INISECT_INS, name)):
8697
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8698

    
8699
    if einfo.has_section(constants.INISECT_OSP):
8700
      # use the parameters, without overriding
8701
      for name, value in einfo.items(constants.INISECT_OSP):
8702
        if name not in self.op.osparams:
8703
          self.op.osparams[name] = value
8704

    
8705
  def _RevertToDefaults(self, cluster):
8706
    """Revert the instance parameters to the default values.
8707

8708
    """
8709
    # hvparams
8710
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8711
    for name in self.op.hvparams.keys():
8712
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8713
        del self.op.hvparams[name]
8714
    # beparams
8715
    be_defs = cluster.SimpleFillBE({})
8716
    for name in self.op.beparams.keys():
8717
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8718
        del self.op.beparams[name]
8719
    # nic params
8720
    nic_defs = cluster.SimpleFillNIC({})
8721
    for nic in self.op.nics:
8722
      for name in constants.NICS_PARAMETERS:
8723
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8724
          del nic[name]
8725
    # osparams
8726
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8727
    for name in self.op.osparams.keys():
8728
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8729
        del self.op.osparams[name]
8730

    
8731
  def _CalculateFileStorageDir(self):
8732
    """Calculate final instance file storage dir.
8733

8734
    """
8735
    # file storage dir calculation/check
8736
    self.instance_file_storage_dir = None
8737
    if self.op.disk_template in constants.DTS_FILEBASED:
8738
      # build the full file storage dir path
8739
      joinargs = []
8740

    
8741
      if self.op.disk_template == constants.DT_SHARED_FILE:
8742
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8743
      else:
8744
        get_fsd_fn = self.cfg.GetFileStorageDir
8745

    
8746
      cfg_storagedir = get_fsd_fn()
8747
      if not cfg_storagedir:
8748
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8749
      joinargs.append(cfg_storagedir)
8750

    
8751
      if self.op.file_storage_dir is not None:
8752
        joinargs.append(self.op.file_storage_dir)
8753

    
8754
      joinargs.append(self.op.instance_name)
8755

    
8756
      # pylint: disable=W0142
8757
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8758

    
8759
  def CheckPrereq(self):
8760
    """Check prerequisites.
8761

8762
    """
8763
    self._CalculateFileStorageDir()
8764

    
8765
    if self.op.mode == constants.INSTANCE_IMPORT:
8766
      export_info = self._ReadExportInfo()
8767
      self._ReadExportParams(export_info)
8768

    
8769
    if (not self.cfg.GetVGName() and
8770
        self.op.disk_template not in constants.DTS_NOT_LVM):
8771
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8772
                                 " instances", errors.ECODE_STATE)
8773

    
8774
    if (self.op.hypervisor is None or
8775
        self.op.hypervisor == constants.VALUE_AUTO):
8776
      self.op.hypervisor = self.cfg.GetHypervisorType()
8777

    
8778
    cluster = self.cfg.GetClusterInfo()
8779
    enabled_hvs = cluster.enabled_hypervisors
8780
    if self.op.hypervisor not in enabled_hvs:
8781
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8782
                                 " cluster (%s)" % (self.op.hypervisor,
8783
                                  ",".join(enabled_hvs)),
8784
                                 errors.ECODE_STATE)
8785

    
8786
    # Check tag validity
8787
    for tag in self.op.tags:
8788
      objects.TaggableObject.ValidateTag(tag)
8789

    
8790
    # check hypervisor parameter syntax (locally)
8791
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8792
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8793
                                      self.op.hvparams)
8794
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8795
    hv_type.CheckParameterSyntax(filled_hvp)
8796
    self.hv_full = filled_hvp
8797
    # check that we don't specify global parameters on an instance
8798
    _CheckGlobalHvParams(self.op.hvparams)
8799

    
8800
    # fill and remember the beparams dict
8801
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8802
    for param, value in self.op.beparams.iteritems():
8803
      if value == constants.VALUE_AUTO:
8804
        self.op.beparams[param] = default_beparams[param]
8805
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8806
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8807

    
8808
    # build os parameters
8809
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8810

    
8811
    # now that hvp/bep are in final format, let's reset to defaults,
8812
    # if told to do so
8813
    if self.op.identify_defaults:
8814
      self._RevertToDefaults(cluster)
8815

    
8816
    # NIC buildup
8817
    self.nics = []
8818
    for idx, nic in enumerate(self.op.nics):
8819
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8820
      nic_mode = nic_mode_req
8821
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8822
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8823

    
8824
      # in routed mode, for the first nic, the default ip is 'auto'
8825
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8826
        default_ip_mode = constants.VALUE_AUTO
8827
      else:
8828
        default_ip_mode = constants.VALUE_NONE
8829

    
8830
      # ip validity checks
8831
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8832
      if ip is None or ip.lower() == constants.VALUE_NONE:
8833
        nic_ip = None
8834
      elif ip.lower() == constants.VALUE_AUTO:
8835
        if not self.op.name_check:
8836
          raise errors.OpPrereqError("IP address set to auto but name checks"
8837
                                     " have been skipped",
8838
                                     errors.ECODE_INVAL)
8839
        nic_ip = self.hostname1.ip
8840
      else:
8841
        if not netutils.IPAddress.IsValid(ip):
8842
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8843
                                     errors.ECODE_INVAL)
8844
        nic_ip = ip
8845

    
8846
      # TODO: check the ip address for uniqueness
8847
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8848
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8849
                                   errors.ECODE_INVAL)
8850

    
8851
      # MAC address verification
8852
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8853
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8854
        mac = utils.NormalizeAndValidateMac(mac)
8855

    
8856
        try:
8857
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8858
        except errors.ReservationError:
8859
          raise errors.OpPrereqError("MAC address %s already in use"
8860
                                     " in cluster" % mac,
8861
                                     errors.ECODE_NOTUNIQUE)
8862

    
8863
      #  Build nic parameters
8864
      link = nic.get(constants.INIC_LINK, None)
8865
      if link == constants.VALUE_AUTO:
8866
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8867
      nicparams = {}
8868
      if nic_mode_req:
8869
        nicparams[constants.NIC_MODE] = nic_mode
8870
      if link:
8871
        nicparams[constants.NIC_LINK] = link
8872

    
8873
      check_params = cluster.SimpleFillNIC(nicparams)
8874
      objects.NIC.CheckParameterSyntax(check_params)
8875
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8876

    
8877
    # disk checks/pre-build
8878
    default_vg = self.cfg.GetVGName()
8879
    self.disks = []
8880
    for disk in self.op.disks:
8881
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8882
      if mode not in constants.DISK_ACCESS_SET:
8883
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8884
                                   mode, errors.ECODE_INVAL)
8885
      size = disk.get(constants.IDISK_SIZE, None)
8886
      if size is None:
8887
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8888
      try:
8889
        size = int(size)
8890
      except (TypeError, ValueError):
8891
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8892
                                   errors.ECODE_INVAL)
8893

    
8894
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8895
      new_disk = {
8896
        constants.IDISK_SIZE: size,
8897
        constants.IDISK_MODE: mode,
8898
        constants.IDISK_VG: data_vg,
8899
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8900
        }
8901
      if constants.IDISK_ADOPT in disk:
8902
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8903
      self.disks.append(new_disk)
8904

    
8905
    if self.op.mode == constants.INSTANCE_IMPORT:
8906
      disk_images = []
8907
      for idx in range(len(self.disks)):
8908
        option = "disk%d_dump" % idx
8909
        if export_info.has_option(constants.INISECT_INS, option):
8910
          # FIXME: are the old os-es, disk sizes, etc. useful?
8911
          export_name = export_info.get(constants.INISECT_INS, option)
8912
          image = utils.PathJoin(self.op.src_path, export_name)
8913
          disk_images.append(image)
8914
        else:
8915
          disk_images.append(False)
8916

    
8917
      self.src_images = disk_images
8918

    
8919
      old_name = export_info.get(constants.INISECT_INS, "name")
8920
      if self.op.instance_name == old_name:
8921
        for idx, nic in enumerate(self.nics):
8922
          if nic.mac == constants.VALUE_AUTO:
8923
            nic_mac_ini = "nic%d_mac" % idx
8924
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8925

    
8926
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8927

    
8928
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8929
    if self.op.ip_check:
8930
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8931
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8932
                                   (self.check_ip, self.op.instance_name),
8933
                                   errors.ECODE_NOTUNIQUE)
8934

    
8935
    #### mac address generation
8936
    # By generating here the mac address both the allocator and the hooks get
8937
    # the real final mac address rather than the 'auto' or 'generate' value.
8938
    # There is a race condition between the generation and the instance object
8939
    # creation, which means that we know the mac is valid now, but we're not
8940
    # sure it will be when we actually add the instance. If things go bad
8941
    # adding the instance will abort because of a duplicate mac, and the
8942
    # creation job will fail.
8943
    for nic in self.nics:
8944
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8945
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8946

    
8947
    #### allocator run
8948

    
8949
    if self.op.iallocator is not None:
8950
      self._RunAllocator()
8951

    
8952
    #### node related checks
8953

    
8954
    # check primary node
8955
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8956
    assert self.pnode is not None, \
8957
      "Cannot retrieve locked node %s" % self.op.pnode
8958
    if pnode.offline:
8959
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8960
                                 pnode.name, errors.ECODE_STATE)
8961
    if pnode.drained:
8962
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8963
                                 pnode.name, errors.ECODE_STATE)
8964
    if not pnode.vm_capable:
8965
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8966
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8967

    
8968
    self.secondaries = []
8969

    
8970
    # mirror node verification
8971
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8972
      if self.op.snode == pnode.name:
8973
        raise errors.OpPrereqError("The secondary node cannot be the"
8974
                                   " primary node", errors.ECODE_INVAL)
8975
      _CheckNodeOnline(self, self.op.snode)
8976
      _CheckNodeNotDrained(self, self.op.snode)
8977
      _CheckNodeVmCapable(self, self.op.snode)
8978
      self.secondaries.append(self.op.snode)
8979

    
8980
    nodenames = [pnode.name] + self.secondaries
8981

    
8982
    if not self.adopt_disks:
8983
      # Check lv size requirements, if not adopting
8984
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8985
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8986

    
8987
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8988
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8989
                                disk[constants.IDISK_ADOPT])
8990
                     for disk in self.disks])
8991
      if len(all_lvs) != len(self.disks):
8992
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8993
                                   errors.ECODE_INVAL)
8994
      for lv_name in all_lvs:
8995
        try:
8996
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8997
          # to ReserveLV uses the same syntax
8998
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8999
        except errors.ReservationError:
9000
          raise errors.OpPrereqError("LV named %s used by another instance" %
9001
                                     lv_name, errors.ECODE_NOTUNIQUE)
9002

    
9003
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9004
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9005

    
9006
      node_lvs = self.rpc.call_lv_list([pnode.name],
9007
                                       vg_names.payload.keys())[pnode.name]
9008
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9009
      node_lvs = node_lvs.payload
9010

    
9011
      delta = all_lvs.difference(node_lvs.keys())
9012
      if delta:
9013
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9014
                                   utils.CommaJoin(delta),
9015
                                   errors.ECODE_INVAL)
9016
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9017
      if online_lvs:
9018
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9019
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9020
                                   errors.ECODE_STATE)
9021
      # update the size of disk based on what is found
9022
      for dsk in self.disks:
9023
        dsk[constants.IDISK_SIZE] = \
9024
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9025
                                        dsk[constants.IDISK_ADOPT])][0]))
9026

    
9027
    elif self.op.disk_template == constants.DT_BLOCK:
9028
      # Normalize and de-duplicate device paths
9029
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9030
                       for disk in self.disks])
9031
      if len(all_disks) != len(self.disks):
9032
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9033
                                   errors.ECODE_INVAL)
9034
      baddisks = [d for d in all_disks
9035
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9036
      if baddisks:
9037
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9038
                                   " cannot be adopted" %
9039
                                   (", ".join(baddisks),
9040
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9041
                                   errors.ECODE_INVAL)
9042

    
9043
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9044
                                            list(all_disks))[pnode.name]
9045
      node_disks.Raise("Cannot get block device information from node %s" %
9046
                       pnode.name)
9047
      node_disks = node_disks.payload
9048
      delta = all_disks.difference(node_disks.keys())
9049
      if delta:
9050
        raise errors.OpPrereqError("Missing block device(s): %s" %
9051
                                   utils.CommaJoin(delta),
9052
                                   errors.ECODE_INVAL)
9053
      for dsk in self.disks:
9054
        dsk[constants.IDISK_SIZE] = \
9055
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9056

    
9057
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9058

    
9059
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9060
    # check OS parameters (remotely)
9061
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9062

    
9063
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9064

    
9065
    # memory check on primary node
9066
    if self.op.start:
9067
      _CheckNodeFreeMemory(self, self.pnode.name,
9068
                           "creating instance %s" % self.op.instance_name,
9069
                           self.be_full[constants.BE_MEMORY],
9070
                           self.op.hypervisor)
9071

    
9072
    self.dry_run_result = list(nodenames)
9073

    
9074
  def Exec(self, feedback_fn):
9075
    """Create and add the instance to the cluster.
9076

9077
    """
9078
    instance = self.op.instance_name
9079
    pnode_name = self.pnode.name
9080

    
9081
    ht_kind = self.op.hypervisor
9082
    if ht_kind in constants.HTS_REQ_PORT:
9083
      network_port = self.cfg.AllocatePort()
9084
    else:
9085
      network_port = None
9086

    
9087
    disks = _GenerateDiskTemplate(self,
9088
                                  self.op.disk_template,
9089
                                  instance, pnode_name,
9090
                                  self.secondaries,
9091
                                  self.disks,
9092
                                  self.instance_file_storage_dir,
9093
                                  self.op.file_driver,
9094
                                  0,
9095
                                  feedback_fn)
9096

    
9097
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9098
                            primary_node=pnode_name,
9099
                            nics=self.nics, disks=disks,
9100
                            disk_template=self.op.disk_template,
9101
                            admin_up=False,
9102
                            network_port=network_port,
9103
                            beparams=self.op.beparams,
9104
                            hvparams=self.op.hvparams,
9105
                            hypervisor=self.op.hypervisor,
9106
                            osparams=self.op.osparams,
9107
                            )
9108

    
9109
    if self.op.tags:
9110
      for tag in self.op.tags:
9111
        iobj.AddTag(tag)
9112

    
9113
    if self.adopt_disks:
9114
      if self.op.disk_template == constants.DT_PLAIN:
9115
        # rename LVs to the newly-generated names; we need to construct
9116
        # 'fake' LV disks with the old data, plus the new unique_id
9117
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9118
        rename_to = []
9119
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9120
          rename_to.append(t_dsk.logical_id)
9121
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9122
          self.cfg.SetDiskID(t_dsk, pnode_name)
9123
        result = self.rpc.call_blockdev_rename(pnode_name,
9124
                                               zip(tmp_disks, rename_to))
9125
        result.Raise("Failed to rename adoped LVs")
9126
    else:
9127
      feedback_fn("* creating instance disks...")
9128
      try:
9129
        _CreateDisks(self, iobj)
9130
      except errors.OpExecError:
9131
        self.LogWarning("Device creation failed, reverting...")
9132
        try:
9133
          _RemoveDisks(self, iobj)
9134
        finally:
9135
          self.cfg.ReleaseDRBDMinors(instance)
9136
          raise
9137

    
9138
    feedback_fn("adding instance %s to cluster config" % instance)
9139

    
9140
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9141

    
9142
    # Declare that we don't want to remove the instance lock anymore, as we've
9143
    # added the instance to the config
9144
    del self.remove_locks[locking.LEVEL_INSTANCE]
9145

    
9146
    if self.op.mode == constants.INSTANCE_IMPORT:
9147
      # Release unused nodes
9148
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9149
    else:
9150
      # Release all nodes
9151
      _ReleaseLocks(self, locking.LEVEL_NODE)
9152

    
9153
    disk_abort = False
9154
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9155
      feedback_fn("* wiping instance disks...")
9156
      try:
9157
        _WipeDisks(self, iobj)
9158
      except errors.OpExecError, err:
9159
        logging.exception("Wiping disks failed")
9160
        self.LogWarning("Wiping instance disks failed (%s)", err)
9161
        disk_abort = True
9162

    
9163
    if disk_abort:
9164
      # Something is already wrong with the disks, don't do anything else
9165
      pass
9166
    elif self.op.wait_for_sync:
9167
      disk_abort = not _WaitForSync(self, iobj)
9168
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9169
      # make sure the disks are not degraded (still sync-ing is ok)
9170
      feedback_fn("* checking mirrors status")
9171
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9172
    else:
9173
      disk_abort = False
9174

    
9175
    if disk_abort:
9176
      _RemoveDisks(self, iobj)
9177
      self.cfg.RemoveInstance(iobj.name)
9178
      # Make sure the instance lock gets removed
9179
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9180
      raise errors.OpExecError("There are some degraded disks for"
9181
                               " this instance")
9182

    
9183
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9184
      if self.op.mode == constants.INSTANCE_CREATE:
9185
        if not self.op.no_install:
9186
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9187
                        not self.op.wait_for_sync)
9188
          if pause_sync:
9189
            feedback_fn("* pausing disk sync to install instance OS")
9190
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9191
                                                              iobj.disks, True)
9192
            for idx, success in enumerate(result.payload):
9193
              if not success:
9194
                logging.warn("pause-sync of instance %s for disk %d failed",
9195
                             instance, idx)
9196

    
9197
          feedback_fn("* running the instance OS create scripts...")
9198
          # FIXME: pass debug option from opcode to backend
9199
          os_add_result = \
9200
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9201
                                          self.op.debug_level)
9202
          if pause_sync:
9203
            feedback_fn("* resuming disk sync")
9204
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9205
                                                              iobj.disks, False)
9206
            for idx, success in enumerate(result.payload):
9207
              if not success:
9208
                logging.warn("resume-sync of instance %s for disk %d failed",
9209
                             instance, idx)
9210

    
9211
          os_add_result.Raise("Could not add os for instance %s"
9212
                              " on node %s" % (instance, pnode_name))
9213

    
9214
      elif self.op.mode == constants.INSTANCE_IMPORT:
9215
        feedback_fn("* running the instance OS import scripts...")
9216

    
9217
        transfers = []
9218

    
9219
        for idx, image in enumerate(self.src_images):
9220
          if not image:
9221
            continue
9222

    
9223
          # FIXME: pass debug option from opcode to backend
9224
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9225
                                             constants.IEIO_FILE, (image, ),
9226
                                             constants.IEIO_SCRIPT,
9227
                                             (iobj.disks[idx], idx),
9228
                                             None)
9229
          transfers.append(dt)
9230

    
9231
        import_result = \
9232
          masterd.instance.TransferInstanceData(self, feedback_fn,
9233
                                                self.op.src_node, pnode_name,
9234
                                                self.pnode.secondary_ip,
9235
                                                iobj, transfers)
9236
        if not compat.all(import_result):
9237
          self.LogWarning("Some disks for instance %s on node %s were not"
9238
                          " imported successfully" % (instance, pnode_name))
9239

    
9240
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9241
        feedback_fn("* preparing remote import...")
9242
        # The source cluster will stop the instance before attempting to make a
9243
        # connection. In some cases stopping an instance can take a long time,
9244
        # hence the shutdown timeout is added to the connection timeout.
9245
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9246
                           self.op.source_shutdown_timeout)
9247
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9248

    
9249
        assert iobj.primary_node == self.pnode.name
9250
        disk_results = \
9251
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9252
                                        self.source_x509_ca,
9253
                                        self._cds, timeouts)
9254
        if not compat.all(disk_results):
9255
          # TODO: Should the instance still be started, even if some disks
9256
          # failed to import (valid for local imports, too)?
9257
          self.LogWarning("Some disks for instance %s on node %s were not"
9258
                          " imported successfully" % (instance, pnode_name))
9259

    
9260
        # Run rename script on newly imported instance
9261
        assert iobj.name == instance
9262
        feedback_fn("Running rename script for %s" % instance)
9263
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9264
                                                   self.source_instance_name,
9265
                                                   self.op.debug_level)
9266
        if result.fail_msg:
9267
          self.LogWarning("Failed to run rename script for %s on node"
9268
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9269

    
9270
      else:
9271
        # also checked in the prereq part
9272
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9273
                                     % self.op.mode)
9274

    
9275
    if self.op.start:
9276
      iobj.admin_up = True
9277
      self.cfg.Update(iobj, feedback_fn)
9278
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9279
      feedback_fn("* starting instance...")
9280
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9281
                                            False)
9282
      result.Raise("Could not start instance")
9283

    
9284
    return list(iobj.all_nodes)
9285

    
9286

    
9287
class LUInstanceConsole(NoHooksLU):
9288
  """Connect to an instance's console.
9289

9290
  This is somewhat special in that it returns the command line that
9291
  you need to run on the master node in order to connect to the
9292
  console.
9293

9294
  """
9295
  REQ_BGL = False
9296

    
9297
  def ExpandNames(self):
9298
    self._ExpandAndLockInstance()
9299

    
9300
  def CheckPrereq(self):
9301
    """Check prerequisites.
9302

9303
    This checks that the instance is in the cluster.
9304

9305
    """
9306
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9307
    assert self.instance is not None, \
9308
      "Cannot retrieve locked instance %s" % self.op.instance_name
9309
    _CheckNodeOnline(self, self.instance.primary_node)
9310

    
9311
  def Exec(self, feedback_fn):
9312
    """Connect to the console of an instance
9313

9314
    """
9315
    instance = self.instance
9316
    node = instance.primary_node
9317

    
9318
    node_insts = self.rpc.call_instance_list([node],
9319
                                             [instance.hypervisor])[node]
9320
    node_insts.Raise("Can't get node information from %s" % node)
9321

    
9322
    if instance.name not in node_insts.payload:
9323
      if instance.admin_up:
9324
        state = constants.INSTST_ERRORDOWN
9325
      else:
9326
        state = constants.INSTST_ADMINDOWN
9327
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9328
                               (instance.name, state))
9329

    
9330
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9331

    
9332
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9333

    
9334

    
9335
def _GetInstanceConsole(cluster, instance):
9336
  """Returns console information for an instance.
9337

9338
  @type cluster: L{objects.Cluster}
9339
  @type instance: L{objects.Instance}
9340
  @rtype: dict
9341

9342
  """
9343
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9344
  # beparams and hvparams are passed separately, to avoid editing the
9345
  # instance and then saving the defaults in the instance itself.
9346
  hvparams = cluster.FillHV(instance)
9347
  beparams = cluster.FillBE(instance)
9348
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9349

    
9350
  assert console.instance == instance.name
9351
  assert console.Validate()
9352

    
9353
  return console.ToDict()
9354

    
9355

    
9356
class LUInstanceReplaceDisks(LogicalUnit):
9357
  """Replace the disks of an instance.
9358

9359
  """
9360
  HPATH = "mirrors-replace"
9361
  HTYPE = constants.HTYPE_INSTANCE
9362
  REQ_BGL = False
9363

    
9364
  def CheckArguments(self):
9365
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9366
                                  self.op.iallocator)
9367

    
9368
  def ExpandNames(self):
9369
    self._ExpandAndLockInstance()
9370

    
9371
    assert locking.LEVEL_NODE not in self.needed_locks
9372
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9373

    
9374
    assert self.op.iallocator is None or self.op.remote_node is None, \
9375
      "Conflicting options"
9376

    
9377
    if self.op.remote_node is not None:
9378
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9379

    
9380
      # Warning: do not remove the locking of the new secondary here
9381
      # unless DRBD8.AddChildren is changed to work in parallel;
9382
      # currently it doesn't since parallel invocations of
9383
      # FindUnusedMinor will conflict
9384
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9385
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9386
    else:
9387
      self.needed_locks[locking.LEVEL_NODE] = []
9388
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9389

    
9390
      if self.op.iallocator is not None:
9391
        # iallocator will select a new node in the same group
9392
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9393

    
9394
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9395
                                   self.op.iallocator, self.op.remote_node,
9396
                                   self.op.disks, False, self.op.early_release)
9397

    
9398
    self.tasklets = [self.replacer]
9399

    
9400
  def DeclareLocks(self, level):
9401
    if level == locking.LEVEL_NODEGROUP:
9402
      assert self.op.remote_node is None
9403
      assert self.op.iallocator is not None
9404
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9405

    
9406
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9407
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9408
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9409

    
9410
    elif level == locking.LEVEL_NODE:
9411
      if self.op.iallocator is not None:
9412
        assert self.op.remote_node is None
9413
        assert not self.needed_locks[locking.LEVEL_NODE]
9414

    
9415
        # Lock member nodes of all locked groups
9416
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9417
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9418
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9419
      else:
9420
        self._LockInstancesNodes()
9421

    
9422
  def BuildHooksEnv(self):
9423
    """Build hooks env.
9424

9425
    This runs on the master, the primary and all the secondaries.
9426

9427
    """
9428
    instance = self.replacer.instance
9429
    env = {
9430
      "MODE": self.op.mode,
9431
      "NEW_SECONDARY": self.op.remote_node,
9432
      "OLD_SECONDARY": instance.secondary_nodes[0],
9433
      }
9434
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9435
    return env
9436

    
9437
  def BuildHooksNodes(self):
9438
    """Build hooks nodes.
9439

9440
    """
9441
    instance = self.replacer.instance
9442
    nl = [
9443
      self.cfg.GetMasterNode(),
9444
      instance.primary_node,
9445
      ]
9446
    if self.op.remote_node is not None:
9447
      nl.append(self.op.remote_node)
9448
    return nl, nl
9449

    
9450
  def CheckPrereq(self):
9451
    """Check prerequisites.
9452

9453
    """
9454
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9455
            self.op.iallocator is None)
9456

    
9457
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9458
    if owned_groups:
9459
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9460

    
9461
    return LogicalUnit.CheckPrereq(self)
9462

    
9463

    
9464
class TLReplaceDisks(Tasklet):
9465
  """Replaces disks for an instance.
9466

9467
  Note: Locking is not within the scope of this class.
9468

9469
  """
9470
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9471
               disks, delay_iallocator, early_release):
9472
    """Initializes this class.
9473

9474
    """
9475
    Tasklet.__init__(self, lu)
9476

    
9477
    # Parameters
9478
    self.instance_name = instance_name
9479
    self.mode = mode
9480
    self.iallocator_name = iallocator_name
9481
    self.remote_node = remote_node
9482
    self.disks = disks
9483
    self.delay_iallocator = delay_iallocator
9484
    self.early_release = early_release
9485

    
9486
    # Runtime data
9487
    self.instance = None
9488
    self.new_node = None
9489
    self.target_node = None
9490
    self.other_node = None
9491
    self.remote_node_info = None
9492
    self.node_secondary_ip = None
9493

    
9494
  @staticmethod
9495
  def CheckArguments(mode, remote_node, iallocator):
9496
    """Helper function for users of this class.
9497

9498
    """
9499
    # check for valid parameter combination
9500
    if mode == constants.REPLACE_DISK_CHG:
9501
      if remote_node is None and iallocator is None:
9502
        raise errors.OpPrereqError("When changing the secondary either an"
9503
                                   " iallocator script must be used or the"
9504
                                   " new node given", errors.ECODE_INVAL)
9505

    
9506
      if remote_node is not None and iallocator is not None:
9507
        raise errors.OpPrereqError("Give either the iallocator or the new"
9508
                                   " secondary, not both", errors.ECODE_INVAL)
9509

    
9510
    elif remote_node is not None or iallocator is not None:
9511
      # Not replacing the secondary
9512
      raise errors.OpPrereqError("The iallocator and new node options can"
9513
                                 " only be used when changing the"
9514
                                 " secondary node", errors.ECODE_INVAL)
9515

    
9516
  @staticmethod
9517
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9518
    """Compute a new secondary node using an IAllocator.
9519

9520
    """
9521
    ial = IAllocator(lu.cfg, lu.rpc,
9522
                     mode=constants.IALLOCATOR_MODE_RELOC,
9523
                     name=instance_name,
9524
                     relocate_from=list(relocate_from))
9525

    
9526
    ial.Run(iallocator_name)
9527

    
9528
    if not ial.success:
9529
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9530
                                 " %s" % (iallocator_name, ial.info),
9531
                                 errors.ECODE_NORES)
9532

    
9533
    if len(ial.result) != ial.required_nodes:
9534
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9535
                                 " of nodes (%s), required %s" %
9536
                                 (iallocator_name,
9537
                                  len(ial.result), ial.required_nodes),
9538
                                 errors.ECODE_FAULT)
9539

    
9540
    remote_node_name = ial.result[0]
9541

    
9542
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9543
               instance_name, remote_node_name)
9544

    
9545
    return remote_node_name
9546

    
9547
  def _FindFaultyDisks(self, node_name):
9548
    """Wrapper for L{_FindFaultyInstanceDisks}.
9549

9550
    """
9551
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9552
                                    node_name, True)
9553

    
9554
  def _CheckDisksActivated(self, instance):
9555
    """Checks if the instance disks are activated.
9556

9557
    @param instance: The instance to check disks
9558
    @return: True if they are activated, False otherwise
9559

9560
    """
9561
    nodes = instance.all_nodes
9562

    
9563
    for idx, dev in enumerate(instance.disks):
9564
      for node in nodes:
9565
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9566
        self.cfg.SetDiskID(dev, node)
9567

    
9568
        result = self.rpc.call_blockdev_find(node, dev)
9569

    
9570
        if result.offline:
9571
          continue
9572
        elif result.fail_msg or not result.payload:
9573
          return False
9574

    
9575
    return True
9576

    
9577
  def CheckPrereq(self):
9578
    """Check prerequisites.
9579

9580
    This checks that the instance is in the cluster.
9581

9582
    """
9583
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9584
    assert instance is not None, \
9585
      "Cannot retrieve locked instance %s" % self.instance_name
9586

    
9587
    if instance.disk_template != constants.DT_DRBD8:
9588
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9589
                                 " instances", errors.ECODE_INVAL)
9590

    
9591
    if len(instance.secondary_nodes) != 1:
9592
      raise errors.OpPrereqError("The instance has a strange layout,"
9593
                                 " expected one secondary but found %d" %
9594
                                 len(instance.secondary_nodes),
9595
                                 errors.ECODE_FAULT)
9596

    
9597
    if not self.delay_iallocator:
9598
      self._CheckPrereq2()
9599

    
9600
  def _CheckPrereq2(self):
9601
    """Check prerequisites, second part.
9602

9603
    This function should always be part of CheckPrereq. It was separated and is
9604
    now called from Exec because during node evacuation iallocator was only
9605
    called with an unmodified cluster model, not taking planned changes into
9606
    account.
9607

9608
    """
9609
    instance = self.instance
9610
    secondary_node = instance.secondary_nodes[0]
9611

    
9612
    if self.iallocator_name is None:
9613
      remote_node = self.remote_node
9614
    else:
9615
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9616
                                       instance.name, instance.secondary_nodes)
9617

    
9618
    if remote_node is None:
9619
      self.remote_node_info = None
9620
    else:
9621
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9622
             "Remote node '%s' is not locked" % remote_node
9623

    
9624
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9625
      assert self.remote_node_info is not None, \
9626
        "Cannot retrieve locked node %s" % remote_node
9627

    
9628
    if remote_node == self.instance.primary_node:
9629
      raise errors.OpPrereqError("The specified node is the primary node of"
9630
                                 " the instance", errors.ECODE_INVAL)
9631

    
9632
    if remote_node == secondary_node:
9633
      raise errors.OpPrereqError("The specified node is already the"
9634
                                 " secondary node of the instance",
9635
                                 errors.ECODE_INVAL)
9636

    
9637
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9638
                                    constants.REPLACE_DISK_CHG):
9639
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9640
                                 errors.ECODE_INVAL)
9641

    
9642
    if self.mode == constants.REPLACE_DISK_AUTO:
9643
      if not self._CheckDisksActivated(instance):
9644
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9645
                                   " first" % self.instance_name,
9646
                                   errors.ECODE_STATE)
9647
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9648
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9649

    
9650
      if faulty_primary and faulty_secondary:
9651
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9652
                                   " one node and can not be repaired"
9653
                                   " automatically" % self.instance_name,
9654
                                   errors.ECODE_STATE)
9655

    
9656
      if faulty_primary:
9657
        self.disks = faulty_primary
9658
        self.target_node = instance.primary_node
9659
        self.other_node = secondary_node
9660
        check_nodes = [self.target_node, self.other_node]
9661
      elif faulty_secondary:
9662
        self.disks = faulty_secondary
9663
        self.target_node = secondary_node
9664
        self.other_node = instance.primary_node
9665
        check_nodes = [self.target_node, self.other_node]
9666
      else:
9667
        self.disks = []
9668
        check_nodes = []
9669

    
9670
    else:
9671
      # Non-automatic modes
9672
      if self.mode == constants.REPLACE_DISK_PRI:
9673
        self.target_node = instance.primary_node
9674
        self.other_node = secondary_node
9675
        check_nodes = [self.target_node, self.other_node]
9676

    
9677
      elif self.mode == constants.REPLACE_DISK_SEC:
9678
        self.target_node = secondary_node
9679
        self.other_node = instance.primary_node
9680
        check_nodes = [self.target_node, self.other_node]
9681

    
9682
      elif self.mode == constants.REPLACE_DISK_CHG:
9683
        self.new_node = remote_node
9684
        self.other_node = instance.primary_node
9685
        self.target_node = secondary_node
9686
        check_nodes = [self.new_node, self.other_node]
9687

    
9688
        _CheckNodeNotDrained(self.lu, remote_node)
9689
        _CheckNodeVmCapable(self.lu, remote_node)
9690

    
9691
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9692
        assert old_node_info is not None
9693
        if old_node_info.offline and not self.early_release:
9694
          # doesn't make sense to delay the release
9695
          self.early_release = True
9696
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9697
                          " early-release mode", secondary_node)
9698

    
9699
      else:
9700
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9701
                                     self.mode)
9702

    
9703
      # If not specified all disks should be replaced
9704
      if not self.disks:
9705
        self.disks = range(len(self.instance.disks))
9706

    
9707
    for node in check_nodes:
9708
      _CheckNodeOnline(self.lu, node)
9709

    
9710
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9711
                                                          self.other_node,
9712
                                                          self.target_node]
9713
                              if node_name is not None)
9714

    
9715
    # Release unneeded node locks
9716
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9717

    
9718
    # Release any owned node group
9719
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9720
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9721

    
9722
    # Check whether disks are valid
9723
    for disk_idx in self.disks:
9724
      instance.FindDisk(disk_idx)
9725

    
9726
    # Get secondary node IP addresses
9727
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9728
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9729

    
9730
  def Exec(self, feedback_fn):
9731
    """Execute disk replacement.
9732

9733
    This dispatches the disk replacement to the appropriate handler.
9734

9735
    """
9736
    if self.delay_iallocator:
9737
      self._CheckPrereq2()
9738

    
9739
    if __debug__:
9740
      # Verify owned locks before starting operation
9741
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9742
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9743
          ("Incorrect node locks, owning %s, expected %s" %
9744
           (owned_nodes, self.node_secondary_ip.keys()))
9745

    
9746
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9747
      assert list(owned_instances) == [self.instance_name], \
9748
          "Instance '%s' not locked" % self.instance_name
9749

    
9750
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9751
          "Should not own any node group lock at this point"
9752

    
9753
    if not self.disks:
9754
      feedback_fn("No disks need replacement")
9755
      return
9756

    
9757
    feedback_fn("Replacing disk(s) %s for %s" %
9758
                (utils.CommaJoin(self.disks), self.instance.name))
9759

    
9760
    activate_disks = (not self.instance.admin_up)
9761

    
9762
    # Activate the instance disks if we're replacing them on a down instance
9763
    if activate_disks:
9764
      _StartInstanceDisks(self.lu, self.instance, True)
9765

    
9766
    try:
9767
      # Should we replace the secondary node?
9768
      if self.new_node is not None:
9769
        fn = self._ExecDrbd8Secondary
9770
      else:
9771
        fn = self._ExecDrbd8DiskOnly
9772

    
9773
      result = fn(feedback_fn)
9774
    finally:
9775
      # Deactivate the instance disks if we're replacing them on a
9776
      # down instance
9777
      if activate_disks:
9778
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9779

    
9780
    if __debug__:
9781
      # Verify owned locks
9782
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9783
      nodes = frozenset(self.node_secondary_ip)
9784
      assert ((self.early_release and not owned_nodes) or
9785
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9786
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9787
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9788

    
9789
    return result
9790

    
9791
  def _CheckVolumeGroup(self, nodes):
9792
    self.lu.LogInfo("Checking volume groups")
9793

    
9794
    vgname = self.cfg.GetVGName()
9795

    
9796
    # Make sure volume group exists on all involved nodes
9797
    results = self.rpc.call_vg_list(nodes)
9798
    if not results:
9799
      raise errors.OpExecError("Can't list volume groups on the nodes")
9800

    
9801
    for node in nodes:
9802
      res = results[node]
9803
      res.Raise("Error checking node %s" % node)
9804
      if vgname not in res.payload:
9805
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9806
                                 (vgname, node))
9807

    
9808
  def _CheckDisksExistence(self, nodes):
9809
    # Check disk existence
9810
    for idx, dev in enumerate(self.instance.disks):
9811
      if idx not in self.disks:
9812
        continue
9813

    
9814
      for node in nodes:
9815
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9816
        self.cfg.SetDiskID(dev, node)
9817

    
9818
        result = self.rpc.call_blockdev_find(node, dev)
9819

    
9820
        msg = result.fail_msg
9821
        if msg or not result.payload:
9822
          if not msg:
9823
            msg = "disk not found"
9824
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9825
                                   (idx, node, msg))
9826

    
9827
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9828
    for idx, dev in enumerate(self.instance.disks):
9829
      if idx not in self.disks:
9830
        continue
9831

    
9832
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9833
                      (idx, node_name))
9834

    
9835
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9836
                                   ldisk=ldisk):
9837
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9838
                                 " replace disks for instance %s" %
9839
                                 (node_name, self.instance.name))
9840

    
9841
  def _CreateNewStorage(self, node_name):
9842
    """Create new storage on the primary or secondary node.
9843

9844
    This is only used for same-node replaces, not for changing the
9845
    secondary node, hence we don't want to modify the existing disk.
9846

9847
    """
9848
    iv_names = {}
9849

    
9850
    for idx, dev in enumerate(self.instance.disks):
9851
      if idx not in self.disks:
9852
        continue
9853

    
9854
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9855

    
9856
      self.cfg.SetDiskID(dev, node_name)
9857

    
9858
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9859
      names = _GenerateUniqueNames(self.lu, lv_names)
9860

    
9861
      vg_data = dev.children[0].logical_id[0]
9862
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9863
                             logical_id=(vg_data, names[0]))
9864
      vg_meta = dev.children[1].logical_id[0]
9865
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9866
                             logical_id=(vg_meta, names[1]))
9867

    
9868
      new_lvs = [lv_data, lv_meta]
9869
      old_lvs = [child.Copy() for child in dev.children]
9870
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9871

    
9872
      # we pass force_create=True to force the LVM creation
9873
      for new_lv in new_lvs:
9874
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9875
                        _GetInstanceInfoText(self.instance), False)
9876

    
9877
    return iv_names
9878

    
9879
  def _CheckDevices(self, node_name, iv_names):
9880
    for name, (dev, _, _) in iv_names.iteritems():
9881
      self.cfg.SetDiskID(dev, node_name)
9882

    
9883
      result = self.rpc.call_blockdev_find(node_name, dev)
9884

    
9885
      msg = result.fail_msg
9886
      if msg or not result.payload:
9887
        if not msg:
9888
          msg = "disk not found"
9889
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9890
                                 (name, msg))
9891

    
9892
      if result.payload.is_degraded:
9893
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9894

    
9895
  def _RemoveOldStorage(self, node_name, iv_names):
9896
    for name, (_, old_lvs, _) in iv_names.iteritems():
9897
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9898

    
9899
      for lv in old_lvs:
9900
        self.cfg.SetDiskID(lv, node_name)
9901

    
9902
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9903
        if msg:
9904
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9905
                             hint="remove unused LVs manually")
9906

    
9907
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9908
    """Replace a disk on the primary or secondary for DRBD 8.
9909

9910
    The algorithm for replace is quite complicated:
9911

9912
      1. for each disk to be replaced:
9913

9914
        1. create new LVs on the target node with unique names
9915
        1. detach old LVs from the drbd device
9916
        1. rename old LVs to name_replaced.<time_t>
9917
        1. rename new LVs to old LVs
9918
        1. attach the new LVs (with the old names now) to the drbd device
9919

9920
      1. wait for sync across all devices
9921

9922
      1. for each modified disk:
9923

9924
        1. remove old LVs (which have the name name_replaces.<time_t>)
9925

9926
    Failures are not very well handled.
9927

9928
    """
9929
    steps_total = 6
9930

    
9931
    # Step: check device activation
9932
    self.lu.LogStep(1, steps_total, "Check device existence")
9933
    self._CheckDisksExistence([self.other_node, self.target_node])
9934
    self._CheckVolumeGroup([self.target_node, self.other_node])
9935

    
9936
    # Step: check other node consistency
9937
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9938
    self._CheckDisksConsistency(self.other_node,
9939
                                self.other_node == self.instance.primary_node,
9940
                                False)
9941

    
9942
    # Step: create new storage
9943
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9944
    iv_names = self._CreateNewStorage(self.target_node)
9945

    
9946
    # Step: for each lv, detach+rename*2+attach
9947
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9948
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9949
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9950

    
9951
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9952
                                                     old_lvs)
9953
      result.Raise("Can't detach drbd from local storage on node"
9954
                   " %s for device %s" % (self.target_node, dev.iv_name))
9955
      #dev.children = []
9956
      #cfg.Update(instance)
9957

    
9958
      # ok, we created the new LVs, so now we know we have the needed
9959
      # storage; as such, we proceed on the target node to rename
9960
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9961
      # using the assumption that logical_id == physical_id (which in
9962
      # turn is the unique_id on that node)
9963

    
9964
      # FIXME(iustin): use a better name for the replaced LVs
9965
      temp_suffix = int(time.time())
9966
      ren_fn = lambda d, suff: (d.physical_id[0],
9967
                                d.physical_id[1] + "_replaced-%s" % suff)
9968

    
9969
      # Build the rename list based on what LVs exist on the node
9970
      rename_old_to_new = []
9971
      for to_ren in old_lvs:
9972
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9973
        if not result.fail_msg and result.payload:
9974
          # device exists
9975
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9976

    
9977
      self.lu.LogInfo("Renaming the old LVs on the target node")
9978
      result = self.rpc.call_blockdev_rename(self.target_node,
9979
                                             rename_old_to_new)
9980
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9981

    
9982
      # Now we rename the new LVs to the old LVs
9983
      self.lu.LogInfo("Renaming the new LVs on the target node")
9984
      rename_new_to_old = [(new, old.physical_id)
9985
                           for old, new in zip(old_lvs, new_lvs)]
9986
      result = self.rpc.call_blockdev_rename(self.target_node,
9987
                                             rename_new_to_old)
9988
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9989

    
9990
      # Intermediate steps of in memory modifications
9991
      for old, new in zip(old_lvs, new_lvs):
9992
        new.logical_id = old.logical_id
9993
        self.cfg.SetDiskID(new, self.target_node)
9994

    
9995
      # We need to modify old_lvs so that removal later removes the
9996
      # right LVs, not the newly added ones; note that old_lvs is a
9997
      # copy here
9998
      for disk in old_lvs:
9999
        disk.logical_id = ren_fn(disk, temp_suffix)
10000
        self.cfg.SetDiskID(disk, self.target_node)
10001

    
10002
      # Now that the new lvs have the old name, we can add them to the device
10003
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10004
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10005
                                                  new_lvs)
10006
      msg = result.fail_msg
10007
      if msg:
10008
        for new_lv in new_lvs:
10009
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10010
                                               new_lv).fail_msg
10011
          if msg2:
10012
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10013
                               hint=("cleanup manually the unused logical"
10014
                                     "volumes"))
10015
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10016

    
10017
    cstep = 5
10018
    if self.early_release:
10019
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10020
      cstep += 1
10021
      self._RemoveOldStorage(self.target_node, iv_names)
10022
      # WARNING: we release both node locks here, do not do other RPCs
10023
      # than WaitForSync to the primary node
10024
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10025
                    names=[self.target_node, self.other_node])
10026

    
10027
    # Wait for sync
10028
    # This can fail as the old devices are degraded and _WaitForSync
10029
    # does a combined result over all disks, so we don't check its return value
10030
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10031
    cstep += 1
10032
    _WaitForSync(self.lu, self.instance)
10033

    
10034
    # Check all devices manually
10035
    self._CheckDevices(self.instance.primary_node, iv_names)
10036

    
10037
    # Step: remove old storage
10038
    if not self.early_release:
10039
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10040
      cstep += 1
10041
      self._RemoveOldStorage(self.target_node, iv_names)
10042

    
10043
  def _ExecDrbd8Secondary(self, feedback_fn):
10044
    """Replace the secondary node for DRBD 8.
10045

10046
    The algorithm for replace is quite complicated:
10047
      - for all disks of the instance:
10048
        - create new LVs on the new node with same names
10049
        - shutdown the drbd device on the old secondary
10050
        - disconnect the drbd network on the primary
10051
        - create the drbd device on the new secondary
10052
        - network attach the drbd on the primary, using an artifice:
10053
          the drbd code for Attach() will connect to the network if it
10054
          finds a device which is connected to the good local disks but
10055
          not network enabled
10056
      - wait for sync across all devices
10057
      - remove all disks from the old secondary
10058

10059
    Failures are not very well handled.
10060

10061
    """
10062
    steps_total = 6
10063

    
10064
    pnode = self.instance.primary_node
10065

    
10066
    # Step: check device activation
10067
    self.lu.LogStep(1, steps_total, "Check device existence")
10068
    self._CheckDisksExistence([self.instance.primary_node])
10069
    self._CheckVolumeGroup([self.instance.primary_node])
10070

    
10071
    # Step: check other node consistency
10072
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10073
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10074

    
10075
    # Step: create new storage
10076
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10077
    for idx, dev in enumerate(self.instance.disks):
10078
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10079
                      (self.new_node, idx))
10080
      # we pass force_create=True to force LVM creation
10081
      for new_lv in dev.children:
10082
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10083
                        _GetInstanceInfoText(self.instance), False)
10084

    
10085
    # Step 4: dbrd minors and drbd setups changes
10086
    # after this, we must manually remove the drbd minors on both the
10087
    # error and the success paths
10088
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10089
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10090
                                         for dev in self.instance.disks],
10091
                                        self.instance.name)
10092
    logging.debug("Allocated minors %r", minors)
10093

    
10094
    iv_names = {}
10095
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10096
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10097
                      (self.new_node, idx))
10098
      # create new devices on new_node; note that we create two IDs:
10099
      # one without port, so the drbd will be activated without
10100
      # networking information on the new node at this stage, and one
10101
      # with network, for the latter activation in step 4
10102
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10103
      if self.instance.primary_node == o_node1:
10104
        p_minor = o_minor1
10105
      else:
10106
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10107
        p_minor = o_minor2
10108

    
10109
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10110
                      p_minor, new_minor, o_secret)
10111
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10112
                    p_minor, new_minor, o_secret)
10113

    
10114
      iv_names[idx] = (dev, dev.children, new_net_id)
10115
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10116
                    new_net_id)
10117
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10118
                              logical_id=new_alone_id,
10119
                              children=dev.children,
10120
                              size=dev.size)
10121
      try:
10122
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10123
                              _GetInstanceInfoText(self.instance), False)
10124
      except errors.GenericError:
10125
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10126
        raise
10127

    
10128
    # We have new devices, shutdown the drbd on the old secondary
10129
    for idx, dev in enumerate(self.instance.disks):
10130
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10131
      self.cfg.SetDiskID(dev, self.target_node)
10132
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10133
      if msg:
10134
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10135
                           "node: %s" % (idx, msg),
10136
                           hint=("Please cleanup this device manually as"
10137
                                 " soon as possible"))
10138

    
10139
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10140
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10141
                                               self.instance.disks)[pnode]
10142

    
10143
    msg = result.fail_msg
10144
    if msg:
10145
      # detaches didn't succeed (unlikely)
10146
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10147
      raise errors.OpExecError("Can't detach the disks from the network on"
10148
                               " old node: %s" % (msg,))
10149

    
10150
    # if we managed to detach at least one, we update all the disks of
10151
    # the instance to point to the new secondary
10152
    self.lu.LogInfo("Updating instance configuration")
10153
    for dev, _, new_logical_id in iv_names.itervalues():
10154
      dev.logical_id = new_logical_id
10155
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10156

    
10157
    self.cfg.Update(self.instance, feedback_fn)
10158

    
10159
    # and now perform the drbd attach
10160
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10161
                    " (standalone => connected)")
10162
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10163
                                            self.new_node],
10164
                                           self.node_secondary_ip,
10165
                                           self.instance.disks,
10166
                                           self.instance.name,
10167
                                           False)
10168
    for to_node, to_result in result.items():
10169
      msg = to_result.fail_msg
10170
      if msg:
10171
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10172
                           to_node, msg,
10173
                           hint=("please do a gnt-instance info to see the"
10174
                                 " status of disks"))
10175
    cstep = 5
10176
    if self.early_release:
10177
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10178
      cstep += 1
10179
      self._RemoveOldStorage(self.target_node, iv_names)
10180
      # WARNING: we release all node locks here, do not do other RPCs
10181
      # than WaitForSync to the primary node
10182
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10183
                    names=[self.instance.primary_node,
10184
                           self.target_node,
10185
                           self.new_node])
10186

    
10187
    # Wait for sync
10188
    # This can fail as the old devices are degraded and _WaitForSync
10189
    # does a combined result over all disks, so we don't check its return value
10190
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10191
    cstep += 1
10192
    _WaitForSync(self.lu, self.instance)
10193

    
10194
    # Check all devices manually
10195
    self._CheckDevices(self.instance.primary_node, iv_names)
10196

    
10197
    # Step: remove old storage
10198
    if not self.early_release:
10199
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10200
      self._RemoveOldStorage(self.target_node, iv_names)
10201

    
10202

    
10203
class LURepairNodeStorage(NoHooksLU):
10204
  """Repairs the volume group on a node.
10205

10206
  """
10207
  REQ_BGL = False
10208

    
10209
  def CheckArguments(self):
10210
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10211

    
10212
    storage_type = self.op.storage_type
10213

    
10214
    if (constants.SO_FIX_CONSISTENCY not in
10215
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10216
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10217
                                 " repaired" % storage_type,
10218
                                 errors.ECODE_INVAL)
10219

    
10220
  def ExpandNames(self):
10221
    self.needed_locks = {
10222
      locking.LEVEL_NODE: [self.op.node_name],
10223
      }
10224

    
10225
  def _CheckFaultyDisks(self, instance, node_name):
10226
    """Ensure faulty disks abort the opcode or at least warn."""
10227
    try:
10228
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10229
                                  node_name, True):
10230
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10231
                                   " node '%s'" % (instance.name, node_name),
10232
                                   errors.ECODE_STATE)
10233
    except errors.OpPrereqError, err:
10234
      if self.op.ignore_consistency:
10235
        self.proc.LogWarning(str(err.args[0]))
10236
      else:
10237
        raise
10238

    
10239
  def CheckPrereq(self):
10240
    """Check prerequisites.
10241

10242
    """
10243
    # Check whether any instance on this node has faulty disks
10244
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10245
      if not inst.admin_up:
10246
        continue
10247
      check_nodes = set(inst.all_nodes)
10248
      check_nodes.discard(self.op.node_name)
10249
      for inst_node_name in check_nodes:
10250
        self._CheckFaultyDisks(inst, inst_node_name)
10251

    
10252
  def Exec(self, feedback_fn):
10253
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10254
                (self.op.name, self.op.node_name))
10255

    
10256
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10257
    result = self.rpc.call_storage_execute(self.op.node_name,
10258
                                           self.op.storage_type, st_args,
10259
                                           self.op.name,
10260
                                           constants.SO_FIX_CONSISTENCY)
10261
    result.Raise("Failed to repair storage unit '%s' on %s" %
10262
                 (self.op.name, self.op.node_name))
10263

    
10264

    
10265
class LUNodeEvacuate(NoHooksLU):
10266
  """Evacuates instances off a list of nodes.
10267

10268
  """
10269
  REQ_BGL = False
10270

    
10271
  def CheckArguments(self):
10272
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10273

    
10274
  def ExpandNames(self):
10275
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10276

    
10277
    if self.op.remote_node is not None:
10278
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10279
      assert self.op.remote_node
10280

    
10281
      if self.op.remote_node == self.op.node_name:
10282
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10283
                                   " secondary node", errors.ECODE_INVAL)
10284

    
10285
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10286
        raise errors.OpPrereqError("Without the use of an iallocator only"
10287
                                   " secondary instances can be evacuated",
10288
                                   errors.ECODE_INVAL)
10289

    
10290
    # Declare locks
10291
    self.share_locks = _ShareAll()
10292
    self.needed_locks = {
10293
      locking.LEVEL_INSTANCE: [],
10294
      locking.LEVEL_NODEGROUP: [],
10295
      locking.LEVEL_NODE: [],
10296
      }
10297

    
10298
    if self.op.remote_node is None:
10299
      # Iallocator will choose any node(s) in the same group
10300
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10301
    else:
10302
      group_nodes = frozenset([self.op.remote_node])
10303

    
10304
    # Determine nodes to be locked
10305
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10306

    
10307
  def _DetermineInstances(self):
10308
    """Builds list of instances to operate on.
10309

10310
    """
10311
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10312

    
10313
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10314
      # Primary instances only
10315
      inst_fn = _GetNodePrimaryInstances
10316
      assert self.op.remote_node is None, \
10317
        "Evacuating primary instances requires iallocator"
10318
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10319
      # Secondary instances only
10320
      inst_fn = _GetNodeSecondaryInstances
10321
    else:
10322
      # All instances
10323
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10324
      inst_fn = _GetNodeInstances
10325

    
10326
    return inst_fn(self.cfg, self.op.node_name)
10327

    
10328
  def DeclareLocks(self, level):
10329
    if level == locking.LEVEL_INSTANCE:
10330
      # Lock instances optimistically, needs verification once node and group
10331
      # locks have been acquired
10332
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10333
        set(i.name for i in self._DetermineInstances())
10334

    
10335
    elif level == locking.LEVEL_NODEGROUP:
10336
      # Lock node groups optimistically, needs verification once nodes have
10337
      # been acquired
10338
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10339
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10340

    
10341
    elif level == locking.LEVEL_NODE:
10342
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10343

    
10344
  def CheckPrereq(self):
10345
    # Verify locks
10346
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10347
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10348
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10349

    
10350
    assert owned_nodes == self.lock_nodes
10351

    
10352
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10353
    if owned_groups != wanted_groups:
10354
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10355
                               " current groups are '%s', used to be '%s'" %
10356
                               (utils.CommaJoin(wanted_groups),
10357
                                utils.CommaJoin(owned_groups)))
10358

    
10359
    # Determine affected instances
10360
    self.instances = self._DetermineInstances()
10361
    self.instance_names = [i.name for i in self.instances]
10362

    
10363
    if set(self.instance_names) != owned_instances:
10364
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10365
                               " were acquired, current instances are '%s',"
10366
                               " used to be '%s'" %
10367
                               (self.op.node_name,
10368
                                utils.CommaJoin(self.instance_names),
10369
                                utils.CommaJoin(owned_instances)))
10370

    
10371
    if self.instance_names:
10372
      self.LogInfo("Evacuating instances from node '%s': %s",
10373
                   self.op.node_name,
10374
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10375
    else:
10376
      self.LogInfo("No instances to evacuate from node '%s'",
10377
                   self.op.node_name)
10378

    
10379
    if self.op.remote_node is not None:
10380
      for i in self.instances:
10381
        if i.primary_node == self.op.remote_node:
10382
          raise errors.OpPrereqError("Node %s is the primary node of"
10383
                                     " instance %s, cannot use it as"
10384
                                     " secondary" %
10385
                                     (self.op.remote_node, i.name),
10386
                                     errors.ECODE_INVAL)
10387

    
10388
  def Exec(self, feedback_fn):
10389
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10390

    
10391
    if not self.instance_names:
10392
      # No instances to evacuate
10393
      jobs = []
10394

    
10395
    elif self.op.iallocator is not None:
10396
      # TODO: Implement relocation to other group
10397
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10398
                       evac_mode=self.op.mode,
10399
                       instances=list(self.instance_names))
10400

    
10401
      ial.Run(self.op.iallocator)
10402

    
10403
      if not ial.success:
10404
        raise errors.OpPrereqError("Can't compute node evacuation using"
10405
                                   " iallocator '%s': %s" %
10406
                                   (self.op.iallocator, ial.info),
10407
                                   errors.ECODE_NORES)
10408

    
10409
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10410

    
10411
    elif self.op.remote_node is not None:
10412
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10413
      jobs = [
10414
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10415
                                        remote_node=self.op.remote_node,
10416
                                        disks=[],
10417
                                        mode=constants.REPLACE_DISK_CHG,
10418
                                        early_release=self.op.early_release)]
10419
        for instance_name in self.instance_names
10420
        ]
10421

    
10422
    else:
10423
      raise errors.ProgrammerError("No iallocator or remote node")
10424

    
10425
    return ResultWithJobs(jobs)
10426

    
10427

    
10428
def _SetOpEarlyRelease(early_release, op):
10429
  """Sets C{early_release} flag on opcodes if available.
10430

10431
  """
10432
  try:
10433
    op.early_release = early_release
10434
  except AttributeError:
10435
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10436

    
10437
  return op
10438

    
10439

    
10440
def _NodeEvacDest(use_nodes, group, nodes):
10441
  """Returns group or nodes depending on caller's choice.
10442

10443
  """
10444
  if use_nodes:
10445
    return utils.CommaJoin(nodes)
10446
  else:
10447
    return group
10448

    
10449

    
10450
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10451
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10452

10453
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10454
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10455

10456
  @type lu: L{LogicalUnit}
10457
  @param lu: Logical unit instance
10458
  @type alloc_result: tuple/list
10459
  @param alloc_result: Result from iallocator
10460
  @type early_release: bool
10461
  @param early_release: Whether to release locks early if possible
10462
  @type use_nodes: bool
10463
  @param use_nodes: Whether to display node names instead of groups
10464

10465
  """
10466
  (moved, failed, jobs) = alloc_result
10467

    
10468
  if failed:
10469
    lu.LogWarning("Unable to evacuate instances %s",
10470
                  utils.CommaJoin("%s (%s)" % (name, reason)
10471
                                  for (name, reason) in failed))
10472

    
10473
  if moved:
10474
    lu.LogInfo("Instances to be moved: %s",
10475
               utils.CommaJoin("%s (to %s)" %
10476
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10477
                               for (name, group, nodes) in moved))
10478

    
10479
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10480
              map(opcodes.OpCode.LoadOpCode, ops))
10481
          for ops in jobs]
10482

    
10483

    
10484
class LUInstanceGrowDisk(LogicalUnit):
10485
  """Grow a disk of an instance.
10486

10487
  """
10488
  HPATH = "disk-grow"
10489
  HTYPE = constants.HTYPE_INSTANCE
10490
  REQ_BGL = False
10491

    
10492
  def ExpandNames(self):
10493
    self._ExpandAndLockInstance()
10494
    self.needed_locks[locking.LEVEL_NODE] = []
10495
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10496

    
10497
  def DeclareLocks(self, level):
10498
    if level == locking.LEVEL_NODE:
10499
      self._LockInstancesNodes()
10500

    
10501
  def BuildHooksEnv(self):
10502
    """Build hooks env.
10503

10504
    This runs on the master, the primary and all the secondaries.
10505

10506
    """
10507
    env = {
10508
      "DISK": self.op.disk,
10509
      "AMOUNT": self.op.amount,
10510
      }
10511
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10512
    return env
10513

    
10514
  def BuildHooksNodes(self):
10515
    """Build hooks nodes.
10516

10517
    """
10518
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10519
    return (nl, nl)
10520

    
10521
  def CheckPrereq(self):
10522
    """Check prerequisites.
10523

10524
    This checks that the instance is in the cluster.
10525

10526
    """
10527
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10528
    assert instance is not None, \
10529
      "Cannot retrieve locked instance %s" % self.op.instance_name
10530
    nodenames = list(instance.all_nodes)
10531
    for node in nodenames:
10532
      _CheckNodeOnline(self, node)
10533

    
10534
    self.instance = instance
10535

    
10536
    if instance.disk_template not in constants.DTS_GROWABLE:
10537
      raise errors.OpPrereqError("Instance's disk layout does not support"
10538
                                 " growing", errors.ECODE_INVAL)
10539

    
10540
    self.disk = instance.FindDisk(self.op.disk)
10541

    
10542
    if instance.disk_template not in (constants.DT_FILE,
10543
                                      constants.DT_SHARED_FILE):
10544
      # TODO: check the free disk space for file, when that feature will be
10545
      # supported
10546
      _CheckNodesFreeDiskPerVG(self, nodenames,
10547
                               self.disk.ComputeGrowth(self.op.amount))
10548

    
10549
  def Exec(self, feedback_fn):
10550
    """Execute disk grow.
10551

10552
    """
10553
    instance = self.instance
10554
    disk = self.disk
10555

    
10556
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10557
    if not disks_ok:
10558
      raise errors.OpExecError("Cannot activate block device to grow")
10559

    
10560
    # First run all grow ops in dry-run mode
10561
    for node in instance.all_nodes:
10562
      self.cfg.SetDiskID(disk, node)
10563
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10564
      result.Raise("Grow request failed to node %s" % node)
10565

    
10566
    # We know that (as far as we can test) operations across different
10567
    # nodes will succeed, time to run it for real
10568
    for node in instance.all_nodes:
10569
      self.cfg.SetDiskID(disk, node)
10570
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10571
      result.Raise("Grow request failed to node %s" % node)
10572

    
10573
      # TODO: Rewrite code to work properly
10574
      # DRBD goes into sync mode for a short amount of time after executing the
10575
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10576
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10577
      # time is a work-around.
10578
      time.sleep(5)
10579

    
10580
    disk.RecordGrow(self.op.amount)
10581
    self.cfg.Update(instance, feedback_fn)
10582
    if self.op.wait_for_sync:
10583
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10584
      if disk_abort:
10585
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10586
                             " status; please check the instance")
10587
      if not instance.admin_up:
10588
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10589
    elif not instance.admin_up:
10590
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10591
                           " not supposed to be running because no wait for"
10592
                           " sync mode was requested")
10593

    
10594

    
10595
class LUInstanceQueryData(NoHooksLU):
10596
  """Query runtime instance data.
10597

10598
  """
10599
  REQ_BGL = False
10600

    
10601
  def ExpandNames(self):
10602
    self.needed_locks = {}
10603

    
10604
    # Use locking if requested or when non-static information is wanted
10605
    if not (self.op.static or self.op.use_locking):
10606
      self.LogWarning("Non-static data requested, locks need to be acquired")
10607
      self.op.use_locking = True
10608

    
10609
    if self.op.instances or not self.op.use_locking:
10610
      # Expand instance names right here
10611
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10612
    else:
10613
      # Will use acquired locks
10614
      self.wanted_names = None
10615

    
10616
    if self.op.use_locking:
10617
      self.share_locks = _ShareAll()
10618

    
10619
      if self.wanted_names is None:
10620
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10621
      else:
10622
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10623

    
10624
      self.needed_locks[locking.LEVEL_NODE] = []
10625
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10626

    
10627
  def DeclareLocks(self, level):
10628
    if self.op.use_locking and level == locking.LEVEL_NODE:
10629
      self._LockInstancesNodes()
10630

    
10631
  def CheckPrereq(self):
10632
    """Check prerequisites.
10633

10634
    This only checks the optional instance list against the existing names.
10635

10636
    """
10637
    if self.wanted_names is None:
10638
      assert self.op.use_locking, "Locking was not used"
10639
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10640

    
10641
    self.wanted_instances = \
10642
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10643

    
10644
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10645
    """Returns the status of a block device
10646

10647
    """
10648
    if self.op.static or not node:
10649
      return None
10650

    
10651
    self.cfg.SetDiskID(dev, node)
10652

    
10653
    result = self.rpc.call_blockdev_find(node, dev)
10654
    if result.offline:
10655
      return None
10656

    
10657
    result.Raise("Can't compute disk status for %s" % instance_name)
10658

    
10659
    status = result.payload
10660
    if status is None:
10661
      return None
10662

    
10663
    return (status.dev_path, status.major, status.minor,
10664
            status.sync_percent, status.estimated_time,
10665
            status.is_degraded, status.ldisk_status)
10666

    
10667
  def _ComputeDiskStatus(self, instance, snode, dev):
10668
    """Compute block device status.
10669

10670
    """
10671
    if dev.dev_type in constants.LDS_DRBD:
10672
      # we change the snode then (otherwise we use the one passed in)
10673
      if dev.logical_id[0] == instance.primary_node:
10674
        snode = dev.logical_id[1]
10675
      else:
10676
        snode = dev.logical_id[0]
10677

    
10678
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10679
                                              instance.name, dev)
10680
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10681

    
10682
    if dev.children:
10683
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10684
                                        instance, snode),
10685
                         dev.children)
10686
    else:
10687
      dev_children = []
10688

    
10689
    return {
10690
      "iv_name": dev.iv_name,
10691
      "dev_type": dev.dev_type,
10692
      "logical_id": dev.logical_id,
10693
      "physical_id": dev.physical_id,
10694
      "pstatus": dev_pstatus,
10695
      "sstatus": dev_sstatus,
10696
      "children": dev_children,
10697
      "mode": dev.mode,
10698
      "size": dev.size,
10699
      }
10700

    
10701
  def Exec(self, feedback_fn):
10702
    """Gather and return data"""
10703
    result = {}
10704

    
10705
    cluster = self.cfg.GetClusterInfo()
10706

    
10707
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10708
                                          for i in self.wanted_instances)
10709
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10710
      if self.op.static or pnode.offline:
10711
        remote_state = None
10712
        if pnode.offline:
10713
          self.LogWarning("Primary node %s is marked offline, returning static"
10714
                          " information only for instance %s" %
10715
                          (pnode.name, instance.name))
10716
      else:
10717
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10718
                                                  instance.name,
10719
                                                  instance.hypervisor)
10720
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10721
        remote_info = remote_info.payload
10722
        if remote_info and "state" in remote_info:
10723
          remote_state = "up"
10724
        else:
10725
          remote_state = "down"
10726

    
10727
      if instance.admin_up:
10728
        config_state = "up"
10729
      else:
10730
        config_state = "down"
10731

    
10732
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10733
                  instance.disks)
10734

    
10735
      result[instance.name] = {
10736
        "name": instance.name,
10737
        "config_state": config_state,
10738
        "run_state": remote_state,
10739
        "pnode": instance.primary_node,
10740
        "snodes": instance.secondary_nodes,
10741
        "os": instance.os,
10742
        # this happens to be the same format used for hooks
10743
        "nics": _NICListToTuple(self, instance.nics),
10744
        "disk_template": instance.disk_template,
10745
        "disks": disks,
10746
        "hypervisor": instance.hypervisor,
10747
        "network_port": instance.network_port,
10748
        "hv_instance": instance.hvparams,
10749
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10750
        "be_instance": instance.beparams,
10751
        "be_actual": cluster.FillBE(instance),
10752
        "os_instance": instance.osparams,
10753
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10754
        "serial_no": instance.serial_no,
10755
        "mtime": instance.mtime,
10756
        "ctime": instance.ctime,
10757
        "uuid": instance.uuid,
10758
        }
10759

    
10760
    return result
10761

    
10762

    
10763
class LUInstanceSetParams(LogicalUnit):
10764
  """Modifies an instances's parameters.
10765

10766
  """
10767
  HPATH = "instance-modify"
10768
  HTYPE = constants.HTYPE_INSTANCE
10769
  REQ_BGL = False
10770

    
10771
  def CheckArguments(self):
10772
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10773
            self.op.hvparams or self.op.beparams or self.op.os_name):
10774
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10775

    
10776
    if self.op.hvparams:
10777
      _CheckGlobalHvParams(self.op.hvparams)
10778

    
10779
    # Disk validation
10780
    disk_addremove = 0
10781
    for disk_op, disk_dict in self.op.disks:
10782
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10783
      if disk_op == constants.DDM_REMOVE:
10784
        disk_addremove += 1
10785
        continue
10786
      elif disk_op == constants.DDM_ADD:
10787
        disk_addremove += 1
10788
      else:
10789
        if not isinstance(disk_op, int):
10790
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10791
        if not isinstance(disk_dict, dict):
10792
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10793
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10794

    
10795
      if disk_op == constants.DDM_ADD:
10796
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10797
        if mode not in constants.DISK_ACCESS_SET:
10798
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10799
                                     errors.ECODE_INVAL)
10800
        size = disk_dict.get(constants.IDISK_SIZE, None)
10801
        if size is None:
10802
          raise errors.OpPrereqError("Required disk parameter size missing",
10803
                                     errors.ECODE_INVAL)
10804
        try:
10805
          size = int(size)
10806
        except (TypeError, ValueError), err:
10807
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10808
                                     str(err), errors.ECODE_INVAL)
10809
        disk_dict[constants.IDISK_SIZE] = size
10810
      else:
10811
        # modification of disk
10812
        if constants.IDISK_SIZE in disk_dict:
10813
          raise errors.OpPrereqError("Disk size change not possible, use"
10814
                                     " grow-disk", errors.ECODE_INVAL)
10815

    
10816
    if disk_addremove > 1:
10817
      raise errors.OpPrereqError("Only one disk add or remove operation"
10818
                                 " supported at a time", errors.ECODE_INVAL)
10819

    
10820
    if self.op.disks and self.op.disk_template is not None:
10821
      raise errors.OpPrereqError("Disk template conversion and other disk"
10822
                                 " changes not supported at the same time",
10823
                                 errors.ECODE_INVAL)
10824

    
10825
    if (self.op.disk_template and
10826
        self.op.disk_template in constants.DTS_INT_MIRROR and
10827
        self.op.remote_node is None):
10828
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10829
                                 " one requires specifying a secondary node",
10830
                                 errors.ECODE_INVAL)
10831

    
10832
    # NIC validation
10833
    nic_addremove = 0
10834
    for nic_op, nic_dict in self.op.nics:
10835
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10836
      if nic_op == constants.DDM_REMOVE:
10837
        nic_addremove += 1
10838
        continue
10839
      elif nic_op == constants.DDM_ADD:
10840
        nic_addremove += 1
10841
      else:
10842
        if not isinstance(nic_op, int):
10843
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10844
        if not isinstance(nic_dict, dict):
10845
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10846
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10847

    
10848
      # nic_dict should be a dict
10849
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10850
      if nic_ip is not None:
10851
        if nic_ip.lower() == constants.VALUE_NONE:
10852
          nic_dict[constants.INIC_IP] = None
10853
        else:
10854
          if not netutils.IPAddress.IsValid(nic_ip):
10855
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10856
                                       errors.ECODE_INVAL)
10857

    
10858
      nic_bridge = nic_dict.get("bridge", None)
10859
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10860
      if nic_bridge and nic_link:
10861
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10862
                                   " at the same time", errors.ECODE_INVAL)
10863
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10864
        nic_dict["bridge"] = None
10865
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10866
        nic_dict[constants.INIC_LINK] = None
10867

    
10868
      if nic_op == constants.DDM_ADD:
10869
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10870
        if nic_mac is None:
10871
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10872

    
10873
      if constants.INIC_MAC in nic_dict:
10874
        nic_mac = nic_dict[constants.INIC_MAC]
10875
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10876
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10877

    
10878
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10879
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10880
                                     " modifying an existing nic",
10881
                                     errors.ECODE_INVAL)
10882

    
10883
    if nic_addremove > 1:
10884
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10885
                                 " supported at a time", errors.ECODE_INVAL)
10886

    
10887
  def ExpandNames(self):
10888
    self._ExpandAndLockInstance()
10889
    self.needed_locks[locking.LEVEL_NODE] = []
10890
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10891

    
10892
  def DeclareLocks(self, level):
10893
    if level == locking.LEVEL_NODE:
10894
      self._LockInstancesNodes()
10895
      if self.op.disk_template and self.op.remote_node:
10896
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10897
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10898

    
10899
  def BuildHooksEnv(self):
10900
    """Build hooks env.
10901

10902
    This runs on the master, primary and secondaries.
10903

10904
    """
10905
    args = dict()
10906
    if constants.BE_MEMORY in self.be_new:
10907
      args["memory"] = self.be_new[constants.BE_MEMORY]
10908
    if constants.BE_VCPUS in self.be_new:
10909
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10910
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10911
    # information at all.
10912
    if self.op.nics:
10913
      args["nics"] = []
10914
      nic_override = dict(self.op.nics)
10915
      for idx, nic in enumerate(self.instance.nics):
10916
        if idx in nic_override:
10917
          this_nic_override = nic_override[idx]
10918
        else:
10919
          this_nic_override = {}
10920
        if constants.INIC_IP in this_nic_override:
10921
          ip = this_nic_override[constants.INIC_IP]
10922
        else:
10923
          ip = nic.ip
10924
        if constants.INIC_MAC in this_nic_override:
10925
          mac = this_nic_override[constants.INIC_MAC]
10926
        else:
10927
          mac = nic.mac
10928
        if idx in self.nic_pnew:
10929
          nicparams = self.nic_pnew[idx]
10930
        else:
10931
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10932
        mode = nicparams[constants.NIC_MODE]
10933
        link = nicparams[constants.NIC_LINK]
10934
        args["nics"].append((ip, mac, mode, link))
10935
      if constants.DDM_ADD in nic_override:
10936
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10937
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10938
        nicparams = self.nic_pnew[constants.DDM_ADD]
10939
        mode = nicparams[constants.NIC_MODE]
10940
        link = nicparams[constants.NIC_LINK]
10941
        args["nics"].append((ip, mac, mode, link))
10942
      elif constants.DDM_REMOVE in nic_override:
10943
        del args["nics"][-1]
10944

    
10945
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10946
    if self.op.disk_template:
10947
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10948

    
10949
    return env
10950

    
10951
  def BuildHooksNodes(self):
10952
    """Build hooks nodes.
10953

10954
    """
10955
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10956
    return (nl, nl)
10957

    
10958
  def CheckPrereq(self):
10959
    """Check prerequisites.
10960

10961
    This only checks the instance list against the existing names.
10962

10963
    """
10964
    # checking the new params on the primary/secondary nodes
10965

    
10966
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10967
    cluster = self.cluster = self.cfg.GetClusterInfo()
10968
    assert self.instance is not None, \
10969
      "Cannot retrieve locked instance %s" % self.op.instance_name
10970
    pnode = instance.primary_node
10971
    nodelist = list(instance.all_nodes)
10972

    
10973
    # OS change
10974
    if self.op.os_name and not self.op.force:
10975
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10976
                      self.op.force_variant)
10977
      instance_os = self.op.os_name
10978
    else:
10979
      instance_os = instance.os
10980

    
10981
    if self.op.disk_template:
10982
      if instance.disk_template == self.op.disk_template:
10983
        raise errors.OpPrereqError("Instance already has disk template %s" %
10984
                                   instance.disk_template, errors.ECODE_INVAL)
10985

    
10986
      if (instance.disk_template,
10987
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10988
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10989
                                   " %s to %s" % (instance.disk_template,
10990
                                                  self.op.disk_template),
10991
                                   errors.ECODE_INVAL)
10992
      _CheckInstanceDown(self, instance, "cannot change disk template")
10993
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10994
        if self.op.remote_node == pnode:
10995
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10996
                                     " as the primary node of the instance" %
10997
                                     self.op.remote_node, errors.ECODE_STATE)
10998
        _CheckNodeOnline(self, self.op.remote_node)
10999
        _CheckNodeNotDrained(self, self.op.remote_node)
11000
        # FIXME: here we assume that the old instance type is DT_PLAIN
11001
        assert instance.disk_template == constants.DT_PLAIN
11002
        disks = [{constants.IDISK_SIZE: d.size,
11003
                  constants.IDISK_VG: d.logical_id[0]}
11004
                 for d in instance.disks]
11005
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11006
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11007

    
11008
    # hvparams processing
11009
    if self.op.hvparams:
11010
      hv_type = instance.hypervisor
11011
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11012
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11013
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11014

    
11015
      # local check
11016
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11017
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11018
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11019
      self.hv_inst = i_hvdict # the new dict (without defaults)
11020
    else:
11021
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11022
                                              instance.hvparams)
11023
      self.hv_new = self.hv_inst = {}
11024

    
11025
    # beparams processing
11026
    if self.op.beparams:
11027
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11028
                                   use_none=True)
11029
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11030
      be_new = cluster.SimpleFillBE(i_bedict)
11031
      self.be_proposed = self.be_new = be_new # the new actual values
11032
      self.be_inst = i_bedict # the new dict (without defaults)
11033
    else:
11034
      self.be_new = self.be_inst = {}
11035
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11036
    be_old = cluster.FillBE(instance)
11037

    
11038
    # CPU param validation -- checking every time a paramtere is
11039
    # changed to cover all cases where either CPU mask or vcpus have
11040
    # changed
11041
    if (constants.BE_VCPUS in self.be_proposed and
11042
        constants.HV_CPU_MASK in self.hv_proposed):
11043
      cpu_list = \
11044
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11045
      # Verify mask is consistent with number of vCPUs. Can skip this
11046
      # test if only 1 entry in the CPU mask, which means same mask
11047
      # is applied to all vCPUs.
11048
      if (len(cpu_list) > 1 and
11049
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11050
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11051
                                   " CPU mask [%s]" %
11052
                                   (self.be_proposed[constants.BE_VCPUS],
11053
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11054
                                   errors.ECODE_INVAL)
11055

    
11056
      # Only perform this test if a new CPU mask is given
11057
      if constants.HV_CPU_MASK in self.hv_new:
11058
        # Calculate the largest CPU number requested
11059
        max_requested_cpu = max(map(max, cpu_list))
11060
        # Check that all of the instance's nodes have enough physical CPUs to
11061
        # satisfy the requested CPU mask
11062
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11063
                                max_requested_cpu + 1, instance.hypervisor)
11064

    
11065
    # osparams processing
11066
    if self.op.osparams:
11067
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11068
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11069
      self.os_inst = i_osdict # the new dict (without defaults)
11070
    else:
11071
      self.os_inst = {}
11072

    
11073
    self.warn = []
11074

    
11075
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11076
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11077
      mem_check_list = [pnode]
11078
      if be_new[constants.BE_AUTO_BALANCE]:
11079
        # either we changed auto_balance to yes or it was from before
11080
        mem_check_list.extend(instance.secondary_nodes)
11081
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11082
                                                  instance.hypervisor)
11083
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11084
                                         instance.hypervisor)
11085
      pninfo = nodeinfo[pnode]
11086
      msg = pninfo.fail_msg
11087
      if msg:
11088
        # Assume the primary node is unreachable and go ahead
11089
        self.warn.append("Can't get info from primary node %s: %s" %
11090
                         (pnode, msg))
11091
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11092
        self.warn.append("Node data from primary node %s doesn't contain"
11093
                         " free memory information" % pnode)
11094
      elif instance_info.fail_msg:
11095
        self.warn.append("Can't get instance runtime information: %s" %
11096
                        instance_info.fail_msg)
11097
      else:
11098
        if instance_info.payload:
11099
          current_mem = int(instance_info.payload["memory"])
11100
        else:
11101
          # Assume instance not running
11102
          # (there is a slight race condition here, but it's not very probable,
11103
          # and we have no other way to check)
11104
          current_mem = 0
11105
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11106
                    pninfo.payload["memory_free"])
11107
        if miss_mem > 0:
11108
          raise errors.OpPrereqError("This change will prevent the instance"
11109
                                     " from starting, due to %d MB of memory"
11110
                                     " missing on its primary node" % miss_mem,
11111
                                     errors.ECODE_NORES)
11112

    
11113
      if be_new[constants.BE_AUTO_BALANCE]:
11114
        for node, nres in nodeinfo.items():
11115
          if node not in instance.secondary_nodes:
11116
            continue
11117
          nres.Raise("Can't get info from secondary node %s" % node,
11118
                     prereq=True, ecode=errors.ECODE_STATE)
11119
          if not isinstance(nres.payload.get("memory_free", None), int):
11120
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11121
                                       " memory information" % node,
11122
                                       errors.ECODE_STATE)
11123
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11124
            raise errors.OpPrereqError("This change will prevent the instance"
11125
                                       " from failover to its secondary node"
11126
                                       " %s, due to not enough memory" % node,
11127
                                       errors.ECODE_STATE)
11128

    
11129
    # NIC processing
11130
    self.nic_pnew = {}
11131
    self.nic_pinst = {}
11132
    for nic_op, nic_dict in self.op.nics:
11133
      if nic_op == constants.DDM_REMOVE:
11134
        if not instance.nics:
11135
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11136
                                     errors.ECODE_INVAL)
11137
        continue
11138
      if nic_op != constants.DDM_ADD:
11139
        # an existing nic
11140
        if not instance.nics:
11141
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11142
                                     " no NICs" % nic_op,
11143
                                     errors.ECODE_INVAL)
11144
        if nic_op < 0 or nic_op >= len(instance.nics):
11145
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11146
                                     " are 0 to %d" %
11147
                                     (nic_op, len(instance.nics) - 1),
11148
                                     errors.ECODE_INVAL)
11149
        old_nic_params = instance.nics[nic_op].nicparams
11150
        old_nic_ip = instance.nics[nic_op].ip
11151
      else:
11152
        old_nic_params = {}
11153
        old_nic_ip = None
11154

    
11155
      update_params_dict = dict([(key, nic_dict[key])
11156
                                 for key in constants.NICS_PARAMETERS
11157
                                 if key in nic_dict])
11158

    
11159
      if "bridge" in nic_dict:
11160
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11161

    
11162
      new_nic_params = _GetUpdatedParams(old_nic_params,
11163
                                         update_params_dict)
11164
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11165
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11166
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11167
      self.nic_pinst[nic_op] = new_nic_params
11168
      self.nic_pnew[nic_op] = new_filled_nic_params
11169
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11170

    
11171
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11172
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11173
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11174
        if msg:
11175
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11176
          if self.op.force:
11177
            self.warn.append(msg)
11178
          else:
11179
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11180
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11181
        if constants.INIC_IP in nic_dict:
11182
          nic_ip = nic_dict[constants.INIC_IP]
11183
        else:
11184
          nic_ip = old_nic_ip
11185
        if nic_ip is None:
11186
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11187
                                     " on a routed nic", errors.ECODE_INVAL)
11188
      if constants.INIC_MAC in nic_dict:
11189
        nic_mac = nic_dict[constants.INIC_MAC]
11190
        if nic_mac is None:
11191
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11192
                                     errors.ECODE_INVAL)
11193
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11194
          # otherwise generate the mac
11195
          nic_dict[constants.INIC_MAC] = \
11196
            self.cfg.GenerateMAC(self.proc.GetECId())
11197
        else:
11198
          # or validate/reserve the current one
11199
          try:
11200
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11201
          except errors.ReservationError:
11202
            raise errors.OpPrereqError("MAC address %s already in use"
11203
                                       " in cluster" % nic_mac,
11204
                                       errors.ECODE_NOTUNIQUE)
11205

    
11206
    # DISK processing
11207
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11208
      raise errors.OpPrereqError("Disk operations not supported for"
11209
                                 " diskless instances",
11210
                                 errors.ECODE_INVAL)
11211
    for disk_op, _ in self.op.disks:
11212
      if disk_op == constants.DDM_REMOVE:
11213
        if len(instance.disks) == 1:
11214
          raise errors.OpPrereqError("Cannot remove the last disk of"
11215
                                     " an instance", errors.ECODE_INVAL)
11216
        _CheckInstanceDown(self, instance, "cannot remove disks")
11217

    
11218
      if (disk_op == constants.DDM_ADD and
11219
          len(instance.disks) >= constants.MAX_DISKS):
11220
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11221
                                   " add more" % constants.MAX_DISKS,
11222
                                   errors.ECODE_STATE)
11223
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11224
        # an existing disk
11225
        if disk_op < 0 or disk_op >= len(instance.disks):
11226
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11227
                                     " are 0 to %d" %
11228
                                     (disk_op, len(instance.disks)),
11229
                                     errors.ECODE_INVAL)
11230

    
11231
    return
11232

    
11233
  def _ConvertPlainToDrbd(self, feedback_fn):
11234
    """Converts an instance from plain to drbd.
11235

11236
    """
11237
    feedback_fn("Converting template to drbd")
11238
    instance = self.instance
11239
    pnode = instance.primary_node
11240
    snode = self.op.remote_node
11241

    
11242
    # create a fake disk info for _GenerateDiskTemplate
11243
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11244
                  constants.IDISK_VG: d.logical_id[0]}
11245
                 for d in instance.disks]
11246
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11247
                                      instance.name, pnode, [snode],
11248
                                      disk_info, None, None, 0, feedback_fn)
11249
    info = _GetInstanceInfoText(instance)
11250
    feedback_fn("Creating aditional volumes...")
11251
    # first, create the missing data and meta devices
11252
    for disk in new_disks:
11253
      # unfortunately this is... not too nice
11254
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11255
                            info, True)
11256
      for child in disk.children:
11257
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11258
    # at this stage, all new LVs have been created, we can rename the
11259
    # old ones
11260
    feedback_fn("Renaming original volumes...")
11261
    rename_list = [(o, n.children[0].logical_id)
11262
                   for (o, n) in zip(instance.disks, new_disks)]
11263
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11264
    result.Raise("Failed to rename original LVs")
11265

    
11266
    feedback_fn("Initializing DRBD devices...")
11267
    # all child devices are in place, we can now create the DRBD devices
11268
    for disk in new_disks:
11269
      for node in [pnode, snode]:
11270
        f_create = node == pnode
11271
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11272

    
11273
    # at this point, the instance has been modified
11274
    instance.disk_template = constants.DT_DRBD8
11275
    instance.disks = new_disks
11276
    self.cfg.Update(instance, feedback_fn)
11277

    
11278
    # disks are created, waiting for sync
11279
    disk_abort = not _WaitForSync(self, instance,
11280
                                  oneshot=not self.op.wait_for_sync)
11281
    if disk_abort:
11282
      raise errors.OpExecError("There are some degraded disks for"
11283
                               " this instance, please cleanup manually")
11284

    
11285
  def _ConvertDrbdToPlain(self, feedback_fn):
11286
    """Converts an instance from drbd to plain.
11287

11288
    """
11289
    instance = self.instance
11290
    assert len(instance.secondary_nodes) == 1
11291
    pnode = instance.primary_node
11292
    snode = instance.secondary_nodes[0]
11293
    feedback_fn("Converting template to plain")
11294

    
11295
    old_disks = instance.disks
11296
    new_disks = [d.children[0] for d in old_disks]
11297

    
11298
    # copy over size and mode
11299
    for parent, child in zip(old_disks, new_disks):
11300
      child.size = parent.size
11301
      child.mode = parent.mode
11302

    
11303
    # update instance structure
11304
    instance.disks = new_disks
11305
    instance.disk_template = constants.DT_PLAIN
11306
    self.cfg.Update(instance, feedback_fn)
11307

    
11308
    feedback_fn("Removing volumes on the secondary node...")
11309
    for disk in old_disks:
11310
      self.cfg.SetDiskID(disk, snode)
11311
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11312
      if msg:
11313
        self.LogWarning("Could not remove block device %s on node %s,"
11314
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11315

    
11316
    feedback_fn("Removing unneeded volumes on the primary node...")
11317
    for idx, disk in enumerate(old_disks):
11318
      meta = disk.children[1]
11319
      self.cfg.SetDiskID(meta, pnode)
11320
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11321
      if msg:
11322
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11323
                        " continuing anyway: %s", idx, pnode, msg)
11324

    
11325
  def Exec(self, feedback_fn):
11326
    """Modifies an instance.
11327

11328
    All parameters take effect only at the next restart of the instance.
11329

11330
    """
11331
    # Process here the warnings from CheckPrereq, as we don't have a
11332
    # feedback_fn there.
11333
    for warn in self.warn:
11334
      feedback_fn("WARNING: %s" % warn)
11335

    
11336
    result = []
11337
    instance = self.instance
11338
    # disk changes
11339
    for disk_op, disk_dict in self.op.disks:
11340
      if disk_op == constants.DDM_REMOVE:
11341
        # remove the last disk
11342
        device = instance.disks.pop()
11343
        device_idx = len(instance.disks)
11344
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11345
          self.cfg.SetDiskID(disk, node)
11346
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11347
          if msg:
11348
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11349
                            " continuing anyway", device_idx, node, msg)
11350
        result.append(("disk/%d" % device_idx, "remove"))
11351
      elif disk_op == constants.DDM_ADD:
11352
        # add a new disk
11353
        if instance.disk_template in (constants.DT_FILE,
11354
                                        constants.DT_SHARED_FILE):
11355
          file_driver, file_path = instance.disks[0].logical_id
11356
          file_path = os.path.dirname(file_path)
11357
        else:
11358
          file_driver = file_path = None
11359
        disk_idx_base = len(instance.disks)
11360
        new_disk = _GenerateDiskTemplate(self,
11361
                                         instance.disk_template,
11362
                                         instance.name, instance.primary_node,
11363
                                         instance.secondary_nodes,
11364
                                         [disk_dict],
11365
                                         file_path,
11366
                                         file_driver,
11367
                                         disk_idx_base, feedback_fn)[0]
11368
        instance.disks.append(new_disk)
11369
        info = _GetInstanceInfoText(instance)
11370

    
11371
        logging.info("Creating volume %s for instance %s",
11372
                     new_disk.iv_name, instance.name)
11373
        # Note: this needs to be kept in sync with _CreateDisks
11374
        #HARDCODE
11375
        for node in instance.all_nodes:
11376
          f_create = node == instance.primary_node
11377
          try:
11378
            _CreateBlockDev(self, node, instance, new_disk,
11379
                            f_create, info, f_create)
11380
          except errors.OpExecError, err:
11381
            self.LogWarning("Failed to create volume %s (%s) on"
11382
                            " node %s: %s",
11383
                            new_disk.iv_name, new_disk, node, err)
11384
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11385
                       (new_disk.size, new_disk.mode)))
11386
      else:
11387
        # change a given disk
11388
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11389
        result.append(("disk.mode/%d" % disk_op,
11390
                       disk_dict[constants.IDISK_MODE]))
11391

    
11392
    if self.op.disk_template:
11393
      r_shut = _ShutdownInstanceDisks(self, instance)
11394
      if not r_shut:
11395
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11396
                                 " proceed with disk template conversion")
11397
      mode = (instance.disk_template, self.op.disk_template)
11398
      try:
11399
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11400
      except:
11401
        self.cfg.ReleaseDRBDMinors(instance.name)
11402
        raise
11403
      result.append(("disk_template", self.op.disk_template))
11404

    
11405
    # NIC changes
11406
    for nic_op, nic_dict in self.op.nics:
11407
      if nic_op == constants.DDM_REMOVE:
11408
        # remove the last nic
11409
        del instance.nics[-1]
11410
        result.append(("nic.%d" % len(instance.nics), "remove"))
11411
      elif nic_op == constants.DDM_ADD:
11412
        # mac and bridge should be set, by now
11413
        mac = nic_dict[constants.INIC_MAC]
11414
        ip = nic_dict.get(constants.INIC_IP, None)
11415
        nicparams = self.nic_pinst[constants.DDM_ADD]
11416
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11417
        instance.nics.append(new_nic)
11418
        result.append(("nic.%d" % (len(instance.nics) - 1),
11419
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11420
                       (new_nic.mac, new_nic.ip,
11421
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11422
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11423
                       )))
11424
      else:
11425
        for key in (constants.INIC_MAC, constants.INIC_IP):
11426
          if key in nic_dict:
11427
            setattr(instance.nics[nic_op], key, nic_dict[key])
11428
        if nic_op in self.nic_pinst:
11429
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11430
        for key, val in nic_dict.iteritems():
11431
          result.append(("nic.%s/%d" % (key, nic_op), val))
11432

    
11433
    # hvparams changes
11434
    if self.op.hvparams:
11435
      instance.hvparams = self.hv_inst
11436
      for key, val in self.op.hvparams.iteritems():
11437
        result.append(("hv/%s" % key, val))
11438

    
11439
    # beparams changes
11440
    if self.op.beparams:
11441
      instance.beparams = self.be_inst
11442
      for key, val in self.op.beparams.iteritems():
11443
        result.append(("be/%s" % key, val))
11444

    
11445
    # OS change
11446
    if self.op.os_name:
11447
      instance.os = self.op.os_name
11448

    
11449
    # osparams changes
11450
    if self.op.osparams:
11451
      instance.osparams = self.os_inst
11452
      for key, val in self.op.osparams.iteritems():
11453
        result.append(("os/%s" % key, val))
11454

    
11455
    self.cfg.Update(instance, feedback_fn)
11456

    
11457
    return result
11458

    
11459
  _DISK_CONVERSIONS = {
11460
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11461
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11462
    }
11463

    
11464

    
11465
class LUInstanceChangeGroup(LogicalUnit):
11466
  HPATH = "instance-change-group"
11467
  HTYPE = constants.HTYPE_INSTANCE
11468
  REQ_BGL = False
11469

    
11470
  def ExpandNames(self):
11471
    self.share_locks = _ShareAll()
11472
    self.needed_locks = {
11473
      locking.LEVEL_NODEGROUP: [],
11474
      locking.LEVEL_NODE: [],
11475
      }
11476

    
11477
    self._ExpandAndLockInstance()
11478

    
11479
    if self.op.target_groups:
11480
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11481
                                  self.op.target_groups)
11482
    else:
11483
      self.req_target_uuids = None
11484

    
11485
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11486

    
11487
  def DeclareLocks(self, level):
11488
    if level == locking.LEVEL_NODEGROUP:
11489
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11490

    
11491
      if self.req_target_uuids:
11492
        lock_groups = set(self.req_target_uuids)
11493

    
11494
        # Lock all groups used by instance optimistically; this requires going
11495
        # via the node before it's locked, requiring verification later on
11496
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11497
        lock_groups.update(instance_groups)
11498
      else:
11499
        # No target groups, need to lock all of them
11500
        lock_groups = locking.ALL_SET
11501

    
11502
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11503

    
11504
    elif level == locking.LEVEL_NODE:
11505
      if self.req_target_uuids:
11506
        # Lock all nodes used by instances
11507
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11508
        self._LockInstancesNodes()
11509

    
11510
        # Lock all nodes in all potential target groups
11511
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11512
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11513
        member_nodes = [node_name
11514
                        for group in lock_groups
11515
                        for node_name in self.cfg.GetNodeGroup(group).members]
11516
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11517
      else:
11518
        # Lock all nodes as all groups are potential targets
11519
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11520

    
11521
  def CheckPrereq(self):
11522
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11523
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11524
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11525

    
11526
    assert (self.req_target_uuids is None or
11527
            owned_groups.issuperset(self.req_target_uuids))
11528
    assert owned_instances == set([self.op.instance_name])
11529

    
11530
    # Get instance information
11531
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11532

    
11533
    # Check if node groups for locked instance are still correct
11534
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11535
      ("Instance %s's nodes changed while we kept the lock" %
11536
       self.op.instance_name)
11537

    
11538
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11539
                                           owned_groups)
11540

    
11541
    if self.req_target_uuids:
11542
      # User requested specific target groups
11543
      self.target_uuids = self.req_target_uuids
11544
    else:
11545
      # All groups except those used by the instance are potential targets
11546
      self.target_uuids = owned_groups - inst_groups
11547

    
11548
    conflicting_groups = self.target_uuids & inst_groups
11549
    if conflicting_groups:
11550
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11551
                                 " used by the instance '%s'" %
11552
                                 (utils.CommaJoin(conflicting_groups),
11553
                                  self.op.instance_name),
11554
                                 errors.ECODE_INVAL)
11555

    
11556
    if not self.target_uuids:
11557
      raise errors.OpPrereqError("There are no possible target groups",
11558
                                 errors.ECODE_INVAL)
11559

    
11560
  def BuildHooksEnv(self):
11561
    """Build hooks env.
11562

11563
    """
11564
    assert self.target_uuids
11565

    
11566
    env = {
11567
      "TARGET_GROUPS": " ".join(self.target_uuids),
11568
      }
11569

    
11570
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11571

    
11572
    return env
11573

    
11574
  def BuildHooksNodes(self):
11575
    """Build hooks nodes.
11576

11577
    """
11578
    mn = self.cfg.GetMasterNode()
11579
    return ([mn], [mn])
11580

    
11581
  def Exec(self, feedback_fn):
11582
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11583

    
11584
    assert instances == [self.op.instance_name], "Instance not locked"
11585

    
11586
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11587
                     instances=instances, target_groups=list(self.target_uuids))
11588

    
11589
    ial.Run(self.op.iallocator)
11590

    
11591
    if not ial.success:
11592
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11593
                                 " instance '%s' using iallocator '%s': %s" %
11594
                                 (self.op.instance_name, self.op.iallocator,
11595
                                  ial.info),
11596
                                 errors.ECODE_NORES)
11597

    
11598
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11599

    
11600
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11601
                 " instance '%s'", len(jobs), self.op.instance_name)
11602

    
11603
    return ResultWithJobs(jobs)
11604

    
11605

    
11606
class LUBackupQuery(NoHooksLU):
11607
  """Query the exports list
11608

11609
  """
11610
  REQ_BGL = False
11611

    
11612
  def ExpandNames(self):
11613
    self.needed_locks = {}
11614
    self.share_locks[locking.LEVEL_NODE] = 1
11615
    if not self.op.nodes:
11616
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11617
    else:
11618
      self.needed_locks[locking.LEVEL_NODE] = \
11619
        _GetWantedNodes(self, self.op.nodes)
11620

    
11621
  def Exec(self, feedback_fn):
11622
    """Compute the list of all the exported system images.
11623

11624
    @rtype: dict
11625
    @return: a dictionary with the structure node->(export-list)
11626
        where export-list is a list of the instances exported on
11627
        that node.
11628

11629
    """
11630
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11631
    rpcresult = self.rpc.call_export_list(self.nodes)
11632
    result = {}
11633
    for node in rpcresult:
11634
      if rpcresult[node].fail_msg:
11635
        result[node] = False
11636
      else:
11637
        result[node] = rpcresult[node].payload
11638

    
11639
    return result
11640

    
11641

    
11642
class LUBackupPrepare(NoHooksLU):
11643
  """Prepares an instance for an export and returns useful information.
11644

11645
  """
11646
  REQ_BGL = False
11647

    
11648
  def ExpandNames(self):
11649
    self._ExpandAndLockInstance()
11650

    
11651
  def CheckPrereq(self):
11652
    """Check prerequisites.
11653

11654
    """
11655
    instance_name = self.op.instance_name
11656

    
11657
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11658
    assert self.instance is not None, \
11659
          "Cannot retrieve locked instance %s" % self.op.instance_name
11660
    _CheckNodeOnline(self, self.instance.primary_node)
11661

    
11662
    self._cds = _GetClusterDomainSecret()
11663

    
11664
  def Exec(self, feedback_fn):
11665
    """Prepares an instance for an export.
11666

11667
    """
11668
    instance = self.instance
11669

    
11670
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11671
      salt = utils.GenerateSecret(8)
11672

    
11673
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11674
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11675
                                              constants.RIE_CERT_VALIDITY)
11676
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11677

    
11678
      (name, cert_pem) = result.payload
11679

    
11680
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11681
                                             cert_pem)
11682

    
11683
      return {
11684
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11685
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11686
                          salt),
11687
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11688
        }
11689

    
11690
    return None
11691

    
11692

    
11693
class LUBackupExport(LogicalUnit):
11694
  """Export an instance to an image in the cluster.
11695

11696
  """
11697
  HPATH = "instance-export"
11698
  HTYPE = constants.HTYPE_INSTANCE
11699
  REQ_BGL = False
11700

    
11701
  def CheckArguments(self):
11702
    """Check the arguments.
11703

11704
    """
11705
    self.x509_key_name = self.op.x509_key_name
11706
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11707

    
11708
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11709
      if not self.x509_key_name:
11710
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11711
                                   errors.ECODE_INVAL)
11712

    
11713
      if not self.dest_x509_ca_pem:
11714
        raise errors.OpPrereqError("Missing destination X509 CA",
11715
                                   errors.ECODE_INVAL)
11716

    
11717
  def ExpandNames(self):
11718
    self._ExpandAndLockInstance()
11719

    
11720
    # Lock all nodes for local exports
11721
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11722
      # FIXME: lock only instance primary and destination node
11723
      #
11724
      # Sad but true, for now we have do lock all nodes, as we don't know where
11725
      # the previous export might be, and in this LU we search for it and
11726
      # remove it from its current node. In the future we could fix this by:
11727
      #  - making a tasklet to search (share-lock all), then create the
11728
      #    new one, then one to remove, after
11729
      #  - removing the removal operation altogether
11730
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11731

    
11732
  def DeclareLocks(self, level):
11733
    """Last minute lock declaration."""
11734
    # All nodes are locked anyway, so nothing to do here.
11735

    
11736
  def BuildHooksEnv(self):
11737
    """Build hooks env.
11738

11739
    This will run on the master, primary node and target node.
11740

11741
    """
11742
    env = {
11743
      "EXPORT_MODE": self.op.mode,
11744
      "EXPORT_NODE": self.op.target_node,
11745
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11746
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11747
      # TODO: Generic function for boolean env variables
11748
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11749
      }
11750

    
11751
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11752

    
11753
    return env
11754

    
11755
  def BuildHooksNodes(self):
11756
    """Build hooks nodes.
11757

11758
    """
11759
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11760

    
11761
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11762
      nl.append(self.op.target_node)
11763

    
11764
    return (nl, nl)
11765

    
11766
  def CheckPrereq(self):
11767
    """Check prerequisites.
11768

11769
    This checks that the instance and node names are valid.
11770

11771
    """
11772
    instance_name = self.op.instance_name
11773

    
11774
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11775
    assert self.instance is not None, \
11776
          "Cannot retrieve locked instance %s" % self.op.instance_name
11777
    _CheckNodeOnline(self, self.instance.primary_node)
11778

    
11779
    if (self.op.remove_instance and self.instance.admin_up and
11780
        not self.op.shutdown):
11781
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11782
                                 " down before")
11783

    
11784
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11785
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11786
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11787
      assert self.dst_node is not None
11788

    
11789
      _CheckNodeOnline(self, self.dst_node.name)
11790
      _CheckNodeNotDrained(self, self.dst_node.name)
11791

    
11792
      self._cds = None
11793
      self.dest_disk_info = None
11794
      self.dest_x509_ca = None
11795

    
11796
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11797
      self.dst_node = None
11798

    
11799
      if len(self.op.target_node) != len(self.instance.disks):
11800
        raise errors.OpPrereqError(("Received destination information for %s"
11801
                                    " disks, but instance %s has %s disks") %
11802
                                   (len(self.op.target_node), instance_name,
11803
                                    len(self.instance.disks)),
11804
                                   errors.ECODE_INVAL)
11805

    
11806
      cds = _GetClusterDomainSecret()
11807

    
11808
      # Check X509 key name
11809
      try:
11810
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11811
      except (TypeError, ValueError), err:
11812
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11813

    
11814
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11815
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11816
                                   errors.ECODE_INVAL)
11817

    
11818
      # Load and verify CA
11819
      try:
11820
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11821
      except OpenSSL.crypto.Error, err:
11822
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11823
                                   (err, ), errors.ECODE_INVAL)
11824

    
11825
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11826
      if errcode is not None:
11827
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11828
                                   (msg, ), errors.ECODE_INVAL)
11829

    
11830
      self.dest_x509_ca = cert
11831

    
11832
      # Verify target information
11833
      disk_info = []
11834
      for idx, disk_data in enumerate(self.op.target_node):
11835
        try:
11836
          (host, port, magic) = \
11837
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11838
        except errors.GenericError, err:
11839
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11840
                                     (idx, err), errors.ECODE_INVAL)
11841

    
11842
        disk_info.append((host, port, magic))
11843

    
11844
      assert len(disk_info) == len(self.op.target_node)
11845
      self.dest_disk_info = disk_info
11846

    
11847
    else:
11848
      raise errors.ProgrammerError("Unhandled export mode %r" %
11849
                                   self.op.mode)
11850

    
11851
    # instance disk type verification
11852
    # TODO: Implement export support for file-based disks
11853
    for disk in self.instance.disks:
11854
      if disk.dev_type == constants.LD_FILE:
11855
        raise errors.OpPrereqError("Export not supported for instances with"
11856
                                   " file-based disks", errors.ECODE_INVAL)
11857

    
11858
  def _CleanupExports(self, feedback_fn):
11859
    """Removes exports of current instance from all other nodes.
11860

11861
    If an instance in a cluster with nodes A..D was exported to node C, its
11862
    exports will be removed from the nodes A, B and D.
11863

11864
    """
11865
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11866

    
11867
    nodelist = self.cfg.GetNodeList()
11868
    nodelist.remove(self.dst_node.name)
11869

    
11870
    # on one-node clusters nodelist will be empty after the removal
11871
    # if we proceed the backup would be removed because OpBackupQuery
11872
    # substitutes an empty list with the full cluster node list.
11873
    iname = self.instance.name
11874
    if nodelist:
11875
      feedback_fn("Removing old exports for instance %s" % iname)
11876
      exportlist = self.rpc.call_export_list(nodelist)
11877
      for node in exportlist:
11878
        if exportlist[node].fail_msg:
11879
          continue
11880
        if iname in exportlist[node].payload:
11881
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11882
          if msg:
11883
            self.LogWarning("Could not remove older export for instance %s"
11884
                            " on node %s: %s", iname, node, msg)
11885

    
11886
  def Exec(self, feedback_fn):
11887
    """Export an instance to an image in the cluster.
11888

11889
    """
11890
    assert self.op.mode in constants.EXPORT_MODES
11891

    
11892
    instance = self.instance
11893
    src_node = instance.primary_node
11894

    
11895
    if self.op.shutdown:
11896
      # shutdown the instance, but not the disks
11897
      feedback_fn("Shutting down instance %s" % instance.name)
11898
      result = self.rpc.call_instance_shutdown(src_node, instance,
11899
                                               self.op.shutdown_timeout)
11900
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11901
      result.Raise("Could not shutdown instance %s on"
11902
                   " node %s" % (instance.name, src_node))
11903

    
11904
    # set the disks ID correctly since call_instance_start needs the
11905
    # correct drbd minor to create the symlinks
11906
    for disk in instance.disks:
11907
      self.cfg.SetDiskID(disk, src_node)
11908

    
11909
    activate_disks = (not instance.admin_up)
11910

    
11911
    if activate_disks:
11912
      # Activate the instance disks if we'exporting a stopped instance
11913
      feedback_fn("Activating disks for %s" % instance.name)
11914
      _StartInstanceDisks(self, instance, None)
11915

    
11916
    try:
11917
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11918
                                                     instance)
11919

    
11920
      helper.CreateSnapshots()
11921
      try:
11922
        if (self.op.shutdown and instance.admin_up and
11923
            not self.op.remove_instance):
11924
          assert not activate_disks
11925
          feedback_fn("Starting instance %s" % instance.name)
11926
          result = self.rpc.call_instance_start(src_node,
11927
                                                (instance, None, None), False)
11928
          msg = result.fail_msg
11929
          if msg:
11930
            feedback_fn("Failed to start instance: %s" % msg)
11931
            _ShutdownInstanceDisks(self, instance)
11932
            raise errors.OpExecError("Could not start instance: %s" % msg)
11933

    
11934
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11935
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11936
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11937
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11938
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11939

    
11940
          (key_name, _, _) = self.x509_key_name
11941

    
11942
          dest_ca_pem = \
11943
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11944
                                            self.dest_x509_ca)
11945

    
11946
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11947
                                                     key_name, dest_ca_pem,
11948
                                                     timeouts)
11949
      finally:
11950
        helper.Cleanup()
11951

    
11952
      # Check for backwards compatibility
11953
      assert len(dresults) == len(instance.disks)
11954
      assert compat.all(isinstance(i, bool) for i in dresults), \
11955
             "Not all results are boolean: %r" % dresults
11956

    
11957
    finally:
11958
      if activate_disks:
11959
        feedback_fn("Deactivating disks for %s" % instance.name)
11960
        _ShutdownInstanceDisks(self, instance)
11961

    
11962
    if not (compat.all(dresults) and fin_resu):
11963
      failures = []
11964
      if not fin_resu:
11965
        failures.append("export finalization")
11966
      if not compat.all(dresults):
11967
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11968
                               if not dsk)
11969
        failures.append("disk export: disk(s) %s" % fdsk)
11970

    
11971
      raise errors.OpExecError("Export failed, errors in %s" %
11972
                               utils.CommaJoin(failures))
11973

    
11974
    # At this point, the export was successful, we can cleanup/finish
11975

    
11976
    # Remove instance if requested
11977
    if self.op.remove_instance:
11978
      feedback_fn("Removing instance %s" % instance.name)
11979
      _RemoveInstance(self, feedback_fn, instance,
11980
                      self.op.ignore_remove_failures)
11981

    
11982
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11983
      self._CleanupExports(feedback_fn)
11984

    
11985
    return fin_resu, dresults
11986

    
11987

    
11988
class LUBackupRemove(NoHooksLU):
11989
  """Remove exports related to the named instance.
11990

11991
  """
11992
  REQ_BGL = False
11993

    
11994
  def ExpandNames(self):
11995
    self.needed_locks = {}
11996
    # We need all nodes to be locked in order for RemoveExport to work, but we
11997
    # don't need to lock the instance itself, as nothing will happen to it (and
11998
    # we can remove exports also for a removed instance)
11999
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12000

    
12001
  def Exec(self, feedback_fn):
12002
    """Remove any export.
12003

12004
    """
12005
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12006
    # If the instance was not found we'll try with the name that was passed in.
12007
    # This will only work if it was an FQDN, though.
12008
    fqdn_warn = False
12009
    if not instance_name:
12010
      fqdn_warn = True
12011
      instance_name = self.op.instance_name
12012

    
12013
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12014
    exportlist = self.rpc.call_export_list(locked_nodes)
12015
    found = False
12016
    for node in exportlist:
12017
      msg = exportlist[node].fail_msg
12018
      if msg:
12019
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12020
        continue
12021
      if instance_name in exportlist[node].payload:
12022
        found = True
12023
        result = self.rpc.call_export_remove(node, instance_name)
12024
        msg = result.fail_msg
12025
        if msg:
12026
          logging.error("Could not remove export for instance %s"
12027
                        " on node %s: %s", instance_name, node, msg)
12028

    
12029
    if fqdn_warn and not found:
12030
      feedback_fn("Export not found. If trying to remove an export belonging"
12031
                  " to a deleted instance please use its Fully Qualified"
12032
                  " Domain Name.")
12033

    
12034

    
12035
class LUGroupAdd(LogicalUnit):
12036
  """Logical unit for creating node groups.
12037

12038
  """
12039
  HPATH = "group-add"
12040
  HTYPE = constants.HTYPE_GROUP
12041
  REQ_BGL = False
12042

    
12043
  def ExpandNames(self):
12044
    # We need the new group's UUID here so that we can create and acquire the
12045
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12046
    # that it should not check whether the UUID exists in the configuration.
12047
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12048
    self.needed_locks = {}
12049
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12050

    
12051
  def CheckPrereq(self):
12052
    """Check prerequisites.
12053

12054
    This checks that the given group name is not an existing node group
12055
    already.
12056

12057
    """
12058
    try:
12059
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12060
    except errors.OpPrereqError:
12061
      pass
12062
    else:
12063
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12064
                                 " node group (UUID: %s)" %
12065
                                 (self.op.group_name, existing_uuid),
12066
                                 errors.ECODE_EXISTS)
12067

    
12068
    if self.op.ndparams:
12069
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12070

    
12071
  def BuildHooksEnv(self):
12072
    """Build hooks env.
12073

12074
    """
12075
    return {
12076
      "GROUP_NAME": self.op.group_name,
12077
      }
12078

    
12079
  def BuildHooksNodes(self):
12080
    """Build hooks nodes.
12081

12082
    """
12083
    mn = self.cfg.GetMasterNode()
12084
    return ([mn], [mn])
12085

    
12086
  def Exec(self, feedback_fn):
12087
    """Add the node group to the cluster.
12088

12089
    """
12090
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12091
                                  uuid=self.group_uuid,
12092
                                  alloc_policy=self.op.alloc_policy,
12093
                                  ndparams=self.op.ndparams)
12094

    
12095
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12096
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12097

    
12098

    
12099
class LUGroupAssignNodes(NoHooksLU):
12100
  """Logical unit for assigning nodes to groups.
12101

12102
  """
12103
  REQ_BGL = False
12104

    
12105
  def ExpandNames(self):
12106
    # These raise errors.OpPrereqError on their own:
12107
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12108
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12109

    
12110
    # We want to lock all the affected nodes and groups. We have readily
12111
    # available the list of nodes, and the *destination* group. To gather the
12112
    # list of "source" groups, we need to fetch node information later on.
12113
    self.needed_locks = {
12114
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12115
      locking.LEVEL_NODE: self.op.nodes,
12116
      }
12117

    
12118
  def DeclareLocks(self, level):
12119
    if level == locking.LEVEL_NODEGROUP:
12120
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12121

    
12122
      # Try to get all affected nodes' groups without having the group or node
12123
      # lock yet. Needs verification later in the code flow.
12124
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12125

    
12126
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12127

    
12128
  def CheckPrereq(self):
12129
    """Check prerequisites.
12130

12131
    """
12132
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12133
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12134
            frozenset(self.op.nodes))
12135

    
12136
    expected_locks = (set([self.group_uuid]) |
12137
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12138
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12139
    if actual_locks != expected_locks:
12140
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12141
                               " current groups are '%s', used to be '%s'" %
12142
                               (utils.CommaJoin(expected_locks),
12143
                                utils.CommaJoin(actual_locks)))
12144

    
12145
    self.node_data = self.cfg.GetAllNodesInfo()
12146
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12147
    instance_data = self.cfg.GetAllInstancesInfo()
12148

    
12149
    if self.group is None:
12150
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12151
                               (self.op.group_name, self.group_uuid))
12152

    
12153
    (new_splits, previous_splits) = \
12154
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12155
                                             for node in self.op.nodes],
12156
                                            self.node_data, instance_data)
12157

    
12158
    if new_splits:
12159
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12160

    
12161
      if not self.op.force:
12162
        raise errors.OpExecError("The following instances get split by this"
12163
                                 " change and --force was not given: %s" %
12164
                                 fmt_new_splits)
12165
      else:
12166
        self.LogWarning("This operation will split the following instances: %s",
12167
                        fmt_new_splits)
12168

    
12169
        if previous_splits:
12170
          self.LogWarning("In addition, these already-split instances continue"
12171
                          " to be split across groups: %s",
12172
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12173

    
12174
  def Exec(self, feedback_fn):
12175
    """Assign nodes to a new group.
12176

12177
    """
12178
    for node in self.op.nodes:
12179
      self.node_data[node].group = self.group_uuid
12180

    
12181
    # FIXME: Depends on side-effects of modifying the result of
12182
    # C{cfg.GetAllNodesInfo}
12183

    
12184
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12185

    
12186
  @staticmethod
12187
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12188
    """Check for split instances after a node assignment.
12189

12190
    This method considers a series of node assignments as an atomic operation,
12191
    and returns information about split instances after applying the set of
12192
    changes.
12193

12194
    In particular, it returns information about newly split instances, and
12195
    instances that were already split, and remain so after the change.
12196

12197
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12198
    considered.
12199

12200
    @type changes: list of (node_name, new_group_uuid) pairs.
12201
    @param changes: list of node assignments to consider.
12202
    @param node_data: a dict with data for all nodes
12203
    @param instance_data: a dict with all instances to consider
12204
    @rtype: a two-tuple
12205
    @return: a list of instances that were previously okay and result split as a
12206
      consequence of this change, and a list of instances that were previously
12207
      split and this change does not fix.
12208

12209
    """
12210
    changed_nodes = dict((node, group) for node, group in changes
12211
                         if node_data[node].group != group)
12212

    
12213
    all_split_instances = set()
12214
    previously_split_instances = set()
12215

    
12216
    def InstanceNodes(instance):
12217
      return [instance.primary_node] + list(instance.secondary_nodes)
12218

    
12219
    for inst in instance_data.values():
12220
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12221
        continue
12222

    
12223
      instance_nodes = InstanceNodes(inst)
12224

    
12225
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12226
        previously_split_instances.add(inst.name)
12227

    
12228
      if len(set(changed_nodes.get(node, node_data[node].group)
12229
                 for node in instance_nodes)) > 1:
12230
        all_split_instances.add(inst.name)
12231

    
12232
    return (list(all_split_instances - previously_split_instances),
12233
            list(previously_split_instances & all_split_instances))
12234

    
12235

    
12236
class _GroupQuery(_QueryBase):
12237
  FIELDS = query.GROUP_FIELDS
12238

    
12239
  def ExpandNames(self, lu):
12240
    lu.needed_locks = {}
12241

    
12242
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12243
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12244

    
12245
    if not self.names:
12246
      self.wanted = [name_to_uuid[name]
12247
                     for name in utils.NiceSort(name_to_uuid.keys())]
12248
    else:
12249
      # Accept names to be either names or UUIDs.
12250
      missing = []
12251
      self.wanted = []
12252
      all_uuid = frozenset(self._all_groups.keys())
12253

    
12254
      for name in self.names:
12255
        if name in all_uuid:
12256
          self.wanted.append(name)
12257
        elif name in name_to_uuid:
12258
          self.wanted.append(name_to_uuid[name])
12259
        else:
12260
          missing.append(name)
12261

    
12262
      if missing:
12263
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12264
                                   utils.CommaJoin(missing),
12265
                                   errors.ECODE_NOENT)
12266

    
12267
  def DeclareLocks(self, lu, level):
12268
    pass
12269

    
12270
  def _GetQueryData(self, lu):
12271
    """Computes the list of node groups and their attributes.
12272

12273
    """
12274
    do_nodes = query.GQ_NODE in self.requested_data
12275
    do_instances = query.GQ_INST in self.requested_data
12276

    
12277
    group_to_nodes = None
12278
    group_to_instances = None
12279

    
12280
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12281
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12282
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12283
    # instance->node. Hence, we will need to process nodes even if we only need
12284
    # instance information.
12285
    if do_nodes or do_instances:
12286
      all_nodes = lu.cfg.GetAllNodesInfo()
12287
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12288
      node_to_group = {}
12289

    
12290
      for node in all_nodes.values():
12291
        if node.group in group_to_nodes:
12292
          group_to_nodes[node.group].append(node.name)
12293
          node_to_group[node.name] = node.group
12294

    
12295
      if do_instances:
12296
        all_instances = lu.cfg.GetAllInstancesInfo()
12297
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12298

    
12299
        for instance in all_instances.values():
12300
          node = instance.primary_node
12301
          if node in node_to_group:
12302
            group_to_instances[node_to_group[node]].append(instance.name)
12303

    
12304
        if not do_nodes:
12305
          # Do not pass on node information if it was not requested.
12306
          group_to_nodes = None
12307

    
12308
    return query.GroupQueryData([self._all_groups[uuid]
12309
                                 for uuid in self.wanted],
12310
                                group_to_nodes, group_to_instances)
12311

    
12312

    
12313
class LUGroupQuery(NoHooksLU):
12314
  """Logical unit for querying node groups.
12315

12316
  """
12317
  REQ_BGL = False
12318

    
12319
  def CheckArguments(self):
12320
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12321
                          self.op.output_fields, False)
12322

    
12323
  def ExpandNames(self):
12324
    self.gq.ExpandNames(self)
12325

    
12326
  def DeclareLocks(self, level):
12327
    self.gq.DeclareLocks(self, level)
12328

    
12329
  def Exec(self, feedback_fn):
12330
    return self.gq.OldStyleQuery(self)
12331

    
12332

    
12333
class LUGroupSetParams(LogicalUnit):
12334
  """Modifies the parameters of a node group.
12335

12336
  """
12337
  HPATH = "group-modify"
12338
  HTYPE = constants.HTYPE_GROUP
12339
  REQ_BGL = False
12340

    
12341
  def CheckArguments(self):
12342
    all_changes = [
12343
      self.op.ndparams,
12344
      self.op.alloc_policy,
12345
      ]
12346

    
12347
    if all_changes.count(None) == len(all_changes):
12348
      raise errors.OpPrereqError("Please pass at least one modification",
12349
                                 errors.ECODE_INVAL)
12350

    
12351
  def ExpandNames(self):
12352
    # This raises errors.OpPrereqError on its own:
12353
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12354

    
12355
    self.needed_locks = {
12356
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12357
      }
12358

    
12359
  def CheckPrereq(self):
12360
    """Check prerequisites.
12361

12362
    """
12363
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12364

    
12365
    if self.group is None:
12366
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12367
                               (self.op.group_name, self.group_uuid))
12368

    
12369
    if self.op.ndparams:
12370
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12371
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12372
      self.new_ndparams = new_ndparams
12373

    
12374
  def BuildHooksEnv(self):
12375
    """Build hooks env.
12376

12377
    """
12378
    return {
12379
      "GROUP_NAME": self.op.group_name,
12380
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12381
      }
12382

    
12383
  def BuildHooksNodes(self):
12384
    """Build hooks nodes.
12385

12386
    """
12387
    mn = self.cfg.GetMasterNode()
12388
    return ([mn], [mn])
12389

    
12390
  def Exec(self, feedback_fn):
12391
    """Modifies the node group.
12392

12393
    """
12394
    result = []
12395

    
12396
    if self.op.ndparams:
12397
      self.group.ndparams = self.new_ndparams
12398
      result.append(("ndparams", str(self.group.ndparams)))
12399

    
12400
    if self.op.alloc_policy:
12401
      self.group.alloc_policy = self.op.alloc_policy
12402

    
12403
    self.cfg.Update(self.group, feedback_fn)
12404
    return result
12405

    
12406

    
12407
class LUGroupRemove(LogicalUnit):
12408
  HPATH = "group-remove"
12409
  HTYPE = constants.HTYPE_GROUP
12410
  REQ_BGL = False
12411

    
12412
  def ExpandNames(self):
12413
    # This will raises errors.OpPrereqError on its own:
12414
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12415
    self.needed_locks = {
12416
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12417
      }
12418

    
12419
  def CheckPrereq(self):
12420
    """Check prerequisites.
12421

12422
    This checks that the given group name exists as a node group, that is
12423
    empty (i.e., contains no nodes), and that is not the last group of the
12424
    cluster.
12425

12426
    """
12427
    # Verify that the group is empty.
12428
    group_nodes = [node.name
12429
                   for node in self.cfg.GetAllNodesInfo().values()
12430
                   if node.group == self.group_uuid]
12431

    
12432
    if group_nodes:
12433
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12434
                                 " nodes: %s" %
12435
                                 (self.op.group_name,
12436
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12437
                                 errors.ECODE_STATE)
12438

    
12439
    # Verify the cluster would not be left group-less.
12440
    if len(self.cfg.GetNodeGroupList()) == 1:
12441
      raise errors.OpPrereqError("Group '%s' is the only group,"
12442
                                 " cannot be removed" %
12443
                                 self.op.group_name,
12444
                                 errors.ECODE_STATE)
12445

    
12446
  def BuildHooksEnv(self):
12447
    """Build hooks env.
12448

12449
    """
12450
    return {
12451
      "GROUP_NAME": self.op.group_name,
12452
      }
12453

    
12454
  def BuildHooksNodes(self):
12455
    """Build hooks nodes.
12456

12457
    """
12458
    mn = self.cfg.GetMasterNode()
12459
    return ([mn], [mn])
12460

    
12461
  def Exec(self, feedback_fn):
12462
    """Remove the node group.
12463

12464
    """
12465
    try:
12466
      self.cfg.RemoveNodeGroup(self.group_uuid)
12467
    except errors.ConfigurationError:
12468
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12469
                               (self.op.group_name, self.group_uuid))
12470

    
12471
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12472

    
12473

    
12474
class LUGroupRename(LogicalUnit):
12475
  HPATH = "group-rename"
12476
  HTYPE = constants.HTYPE_GROUP
12477
  REQ_BGL = False
12478

    
12479
  def ExpandNames(self):
12480
    # This raises errors.OpPrereqError on its own:
12481
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12482

    
12483
    self.needed_locks = {
12484
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12485
      }
12486

    
12487
  def CheckPrereq(self):
12488
    """Check prerequisites.
12489

12490
    Ensures requested new name is not yet used.
12491

12492
    """
12493
    try:
12494
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12495
    except errors.OpPrereqError:
12496
      pass
12497
    else:
12498
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12499
                                 " node group (UUID: %s)" %
12500
                                 (self.op.new_name, new_name_uuid),
12501
                                 errors.ECODE_EXISTS)
12502

    
12503
  def BuildHooksEnv(self):
12504
    """Build hooks env.
12505

12506
    """
12507
    return {
12508
      "OLD_NAME": self.op.group_name,
12509
      "NEW_NAME": self.op.new_name,
12510
      }
12511

    
12512
  def BuildHooksNodes(self):
12513
    """Build hooks nodes.
12514

12515
    """
12516
    mn = self.cfg.GetMasterNode()
12517

    
12518
    all_nodes = self.cfg.GetAllNodesInfo()
12519
    all_nodes.pop(mn, None)
12520

    
12521
    run_nodes = [mn]
12522
    run_nodes.extend(node.name for node in all_nodes.values()
12523
                     if node.group == self.group_uuid)
12524

    
12525
    return (run_nodes, run_nodes)
12526

    
12527
  def Exec(self, feedback_fn):
12528
    """Rename the node group.
12529

12530
    """
12531
    group = self.cfg.GetNodeGroup(self.group_uuid)
12532

    
12533
    if group is None:
12534
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12535
                               (self.op.group_name, self.group_uuid))
12536

    
12537
    group.name = self.op.new_name
12538
    self.cfg.Update(group, feedback_fn)
12539

    
12540
    return self.op.new_name
12541

    
12542

    
12543
class LUGroupEvacuate(LogicalUnit):
12544
  HPATH = "group-evacuate"
12545
  HTYPE = constants.HTYPE_GROUP
12546
  REQ_BGL = False
12547

    
12548
  def ExpandNames(self):
12549
    # This raises errors.OpPrereqError on its own:
12550
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12551

    
12552
    if self.op.target_groups:
12553
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12554
                                  self.op.target_groups)
12555
    else:
12556
      self.req_target_uuids = []
12557

    
12558
    if self.group_uuid in self.req_target_uuids:
12559
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12560
                                 " as a target group (targets are %s)" %
12561
                                 (self.group_uuid,
12562
                                  utils.CommaJoin(self.req_target_uuids)),
12563
                                 errors.ECODE_INVAL)
12564

    
12565
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12566

    
12567
    self.share_locks = _ShareAll()
12568
    self.needed_locks = {
12569
      locking.LEVEL_INSTANCE: [],
12570
      locking.LEVEL_NODEGROUP: [],
12571
      locking.LEVEL_NODE: [],
12572
      }
12573

    
12574
  def DeclareLocks(self, level):
12575
    if level == locking.LEVEL_INSTANCE:
12576
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12577

    
12578
      # Lock instances optimistically, needs verification once node and group
12579
      # locks have been acquired
12580
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12581
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12582

    
12583
    elif level == locking.LEVEL_NODEGROUP:
12584
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12585

    
12586
      if self.req_target_uuids:
12587
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12588

    
12589
        # Lock all groups used by instances optimistically; this requires going
12590
        # via the node before it's locked, requiring verification later on
12591
        lock_groups.update(group_uuid
12592
                           for instance_name in
12593
                             self.owned_locks(locking.LEVEL_INSTANCE)
12594
                           for group_uuid in
12595
                             self.cfg.GetInstanceNodeGroups(instance_name))
12596
      else:
12597
        # No target groups, need to lock all of them
12598
        lock_groups = locking.ALL_SET
12599

    
12600
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12601

    
12602
    elif level == locking.LEVEL_NODE:
12603
      # This will only lock the nodes in the group to be evacuated which
12604
      # contain actual instances
12605
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12606
      self._LockInstancesNodes()
12607

    
12608
      # Lock all nodes in group to be evacuated and target groups
12609
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12610
      assert self.group_uuid in owned_groups
12611
      member_nodes = [node_name
12612
                      for group in owned_groups
12613
                      for node_name in self.cfg.GetNodeGroup(group).members]
12614
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12615

    
12616
  def CheckPrereq(self):
12617
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12618
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12619
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12620

    
12621
    assert owned_groups.issuperset(self.req_target_uuids)
12622
    assert self.group_uuid in owned_groups
12623

    
12624
    # Check if locked instances are still correct
12625
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12626

    
12627
    # Get instance information
12628
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12629

    
12630
    # Check if node groups for locked instances are still correct
12631
    for instance_name in owned_instances:
12632
      inst = self.instances[instance_name]
12633
      assert owned_nodes.issuperset(inst.all_nodes), \
12634
        "Instance %s's nodes changed while we kept the lock" % instance_name
12635

    
12636
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12637
                                             owned_groups)
12638

    
12639
      assert self.group_uuid in inst_groups, \
12640
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12641

    
12642
    if self.req_target_uuids:
12643
      # User requested specific target groups
12644
      self.target_uuids = self.req_target_uuids
12645
    else:
12646
      # All groups except the one to be evacuated are potential targets
12647
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12648
                           if group_uuid != self.group_uuid]
12649

    
12650
      if not self.target_uuids:
12651
        raise errors.OpPrereqError("There are no possible target groups",
12652
                                   errors.ECODE_INVAL)
12653

    
12654
  def BuildHooksEnv(self):
12655
    """Build hooks env.
12656

12657
    """
12658
    return {
12659
      "GROUP_NAME": self.op.group_name,
12660
      "TARGET_GROUPS": " ".join(self.target_uuids),
12661
      }
12662

    
12663
  def BuildHooksNodes(self):
12664
    """Build hooks nodes.
12665

12666
    """
12667
    mn = self.cfg.GetMasterNode()
12668

    
12669
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12670

    
12671
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12672

    
12673
    return (run_nodes, run_nodes)
12674

    
12675
  def Exec(self, feedback_fn):
12676
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12677

    
12678
    assert self.group_uuid not in self.target_uuids
12679

    
12680
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12681
                     instances=instances, target_groups=self.target_uuids)
12682

    
12683
    ial.Run(self.op.iallocator)
12684

    
12685
    if not ial.success:
12686
      raise errors.OpPrereqError("Can't compute group evacuation using"
12687
                                 " iallocator '%s': %s" %
12688
                                 (self.op.iallocator, ial.info),
12689
                                 errors.ECODE_NORES)
12690

    
12691
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12692

    
12693
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12694
                 len(jobs), self.op.group_name)
12695

    
12696
    return ResultWithJobs(jobs)
12697

    
12698

    
12699
class TagsLU(NoHooksLU): # pylint: disable=W0223
12700
  """Generic tags LU.
12701

12702
  This is an abstract class which is the parent of all the other tags LUs.
12703

12704
  """
12705
  def ExpandNames(self):
12706
    self.group_uuid = None
12707
    self.needed_locks = {}
12708
    if self.op.kind == constants.TAG_NODE:
12709
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12710
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12711
    elif self.op.kind == constants.TAG_INSTANCE:
12712
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12713
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12714
    elif self.op.kind == constants.TAG_NODEGROUP:
12715
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12716

    
12717
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12718
    # not possible to acquire the BGL based on opcode parameters)
12719

    
12720
  def CheckPrereq(self):
12721
    """Check prerequisites.
12722

12723
    """
12724
    if self.op.kind == constants.TAG_CLUSTER:
12725
      self.target = self.cfg.GetClusterInfo()
12726
    elif self.op.kind == constants.TAG_NODE:
12727
      self.target = self.cfg.GetNodeInfo(self.op.name)
12728
    elif self.op.kind == constants.TAG_INSTANCE:
12729
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12730
    elif self.op.kind == constants.TAG_NODEGROUP:
12731
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12732
    else:
12733
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12734
                                 str(self.op.kind), errors.ECODE_INVAL)
12735

    
12736

    
12737
class LUTagsGet(TagsLU):
12738
  """Returns the tags of a given object.
12739

12740
  """
12741
  REQ_BGL = False
12742

    
12743
  def ExpandNames(self):
12744
    TagsLU.ExpandNames(self)
12745

    
12746
    # Share locks as this is only a read operation
12747
    self.share_locks = _ShareAll()
12748

    
12749
  def Exec(self, feedback_fn):
12750
    """Returns the tag list.
12751

12752
    """
12753
    return list(self.target.GetTags())
12754

    
12755

    
12756
class LUTagsSearch(NoHooksLU):
12757
  """Searches the tags for a given pattern.
12758

12759
  """
12760
  REQ_BGL = False
12761

    
12762
  def ExpandNames(self):
12763
    self.needed_locks = {}
12764

    
12765
  def CheckPrereq(self):
12766
    """Check prerequisites.
12767

12768
    This checks the pattern passed for validity by compiling it.
12769

12770
    """
12771
    try:
12772
      self.re = re.compile(self.op.pattern)
12773
    except re.error, err:
12774
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12775
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12776

    
12777
  def Exec(self, feedback_fn):
12778
    """Returns the tag list.
12779

12780
    """
12781
    cfg = self.cfg
12782
    tgts = [("/cluster", cfg.GetClusterInfo())]
12783
    ilist = cfg.GetAllInstancesInfo().values()
12784
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12785
    nlist = cfg.GetAllNodesInfo().values()
12786
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12787
    tgts.extend(("/nodegroup/%s" % n.name, n)
12788
                for n in cfg.GetAllNodeGroupsInfo().values())
12789
    results = []
12790
    for path, target in tgts:
12791
      for tag in target.GetTags():
12792
        if self.re.search(tag):
12793
          results.append((path, tag))
12794
    return results
12795

    
12796

    
12797
class LUTagsSet(TagsLU):
12798
  """Sets a tag on a given object.
12799

12800
  """
12801
  REQ_BGL = False
12802

    
12803
  def CheckPrereq(self):
12804
    """Check prerequisites.
12805

12806
    This checks the type and length of the tag name and value.
12807

12808
    """
12809
    TagsLU.CheckPrereq(self)
12810
    for tag in self.op.tags:
12811
      objects.TaggableObject.ValidateTag(tag)
12812

    
12813
  def Exec(self, feedback_fn):
12814
    """Sets the tag.
12815

12816
    """
12817
    try:
12818
      for tag in self.op.tags:
12819
        self.target.AddTag(tag)
12820
    except errors.TagError, err:
12821
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12822
    self.cfg.Update(self.target, feedback_fn)
12823

    
12824

    
12825
class LUTagsDel(TagsLU):
12826
  """Delete a list of tags from a given object.
12827

12828
  """
12829
  REQ_BGL = False
12830

    
12831
  def CheckPrereq(self):
12832
    """Check prerequisites.
12833

12834
    This checks that we have the given tag.
12835

12836
    """
12837
    TagsLU.CheckPrereq(self)
12838
    for tag in self.op.tags:
12839
      objects.TaggableObject.ValidateTag(tag)
12840
    del_tags = frozenset(self.op.tags)
12841
    cur_tags = self.target.GetTags()
12842

    
12843
    diff_tags = del_tags - cur_tags
12844
    if diff_tags:
12845
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12846
      raise errors.OpPrereqError("Tag(s) %s not found" %
12847
                                 (utils.CommaJoin(diff_names), ),
12848
                                 errors.ECODE_NOENT)
12849

    
12850
  def Exec(self, feedback_fn):
12851
    """Remove the tag from the object.
12852

12853
    """
12854
    for tag in self.op.tags:
12855
      self.target.RemoveTag(tag)
12856
    self.cfg.Update(self.target, feedback_fn)
12857

    
12858

    
12859
class LUTestDelay(NoHooksLU):
12860
  """Sleep for a specified amount of time.
12861

12862
  This LU sleeps on the master and/or nodes for a specified amount of
12863
  time.
12864

12865
  """
12866
  REQ_BGL = False
12867

    
12868
  def ExpandNames(self):
12869
    """Expand names and set required locks.
12870

12871
    This expands the node list, if any.
12872

12873
    """
12874
    self.needed_locks = {}
12875
    if self.op.on_nodes:
12876
      # _GetWantedNodes can be used here, but is not always appropriate to use
12877
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12878
      # more information.
12879
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12880
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12881

    
12882
  def _TestDelay(self):
12883
    """Do the actual sleep.
12884

12885
    """
12886
    if self.op.on_master:
12887
      if not utils.TestDelay(self.op.duration):
12888
        raise errors.OpExecError("Error during master delay test")
12889
    if self.op.on_nodes:
12890
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12891
      for node, node_result in result.items():
12892
        node_result.Raise("Failure during rpc call to node %s" % node)
12893

    
12894
  def Exec(self, feedback_fn):
12895
    """Execute the test delay opcode, with the wanted repetitions.
12896

12897
    """
12898
    if self.op.repeat == 0:
12899
      self._TestDelay()
12900
    else:
12901
      top_value = self.op.repeat - 1
12902
      for i in range(self.op.repeat):
12903
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12904
        self._TestDelay()
12905

    
12906

    
12907
class LUTestJqueue(NoHooksLU):
12908
  """Utility LU to test some aspects of the job queue.
12909

12910
  """
12911
  REQ_BGL = False
12912

    
12913
  # Must be lower than default timeout for WaitForJobChange to see whether it
12914
  # notices changed jobs
12915
  _CLIENT_CONNECT_TIMEOUT = 20.0
12916
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12917

    
12918
  @classmethod
12919
  def _NotifyUsingSocket(cls, cb, errcls):
12920
    """Opens a Unix socket and waits for another program to connect.
12921

12922
    @type cb: callable
12923
    @param cb: Callback to send socket name to client
12924
    @type errcls: class
12925
    @param errcls: Exception class to use for errors
12926

12927
    """
12928
    # Using a temporary directory as there's no easy way to create temporary
12929
    # sockets without writing a custom loop around tempfile.mktemp and
12930
    # socket.bind
12931
    tmpdir = tempfile.mkdtemp()
12932
    try:
12933
      tmpsock = utils.PathJoin(tmpdir, "sock")
12934

    
12935
      logging.debug("Creating temporary socket at %s", tmpsock)
12936
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12937
      try:
12938
        sock.bind(tmpsock)
12939
        sock.listen(1)
12940

    
12941
        # Send details to client
12942
        cb(tmpsock)
12943

    
12944
        # Wait for client to connect before continuing
12945
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12946
        try:
12947
          (conn, _) = sock.accept()
12948
        except socket.error, err:
12949
          raise errcls("Client didn't connect in time (%s)" % err)
12950
      finally:
12951
        sock.close()
12952
    finally:
12953
      # Remove as soon as client is connected
12954
      shutil.rmtree(tmpdir)
12955

    
12956
    # Wait for client to close
12957
    try:
12958
      try:
12959
        # pylint: disable=E1101
12960
        # Instance of '_socketobject' has no ... member
12961
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12962
        conn.recv(1)
12963
      except socket.error, err:
12964
        raise errcls("Client failed to confirm notification (%s)" % err)
12965
    finally:
12966
      conn.close()
12967

    
12968
  def _SendNotification(self, test, arg, sockname):
12969
    """Sends a notification to the client.
12970

12971
    @type test: string
12972
    @param test: Test name
12973
    @param arg: Test argument (depends on test)
12974
    @type sockname: string
12975
    @param sockname: Socket path
12976

12977
    """
12978
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12979

    
12980
  def _Notify(self, prereq, test, arg):
12981
    """Notifies the client of a test.
12982

12983
    @type prereq: bool
12984
    @param prereq: Whether this is a prereq-phase test
12985
    @type test: string
12986
    @param test: Test name
12987
    @param arg: Test argument (depends on test)
12988

12989
    """
12990
    if prereq:
12991
      errcls = errors.OpPrereqError
12992
    else:
12993
      errcls = errors.OpExecError
12994

    
12995
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12996
                                                  test, arg),
12997
                                   errcls)
12998

    
12999
  def CheckArguments(self):
13000
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13001
    self.expandnames_calls = 0
13002

    
13003
  def ExpandNames(self):
13004
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13005
    if checkargs_calls < 1:
13006
      raise errors.ProgrammerError("CheckArguments was not called")
13007

    
13008
    self.expandnames_calls += 1
13009

    
13010
    if self.op.notify_waitlock:
13011
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13012

    
13013
    self.LogInfo("Expanding names")
13014

    
13015
    # Get lock on master node (just to get a lock, not for a particular reason)
13016
    self.needed_locks = {
13017
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13018
      }
13019

    
13020
  def Exec(self, feedback_fn):
13021
    if self.expandnames_calls < 1:
13022
      raise errors.ProgrammerError("ExpandNames was not called")
13023

    
13024
    if self.op.notify_exec:
13025
      self._Notify(False, constants.JQT_EXEC, None)
13026

    
13027
    self.LogInfo("Executing")
13028

    
13029
    if self.op.log_messages:
13030
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13031
      for idx, msg in enumerate(self.op.log_messages):
13032
        self.LogInfo("Sending log message %s", idx + 1)
13033
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13034
        # Report how many test messages have been sent
13035
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13036

    
13037
    if self.op.fail:
13038
      raise errors.OpExecError("Opcode failure was requested")
13039

    
13040
    return True
13041

    
13042

    
13043
class IAllocator(object):
13044
  """IAllocator framework.
13045

13046
  An IAllocator instance has three sets of attributes:
13047
    - cfg that is needed to query the cluster
13048
    - input data (all members of the _KEYS class attribute are required)
13049
    - four buffer attributes (in|out_data|text), that represent the
13050
      input (to the external script) in text and data structure format,
13051
      and the output from it, again in two formats
13052
    - the result variables from the script (success, info, nodes) for
13053
      easy usage
13054

13055
  """
13056
  # pylint: disable=R0902
13057
  # lots of instance attributes
13058

    
13059
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13060
    self.cfg = cfg
13061
    self.rpc = rpc_runner
13062
    # init buffer variables
13063
    self.in_text = self.out_text = self.in_data = self.out_data = None
13064
    # init all input fields so that pylint is happy
13065
    self.mode = mode
13066
    self.memory = self.disks = self.disk_template = None
13067
    self.os = self.tags = self.nics = self.vcpus = None
13068
    self.hypervisor = None
13069
    self.relocate_from = None
13070
    self.name = None
13071
    self.instances = None
13072
    self.evac_mode = None
13073
    self.target_groups = []
13074
    # computed fields
13075
    self.required_nodes = None
13076
    # init result fields
13077
    self.success = self.info = self.result = None
13078

    
13079
    try:
13080
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13081
    except KeyError:
13082
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13083
                                   " IAllocator" % self.mode)
13084

    
13085
    keyset = [n for (n, _) in keydata]
13086

    
13087
    for key in kwargs:
13088
      if key not in keyset:
13089
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13090
                                     " IAllocator" % key)
13091
      setattr(self, key, kwargs[key])
13092

    
13093
    for key in keyset:
13094
      if key not in kwargs:
13095
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13096
                                     " IAllocator" % key)
13097
    self._BuildInputData(compat.partial(fn, self), keydata)
13098

    
13099
  def _ComputeClusterData(self):
13100
    """Compute the generic allocator input data.
13101

13102
    This is the data that is independent of the actual operation.
13103

13104
    """
13105
    cfg = self.cfg
13106
    cluster_info = cfg.GetClusterInfo()
13107
    # cluster data
13108
    data = {
13109
      "version": constants.IALLOCATOR_VERSION,
13110
      "cluster_name": cfg.GetClusterName(),
13111
      "cluster_tags": list(cluster_info.GetTags()),
13112
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13113
      # we don't have job IDs
13114
      }
13115
    ninfo = cfg.GetAllNodesInfo()
13116
    iinfo = cfg.GetAllInstancesInfo().values()
13117
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13118

    
13119
    # node data
13120
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13121

    
13122
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13123
      hypervisor_name = self.hypervisor
13124
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13125
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13126
    else:
13127
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13128

    
13129
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13130
                                        hypervisor_name)
13131
    node_iinfo = \
13132
      self.rpc.call_all_instances_info(node_list,
13133
                                       cluster_info.enabled_hypervisors)
13134

    
13135
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13136

    
13137
    config_ndata = self._ComputeBasicNodeData(ninfo)
13138
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13139
                                                 i_list, config_ndata)
13140
    assert len(data["nodes"]) == len(ninfo), \
13141
        "Incomplete node data computed"
13142

    
13143
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13144

    
13145
    self.in_data = data
13146

    
13147
  @staticmethod
13148
  def _ComputeNodeGroupData(cfg):
13149
    """Compute node groups data.
13150

13151
    """
13152
    ng = dict((guuid, {
13153
      "name": gdata.name,
13154
      "alloc_policy": gdata.alloc_policy,
13155
      })
13156
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13157

    
13158
    return ng
13159

    
13160
  @staticmethod
13161
  def _ComputeBasicNodeData(node_cfg):
13162
    """Compute global node data.
13163

13164
    @rtype: dict
13165
    @returns: a dict of name: (node dict, node config)
13166

13167
    """
13168
    # fill in static (config-based) values
13169
    node_results = dict((ninfo.name, {
13170
      "tags": list(ninfo.GetTags()),
13171
      "primary_ip": ninfo.primary_ip,
13172
      "secondary_ip": ninfo.secondary_ip,
13173
      "offline": ninfo.offline,
13174
      "drained": ninfo.drained,
13175
      "master_candidate": ninfo.master_candidate,
13176
      "group": ninfo.group,
13177
      "master_capable": ninfo.master_capable,
13178
      "vm_capable": ninfo.vm_capable,
13179
      })
13180
      for ninfo in node_cfg.values())
13181

    
13182
    return node_results
13183

    
13184
  @staticmethod
13185
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13186
                              node_results):
13187
    """Compute global node data.
13188

13189
    @param node_results: the basic node structures as filled from the config
13190

13191
    """
13192
    # make a copy of the current dict
13193
    node_results = dict(node_results)
13194
    for nname, nresult in node_data.items():
13195
      assert nname in node_results, "Missing basic data for node %s" % nname
13196
      ninfo = node_cfg[nname]
13197

    
13198
      if not (ninfo.offline or ninfo.drained):
13199
        nresult.Raise("Can't get data for node %s" % nname)
13200
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13201
                                nname)
13202
        remote_info = nresult.payload
13203

    
13204
        for attr in ["memory_total", "memory_free", "memory_dom0",
13205
                     "vg_size", "vg_free", "cpu_total"]:
13206
          if attr not in remote_info:
13207
            raise errors.OpExecError("Node '%s' didn't return attribute"
13208
                                     " '%s'" % (nname, attr))
13209
          if not isinstance(remote_info[attr], int):
13210
            raise errors.OpExecError("Node '%s' returned invalid value"
13211
                                     " for '%s': %s" %
13212
                                     (nname, attr, remote_info[attr]))
13213
        # compute memory used by primary instances
13214
        i_p_mem = i_p_up_mem = 0
13215
        for iinfo, beinfo in i_list:
13216
          if iinfo.primary_node == nname:
13217
            i_p_mem += beinfo[constants.BE_MEMORY]
13218
            if iinfo.name not in node_iinfo[nname].payload:
13219
              i_used_mem = 0
13220
            else:
13221
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13222
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13223
            remote_info["memory_free"] -= max(0, i_mem_diff)
13224

    
13225
            if iinfo.admin_up:
13226
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13227

    
13228
        # compute memory used by instances
13229
        pnr_dyn = {
13230
          "total_memory": remote_info["memory_total"],
13231
          "reserved_memory": remote_info["memory_dom0"],
13232
          "free_memory": remote_info["memory_free"],
13233
          "total_disk": remote_info["vg_size"],
13234
          "free_disk": remote_info["vg_free"],
13235
          "total_cpus": remote_info["cpu_total"],
13236
          "i_pri_memory": i_p_mem,
13237
          "i_pri_up_memory": i_p_up_mem,
13238
          }
13239
        pnr_dyn.update(node_results[nname])
13240
        node_results[nname] = pnr_dyn
13241

    
13242
    return node_results
13243

    
13244
  @staticmethod
13245
  def _ComputeInstanceData(cluster_info, i_list):
13246
    """Compute global instance data.
13247

13248
    """
13249
    instance_data = {}
13250
    for iinfo, beinfo in i_list:
13251
      nic_data = []
13252
      for nic in iinfo.nics:
13253
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13254
        nic_dict = {
13255
          "mac": nic.mac,
13256
          "ip": nic.ip,
13257
          "mode": filled_params[constants.NIC_MODE],
13258
          "link": filled_params[constants.NIC_LINK],
13259
          }
13260
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13261
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13262
        nic_data.append(nic_dict)
13263
      pir = {
13264
        "tags": list(iinfo.GetTags()),
13265
        "admin_up": iinfo.admin_up,
13266
        "vcpus": beinfo[constants.BE_VCPUS],
13267
        "memory": beinfo[constants.BE_MEMORY],
13268
        "os": iinfo.os,
13269
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13270
        "nics": nic_data,
13271
        "disks": [{constants.IDISK_SIZE: dsk.size,
13272
                   constants.IDISK_MODE: dsk.mode}
13273
                  for dsk in iinfo.disks],
13274
        "disk_template": iinfo.disk_template,
13275
        "hypervisor": iinfo.hypervisor,
13276
        }
13277
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13278
                                                 pir["disks"])
13279
      instance_data[iinfo.name] = pir
13280

    
13281
    return instance_data
13282

    
13283
  def _AddNewInstance(self):
13284
    """Add new instance data to allocator structure.
13285

13286
    This in combination with _AllocatorGetClusterData will create the
13287
    correct structure needed as input for the allocator.
13288

13289
    The checks for the completeness of the opcode must have already been
13290
    done.
13291

13292
    """
13293
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13294

    
13295
    if self.disk_template in constants.DTS_INT_MIRROR:
13296
      self.required_nodes = 2
13297
    else:
13298
      self.required_nodes = 1
13299

    
13300
    request = {
13301
      "name": self.name,
13302
      "disk_template": self.disk_template,
13303
      "tags": self.tags,
13304
      "os": self.os,
13305
      "vcpus": self.vcpus,
13306
      "memory": self.memory,
13307
      "disks": self.disks,
13308
      "disk_space_total": disk_space,
13309
      "nics": self.nics,
13310
      "required_nodes": self.required_nodes,
13311
      "hypervisor": self.hypervisor,
13312
      }
13313

    
13314
    return request
13315

    
13316
  def _AddRelocateInstance(self):
13317
    """Add relocate instance data to allocator structure.
13318

13319
    This in combination with _IAllocatorGetClusterData will create the
13320
    correct structure needed as input for the allocator.
13321

13322
    The checks for the completeness of the opcode must have already been
13323
    done.
13324

13325
    """
13326
    instance = self.cfg.GetInstanceInfo(self.name)
13327
    if instance is None:
13328
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13329
                                   " IAllocator" % self.name)
13330

    
13331
    if instance.disk_template not in constants.DTS_MIRRORED:
13332
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13333
                                 errors.ECODE_INVAL)
13334

    
13335
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13336
        len(instance.secondary_nodes) != 1:
13337
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13338
                                 errors.ECODE_STATE)
13339

    
13340
    self.required_nodes = 1
13341
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13342
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13343

    
13344
    request = {
13345
      "name": self.name,
13346
      "disk_space_total": disk_space,
13347
      "required_nodes": self.required_nodes,
13348
      "relocate_from": self.relocate_from,
13349
      }
13350
    return request
13351

    
13352
  def _AddNodeEvacuate(self):
13353
    """Get data for node-evacuate requests.
13354

13355
    """
13356
    return {
13357
      "instances": self.instances,
13358
      "evac_mode": self.evac_mode,
13359
      }
13360

    
13361
  def _AddChangeGroup(self):
13362
    """Get data for node-evacuate requests.
13363

13364
    """
13365
    return {
13366
      "instances": self.instances,
13367
      "target_groups": self.target_groups,
13368
      }
13369

    
13370
  def _BuildInputData(self, fn, keydata):
13371
    """Build input data structures.
13372

13373
    """
13374
    self._ComputeClusterData()
13375

    
13376
    request = fn()
13377
    request["type"] = self.mode
13378
    for keyname, keytype in keydata:
13379
      if keyname not in request:
13380
        raise errors.ProgrammerError("Request parameter %s is missing" %
13381
                                     keyname)
13382
      val = request[keyname]
13383
      if not keytype(val):
13384
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13385
                                     " validation, value %s, expected"
13386
                                     " type %s" % (keyname, val, keytype))
13387
    self.in_data["request"] = request
13388

    
13389
    self.in_text = serializer.Dump(self.in_data)
13390

    
13391
  _STRING_LIST = ht.TListOf(ht.TString)
13392
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13393
     # pylint: disable=E1101
13394
     # Class '...' has no 'OP_ID' member
13395
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13396
                          opcodes.OpInstanceMigrate.OP_ID,
13397
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13398
     })))
13399

    
13400
  _NEVAC_MOVED = \
13401
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13402
                       ht.TItems([ht.TNonEmptyString,
13403
                                  ht.TNonEmptyString,
13404
                                  ht.TListOf(ht.TNonEmptyString),
13405
                                 ])))
13406
  _NEVAC_FAILED = \
13407
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13408
                       ht.TItems([ht.TNonEmptyString,
13409
                                  ht.TMaybeString,
13410
                                 ])))
13411
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13412
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13413

    
13414
  _MODE_DATA = {
13415
    constants.IALLOCATOR_MODE_ALLOC:
13416
      (_AddNewInstance,
13417
       [
13418
        ("name", ht.TString),
13419
        ("memory", ht.TInt),
13420
        ("disks", ht.TListOf(ht.TDict)),
13421
        ("disk_template", ht.TString),
13422
        ("os", ht.TString),
13423
        ("tags", _STRING_LIST),
13424
        ("nics", ht.TListOf(ht.TDict)),
13425
        ("vcpus", ht.TInt),
13426
        ("hypervisor", ht.TString),
13427
        ], ht.TList),
13428
    constants.IALLOCATOR_MODE_RELOC:
13429
      (_AddRelocateInstance,
13430
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13431
       ht.TList),
13432
     constants.IALLOCATOR_MODE_NODE_EVAC:
13433
      (_AddNodeEvacuate, [
13434
        ("instances", _STRING_LIST),
13435
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13436
        ], _NEVAC_RESULT),
13437
     constants.IALLOCATOR_MODE_CHG_GROUP:
13438
      (_AddChangeGroup, [
13439
        ("instances", _STRING_LIST),
13440
        ("target_groups", _STRING_LIST),
13441
        ], _NEVAC_RESULT),
13442
    }
13443

    
13444
  def Run(self, name, validate=True, call_fn=None):
13445
    """Run an instance allocator and return the results.
13446

13447
    """
13448
    if call_fn is None:
13449
      call_fn = self.rpc.call_iallocator_runner
13450

    
13451
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13452
    result.Raise("Failure while running the iallocator script")
13453

    
13454
    self.out_text = result.payload
13455
    if validate:
13456
      self._ValidateResult()
13457

    
13458
  def _ValidateResult(self):
13459
    """Process the allocator results.
13460

13461
    This will process and if successful save the result in
13462
    self.out_data and the other parameters.
13463

13464
    """
13465
    try:
13466
      rdict = serializer.Load(self.out_text)
13467
    except Exception, err:
13468
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13469

    
13470
    if not isinstance(rdict, dict):
13471
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13472

    
13473
    # TODO: remove backwards compatiblity in later versions
13474
    if "nodes" in rdict and "result" not in rdict:
13475
      rdict["result"] = rdict["nodes"]
13476
      del rdict["nodes"]
13477

    
13478
    for key in "success", "info", "result":
13479
      if key not in rdict:
13480
        raise errors.OpExecError("Can't parse iallocator results:"
13481
                                 " missing key '%s'" % key)
13482
      setattr(self, key, rdict[key])
13483

    
13484
    if not self._result_check(self.result):
13485
      raise errors.OpExecError("Iallocator returned invalid result,"
13486
                               " expected %s, got %s" %
13487
                               (self._result_check, self.result),
13488
                               errors.ECODE_INVAL)
13489

    
13490
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13491
      assert self.relocate_from is not None
13492
      assert self.required_nodes == 1
13493

    
13494
      node2group = dict((name, ndata["group"])
13495
                        for (name, ndata) in self.in_data["nodes"].items())
13496

    
13497
      fn = compat.partial(self._NodesToGroups, node2group,
13498
                          self.in_data["nodegroups"])
13499

    
13500
      instance = self.cfg.GetInstanceInfo(self.name)
13501
      request_groups = fn(self.relocate_from + [instance.primary_node])
13502
      result_groups = fn(rdict["result"] + [instance.primary_node])
13503

    
13504
      if self.success and not set(result_groups).issubset(request_groups):
13505
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13506
                                 " differ from original groups (%s)" %
13507
                                 (utils.CommaJoin(result_groups),
13508
                                  utils.CommaJoin(request_groups)))
13509

    
13510
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13511
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13512

    
13513
    self.out_data = rdict
13514

    
13515
  @staticmethod
13516
  def _NodesToGroups(node2group, groups, nodes):
13517
    """Returns a list of unique group names for a list of nodes.
13518

13519
    @type node2group: dict
13520
    @param node2group: Map from node name to group UUID
13521
    @type groups: dict
13522
    @param groups: Group information
13523
    @type nodes: list
13524
    @param nodes: Node names
13525

13526
    """
13527
    result = set()
13528

    
13529
    for node in nodes:
13530
      try:
13531
        group_uuid = node2group[node]
13532
      except KeyError:
13533
        # Ignore unknown node
13534
        pass
13535
      else:
13536
        try:
13537
          group = groups[group_uuid]
13538
        except KeyError:
13539
          # Can't find group, let's use UUID
13540
          group_name = group_uuid
13541
        else:
13542
          group_name = group["name"]
13543

    
13544
        result.add(group_name)
13545

    
13546
    return sorted(result)
13547

    
13548

    
13549
class LUTestAllocator(NoHooksLU):
13550
  """Run allocator tests.
13551

13552
  This LU runs the allocator tests
13553

13554
  """
13555
  def CheckPrereq(self):
13556
    """Check prerequisites.
13557

13558
    This checks the opcode parameters depending on the director and mode test.
13559

13560
    """
13561
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13562
      for attr in ["memory", "disks", "disk_template",
13563
                   "os", "tags", "nics", "vcpus"]:
13564
        if not hasattr(self.op, attr):
13565
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13566
                                     attr, errors.ECODE_INVAL)
13567
      iname = self.cfg.ExpandInstanceName(self.op.name)
13568
      if iname is not None:
13569
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13570
                                   iname, errors.ECODE_EXISTS)
13571
      if not isinstance(self.op.nics, list):
13572
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13573
                                   errors.ECODE_INVAL)
13574
      if not isinstance(self.op.disks, list):
13575
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13576
                                   errors.ECODE_INVAL)
13577
      for row in self.op.disks:
13578
        if (not isinstance(row, dict) or
13579
            constants.IDISK_SIZE not in row or
13580
            not isinstance(row[constants.IDISK_SIZE], int) or
13581
            constants.IDISK_MODE not in row or
13582
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13583
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13584
                                     " parameter", errors.ECODE_INVAL)
13585
      if self.op.hypervisor is None:
13586
        self.op.hypervisor = self.cfg.GetHypervisorType()
13587
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13588
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13589
      self.op.name = fname
13590
      self.relocate_from = \
13591
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13592
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13593
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13594
      if not self.op.instances:
13595
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13596
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13597
    else:
13598
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13599
                                 self.op.mode, errors.ECODE_INVAL)
13600

    
13601
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13602
      if self.op.allocator is None:
13603
        raise errors.OpPrereqError("Missing allocator name",
13604
                                   errors.ECODE_INVAL)
13605
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13606
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13607
                                 self.op.direction, errors.ECODE_INVAL)
13608

    
13609
  def Exec(self, feedback_fn):
13610
    """Run the allocator test.
13611

13612
    """
13613
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13614
      ial = IAllocator(self.cfg, self.rpc,
13615
                       mode=self.op.mode,
13616
                       name=self.op.name,
13617
                       memory=self.op.memory,
13618
                       disks=self.op.disks,
13619
                       disk_template=self.op.disk_template,
13620
                       os=self.op.os,
13621
                       tags=self.op.tags,
13622
                       nics=self.op.nics,
13623
                       vcpus=self.op.vcpus,
13624
                       hypervisor=self.op.hypervisor,
13625
                       )
13626
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13627
      ial = IAllocator(self.cfg, self.rpc,
13628
                       mode=self.op.mode,
13629
                       name=self.op.name,
13630
                       relocate_from=list(self.relocate_from),
13631
                       )
13632
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13633
      ial = IAllocator(self.cfg, self.rpc,
13634
                       mode=self.op.mode,
13635
                       instances=self.op.instances,
13636
                       target_groups=self.op.target_groups)
13637
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13638
      ial = IAllocator(self.cfg, self.rpc,
13639
                       mode=self.op.mode,
13640
                       instances=self.op.instances,
13641
                       evac_mode=self.op.evac_mode)
13642
    else:
13643
      raise errors.ProgrammerError("Uncatched mode %s in"
13644
                                   " LUTestAllocator.Exec", self.op.mode)
13645

    
13646
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13647
      result = ial.in_text
13648
    else:
13649
      ial.Run(self.op.allocator, validate=False)
13650
      result = ial.out_text
13651
    return result
13652

    
13653

    
13654
#: Query type implementations
13655
_QUERY_IMPL = {
13656
  constants.QR_INSTANCE: _InstanceQuery,
13657
  constants.QR_NODE: _NodeQuery,
13658
  constants.QR_GROUP: _GroupQuery,
13659
  constants.QR_OS: _OsQuery,
13660
  }
13661

    
13662
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13663

    
13664

    
13665
def _GetQueryImplementation(name):
13666
  """Returns the implemtnation for a query type.
13667

13668
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13669

13670
  """
13671
  try:
13672
    return _QUERY_IMPL[name]
13673
  except KeyError:
13674
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13675
                               errors.ECODE_INVAL)