Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ e3ac8406

History | View | Annotate | Download (481.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False):
353
    """Helper function to declare instances' nodes for locking.
354

355
    This function should be called after locking one or more instances to lock
356
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
357
    with all primary or secondary nodes for instances already locked and
358
    present in self.needed_locks[locking.LEVEL_INSTANCE].
359

360
    It should be called from DeclareLocks, and for safety only works if
361
    self.recalculate_locks[locking.LEVEL_NODE] is set.
362

363
    In the future it may grow parameters to just lock some instance's nodes, or
364
    to just lock primaries or secondary nodes, if needed.
365

366
    If should be called in DeclareLocks in a way similar to::
367

368
      if level == locking.LEVEL_NODE:
369
        self._LockInstancesNodes()
370

371
    @type primary_only: boolean
372
    @param primary_only: only lock primary nodes of locked instances
373

374
    """
375
    assert locking.LEVEL_NODE in self.recalculate_locks, \
376
      "_LockInstancesNodes helper function called with no nodes to recalculate"
377

    
378
    # TODO: check if we're really been called with the instance locks held
379

    
380
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
381
    # future we might want to have different behaviors depending on the value
382
    # of self.recalculate_locks[locking.LEVEL_NODE]
383
    wanted_nodes = []
384
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
385
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
386
      wanted_nodes.append(instance.primary_node)
387
      if not primary_only:
388
        wanted_nodes.extend(instance.secondary_nodes)
389

    
390
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
391
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
392
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
393
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
394

    
395
    del self.recalculate_locks[locking.LEVEL_NODE]
396

    
397

    
398
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
399
  """Simple LU which runs no hooks.
400

401
  This LU is intended as a parent for other LogicalUnits which will
402
  run no hooks, in order to reduce duplicate code.
403

404
  """
405
  HPATH = None
406
  HTYPE = None
407

    
408
  def BuildHooksEnv(self):
409
    """Empty BuildHooksEnv for NoHooksLu.
410

411
    This just raises an error.
412

413
    """
414
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
415

    
416
  def BuildHooksNodes(self):
417
    """Empty BuildHooksNodes for NoHooksLU.
418

419
    """
420
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
421

    
422

    
423
class Tasklet:
424
  """Tasklet base class.
425

426
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
427
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
428
  tasklets know nothing about locks.
429

430
  Subclasses must follow these rules:
431
    - Implement CheckPrereq
432
    - Implement Exec
433

434
  """
435
  def __init__(self, lu):
436
    self.lu = lu
437

    
438
    # Shortcuts
439
    self.cfg = lu.cfg
440
    self.rpc = lu.rpc
441

    
442
  def CheckPrereq(self):
443
    """Check prerequisites for this tasklets.
444

445
    This method should check whether the prerequisites for the execution of
446
    this tasklet are fulfilled. It can do internode communication, but it
447
    should be idempotent - no cluster or system changes are allowed.
448

449
    The method should raise errors.OpPrereqError in case something is not
450
    fulfilled. Its return value is ignored.
451

452
    This method should also update all parameters to their canonical form if it
453
    hasn't been done before.
454

455
    """
456
    pass
457

    
458
  def Exec(self, feedback_fn):
459
    """Execute the tasklet.
460

461
    This method should implement the actual work. It should raise
462
    errors.OpExecError for failures that are somewhat dealt with in code, or
463
    expected.
464

465
    """
466
    raise NotImplementedError
467

    
468

    
469
class _QueryBase:
470
  """Base for query utility classes.
471

472
  """
473
  #: Attribute holding field definitions
474
  FIELDS = None
475

    
476
  def __init__(self, qfilter, fields, use_locking):
477
    """Initializes this class.
478

479
    """
480
    self.use_locking = use_locking
481

    
482
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
483
                             namefield="name")
484
    self.requested_data = self.query.RequestedData()
485
    self.names = self.query.RequestedNames()
486

    
487
    # Sort only if no names were requested
488
    self.sort_by_name = not self.names
489

    
490
    self.do_locking = None
491
    self.wanted = None
492

    
493
  def _GetNames(self, lu, all_names, lock_level):
494
    """Helper function to determine names asked for in the query.
495

496
    """
497
    if self.do_locking:
498
      names = lu.owned_locks(lock_level)
499
    else:
500
      names = all_names
501

    
502
    if self.wanted == locking.ALL_SET:
503
      assert not self.names
504
      # caller didn't specify names, so ordering is not important
505
      return utils.NiceSort(names)
506

    
507
    # caller specified names and we must keep the same order
508
    assert self.names
509
    assert not self.do_locking or lu.glm.is_owned(lock_level)
510

    
511
    missing = set(self.wanted).difference(names)
512
    if missing:
513
      raise errors.OpExecError("Some items were removed before retrieving"
514
                               " their data: %s" % missing)
515

    
516
    # Return expanded names
517
    return self.wanted
518

    
519
  def ExpandNames(self, lu):
520
    """Expand names for this query.
521

522
    See L{LogicalUnit.ExpandNames}.
523

524
    """
525
    raise NotImplementedError()
526

    
527
  def DeclareLocks(self, lu, level):
528
    """Declare locks for this query.
529

530
    See L{LogicalUnit.DeclareLocks}.
531

532
    """
533
    raise NotImplementedError()
534

    
535
  def _GetQueryData(self, lu):
536
    """Collects all data for this query.
537

538
    @return: Query data object
539

540
    """
541
    raise NotImplementedError()
542

    
543
  def NewStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
548
                                  sort_by_name=self.sort_by_name)
549

    
550
  def OldStyleQuery(self, lu):
551
    """Collect data and execute query.
552

553
    """
554
    return self.query.OldStyleQuery(self._GetQueryData(lu),
555
                                    sort_by_name=self.sort_by_name)
556

    
557

    
558
def _ShareAll():
559
  """Returns a dict declaring all lock levels shared.
560

561
  """
562
  return dict.fromkeys(locking.LEVELS, 1)
563

    
564

    
565
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
566
  """Checks if the owned node groups are still correct for an instance.
567

568
  @type cfg: L{config.ConfigWriter}
569
  @param cfg: The cluster configuration
570
  @type instance_name: string
571
  @param instance_name: Instance name
572
  @type owned_groups: set or frozenset
573
  @param owned_groups: List of currently owned node groups
574

575
  """
576
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
577

    
578
  if not owned_groups.issuperset(inst_groups):
579
    raise errors.OpPrereqError("Instance %s's node groups changed since"
580
                               " locks were acquired, current groups are"
581
                               " are '%s', owning groups '%s'; retry the"
582
                               " operation" %
583
                               (instance_name,
584
                                utils.CommaJoin(inst_groups),
585
                                utils.CommaJoin(owned_groups)),
586
                               errors.ECODE_STATE)
587

    
588
  return inst_groups
589

    
590

    
591
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
592
  """Checks if the instances in a node group are still correct.
593

594
  @type cfg: L{config.ConfigWriter}
595
  @param cfg: The cluster configuration
596
  @type group_uuid: string
597
  @param group_uuid: Node group UUID
598
  @type owned_instances: set or frozenset
599
  @param owned_instances: List of currently owned instances
600

601
  """
602
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
603
  if owned_instances != wanted_instances:
604
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
605
                               " locks were acquired, wanted '%s', have '%s';"
606
                               " retry the operation" %
607
                               (group_uuid,
608
                                utils.CommaJoin(wanted_instances),
609
                                utils.CommaJoin(owned_instances)),
610
                               errors.ECODE_STATE)
611

    
612
  return wanted_instances
613

    
614

    
615
def _SupportsOob(cfg, node):
616
  """Tells if node supports OOB.
617

618
  @type cfg: L{config.ConfigWriter}
619
  @param cfg: The cluster configuration
620
  @type node: L{objects.Node}
621
  @param node: The node
622
  @return: The OOB script if supported or an empty string otherwise
623

624
  """
625
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
626

    
627

    
628
def _GetWantedNodes(lu, nodes):
629
  """Returns list of checked and expanded node names.
630

631
  @type lu: L{LogicalUnit}
632
  @param lu: the logical unit on whose behalf we execute
633
  @type nodes: list
634
  @param nodes: list of node names or None for all nodes
635
  @rtype: list
636
  @return: the list of nodes, sorted
637
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
638

639
  """
640
  if nodes:
641
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
642

    
643
  return utils.NiceSort(lu.cfg.GetNodeList())
644

    
645

    
646
def _GetWantedInstances(lu, instances):
647
  """Returns list of checked and expanded instance names.
648

649
  @type lu: L{LogicalUnit}
650
  @param lu: the logical unit on whose behalf we execute
651
  @type instances: list
652
  @param instances: list of instance names or None for all instances
653
  @rtype: list
654
  @return: the list of instances, sorted
655
  @raise errors.OpPrereqError: if the instances parameter is wrong type
656
  @raise errors.OpPrereqError: if any of the passed instances is not found
657

658
  """
659
  if instances:
660
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
661
  else:
662
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
663
  return wanted
664

    
665

    
666
def _GetUpdatedParams(old_params, update_dict,
667
                      use_default=True, use_none=False):
668
  """Return the new version of a parameter dictionary.
669

670
  @type old_params: dict
671
  @param old_params: old parameters
672
  @type update_dict: dict
673
  @param update_dict: dict containing new parameter values, or
674
      constants.VALUE_DEFAULT to reset the parameter to its default
675
      value
676
  @param use_default: boolean
677
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
678
      values as 'to be deleted' values
679
  @param use_none: boolean
680
  @type use_none: whether to recognise C{None} values as 'to be
681
      deleted' values
682
  @rtype: dict
683
  @return: the new parameter dictionary
684

685
  """
686
  params_copy = copy.deepcopy(old_params)
687
  for key, val in update_dict.iteritems():
688
    if ((use_default and val == constants.VALUE_DEFAULT) or
689
        (use_none and val is None)):
690
      try:
691
        del params_copy[key]
692
      except KeyError:
693
        pass
694
    else:
695
      params_copy[key] = val
696
  return params_copy
697

    
698

    
699
def _ReleaseLocks(lu, level, names=None, keep=None):
700
  """Releases locks owned by an LU.
701

702
  @type lu: L{LogicalUnit}
703
  @param level: Lock level
704
  @type names: list or None
705
  @param names: Names of locks to release
706
  @type keep: list or None
707
  @param keep: Names of locks to retain
708

709
  """
710
  assert not (keep is not None and names is not None), \
711
         "Only one of the 'names' and the 'keep' parameters can be given"
712

    
713
  if names is not None:
714
    should_release = names.__contains__
715
  elif keep:
716
    should_release = lambda name: name not in keep
717
  else:
718
    should_release = None
719

    
720
  if should_release:
721
    retain = []
722
    release = []
723

    
724
    # Determine which locks to release
725
    for name in lu.owned_locks(level):
726
      if should_release(name):
727
        release.append(name)
728
      else:
729
        retain.append(name)
730

    
731
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
732

    
733
    # Release just some locks
734
    lu.glm.release(level, names=release)
735

    
736
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
737
  else:
738
    # Release everything
739
    lu.glm.release(level)
740

    
741
    assert not lu.glm.is_owned(level), "No locks should be owned"
742

    
743

    
744
def _MapInstanceDisksToNodes(instances):
745
  """Creates a map from (node, volume) to instance name.
746

747
  @type instances: list of L{objects.Instance}
748
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
749

750
  """
751
  return dict(((node, vol), inst.name)
752
              for inst in instances
753
              for (node, vols) in inst.MapLVsByNode().items()
754
              for vol in vols)
755

    
756

    
757
def _RunPostHook(lu, node_name):
758
  """Runs the post-hook for an opcode on a single node.
759

760
  """
761
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
762
  try:
763
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
764
  except:
765
    # pylint: disable=W0702
766
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
767

    
768

    
769
def _CheckOutputFields(static, dynamic, selected):
770
  """Checks whether all selected fields are valid.
771

772
  @type static: L{utils.FieldSet}
773
  @param static: static fields set
774
  @type dynamic: L{utils.FieldSet}
775
  @param dynamic: dynamic fields set
776

777
  """
778
  f = utils.FieldSet()
779
  f.Extend(static)
780
  f.Extend(dynamic)
781

    
782
  delta = f.NonMatching(selected)
783
  if delta:
784
    raise errors.OpPrereqError("Unknown output fields selected: %s"
785
                               % ",".join(delta), errors.ECODE_INVAL)
786

    
787

    
788
def _CheckGlobalHvParams(params):
789
  """Validates that given hypervisor params are not global ones.
790

791
  This will ensure that instances don't get customised versions of
792
  global params.
793

794
  """
795
  used_globals = constants.HVC_GLOBALS.intersection(params)
796
  if used_globals:
797
    msg = ("The following hypervisor parameters are global and cannot"
798
           " be customized at instance level, please modify them at"
799
           " cluster level: %s" % utils.CommaJoin(used_globals))
800
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
801

    
802

    
803
def _CheckNodeOnline(lu, node, msg=None):
804
  """Ensure that a given node is online.
805

806
  @param lu: the LU on behalf of which we make the check
807
  @param node: the node to check
808
  @param msg: if passed, should be a message to replace the default one
809
  @raise errors.OpPrereqError: if the node is offline
810

811
  """
812
  if msg is None:
813
    msg = "Can't use offline node"
814
  if lu.cfg.GetNodeInfo(node).offline:
815
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
816

    
817

    
818
def _CheckNodeNotDrained(lu, node):
819
  """Ensure that a given node is not drained.
820

821
  @param lu: the LU on behalf of which we make the check
822
  @param node: the node to check
823
  @raise errors.OpPrereqError: if the node is drained
824

825
  """
826
  if lu.cfg.GetNodeInfo(node).drained:
827
    raise errors.OpPrereqError("Can't use drained node %s" % node,
828
                               errors.ECODE_STATE)
829

    
830

    
831
def _CheckNodeVmCapable(lu, node):
832
  """Ensure that a given node is vm capable.
833

834
  @param lu: the LU on behalf of which we make the check
835
  @param node: the node to check
836
  @raise errors.OpPrereqError: if the node is not vm capable
837

838
  """
839
  if not lu.cfg.GetNodeInfo(node).vm_capable:
840
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
841
                               errors.ECODE_STATE)
842

    
843

    
844
def _CheckNodeHasOS(lu, node, os_name, force_variant):
845
  """Ensure that a node supports a given OS.
846

847
  @param lu: the LU on behalf of which we make the check
848
  @param node: the node to check
849
  @param os_name: the OS to query about
850
  @param force_variant: whether to ignore variant errors
851
  @raise errors.OpPrereqError: if the node is not supporting the OS
852

853
  """
854
  result = lu.rpc.call_os_get(node, os_name)
855
  result.Raise("OS '%s' not in supported OS list for node %s" %
856
               (os_name, node),
857
               prereq=True, ecode=errors.ECODE_INVAL)
858
  if not force_variant:
859
    _CheckOSVariant(result.payload, os_name)
860

    
861

    
862
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
863
  """Ensure that a node has the given secondary ip.
864

865
  @type lu: L{LogicalUnit}
866
  @param lu: the LU on behalf of which we make the check
867
  @type node: string
868
  @param node: the node to check
869
  @type secondary_ip: string
870
  @param secondary_ip: the ip to check
871
  @type prereq: boolean
872
  @param prereq: whether to throw a prerequisite or an execute error
873
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
874
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
875

876
  """
877
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
878
  result.Raise("Failure checking secondary ip on node %s" % node,
879
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
880
  if not result.payload:
881
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
882
           " please fix and re-run this command" % secondary_ip)
883
    if prereq:
884
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
885
    else:
886
      raise errors.OpExecError(msg)
887

    
888

    
889
def _GetClusterDomainSecret():
890
  """Reads the cluster domain secret.
891

892
  """
893
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
894
                               strict=True)
895

    
896

    
897
def _CheckInstanceDown(lu, instance, reason):
898
  """Ensure that an instance is not running."""
899
  if instance.admin_up:
900
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
901
                               (instance.name, reason), errors.ECODE_STATE)
902

    
903
  pnode = instance.primary_node
904
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
905
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
906
              prereq=True, ecode=errors.ECODE_ENVIRON)
907

    
908
  if instance.name in ins_l.payload:
909
    raise errors.OpPrereqError("Instance %s is running, %s" %
910
                               (instance.name, reason), errors.ECODE_STATE)
911

    
912

    
913
def _ExpandItemName(fn, name, kind):
914
  """Expand an item name.
915

916
  @param fn: the function to use for expansion
917
  @param name: requested item name
918
  @param kind: text description ('Node' or 'Instance')
919
  @return: the resolved (full) name
920
  @raise errors.OpPrereqError: if the item is not found
921

922
  """
923
  full_name = fn(name)
924
  if full_name is None:
925
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
926
                               errors.ECODE_NOENT)
927
  return full_name
928

    
929

    
930
def _ExpandNodeName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for nodes."""
932
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
933

    
934

    
935
def _ExpandInstanceName(cfg, name):
936
  """Wrapper over L{_ExpandItemName} for instance."""
937
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
938

    
939

    
940
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
941
                          memory, vcpus, nics, disk_template, disks,
942
                          bep, hvp, hypervisor_name, tags):
943
  """Builds instance related env variables for hooks
944

945
  This builds the hook environment from individual variables.
946

947
  @type name: string
948
  @param name: the name of the instance
949
  @type primary_node: string
950
  @param primary_node: the name of the instance's primary node
951
  @type secondary_nodes: list
952
  @param secondary_nodes: list of secondary nodes as strings
953
  @type os_type: string
954
  @param os_type: the name of the instance's OS
955
  @type status: boolean
956
  @param status: the should_run status of the instance
957
  @type memory: string
958
  @param memory: the memory size of the instance
959
  @type vcpus: string
960
  @param vcpus: the count of VCPUs the instance has
961
  @type nics: list
962
  @param nics: list of tuples (ip, mac, mode, link) representing
963
      the NICs the instance has
964
  @type disk_template: string
965
  @param disk_template: the disk template of the instance
966
  @type disks: list
967
  @param disks: the list of (size, mode) pairs
968
  @type bep: dict
969
  @param bep: the backend parameters for the instance
970
  @type hvp: dict
971
  @param hvp: the hypervisor parameters for the instance
972
  @type hypervisor_name: string
973
  @param hypervisor_name: the hypervisor for the instance
974
  @type tags: list
975
  @param tags: list of instance tags as strings
976
  @rtype: dict
977
  @return: the hook environment for this instance
978

979
  """
980
  if status:
981
    str_status = "up"
982
  else:
983
    str_status = "down"
984
  env = {
985
    "OP_TARGET": name,
986
    "INSTANCE_NAME": name,
987
    "INSTANCE_PRIMARY": primary_node,
988
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
989
    "INSTANCE_OS_TYPE": os_type,
990
    "INSTANCE_STATUS": str_status,
991
    "INSTANCE_MEMORY": memory,
992
    "INSTANCE_VCPUS": vcpus,
993
    "INSTANCE_DISK_TEMPLATE": disk_template,
994
    "INSTANCE_HYPERVISOR": hypervisor_name,
995
  }
996

    
997
  if nics:
998
    nic_count = len(nics)
999
    for idx, (ip, mac, mode, link) in enumerate(nics):
1000
      if ip is None:
1001
        ip = ""
1002
      env["INSTANCE_NIC%d_IP" % idx] = ip
1003
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1004
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1005
      env["INSTANCE_NIC%d_LINK" % idx] = link
1006
      if mode == constants.NIC_MODE_BRIDGED:
1007
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1008
  else:
1009
    nic_count = 0
1010

    
1011
  env["INSTANCE_NIC_COUNT"] = nic_count
1012

    
1013
  if disks:
1014
    disk_count = len(disks)
1015
    for idx, (size, mode) in enumerate(disks):
1016
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1017
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1018
  else:
1019
    disk_count = 0
1020

    
1021
  env["INSTANCE_DISK_COUNT"] = disk_count
1022

    
1023
  if not tags:
1024
    tags = []
1025

    
1026
  env["INSTANCE_TAGS"] = " ".join(tags)
1027

    
1028
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1029
    for key, value in source.items():
1030
      env["INSTANCE_%s_%s" % (kind, key)] = value
1031

    
1032
  return env
1033

    
1034

    
1035
def _NICListToTuple(lu, nics):
1036
  """Build a list of nic information tuples.
1037

1038
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1039
  value in LUInstanceQueryData.
1040

1041
  @type lu:  L{LogicalUnit}
1042
  @param lu: the logical unit on whose behalf we execute
1043
  @type nics: list of L{objects.NIC}
1044
  @param nics: list of nics to convert to hooks tuples
1045

1046
  """
1047
  hooks_nics = []
1048
  cluster = lu.cfg.GetClusterInfo()
1049
  for nic in nics:
1050
    ip = nic.ip
1051
    mac = nic.mac
1052
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1053
    mode = filled_params[constants.NIC_MODE]
1054
    link = filled_params[constants.NIC_LINK]
1055
    hooks_nics.append((ip, mac, mode, link))
1056
  return hooks_nics
1057

    
1058

    
1059
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1060
  """Builds instance related env variables for hooks from an object.
1061

1062
  @type lu: L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type instance: L{objects.Instance}
1065
  @param instance: the instance for which we should build the
1066
      environment
1067
  @type override: dict
1068
  @param override: dictionary with key/values that will override
1069
      our values
1070
  @rtype: dict
1071
  @return: the hook environment dictionary
1072

1073
  """
1074
  cluster = lu.cfg.GetClusterInfo()
1075
  bep = cluster.FillBE(instance)
1076
  hvp = cluster.FillHV(instance)
1077
  args = {
1078
    "name": instance.name,
1079
    "primary_node": instance.primary_node,
1080
    "secondary_nodes": instance.secondary_nodes,
1081
    "os_type": instance.os,
1082
    "status": instance.admin_up,
1083
    "memory": bep[constants.BE_MEMORY],
1084
    "vcpus": bep[constants.BE_VCPUS],
1085
    "nics": _NICListToTuple(lu, instance.nics),
1086
    "disk_template": instance.disk_template,
1087
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1088
    "bep": bep,
1089
    "hvp": hvp,
1090
    "hypervisor_name": instance.hypervisor,
1091
    "tags": instance.tags,
1092
  }
1093
  if override:
1094
    args.update(override)
1095
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1096

    
1097

    
1098
def _AdjustCandidatePool(lu, exceptions):
1099
  """Adjust the candidate pool after node operations.
1100

1101
  """
1102
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1103
  if mod_list:
1104
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1105
               utils.CommaJoin(node.name for node in mod_list))
1106
    for name in mod_list:
1107
      lu.context.ReaddNode(name)
1108
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1109
  if mc_now > mc_max:
1110
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1111
               (mc_now, mc_max))
1112

    
1113

    
1114
def _DecideSelfPromotion(lu, exceptions=None):
1115
  """Decide whether I should promote myself as a master candidate.
1116

1117
  """
1118
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1119
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1120
  # the new node will increase mc_max with one, so:
1121
  mc_should = min(mc_should + 1, cp_size)
1122
  return mc_now < mc_should
1123

    
1124

    
1125
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1126
  """Check that the brigdes needed by a list of nics exist.
1127

1128
  """
1129
  cluster = lu.cfg.GetClusterInfo()
1130
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1131
  brlist = [params[constants.NIC_LINK] for params in paramslist
1132
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1133
  if brlist:
1134
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1135
    result.Raise("Error checking bridges on destination node '%s'" %
1136
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1137

    
1138

    
1139
def _CheckInstanceBridgesExist(lu, instance, node=None):
1140
  """Check that the brigdes needed by an instance exist.
1141

1142
  """
1143
  if node is None:
1144
    node = instance.primary_node
1145
  _CheckNicsBridgesExist(lu, instance.nics, node)
1146

    
1147

    
1148
def _CheckOSVariant(os_obj, name):
1149
  """Check whether an OS name conforms to the os variants specification.
1150

1151
  @type os_obj: L{objects.OS}
1152
  @param os_obj: OS object to check
1153
  @type name: string
1154
  @param name: OS name passed by the user, to check for validity
1155

1156
  """
1157
  variant = objects.OS.GetVariant(name)
1158
  if not os_obj.supported_variants:
1159
    if variant:
1160
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1161
                                 " passed)" % (os_obj.name, variant),
1162
                                 errors.ECODE_INVAL)
1163
    return
1164
  if not variant:
1165
    raise errors.OpPrereqError("OS name must include a variant",
1166
                               errors.ECODE_INVAL)
1167

    
1168
  if variant not in os_obj.supported_variants:
1169
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1170

    
1171

    
1172
def _GetNodeInstancesInner(cfg, fn):
1173
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1174

    
1175

    
1176
def _GetNodeInstances(cfg, node_name):
1177
  """Returns a list of all primary and secondary instances on a node.
1178

1179
  """
1180

    
1181
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1182

    
1183

    
1184
def _GetNodePrimaryInstances(cfg, node_name):
1185
  """Returns primary instances on a node.
1186

1187
  """
1188
  return _GetNodeInstancesInner(cfg,
1189
                                lambda inst: node_name == inst.primary_node)
1190

    
1191

    
1192
def _GetNodeSecondaryInstances(cfg, node_name):
1193
  """Returns secondary instances on a node.
1194

1195
  """
1196
  return _GetNodeInstancesInner(cfg,
1197
                                lambda inst: node_name in inst.secondary_nodes)
1198

    
1199

    
1200
def _GetStorageTypeArgs(cfg, storage_type):
1201
  """Returns the arguments for a storage type.
1202

1203
  """
1204
  # Special case for file storage
1205
  if storage_type == constants.ST_FILE:
1206
    # storage.FileStorage wants a list of storage directories
1207
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1208

    
1209
  return []
1210

    
1211

    
1212
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1213
  faulty = []
1214

    
1215
  for dev in instance.disks:
1216
    cfg.SetDiskID(dev, node_name)
1217

    
1218
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1219
  result.Raise("Failed to get disk status from node %s" % node_name,
1220
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1221

    
1222
  for idx, bdev_status in enumerate(result.payload):
1223
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224
      faulty.append(idx)
1225

    
1226
  return faulty
1227

    
1228

    
1229
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1230
  """Check the sanity of iallocator and node arguments and use the
1231
  cluster-wide iallocator if appropriate.
1232

1233
  Check that at most one of (iallocator, node) is specified. If none is
1234
  specified, then the LU's opcode's iallocator slot is filled with the
1235
  cluster-wide default iallocator.
1236

1237
  @type iallocator_slot: string
1238
  @param iallocator_slot: the name of the opcode iallocator slot
1239
  @type node_slot: string
1240
  @param node_slot: the name of the opcode target node slot
1241

1242
  """
1243
  node = getattr(lu.op, node_slot, None)
1244
  iallocator = getattr(lu.op, iallocator_slot, None)
1245

    
1246
  if node is not None and iallocator is not None:
1247
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1248
                               errors.ECODE_INVAL)
1249
  elif node is None and iallocator is None:
1250
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1251
    if default_iallocator:
1252
      setattr(lu.op, iallocator_slot, default_iallocator)
1253
    else:
1254
      raise errors.OpPrereqError("No iallocator or node given and no"
1255
                                 " cluster-wide default iallocator found;"
1256
                                 " please specify either an iallocator or a"
1257
                                 " node, or set a cluster-wide default"
1258
                                 " iallocator")
1259

    
1260

    
1261
def _GetDefaultIAllocator(cfg, iallocator):
1262
  """Decides on which iallocator to use.
1263

1264
  @type cfg: L{config.ConfigWriter}
1265
  @param cfg: Cluster configuration object
1266
  @type iallocator: string or None
1267
  @param iallocator: Iallocator specified in opcode
1268
  @rtype: string
1269
  @return: Iallocator name
1270

1271
  """
1272
  if not iallocator:
1273
    # Use default iallocator
1274
    iallocator = cfg.GetDefaultIAllocator()
1275

    
1276
  if not iallocator:
1277
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1278
                               " opcode nor as a cluster-wide default",
1279
                               errors.ECODE_INVAL)
1280

    
1281
  return iallocator
1282

    
1283

    
1284
class LUClusterPostInit(LogicalUnit):
1285
  """Logical unit for running hooks after cluster initialization.
1286

1287
  """
1288
  HPATH = "cluster-init"
1289
  HTYPE = constants.HTYPE_CLUSTER
1290

    
1291
  def BuildHooksEnv(self):
1292
    """Build hooks env.
1293

1294
    """
1295
    return {
1296
      "OP_TARGET": self.cfg.GetClusterName(),
1297
      }
1298

    
1299
  def BuildHooksNodes(self):
1300
    """Build hooks nodes.
1301

1302
    """
1303
    return ([], [self.cfg.GetMasterNode()])
1304

    
1305
  def Exec(self, feedback_fn):
1306
    """Nothing to do.
1307

1308
    """
1309
    return True
1310

    
1311

    
1312
class LUClusterDestroy(LogicalUnit):
1313
  """Logical unit for destroying the cluster.
1314

1315
  """
1316
  HPATH = "cluster-destroy"
1317
  HTYPE = constants.HTYPE_CLUSTER
1318

    
1319
  def BuildHooksEnv(self):
1320
    """Build hooks env.
1321

1322
    """
1323
    return {
1324
      "OP_TARGET": self.cfg.GetClusterName(),
1325
      }
1326

    
1327
  def BuildHooksNodes(self):
1328
    """Build hooks nodes.
1329

1330
    """
1331
    return ([], [])
1332

    
1333
  def CheckPrereq(self):
1334
    """Check prerequisites.
1335

1336
    This checks whether the cluster is empty.
1337

1338
    Any errors are signaled by raising errors.OpPrereqError.
1339

1340
    """
1341
    master = self.cfg.GetMasterNode()
1342

    
1343
    nodelist = self.cfg.GetNodeList()
1344
    if len(nodelist) != 1 or nodelist[0] != master:
1345
      raise errors.OpPrereqError("There are still %d node(s) in"
1346
                                 " this cluster." % (len(nodelist) - 1),
1347
                                 errors.ECODE_INVAL)
1348
    instancelist = self.cfg.GetInstanceList()
1349
    if instancelist:
1350
      raise errors.OpPrereqError("There are still %d instance(s) in"
1351
                                 " this cluster." % len(instancelist),
1352
                                 errors.ECODE_INVAL)
1353

    
1354
  def Exec(self, feedback_fn):
1355
    """Destroys the cluster.
1356

1357
    """
1358
    master = self.cfg.GetMasterNode()
1359

    
1360
    # Run post hooks on master node before it's removed
1361
    _RunPostHook(self, master)
1362

    
1363
    result = self.rpc.call_node_deactivate_master_ip(master)
1364
    result.Raise("Could not disable the master role")
1365

    
1366
    return master
1367

    
1368

    
1369
def _VerifyCertificate(filename):
1370
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1371

1372
  @type filename: string
1373
  @param filename: Path to PEM file
1374

1375
  """
1376
  try:
1377
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1378
                                           utils.ReadFile(filename))
1379
  except Exception, err: # pylint: disable=W0703
1380
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1381
            "Failed to load X509 certificate %s: %s" % (filename, err))
1382

    
1383
  (errcode, msg) = \
1384
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1385
                                constants.SSL_CERT_EXPIRATION_ERROR)
1386

    
1387
  if msg:
1388
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1389
  else:
1390
    fnamemsg = None
1391

    
1392
  if errcode is None:
1393
    return (None, fnamemsg)
1394
  elif errcode == utils.CERT_WARNING:
1395
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1396
  elif errcode == utils.CERT_ERROR:
1397
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1398

    
1399
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1400

    
1401

    
1402
def _GetAllHypervisorParameters(cluster, instances):
1403
  """Compute the set of all hypervisor parameters.
1404

1405
  @type cluster: L{objects.Cluster}
1406
  @param cluster: the cluster object
1407
  @param instances: list of L{objects.Instance}
1408
  @param instances: additional instances from which to obtain parameters
1409
  @rtype: list of (origin, hypervisor, parameters)
1410
  @return: a list with all parameters found, indicating the hypervisor they
1411
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1412

1413
  """
1414
  hvp_data = []
1415

    
1416
  for hv_name in cluster.enabled_hypervisors:
1417
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1418

    
1419
  for os_name, os_hvp in cluster.os_hvp.items():
1420
    for hv_name, hv_params in os_hvp.items():
1421
      if hv_params:
1422
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1423
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1424

    
1425
  # TODO: collapse identical parameter values in a single one
1426
  for instance in instances:
1427
    if instance.hvparams:
1428
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1429
                       cluster.FillHV(instance)))
1430

    
1431
  return hvp_data
1432

    
1433

    
1434
class _VerifyErrors(object):
1435
  """Mix-in for cluster/group verify LUs.
1436

1437
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1438
  self.op and self._feedback_fn to be available.)
1439

1440
  """
1441

    
1442
  ETYPE_FIELD = "code"
1443
  ETYPE_ERROR = "ERROR"
1444
  ETYPE_WARNING = "WARNING"
1445

    
1446
  def _Error(self, ecode, item, msg, *args, **kwargs):
1447
    """Format an error message.
1448

1449
    Based on the opcode's error_codes parameter, either format a
1450
    parseable error code, or a simpler error string.
1451

1452
    This must be called only from Exec and functions called from Exec.
1453

1454
    """
1455
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1456
    itype, etxt, _ = ecode
1457
    # first complete the msg
1458
    if args:
1459
      msg = msg % args
1460
    # then format the whole message
1461
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1462
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1463
    else:
1464
      if item:
1465
        item = " " + item
1466
      else:
1467
        item = ""
1468
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1469
    # and finally report it via the feedback_fn
1470
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1471

    
1472
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1473
    """Log an error message if the passed condition is True.
1474

1475
    """
1476
    cond = (bool(cond)
1477
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1478

    
1479
    # If the error code is in the list of ignored errors, demote the error to a
1480
    # warning
1481
    (_, etxt, _) = ecode
1482
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1483
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1484

    
1485
    if cond:
1486
      self._Error(ecode, *args, **kwargs)
1487

    
1488
    # do not mark the operation as failed for WARN cases only
1489
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1490
      self.bad = self.bad or cond
1491

    
1492

    
1493
class LUClusterVerify(NoHooksLU):
1494
  """Submits all jobs necessary to verify the cluster.
1495

1496
  """
1497
  REQ_BGL = False
1498

    
1499
  def ExpandNames(self):
1500
    self.needed_locks = {}
1501

    
1502
  def Exec(self, feedback_fn):
1503
    jobs = []
1504

    
1505
    if self.op.group_name:
1506
      groups = [self.op.group_name]
1507
      depends_fn = lambda: None
1508
    else:
1509
      groups = self.cfg.GetNodeGroupList()
1510

    
1511
      # Verify global configuration
1512
      jobs.append([
1513
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1514
        ])
1515

    
1516
      # Always depend on global verification
1517
      depends_fn = lambda: [(-len(jobs), [])]
1518

    
1519
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1520
                                            ignore_errors=self.op.ignore_errors,
1521
                                            depends=depends_fn())]
1522
                for group in groups)
1523

    
1524
    # Fix up all parameters
1525
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1526
      op.debug_simulate_errors = self.op.debug_simulate_errors
1527
      op.verbose = self.op.verbose
1528
      op.error_codes = self.op.error_codes
1529
      try:
1530
        op.skip_checks = self.op.skip_checks
1531
      except AttributeError:
1532
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1533

    
1534
    return ResultWithJobs(jobs)
1535

    
1536

    
1537
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1538
  """Verifies the cluster config.
1539

1540
  """
1541
  REQ_BGL = True
1542

    
1543
  def _VerifyHVP(self, hvp_data):
1544
    """Verifies locally the syntax of the hypervisor parameters.
1545

1546
    """
1547
    for item, hv_name, hv_params in hvp_data:
1548
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1549
             (item, hv_name))
1550
      try:
1551
        hv_class = hypervisor.GetHypervisor(hv_name)
1552
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1553
        hv_class.CheckParameterSyntax(hv_params)
1554
      except errors.GenericError, err:
1555
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1556

    
1557
  def ExpandNames(self):
1558
    # Information can be safely retrieved as the BGL is acquired in exclusive
1559
    # mode
1560
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1561
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1562
    self.all_node_info = self.cfg.GetAllNodesInfo()
1563
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1564
    self.needed_locks = {}
1565

    
1566
  def Exec(self, feedback_fn):
1567
    """Verify integrity of cluster, performing various test on nodes.
1568

1569
    """
1570
    self.bad = False
1571
    self._feedback_fn = feedback_fn
1572

    
1573
    feedback_fn("* Verifying cluster config")
1574

    
1575
    for msg in self.cfg.VerifyConfig():
1576
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1577

    
1578
    feedback_fn("* Verifying cluster certificate files")
1579

    
1580
    for cert_filename in constants.ALL_CERT_FILES:
1581
      (errcode, msg) = _VerifyCertificate(cert_filename)
1582
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1583

    
1584
    feedback_fn("* Verifying hypervisor parameters")
1585

    
1586
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1587
                                                self.all_inst_info.values()))
1588

    
1589
    feedback_fn("* Verifying all nodes belong to an existing group")
1590

    
1591
    # We do this verification here because, should this bogus circumstance
1592
    # occur, it would never be caught by VerifyGroup, which only acts on
1593
    # nodes/instances reachable from existing node groups.
1594

    
1595
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1596
                         if node.group not in self.all_group_info)
1597

    
1598
    dangling_instances = {}
1599
    no_node_instances = []
1600

    
1601
    for inst in self.all_inst_info.values():
1602
      if inst.primary_node in dangling_nodes:
1603
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1604
      elif inst.primary_node not in self.all_node_info:
1605
        no_node_instances.append(inst.name)
1606

    
1607
    pretty_dangling = [
1608
        "%s (%s)" %
1609
        (node.name,
1610
         utils.CommaJoin(dangling_instances.get(node.name,
1611
                                                ["no instances"])))
1612
        for node in dangling_nodes]
1613

    
1614
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1615
                  None,
1616
                  "the following nodes (and their instances) belong to a non"
1617
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1618

    
1619
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1620
                  None,
1621
                  "the following instances have a non-existing primary-node:"
1622
                  " %s", utils.CommaJoin(no_node_instances))
1623

    
1624
    return not self.bad
1625

    
1626

    
1627
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1628
  """Verifies the status of a node group.
1629

1630
  """
1631
  HPATH = "cluster-verify"
1632
  HTYPE = constants.HTYPE_CLUSTER
1633
  REQ_BGL = False
1634

    
1635
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1636

    
1637
  class NodeImage(object):
1638
    """A class representing the logical and physical status of a node.
1639

1640
    @type name: string
1641
    @ivar name: the node name to which this object refers
1642
    @ivar volumes: a structure as returned from
1643
        L{ganeti.backend.GetVolumeList} (runtime)
1644
    @ivar instances: a list of running instances (runtime)
1645
    @ivar pinst: list of configured primary instances (config)
1646
    @ivar sinst: list of configured secondary instances (config)
1647
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1648
        instances for which this node is secondary (config)
1649
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1650
    @ivar dfree: free disk, as reported by the node (runtime)
1651
    @ivar offline: the offline status (config)
1652
    @type rpc_fail: boolean
1653
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1654
        not whether the individual keys were correct) (runtime)
1655
    @type lvm_fail: boolean
1656
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1657
    @type hyp_fail: boolean
1658
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1659
    @type ghost: boolean
1660
    @ivar ghost: whether this is a known node or not (config)
1661
    @type os_fail: boolean
1662
    @ivar os_fail: whether the RPC call didn't return valid OS data
1663
    @type oslist: list
1664
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1665
    @type vm_capable: boolean
1666
    @ivar vm_capable: whether the node can host instances
1667

1668
    """
1669
    def __init__(self, offline=False, name=None, vm_capable=True):
1670
      self.name = name
1671
      self.volumes = {}
1672
      self.instances = []
1673
      self.pinst = []
1674
      self.sinst = []
1675
      self.sbp = {}
1676
      self.mfree = 0
1677
      self.dfree = 0
1678
      self.offline = offline
1679
      self.vm_capable = vm_capable
1680
      self.rpc_fail = False
1681
      self.lvm_fail = False
1682
      self.hyp_fail = False
1683
      self.ghost = False
1684
      self.os_fail = False
1685
      self.oslist = {}
1686

    
1687
  def ExpandNames(self):
1688
    # This raises errors.OpPrereqError on its own:
1689
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1690

    
1691
    # Get instances in node group; this is unsafe and needs verification later
1692
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1693

    
1694
    self.needed_locks = {
1695
      locking.LEVEL_INSTANCE: inst_names,
1696
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1697
      locking.LEVEL_NODE: [],
1698
      }
1699

    
1700
    self.share_locks = _ShareAll()
1701

    
1702
  def DeclareLocks(self, level):
1703
    if level == locking.LEVEL_NODE:
1704
      # Get members of node group; this is unsafe and needs verification later
1705
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1706

    
1707
      all_inst_info = self.cfg.GetAllInstancesInfo()
1708

    
1709
      # In Exec(), we warn about mirrored instances that have primary and
1710
      # secondary living in separate node groups. To fully verify that
1711
      # volumes for these instances are healthy, we will need to do an
1712
      # extra call to their secondaries. We ensure here those nodes will
1713
      # be locked.
1714
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1715
        # Important: access only the instances whose lock is owned
1716
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1717
          nodes.update(all_inst_info[inst].secondary_nodes)
1718

    
1719
      self.needed_locks[locking.LEVEL_NODE] = nodes
1720

    
1721
  def CheckPrereq(self):
1722
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1723
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1724

    
1725
    group_nodes = set(self.group_info.members)
1726
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1727

    
1728
    unlocked_nodes = \
1729
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1730

    
1731
    unlocked_instances = \
1732
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1733

    
1734
    if unlocked_nodes:
1735
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1736
                                 utils.CommaJoin(unlocked_nodes))
1737

    
1738
    if unlocked_instances:
1739
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1740
                                 utils.CommaJoin(unlocked_instances))
1741

    
1742
    self.all_node_info = self.cfg.GetAllNodesInfo()
1743
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1744

    
1745
    self.my_node_names = utils.NiceSort(group_nodes)
1746
    self.my_inst_names = utils.NiceSort(group_instances)
1747

    
1748
    self.my_node_info = dict((name, self.all_node_info[name])
1749
                             for name in self.my_node_names)
1750

    
1751
    self.my_inst_info = dict((name, self.all_inst_info[name])
1752
                             for name in self.my_inst_names)
1753

    
1754
    # We detect here the nodes that will need the extra RPC calls for verifying
1755
    # split LV volumes; they should be locked.
1756
    extra_lv_nodes = set()
1757

    
1758
    for inst in self.my_inst_info.values():
1759
      if inst.disk_template in constants.DTS_INT_MIRROR:
1760
        group = self.my_node_info[inst.primary_node].group
1761
        for nname in inst.secondary_nodes:
1762
          if self.all_node_info[nname].group != group:
1763
            extra_lv_nodes.add(nname)
1764

    
1765
    unlocked_lv_nodes = \
1766
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1767

    
1768
    if unlocked_lv_nodes:
1769
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1770
                                 utils.CommaJoin(unlocked_lv_nodes))
1771
    self.extra_lv_nodes = list(extra_lv_nodes)
1772

    
1773
  def _VerifyNode(self, ninfo, nresult):
1774
    """Perform some basic validation on data returned from a node.
1775

1776
      - check the result data structure is well formed and has all the
1777
        mandatory fields
1778
      - check ganeti version
1779

1780
    @type ninfo: L{objects.Node}
1781
    @param ninfo: the node to check
1782
    @param nresult: the results from the node
1783
    @rtype: boolean
1784
    @return: whether overall this call was successful (and we can expect
1785
         reasonable values in the respose)
1786

1787
    """
1788
    node = ninfo.name
1789
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1790

    
1791
    # main result, nresult should be a non-empty dict
1792
    test = not nresult or not isinstance(nresult, dict)
1793
    _ErrorIf(test, constants.CV_ENODERPC, node,
1794
                  "unable to verify node: no data returned")
1795
    if test:
1796
      return False
1797

    
1798
    # compares ganeti version
1799
    local_version = constants.PROTOCOL_VERSION
1800
    remote_version = nresult.get("version", None)
1801
    test = not (remote_version and
1802
                isinstance(remote_version, (list, tuple)) and
1803
                len(remote_version) == 2)
1804
    _ErrorIf(test, constants.CV_ENODERPC, node,
1805
             "connection to node returned invalid data")
1806
    if test:
1807
      return False
1808

    
1809
    test = local_version != remote_version[0]
1810
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1811
             "incompatible protocol versions: master %s,"
1812
             " node %s", local_version, remote_version[0])
1813
    if test:
1814
      return False
1815

    
1816
    # node seems compatible, we can actually try to look into its results
1817

    
1818
    # full package version
1819
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1820
                  constants.CV_ENODEVERSION, node,
1821
                  "software version mismatch: master %s, node %s",
1822
                  constants.RELEASE_VERSION, remote_version[1],
1823
                  code=self.ETYPE_WARNING)
1824

    
1825
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1826
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1827
      for hv_name, hv_result in hyp_result.iteritems():
1828
        test = hv_result is not None
1829
        _ErrorIf(test, constants.CV_ENODEHV, node,
1830
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1831

    
1832
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1833
    if ninfo.vm_capable and isinstance(hvp_result, list):
1834
      for item, hv_name, hv_result in hvp_result:
1835
        _ErrorIf(True, constants.CV_ENODEHV, node,
1836
                 "hypervisor %s parameter verify failure (source %s): %s",
1837
                 hv_name, item, hv_result)
1838

    
1839
    test = nresult.get(constants.NV_NODESETUP,
1840
                       ["Missing NODESETUP results"])
1841
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1842
             "; ".join(test))
1843

    
1844
    return True
1845

    
1846
  def _VerifyNodeTime(self, ninfo, nresult,
1847
                      nvinfo_starttime, nvinfo_endtime):
1848
    """Check the node time.
1849

1850
    @type ninfo: L{objects.Node}
1851
    @param ninfo: the node to check
1852
    @param nresult: the remote results for the node
1853
    @param nvinfo_starttime: the start time of the RPC call
1854
    @param nvinfo_endtime: the end time of the RPC call
1855

1856
    """
1857
    node = ninfo.name
1858
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1859

    
1860
    ntime = nresult.get(constants.NV_TIME, None)
1861
    try:
1862
      ntime_merged = utils.MergeTime(ntime)
1863
    except (ValueError, TypeError):
1864
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1865
      return
1866

    
1867
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1868
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1869
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1870
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1871
    else:
1872
      ntime_diff = None
1873

    
1874
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1875
             "Node time diverges by at least %s from master node time",
1876
             ntime_diff)
1877

    
1878
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1879
    """Check the node LVM results.
1880

1881
    @type ninfo: L{objects.Node}
1882
    @param ninfo: the node to check
1883
    @param nresult: the remote results for the node
1884
    @param vg_name: the configured VG name
1885

1886
    """
1887
    if vg_name is None:
1888
      return
1889

    
1890
    node = ninfo.name
1891
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1892

    
1893
    # checks vg existence and size > 20G
1894
    vglist = nresult.get(constants.NV_VGLIST, None)
1895
    test = not vglist
1896
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1897
    if not test:
1898
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1899
                                            constants.MIN_VG_SIZE)
1900
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1901

    
1902
    # check pv names
1903
    pvlist = nresult.get(constants.NV_PVLIST, None)
1904
    test = pvlist is None
1905
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1906
    if not test:
1907
      # check that ':' is not present in PV names, since it's a
1908
      # special character for lvcreate (denotes the range of PEs to
1909
      # use on the PV)
1910
      for _, pvname, owner_vg in pvlist:
1911
        test = ":" in pvname
1912
        _ErrorIf(test, constants.CV_ENODELVM, node,
1913
                 "Invalid character ':' in PV '%s' of VG '%s'",
1914
                 pvname, owner_vg)
1915

    
1916
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1917
    """Check the node bridges.
1918

1919
    @type ninfo: L{objects.Node}
1920
    @param ninfo: the node to check
1921
    @param nresult: the remote results for the node
1922
    @param bridges: the expected list of bridges
1923

1924
    """
1925
    if not bridges:
1926
      return
1927

    
1928
    node = ninfo.name
1929
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1930

    
1931
    missing = nresult.get(constants.NV_BRIDGES, None)
1932
    test = not isinstance(missing, list)
1933
    _ErrorIf(test, constants.CV_ENODENET, node,
1934
             "did not return valid bridge information")
1935
    if not test:
1936
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1937
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1938

    
1939
  def _VerifyNodeNetwork(self, ninfo, nresult):
1940
    """Check the node network connectivity results.
1941

1942
    @type ninfo: L{objects.Node}
1943
    @param ninfo: the node to check
1944
    @param nresult: the remote results for the node
1945

1946
    """
1947
    node = ninfo.name
1948
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1949

    
1950
    test = constants.NV_NODELIST not in nresult
1951
    _ErrorIf(test, constants.CV_ENODESSH, node,
1952
             "node hasn't returned node ssh connectivity data")
1953
    if not test:
1954
      if nresult[constants.NV_NODELIST]:
1955
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1956
          _ErrorIf(True, constants.CV_ENODESSH, node,
1957
                   "ssh communication with node '%s': %s", a_node, a_msg)
1958

    
1959
    test = constants.NV_NODENETTEST not in nresult
1960
    _ErrorIf(test, constants.CV_ENODENET, node,
1961
             "node hasn't returned node tcp connectivity data")
1962
    if not test:
1963
      if nresult[constants.NV_NODENETTEST]:
1964
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1965
        for anode in nlist:
1966
          _ErrorIf(True, constants.CV_ENODENET, node,
1967
                   "tcp communication with node '%s': %s",
1968
                   anode, nresult[constants.NV_NODENETTEST][anode])
1969

    
1970
    test = constants.NV_MASTERIP not in nresult
1971
    _ErrorIf(test, constants.CV_ENODENET, node,
1972
             "node hasn't returned node master IP reachability data")
1973
    if not test:
1974
      if not nresult[constants.NV_MASTERIP]:
1975
        if node == self.master_node:
1976
          msg = "the master node cannot reach the master IP (not configured?)"
1977
        else:
1978
          msg = "cannot reach the master IP"
1979
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1980

    
1981
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1982
                      diskstatus):
1983
    """Verify an instance.
1984

1985
    This function checks to see if the required block devices are
1986
    available on the instance's node.
1987

1988
    """
1989
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1990
    node_current = instanceconfig.primary_node
1991

    
1992
    node_vol_should = {}
1993
    instanceconfig.MapLVsByNode(node_vol_should)
1994

    
1995
    for node in node_vol_should:
1996
      n_img = node_image[node]
1997
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1998
        # ignore missing volumes on offline or broken nodes
1999
        continue
2000
      for volume in node_vol_should[node]:
2001
        test = volume not in n_img.volumes
2002
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2003
                 "volume %s missing on node %s", volume, node)
2004

    
2005
    if instanceconfig.admin_up:
2006
      pri_img = node_image[node_current]
2007
      test = instance not in pri_img.instances and not pri_img.offline
2008
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2009
               "instance not running on its primary node %s",
2010
               node_current)
2011

    
2012
    diskdata = [(nname, success, status, idx)
2013
                for (nname, disks) in diskstatus.items()
2014
                for idx, (success, status) in enumerate(disks)]
2015

    
2016
    for nname, success, bdev_status, idx in diskdata:
2017
      # the 'ghost node' construction in Exec() ensures that we have a
2018
      # node here
2019
      snode = node_image[nname]
2020
      bad_snode = snode.ghost or snode.offline
2021
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2022
               constants.CV_EINSTANCEFAULTYDISK, instance,
2023
               "couldn't retrieve status for disk/%s on %s: %s",
2024
               idx, nname, bdev_status)
2025
      _ErrorIf((instanceconfig.admin_up and success and
2026
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2027
               constants.CV_EINSTANCEFAULTYDISK, instance,
2028
               "disk/%s on %s is faulty", idx, nname)
2029

    
2030
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2031
    """Verify if there are any unknown volumes in the cluster.
2032

2033
    The .os, .swap and backup volumes are ignored. All other volumes are
2034
    reported as unknown.
2035

2036
    @type reserved: L{ganeti.utils.FieldSet}
2037
    @param reserved: a FieldSet of reserved volume names
2038

2039
    """
2040
    for node, n_img in node_image.items():
2041
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2042
        # skip non-healthy nodes
2043
        continue
2044
      for volume in n_img.volumes:
2045
        test = ((node not in node_vol_should or
2046
                volume not in node_vol_should[node]) and
2047
                not reserved.Matches(volume))
2048
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2049
                      "volume %s is unknown", volume)
2050

    
2051
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2052
    """Verify N+1 Memory Resilience.
2053

2054
    Check that if one single node dies we can still start all the
2055
    instances it was primary for.
2056

2057
    """
2058
    cluster_info = self.cfg.GetClusterInfo()
2059
    for node, n_img in node_image.items():
2060
      # This code checks that every node which is now listed as
2061
      # secondary has enough memory to host all instances it is
2062
      # supposed to should a single other node in the cluster fail.
2063
      # FIXME: not ready for failover to an arbitrary node
2064
      # FIXME: does not support file-backed instances
2065
      # WARNING: we currently take into account down instances as well
2066
      # as up ones, considering that even if they're down someone
2067
      # might want to start them even in the event of a node failure.
2068
      if n_img.offline:
2069
        # we're skipping offline nodes from the N+1 warning, since
2070
        # most likely we don't have good memory infromation from them;
2071
        # we already list instances living on such nodes, and that's
2072
        # enough warning
2073
        continue
2074
      for prinode, instances in n_img.sbp.items():
2075
        needed_mem = 0
2076
        for instance in instances:
2077
          bep = cluster_info.FillBE(instance_cfg[instance])
2078
          if bep[constants.BE_AUTO_BALANCE]:
2079
            needed_mem += bep[constants.BE_MEMORY]
2080
        test = n_img.mfree < needed_mem
2081
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2082
                      "not enough memory to accomodate instance failovers"
2083
                      " should node %s fail (%dMiB needed, %dMiB available)",
2084
                      prinode, needed_mem, n_img.mfree)
2085

    
2086
  @classmethod
2087
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2088
                   (files_all, files_opt, files_mc, files_vm)):
2089
    """Verifies file checksums collected from all nodes.
2090

2091
    @param errorif: Callback for reporting errors
2092
    @param nodeinfo: List of L{objects.Node} objects
2093
    @param master_node: Name of master node
2094
    @param all_nvinfo: RPC results
2095

2096
    """
2097
    # Define functions determining which nodes to consider for a file
2098
    files2nodefn = [
2099
      (files_all, None),
2100
      (files_mc, lambda node: (node.master_candidate or
2101
                               node.name == master_node)),
2102
      (files_vm, lambda node: node.vm_capable),
2103
      ]
2104

    
2105
    # Build mapping from filename to list of nodes which should have the file
2106
    nodefiles = {}
2107
    for (files, fn) in files2nodefn:
2108
      if fn is None:
2109
        filenodes = nodeinfo
2110
      else:
2111
        filenodes = filter(fn, nodeinfo)
2112
      nodefiles.update((filename,
2113
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2114
                       for filename in files)
2115

    
2116
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2117

    
2118
    fileinfo = dict((filename, {}) for filename in nodefiles)
2119
    ignore_nodes = set()
2120

    
2121
    for node in nodeinfo:
2122
      if node.offline:
2123
        ignore_nodes.add(node.name)
2124
        continue
2125

    
2126
      nresult = all_nvinfo[node.name]
2127

    
2128
      if nresult.fail_msg or not nresult.payload:
2129
        node_files = None
2130
      else:
2131
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2132

    
2133
      test = not (node_files and isinstance(node_files, dict))
2134
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2135
              "Node did not return file checksum data")
2136
      if test:
2137
        ignore_nodes.add(node.name)
2138
        continue
2139

    
2140
      # Build per-checksum mapping from filename to nodes having it
2141
      for (filename, checksum) in node_files.items():
2142
        assert filename in nodefiles
2143
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2144

    
2145
    for (filename, checksums) in fileinfo.items():
2146
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2147

    
2148
      # Nodes having the file
2149
      with_file = frozenset(node_name
2150
                            for nodes in fileinfo[filename].values()
2151
                            for node_name in nodes) - ignore_nodes
2152

    
2153
      expected_nodes = nodefiles[filename] - ignore_nodes
2154

    
2155
      # Nodes missing file
2156
      missing_file = expected_nodes - with_file
2157

    
2158
      if filename in files_opt:
2159
        # All or no nodes
2160
        errorif(missing_file and missing_file != expected_nodes,
2161
                constants.CV_ECLUSTERFILECHECK, None,
2162
                "File %s is optional, but it must exist on all or no"
2163
                " nodes (not found on %s)",
2164
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2165
      else:
2166
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2167
                "File %s is missing from node(s) %s", filename,
2168
                utils.CommaJoin(utils.NiceSort(missing_file)))
2169

    
2170
        # Warn if a node has a file it shouldn't
2171
        unexpected = with_file - expected_nodes
2172
        errorif(unexpected,
2173
                constants.CV_ECLUSTERFILECHECK, None,
2174
                "File %s should not exist on node(s) %s",
2175
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2176

    
2177
      # See if there are multiple versions of the file
2178
      test = len(checksums) > 1
2179
      if test:
2180
        variants = ["variant %s on %s" %
2181
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2182
                    for (idx, (checksum, nodes)) in
2183
                      enumerate(sorted(checksums.items()))]
2184
      else:
2185
        variants = []
2186

    
2187
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2188
              "File %s found with %s different checksums (%s)",
2189
              filename, len(checksums), "; ".join(variants))
2190

    
2191
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2192
                      drbd_map):
2193
    """Verifies and the node DRBD status.
2194

2195
    @type ninfo: L{objects.Node}
2196
    @param ninfo: the node to check
2197
    @param nresult: the remote results for the node
2198
    @param instanceinfo: the dict of instances
2199
    @param drbd_helper: the configured DRBD usermode helper
2200
    @param drbd_map: the DRBD map as returned by
2201
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2202

2203
    """
2204
    node = ninfo.name
2205
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206

    
2207
    if drbd_helper:
2208
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2209
      test = (helper_result == None)
2210
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2211
               "no drbd usermode helper returned")
2212
      if helper_result:
2213
        status, payload = helper_result
2214
        test = not status
2215
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2216
                 "drbd usermode helper check unsuccessful: %s", payload)
2217
        test = status and (payload != drbd_helper)
2218
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2219
                 "wrong drbd usermode helper: %s", payload)
2220

    
2221
    # compute the DRBD minors
2222
    node_drbd = {}
2223
    for minor, instance in drbd_map[node].items():
2224
      test = instance not in instanceinfo
2225
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2226
               "ghost instance '%s' in temporary DRBD map", instance)
2227
        # ghost instance should not be running, but otherwise we
2228
        # don't give double warnings (both ghost instance and
2229
        # unallocated minor in use)
2230
      if test:
2231
        node_drbd[minor] = (instance, False)
2232
      else:
2233
        instance = instanceinfo[instance]
2234
        node_drbd[minor] = (instance.name, instance.admin_up)
2235

    
2236
    # and now check them
2237
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2238
    test = not isinstance(used_minors, (tuple, list))
2239
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2240
             "cannot parse drbd status file: %s", str(used_minors))
2241
    if test:
2242
      # we cannot check drbd status
2243
      return
2244

    
2245
    for minor, (iname, must_exist) in node_drbd.items():
2246
      test = minor not in used_minors and must_exist
2247
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2248
               "drbd minor %d of instance %s is not active", minor, iname)
2249
    for minor in used_minors:
2250
      test = minor not in node_drbd
2251
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2252
               "unallocated drbd minor %d is in use", minor)
2253

    
2254
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2255
    """Builds the node OS structures.
2256

2257
    @type ninfo: L{objects.Node}
2258
    @param ninfo: the node to check
2259
    @param nresult: the remote results for the node
2260
    @param nimg: the node image object
2261

2262
    """
2263
    node = ninfo.name
2264
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2265

    
2266
    remote_os = nresult.get(constants.NV_OSLIST, None)
2267
    test = (not isinstance(remote_os, list) or
2268
            not compat.all(isinstance(v, list) and len(v) == 7
2269
                           for v in remote_os))
2270

    
2271
    _ErrorIf(test, constants.CV_ENODEOS, node,
2272
             "node hasn't returned valid OS data")
2273

    
2274
    nimg.os_fail = test
2275

    
2276
    if test:
2277
      return
2278

    
2279
    os_dict = {}
2280

    
2281
    for (name, os_path, status, diagnose,
2282
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2283

    
2284
      if name not in os_dict:
2285
        os_dict[name] = []
2286

    
2287
      # parameters is a list of lists instead of list of tuples due to
2288
      # JSON lacking a real tuple type, fix it:
2289
      parameters = [tuple(v) for v in parameters]
2290
      os_dict[name].append((os_path, status, diagnose,
2291
                            set(variants), set(parameters), set(api_ver)))
2292

    
2293
    nimg.oslist = os_dict
2294

    
2295
  def _VerifyNodeOS(self, ninfo, nimg, base):
2296
    """Verifies the node OS list.
2297

2298
    @type ninfo: L{objects.Node}
2299
    @param ninfo: the node to check
2300
    @param nimg: the node image object
2301
    @param base: the 'template' node we match against (e.g. from the master)
2302

2303
    """
2304
    node = ninfo.name
2305
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306

    
2307
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2308

    
2309
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2310
    for os_name, os_data in nimg.oslist.items():
2311
      assert os_data, "Empty OS status for OS %s?!" % os_name
2312
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2313
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2314
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2315
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2316
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2317
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2318
      # comparisons with the 'base' image
2319
      test = os_name not in base.oslist
2320
      _ErrorIf(test, constants.CV_ENODEOS, node,
2321
               "Extra OS %s not present on reference node (%s)",
2322
               os_name, base.name)
2323
      if test:
2324
        continue
2325
      assert base.oslist[os_name], "Base node has empty OS status?"
2326
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2327
      if not b_status:
2328
        # base OS is invalid, skipping
2329
        continue
2330
      for kind, a, b in [("API version", f_api, b_api),
2331
                         ("variants list", f_var, b_var),
2332
                         ("parameters", beautify_params(f_param),
2333
                          beautify_params(b_param))]:
2334
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2335
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2336
                 kind, os_name, base.name,
2337
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2338

    
2339
    # check any missing OSes
2340
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2341
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2342
             "OSes present on reference node %s but missing on this node: %s",
2343
             base.name, utils.CommaJoin(missing))
2344

    
2345
  def _VerifyOob(self, ninfo, nresult):
2346
    """Verifies out of band functionality of a node.
2347

2348
    @type ninfo: L{objects.Node}
2349
    @param ninfo: the node to check
2350
    @param nresult: the remote results for the node
2351

2352
    """
2353
    node = ninfo.name
2354
    # We just have to verify the paths on master and/or master candidates
2355
    # as the oob helper is invoked on the master
2356
    if ((ninfo.master_candidate or ninfo.master_capable) and
2357
        constants.NV_OOB_PATHS in nresult):
2358
      for path_result in nresult[constants.NV_OOB_PATHS]:
2359
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2360

    
2361
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2362
    """Verifies and updates the node volume data.
2363

2364
    This function will update a L{NodeImage}'s internal structures
2365
    with data from the remote call.
2366

2367
    @type ninfo: L{objects.Node}
2368
    @param ninfo: the node to check
2369
    @param nresult: the remote results for the node
2370
    @param nimg: the node image object
2371
    @param vg_name: the configured VG name
2372

2373
    """
2374
    node = ninfo.name
2375
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2376

    
2377
    nimg.lvm_fail = True
2378
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2379
    if vg_name is None:
2380
      pass
2381
    elif isinstance(lvdata, basestring):
2382
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2383
               utils.SafeEncode(lvdata))
2384
    elif not isinstance(lvdata, dict):
2385
      _ErrorIf(True, constants.CV_ENODELVM, node,
2386
               "rpc call to node failed (lvlist)")
2387
    else:
2388
      nimg.volumes = lvdata
2389
      nimg.lvm_fail = False
2390

    
2391
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2392
    """Verifies and updates the node instance list.
2393

2394
    If the listing was successful, then updates this node's instance
2395
    list. Otherwise, it marks the RPC call as failed for the instance
2396
    list key.
2397

2398
    @type ninfo: L{objects.Node}
2399
    @param ninfo: the node to check
2400
    @param nresult: the remote results for the node
2401
    @param nimg: the node image object
2402

2403
    """
2404
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2405
    test = not isinstance(idata, list)
2406
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2407
                  "rpc call to node failed (instancelist): %s",
2408
                  utils.SafeEncode(str(idata)))
2409
    if test:
2410
      nimg.hyp_fail = True
2411
    else:
2412
      nimg.instances = idata
2413

    
2414
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2415
    """Verifies and computes a node information map
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421
    @param vg_name: the configured VG name
2422

2423
    """
2424
    node = ninfo.name
2425
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426

    
2427
    # try to read free memory (from the hypervisor)
2428
    hv_info = nresult.get(constants.NV_HVINFO, None)
2429
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2430
    _ErrorIf(test, constants.CV_ENODEHV, node,
2431
             "rpc call to node failed (hvinfo)")
2432
    if not test:
2433
      try:
2434
        nimg.mfree = int(hv_info["memory_free"])
2435
      except (ValueError, TypeError):
2436
        _ErrorIf(True, constants.CV_ENODERPC, node,
2437
                 "node returned invalid nodeinfo, check hypervisor")
2438

    
2439
    # FIXME: devise a free space model for file based instances as well
2440
    if vg_name is not None:
2441
      test = (constants.NV_VGLIST not in nresult or
2442
              vg_name not in nresult[constants.NV_VGLIST])
2443
      _ErrorIf(test, constants.CV_ENODELVM, node,
2444
               "node didn't return data for the volume group '%s'"
2445
               " - it is either missing or broken", vg_name)
2446
      if not test:
2447
        try:
2448
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2449
        except (ValueError, TypeError):
2450
          _ErrorIf(True, constants.CV_ENODERPC, node,
2451
                   "node returned invalid LVM info, check LVM status")
2452

    
2453
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2454
    """Gets per-disk status information for all instances.
2455

2456
    @type nodelist: list of strings
2457
    @param nodelist: Node names
2458
    @type node_image: dict of (name, L{objects.Node})
2459
    @param node_image: Node objects
2460
    @type instanceinfo: dict of (name, L{objects.Instance})
2461
    @param instanceinfo: Instance objects
2462
    @rtype: {instance: {node: [(succes, payload)]}}
2463
    @return: a dictionary of per-instance dictionaries with nodes as
2464
        keys and disk information as values; the disk information is a
2465
        list of tuples (success, payload)
2466

2467
    """
2468
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2469

    
2470
    node_disks = {}
2471
    node_disks_devonly = {}
2472
    diskless_instances = set()
2473
    diskless = constants.DT_DISKLESS
2474

    
2475
    for nname in nodelist:
2476
      node_instances = list(itertools.chain(node_image[nname].pinst,
2477
                                            node_image[nname].sinst))
2478
      diskless_instances.update(inst for inst in node_instances
2479
                                if instanceinfo[inst].disk_template == diskless)
2480
      disks = [(inst, disk)
2481
               for inst in node_instances
2482
               for disk in instanceinfo[inst].disks]
2483

    
2484
      if not disks:
2485
        # No need to collect data
2486
        continue
2487

    
2488
      node_disks[nname] = disks
2489

    
2490
      # Creating copies as SetDiskID below will modify the objects and that can
2491
      # lead to incorrect data returned from nodes
2492
      devonly = [dev.Copy() for (_, dev) in disks]
2493

    
2494
      for dev in devonly:
2495
        self.cfg.SetDiskID(dev, nname)
2496

    
2497
      node_disks_devonly[nname] = devonly
2498

    
2499
    assert len(node_disks) == len(node_disks_devonly)
2500

    
2501
    # Collect data from all nodes with disks
2502
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2503
                                                          node_disks_devonly)
2504

    
2505
    assert len(result) == len(node_disks)
2506

    
2507
    instdisk = {}
2508

    
2509
    for (nname, nres) in result.items():
2510
      disks = node_disks[nname]
2511

    
2512
      if nres.offline:
2513
        # No data from this node
2514
        data = len(disks) * [(False, "node offline")]
2515
      else:
2516
        msg = nres.fail_msg
2517
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2518
                 "while getting disk information: %s", msg)
2519
        if msg:
2520
          # No data from this node
2521
          data = len(disks) * [(False, msg)]
2522
        else:
2523
          data = []
2524
          for idx, i in enumerate(nres.payload):
2525
            if isinstance(i, (tuple, list)) and len(i) == 2:
2526
              data.append(i)
2527
            else:
2528
              logging.warning("Invalid result from node %s, entry %d: %s",
2529
                              nname, idx, i)
2530
              data.append((False, "Invalid result from the remote node"))
2531

    
2532
      for ((inst, _), status) in zip(disks, data):
2533
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2534

    
2535
    # Add empty entries for diskless instances.
2536
    for inst in diskless_instances:
2537
      assert inst not in instdisk
2538
      instdisk[inst] = {}
2539

    
2540
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2541
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2542
                      compat.all(isinstance(s, (tuple, list)) and
2543
                                 len(s) == 2 for s in statuses)
2544
                      for inst, nnames in instdisk.items()
2545
                      for nname, statuses in nnames.items())
2546
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2547

    
2548
    return instdisk
2549

    
2550
  @staticmethod
2551
  def _SshNodeSelector(group_uuid, all_nodes):
2552
    """Create endless iterators for all potential SSH check hosts.
2553

2554
    """
2555
    nodes = [node for node in all_nodes
2556
             if (node.group != group_uuid and
2557
                 not node.offline)]
2558
    keyfunc = operator.attrgetter("group")
2559

    
2560
    return map(itertools.cycle,
2561
               [sorted(map(operator.attrgetter("name"), names))
2562
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2563
                                                  keyfunc)])
2564

    
2565
  @classmethod
2566
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2567
    """Choose which nodes should talk to which other nodes.
2568

2569
    We will make nodes contact all nodes in their group, and one node from
2570
    every other group.
2571

2572
    @warning: This algorithm has a known issue if one node group is much
2573
      smaller than others (e.g. just one node). In such a case all other
2574
      nodes will talk to the single node.
2575

2576
    """
2577
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2578
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2579

    
2580
    return (online_nodes,
2581
            dict((name, sorted([i.next() for i in sel]))
2582
                 for name in online_nodes))
2583

    
2584
  def BuildHooksEnv(self):
2585
    """Build hooks env.
2586

2587
    Cluster-Verify hooks just ran in the post phase and their failure makes
2588
    the output be logged in the verify output and the verification to fail.
2589

2590
    """
2591
    env = {
2592
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2593
      }
2594

    
2595
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2596
               for node in self.my_node_info.values())
2597

    
2598
    return env
2599

    
2600
  def BuildHooksNodes(self):
2601
    """Build hooks nodes.
2602

2603
    """
2604
    return ([], self.my_node_names)
2605

    
2606
  def Exec(self, feedback_fn):
2607
    """Verify integrity of the node group, performing various test on nodes.
2608

2609
    """
2610
    # This method has too many local variables. pylint: disable=R0914
2611
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2612

    
2613
    if not self.my_node_names:
2614
      # empty node group
2615
      feedback_fn("* Empty node group, skipping verification")
2616
      return True
2617

    
2618
    self.bad = False
2619
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620
    verbose = self.op.verbose
2621
    self._feedback_fn = feedback_fn
2622

    
2623
    vg_name = self.cfg.GetVGName()
2624
    drbd_helper = self.cfg.GetDRBDHelper()
2625
    cluster = self.cfg.GetClusterInfo()
2626
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2627
    hypervisors = cluster.enabled_hypervisors
2628
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2629

    
2630
    i_non_redundant = [] # Non redundant instances
2631
    i_non_a_balanced = [] # Non auto-balanced instances
2632
    n_offline = 0 # Count of offline nodes
2633
    n_drained = 0 # Count of nodes being drained
2634
    node_vol_should = {}
2635

    
2636
    # FIXME: verify OS list
2637

    
2638
    # File verification
2639
    filemap = _ComputeAncillaryFiles(cluster, False)
2640

    
2641
    # do local checksums
2642
    master_node = self.master_node = self.cfg.GetMasterNode()
2643
    master_ip = self.cfg.GetMasterIP()
2644

    
2645
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2646

    
2647
    node_verify_param = {
2648
      constants.NV_FILELIST:
2649
        utils.UniqueSequence(filename
2650
                             for files in filemap
2651
                             for filename in files),
2652
      constants.NV_NODELIST:
2653
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2654
                                  self.all_node_info.values()),
2655
      constants.NV_HYPERVISOR: hypervisors,
2656
      constants.NV_HVPARAMS:
2657
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2658
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2659
                                 for node in node_data_list
2660
                                 if not node.offline],
2661
      constants.NV_INSTANCELIST: hypervisors,
2662
      constants.NV_VERSION: None,
2663
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2664
      constants.NV_NODESETUP: None,
2665
      constants.NV_TIME: None,
2666
      constants.NV_MASTERIP: (master_node, master_ip),
2667
      constants.NV_OSLIST: None,
2668
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2669
      }
2670

    
2671
    if vg_name is not None:
2672
      node_verify_param[constants.NV_VGLIST] = None
2673
      node_verify_param[constants.NV_LVLIST] = vg_name
2674
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2675
      node_verify_param[constants.NV_DRBDLIST] = None
2676

    
2677
    if drbd_helper:
2678
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2679

    
2680
    # bridge checks
2681
    # FIXME: this needs to be changed per node-group, not cluster-wide
2682
    bridges = set()
2683
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2684
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2685
      bridges.add(default_nicpp[constants.NIC_LINK])
2686
    for instance in self.my_inst_info.values():
2687
      for nic in instance.nics:
2688
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2689
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2690
          bridges.add(full_nic[constants.NIC_LINK])
2691

    
2692
    if bridges:
2693
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2694

    
2695
    # Build our expected cluster state
2696
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2697
                                                 name=node.name,
2698
                                                 vm_capable=node.vm_capable))
2699
                      for node in node_data_list)
2700

    
2701
    # Gather OOB paths
2702
    oob_paths = []
2703
    for node in self.all_node_info.values():
2704
      path = _SupportsOob(self.cfg, node)
2705
      if path and path not in oob_paths:
2706
        oob_paths.append(path)
2707

    
2708
    if oob_paths:
2709
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2710

    
2711
    for instance in self.my_inst_names:
2712
      inst_config = self.my_inst_info[instance]
2713

    
2714
      for nname in inst_config.all_nodes:
2715
        if nname not in node_image:
2716
          gnode = self.NodeImage(name=nname)
2717
          gnode.ghost = (nname not in self.all_node_info)
2718
          node_image[nname] = gnode
2719

    
2720
      inst_config.MapLVsByNode(node_vol_should)
2721

    
2722
      pnode = inst_config.primary_node
2723
      node_image[pnode].pinst.append(instance)
2724

    
2725
      for snode in inst_config.secondary_nodes:
2726
        nimg = node_image[snode]
2727
        nimg.sinst.append(instance)
2728
        if pnode not in nimg.sbp:
2729
          nimg.sbp[pnode] = []
2730
        nimg.sbp[pnode].append(instance)
2731

    
2732
    # At this point, we have the in-memory data structures complete,
2733
    # except for the runtime information, which we'll gather next
2734

    
2735
    # Due to the way our RPC system works, exact response times cannot be
2736
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2737
    # time before and after executing the request, we can at least have a time
2738
    # window.
2739
    nvinfo_starttime = time.time()
2740
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2741
                                           node_verify_param,
2742
                                           self.cfg.GetClusterName())
2743
    nvinfo_endtime = time.time()
2744

    
2745
    if self.extra_lv_nodes and vg_name is not None:
2746
      extra_lv_nvinfo = \
2747
          self.rpc.call_node_verify(self.extra_lv_nodes,
2748
                                    {constants.NV_LVLIST: vg_name},
2749
                                    self.cfg.GetClusterName())
2750
    else:
2751
      extra_lv_nvinfo = {}
2752

    
2753
    all_drbd_map = self.cfg.ComputeDRBDMap()
2754

    
2755
    feedback_fn("* Gathering disk information (%s nodes)" %
2756
                len(self.my_node_names))
2757
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2758
                                     self.my_inst_info)
2759

    
2760
    feedback_fn("* Verifying configuration file consistency")
2761

    
2762
    # If not all nodes are being checked, we need to make sure the master node
2763
    # and a non-checked vm_capable node are in the list.
2764
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2765
    if absent_nodes:
2766
      vf_nvinfo = all_nvinfo.copy()
2767
      vf_node_info = list(self.my_node_info.values())
2768
      additional_nodes = []
2769
      if master_node not in self.my_node_info:
2770
        additional_nodes.append(master_node)
2771
        vf_node_info.append(self.all_node_info[master_node])
2772
      # Add the first vm_capable node we find which is not included
2773
      for node in absent_nodes:
2774
        nodeinfo = self.all_node_info[node]
2775
        if nodeinfo.vm_capable and not nodeinfo.offline:
2776
          additional_nodes.append(node)
2777
          vf_node_info.append(self.all_node_info[node])
2778
          break
2779
      key = constants.NV_FILELIST
2780
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2781
                                                 {key: node_verify_param[key]},
2782
                                                 self.cfg.GetClusterName()))
2783
    else:
2784
      vf_nvinfo = all_nvinfo
2785
      vf_node_info = self.my_node_info.values()
2786

    
2787
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2788

    
2789
    feedback_fn("* Verifying node status")
2790

    
2791
    refos_img = None
2792

    
2793
    for node_i in node_data_list:
2794
      node = node_i.name
2795
      nimg = node_image[node]
2796

    
2797
      if node_i.offline:
2798
        if verbose:
2799
          feedback_fn("* Skipping offline node %s" % (node,))
2800
        n_offline += 1
2801
        continue
2802

    
2803
      if node == master_node:
2804
        ntype = "master"
2805
      elif node_i.master_candidate:
2806
        ntype = "master candidate"
2807
      elif node_i.drained:
2808
        ntype = "drained"
2809
        n_drained += 1
2810
      else:
2811
        ntype = "regular"
2812
      if verbose:
2813
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2814

    
2815
      msg = all_nvinfo[node].fail_msg
2816
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2817
               msg)
2818
      if msg:
2819
        nimg.rpc_fail = True
2820
        continue
2821

    
2822
      nresult = all_nvinfo[node].payload
2823

    
2824
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2825
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2826
      self._VerifyNodeNetwork(node_i, nresult)
2827
      self._VerifyOob(node_i, nresult)
2828

    
2829
      if nimg.vm_capable:
2830
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2831
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2832
                             all_drbd_map)
2833

    
2834
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2835
        self._UpdateNodeInstances(node_i, nresult, nimg)
2836
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2837
        self._UpdateNodeOS(node_i, nresult, nimg)
2838

    
2839
        if not nimg.os_fail:
2840
          if refos_img is None:
2841
            refos_img = nimg
2842
          self._VerifyNodeOS(node_i, nimg, refos_img)
2843
        self._VerifyNodeBridges(node_i, nresult, bridges)
2844

    
2845
        # Check whether all running instancies are primary for the node. (This
2846
        # can no longer be done from _VerifyInstance below, since some of the
2847
        # wrong instances could be from other node groups.)
2848
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2849

    
2850
        for inst in non_primary_inst:
2851
          test = inst in self.all_inst_info
2852
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2853
                   "instance should not run on node %s", node_i.name)
2854
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2855
                   "node is running unknown instance %s", inst)
2856

    
2857
    for node, result in extra_lv_nvinfo.items():
2858
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2859
                              node_image[node], vg_name)
2860

    
2861
    feedback_fn("* Verifying instance status")
2862
    for instance in self.my_inst_names:
2863
      if verbose:
2864
        feedback_fn("* Verifying instance %s" % instance)
2865
      inst_config = self.my_inst_info[instance]
2866
      self._VerifyInstance(instance, inst_config, node_image,
2867
                           instdisk[instance])
2868
      inst_nodes_offline = []
2869

    
2870
      pnode = inst_config.primary_node
2871
      pnode_img = node_image[pnode]
2872
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2873
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2874
               " primary node failed", instance)
2875

    
2876
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2877
               constants.CV_EINSTANCEBADNODE, instance,
2878
               "instance is marked as running and lives on offline node %s",
2879
               inst_config.primary_node)
2880

    
2881
      # If the instance is non-redundant we cannot survive losing its primary
2882
      # node, so we are not N+1 compliant. On the other hand we have no disk
2883
      # templates with more than one secondary so that situation is not well
2884
      # supported either.
2885
      # FIXME: does not support file-backed instances
2886
      if not inst_config.secondary_nodes:
2887
        i_non_redundant.append(instance)
2888

    
2889
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2890
               constants.CV_EINSTANCELAYOUT,
2891
               instance, "instance has multiple secondary nodes: %s",
2892
               utils.CommaJoin(inst_config.secondary_nodes),
2893
               code=self.ETYPE_WARNING)
2894

    
2895
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2896
        pnode = inst_config.primary_node
2897
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2898
        instance_groups = {}
2899

    
2900
        for node in instance_nodes:
2901
          instance_groups.setdefault(self.all_node_info[node].group,
2902
                                     []).append(node)
2903

    
2904
        pretty_list = [
2905
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2906
          # Sort so that we always list the primary node first.
2907
          for group, nodes in sorted(instance_groups.items(),
2908
                                     key=lambda (_, nodes): pnode in nodes,
2909
                                     reverse=True)]
2910

    
2911
        self._ErrorIf(len(instance_groups) > 1,
2912
                      constants.CV_EINSTANCESPLITGROUPS,
2913
                      instance, "instance has primary and secondary nodes in"
2914
                      " different groups: %s", utils.CommaJoin(pretty_list),
2915
                      code=self.ETYPE_WARNING)
2916

    
2917
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2918
        i_non_a_balanced.append(instance)
2919

    
2920
      for snode in inst_config.secondary_nodes:
2921
        s_img = node_image[snode]
2922
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2923
                 snode, "instance %s, connection to secondary node failed",
2924
                 instance)
2925

    
2926
        if s_img.offline:
2927
          inst_nodes_offline.append(snode)
2928

    
2929
      # warn that the instance lives on offline nodes
2930
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2931
               "instance has offline secondary node(s) %s",
2932
               utils.CommaJoin(inst_nodes_offline))
2933
      # ... or ghost/non-vm_capable nodes
2934
      for node in inst_config.all_nodes:
2935
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2936
                 instance, "instance lives on ghost node %s", node)
2937
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2938
                 instance, "instance lives on non-vm_capable node %s", node)
2939

    
2940
    feedback_fn("* Verifying orphan volumes")
2941
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2942

    
2943
    # We will get spurious "unknown volume" warnings if any node of this group
2944
    # is secondary for an instance whose primary is in another group. To avoid
2945
    # them, we find these instances and add their volumes to node_vol_should.
2946
    for inst in self.all_inst_info.values():
2947
      for secondary in inst.secondary_nodes:
2948
        if (secondary in self.my_node_info
2949
            and inst.name not in self.my_inst_info):
2950
          inst.MapLVsByNode(node_vol_should)
2951
          break
2952

    
2953
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2954

    
2955
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2956
      feedback_fn("* Verifying N+1 Memory redundancy")
2957
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2958

    
2959
    feedback_fn("* Other Notes")
2960
    if i_non_redundant:
2961
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2962
                  % len(i_non_redundant))
2963

    
2964
    if i_non_a_balanced:
2965
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2966
                  % len(i_non_a_balanced))
2967

    
2968
    if n_offline:
2969
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2970

    
2971
    if n_drained:
2972
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2973

    
2974
    return not self.bad
2975

    
2976
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2977
    """Analyze the post-hooks' result
2978

2979
    This method analyses the hook result, handles it, and sends some
2980
    nicely-formatted feedback back to the user.
2981

2982
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2983
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2984
    @param hooks_results: the results of the multi-node hooks rpc call
2985
    @param feedback_fn: function used send feedback back to the caller
2986
    @param lu_result: previous Exec result
2987
    @return: the new Exec result, based on the previous result
2988
        and hook results
2989

2990
    """
2991
    # We only really run POST phase hooks, only for non-empty groups,
2992
    # and are only interested in their results
2993
    if not self.my_node_names:
2994
      # empty node group
2995
      pass
2996
    elif phase == constants.HOOKS_PHASE_POST:
2997
      # Used to change hooks' output to proper indentation
2998
      feedback_fn("* Hooks Results")
2999
      assert hooks_results, "invalid result from hooks"
3000

    
3001
      for node_name in hooks_results:
3002
        res = hooks_results[node_name]
3003
        msg = res.fail_msg
3004
        test = msg and not res.offline
3005
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3006
                      "Communication failure in hooks execution: %s", msg)
3007
        if res.offline or msg:
3008
          # No need to investigate payload if node is offline or gave
3009
          # an error.
3010
          continue
3011
        for script, hkr, output in res.payload:
3012
          test = hkr == constants.HKR_FAIL
3013
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3014
                        "Script %s failed, output:", script)
3015
          if test:
3016
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3017
            feedback_fn("%s" % output)
3018
            lu_result = False
3019

    
3020
    return lu_result
3021

    
3022

    
3023
class LUClusterVerifyDisks(NoHooksLU):
3024
  """Verifies the cluster disks status.
3025

3026
  """
3027
  REQ_BGL = False
3028

    
3029
  def ExpandNames(self):
3030
    self.share_locks = _ShareAll()
3031
    self.needed_locks = {
3032
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3033
      }
3034

    
3035
  def Exec(self, feedback_fn):
3036
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3037

    
3038
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3039
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3040
                           for group in group_names])
3041

    
3042

    
3043
class LUGroupVerifyDisks(NoHooksLU):
3044
  """Verifies the status of all disks in a node group.
3045

3046
  """
3047
  REQ_BGL = False
3048

    
3049
  def ExpandNames(self):
3050
    # Raises errors.OpPrereqError on its own if group can't be found
3051
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3052

    
3053
    self.share_locks = _ShareAll()
3054
    self.needed_locks = {
3055
      locking.LEVEL_INSTANCE: [],
3056
      locking.LEVEL_NODEGROUP: [],
3057
      locking.LEVEL_NODE: [],
3058
      }
3059

    
3060
  def DeclareLocks(self, level):
3061
    if level == locking.LEVEL_INSTANCE:
3062
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3063

    
3064
      # Lock instances optimistically, needs verification once node and group
3065
      # locks have been acquired
3066
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3067
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3068

    
3069
    elif level == locking.LEVEL_NODEGROUP:
3070
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3071

    
3072
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3073
        set([self.group_uuid] +
3074
            # Lock all groups used by instances optimistically; this requires
3075
            # going via the node before it's locked, requiring verification
3076
            # later on
3077
            [group_uuid
3078
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3079
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3080

    
3081
    elif level == locking.LEVEL_NODE:
3082
      # This will only lock the nodes in the group to be verified which contain
3083
      # actual instances
3084
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3085
      self._LockInstancesNodes()
3086

    
3087
      # Lock all nodes in group to be verified
3088
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3089
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3090
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3091

    
3092
  def CheckPrereq(self):
3093
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3094
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3095
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3096

    
3097
    assert self.group_uuid in owned_groups
3098

    
3099
    # Check if locked instances are still correct
3100
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3101

    
3102
    # Get instance information
3103
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3104

    
3105
    # Check if node groups for locked instances are still correct
3106
    for (instance_name, inst) in self.instances.items():
3107
      assert owned_nodes.issuperset(inst.all_nodes), \
3108
        "Instance %s's nodes changed while we kept the lock" % instance_name
3109

    
3110
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3111
                                             owned_groups)
3112

    
3113
      assert self.group_uuid in inst_groups, \
3114
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3115

    
3116
  def Exec(self, feedback_fn):
3117
    """Verify integrity of cluster disks.
3118

3119
    @rtype: tuple of three items
3120
    @return: a tuple of (dict of node-to-node_error, list of instances
3121
        which need activate-disks, dict of instance: (node, volume) for
3122
        missing volumes
3123

3124
    """
3125
    res_nodes = {}
3126
    res_instances = set()
3127
    res_missing = {}
3128

    
3129
    nv_dict = _MapInstanceDisksToNodes([inst
3130
                                        for inst in self.instances.values()
3131
                                        if inst.admin_up])
3132

    
3133
    if nv_dict:
3134
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3135
                             set(self.cfg.GetVmCapableNodeList()))
3136

    
3137
      node_lvs = self.rpc.call_lv_list(nodes, [])
3138

    
3139
      for (node, node_res) in node_lvs.items():
3140
        if node_res.offline:
3141
          continue
3142

    
3143
        msg = node_res.fail_msg
3144
        if msg:
3145
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3146
          res_nodes[node] = msg
3147
          continue
3148

    
3149
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3150
          inst = nv_dict.pop((node, lv_name), None)
3151
          if not (lv_online or inst is None):
3152
            res_instances.add(inst)
3153

    
3154
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3155
      # better
3156
      for key, inst in nv_dict.iteritems():
3157
        res_missing.setdefault(inst, []).append(key)
3158

    
3159
    return (res_nodes, list(res_instances), res_missing)
3160

    
3161

    
3162
class LUClusterRepairDiskSizes(NoHooksLU):
3163
  """Verifies the cluster disks sizes.
3164

3165
  """
3166
  REQ_BGL = False
3167

    
3168
  def ExpandNames(self):
3169
    if self.op.instances:
3170
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3171
      self.needed_locks = {
3172
        locking.LEVEL_NODE: [],
3173
        locking.LEVEL_INSTANCE: self.wanted_names,
3174
        }
3175
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3176
    else:
3177
      self.wanted_names = None
3178
      self.needed_locks = {
3179
        locking.LEVEL_NODE: locking.ALL_SET,
3180
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3181
        }
3182
    self.share_locks = _ShareAll()
3183

    
3184
  def DeclareLocks(self, level):
3185
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3186
      self._LockInstancesNodes(primary_only=True)
3187

    
3188
  def CheckPrereq(self):
3189
    """Check prerequisites.
3190

3191
    This only checks the optional instance list against the existing names.
3192

3193
    """
3194
    if self.wanted_names is None:
3195
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3196

    
3197
    self.wanted_instances = \
3198
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3199

    
3200
  def _EnsureChildSizes(self, disk):
3201
    """Ensure children of the disk have the needed disk size.
3202

3203
    This is valid mainly for DRBD8 and fixes an issue where the
3204
    children have smaller disk size.
3205

3206
    @param disk: an L{ganeti.objects.Disk} object
3207

3208
    """
3209
    if disk.dev_type == constants.LD_DRBD8:
3210
      assert disk.children, "Empty children for DRBD8?"
3211
      fchild = disk.children[0]
3212
      mismatch = fchild.size < disk.size
3213
      if mismatch:
3214
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3215
                     fchild.size, disk.size)
3216
        fchild.size = disk.size
3217

    
3218
      # and we recurse on this child only, not on the metadev
3219
      return self._EnsureChildSizes(fchild) or mismatch
3220
    else:
3221
      return False
3222

    
3223
  def Exec(self, feedback_fn):
3224
    """Verify the size of cluster disks.
3225

3226
    """
3227
    # TODO: check child disks too
3228
    # TODO: check differences in size between primary/secondary nodes
3229
    per_node_disks = {}
3230
    for instance in self.wanted_instances:
3231
      pnode = instance.primary_node
3232
      if pnode not in per_node_disks:
3233
        per_node_disks[pnode] = []
3234
      for idx, disk in enumerate(instance.disks):
3235
        per_node_disks[pnode].append((instance, idx, disk))
3236

    
3237
    changed = []
3238
    for node, dskl in per_node_disks.items():
3239
      newl = [v[2].Copy() for v in dskl]
3240
      for dsk in newl:
3241
        self.cfg.SetDiskID(dsk, node)
3242
      result = self.rpc.call_blockdev_getsize(node, newl)
3243
      if result.fail_msg:
3244
        self.LogWarning("Failure in blockdev_getsize call to node"
3245
                        " %s, ignoring", node)
3246
        continue
3247
      if len(result.payload) != len(dskl):
3248
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3249
                        " result.payload=%s", node, len(dskl), result.payload)
3250
        self.LogWarning("Invalid result from node %s, ignoring node results",
3251
                        node)
3252
        continue
3253
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3254
        if size is None:
3255
          self.LogWarning("Disk %d of instance %s did not return size"
3256
                          " information, ignoring", idx, instance.name)
3257
          continue
3258
        if not isinstance(size, (int, long)):
3259
          self.LogWarning("Disk %d of instance %s did not return valid"
3260
                          " size information, ignoring", idx, instance.name)
3261
          continue
3262
        size = size >> 20
3263
        if size != disk.size:
3264
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3265
                       " correcting: recorded %d, actual %d", idx,
3266
                       instance.name, disk.size, size)
3267
          disk.size = size
3268
          self.cfg.Update(instance, feedback_fn)
3269
          changed.append((instance.name, idx, size))
3270
        if self._EnsureChildSizes(disk):
3271
          self.cfg.Update(instance, feedback_fn)
3272
          changed.append((instance.name, idx, disk.size))
3273
    return changed
3274

    
3275

    
3276
class LUClusterRename(LogicalUnit):
3277
  """Rename the cluster.
3278

3279
  """
3280
  HPATH = "cluster-rename"
3281
  HTYPE = constants.HTYPE_CLUSTER
3282

    
3283
  def BuildHooksEnv(self):
3284
    """Build hooks env.
3285

3286
    """
3287
    return {
3288
      "OP_TARGET": self.cfg.GetClusterName(),
3289
      "NEW_NAME": self.op.name,
3290
      }
3291

    
3292
  def BuildHooksNodes(self):
3293
    """Build hooks nodes.
3294

3295
    """
3296
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3297

    
3298
  def CheckPrereq(self):
3299
    """Verify that the passed name is a valid one.
3300

3301
    """
3302
    hostname = netutils.GetHostname(name=self.op.name,
3303
                                    family=self.cfg.GetPrimaryIPFamily())
3304

    
3305
    new_name = hostname.name
3306
    self.ip = new_ip = hostname.ip
3307
    old_name = self.cfg.GetClusterName()
3308
    old_ip = self.cfg.GetMasterIP()
3309
    if new_name == old_name and new_ip == old_ip:
3310
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3311
                                 " cluster has changed",
3312
                                 errors.ECODE_INVAL)
3313
    if new_ip != old_ip:
3314
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3315
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3316
                                   " reachable on the network" %
3317
                                   new_ip, errors.ECODE_NOTUNIQUE)
3318

    
3319
    self.op.name = new_name
3320

    
3321
  def Exec(self, feedback_fn):
3322
    """Rename the cluster.
3323

3324
    """
3325
    clustername = self.op.name
3326
    ip = self.ip
3327

    
3328
    # shutdown the master IP
3329
    master = self.cfg.GetMasterNode()
3330
    result = self.rpc.call_node_deactivate_master_ip(master)
3331
    result.Raise("Could not disable the master role")
3332

    
3333
    try:
3334
      cluster = self.cfg.GetClusterInfo()
3335
      cluster.cluster_name = clustername
3336
      cluster.master_ip = ip
3337
      self.cfg.Update(cluster, feedback_fn)
3338

    
3339
      # update the known hosts file
3340
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3341
      node_list = self.cfg.GetOnlineNodeList()
3342
      try:
3343
        node_list.remove(master)
3344
      except ValueError:
3345
        pass
3346
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3347
    finally:
3348
      result = self.rpc.call_node_activate_master_ip(master)
3349
      msg = result.fail_msg
3350
      if msg:
3351
        self.LogWarning("Could not re-enable the master role on"
3352
                        " the master, please restart manually: %s", msg)
3353

    
3354
    return clustername
3355

    
3356

    
3357
def _ValidateNetmask(cfg, netmask):
3358
  """Checks if a netmask is valid.
3359

3360
  @type cfg: L{config.ConfigWriter}
3361
  @param cfg: The cluster configuration
3362
  @type netmask: int
3363
  @param netmask: the netmask to be verified
3364
  @raise errors.OpPrereqError: if the validation fails
3365

3366
  """
3367
  ip_family = cfg.GetPrimaryIPFamily()
3368
  try:
3369
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3370
  except errors.ProgrammerError:
3371
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3372
                               ip_family)
3373
  if not ipcls.ValidateNetmask(netmask):
3374
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3375
                                (netmask))
3376

    
3377

    
3378
class LUClusterSetParams(LogicalUnit):
3379
  """Change the parameters of the cluster.
3380

3381
  """
3382
  HPATH = "cluster-modify"
3383
  HTYPE = constants.HTYPE_CLUSTER
3384
  REQ_BGL = False
3385

    
3386
  def CheckArguments(self):
3387
    """Check parameters
3388

3389
    """
3390
    if self.op.uid_pool:
3391
      uidpool.CheckUidPool(self.op.uid_pool)
3392

    
3393
    if self.op.add_uids:
3394
      uidpool.CheckUidPool(self.op.add_uids)
3395

    
3396
    if self.op.remove_uids:
3397
      uidpool.CheckUidPool(self.op.remove_uids)
3398

    
3399
    if self.op.master_netmask is not None:
3400
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3401

    
3402
  def ExpandNames(self):
3403
    # FIXME: in the future maybe other cluster params won't require checking on
3404
    # all nodes to be modified.
3405
    self.needed_locks = {
3406
      locking.LEVEL_NODE: locking.ALL_SET,
3407
    }
3408
    self.share_locks[locking.LEVEL_NODE] = 1
3409

    
3410
  def BuildHooksEnv(self):
3411
    """Build hooks env.
3412

3413
    """
3414
    return {
3415
      "OP_TARGET": self.cfg.GetClusterName(),
3416
      "NEW_VG_NAME": self.op.vg_name,
3417
      }
3418

    
3419
  def BuildHooksNodes(self):
3420
    """Build hooks nodes.
3421

3422
    """
3423
    mn = self.cfg.GetMasterNode()
3424
    return ([mn], [mn])
3425

    
3426
  def CheckPrereq(self):
3427
    """Check prerequisites.
3428

3429
    This checks whether the given params don't conflict and
3430
    if the given volume group is valid.
3431

3432
    """
3433
    if self.op.vg_name is not None and not self.op.vg_name:
3434
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3435
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3436
                                   " instances exist", errors.ECODE_INVAL)
3437

    
3438
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3439
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3440
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3441
                                   " drbd-based instances exist",
3442
                                   errors.ECODE_INVAL)
3443

    
3444
    node_list = self.owned_locks(locking.LEVEL_NODE)
3445

    
3446
    # if vg_name not None, checks given volume group on all nodes
3447
    if self.op.vg_name:
3448
      vglist = self.rpc.call_vg_list(node_list)
3449
      for node in node_list:
3450
        msg = vglist[node].fail_msg
3451
        if msg:
3452
          # ignoring down node
3453
          self.LogWarning("Error while gathering data on node %s"
3454
                          " (ignoring node): %s", node, msg)
3455
          continue
3456
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3457
                                              self.op.vg_name,
3458
                                              constants.MIN_VG_SIZE)
3459
        if vgstatus:
3460
          raise errors.OpPrereqError("Error on node '%s': %s" %
3461
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3462

    
3463
    if self.op.drbd_helper:
3464
      # checks given drbd helper on all nodes
3465
      helpers = self.rpc.call_drbd_helper(node_list)
3466
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3467
        if ninfo.offline:
3468
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3469
          continue
3470
        msg = helpers[node].fail_msg
3471
        if msg:
3472
          raise errors.OpPrereqError("Error checking drbd helper on node"
3473
                                     " '%s': %s" % (node, msg),
3474
                                     errors.ECODE_ENVIRON)
3475
        node_helper = helpers[node].payload
3476
        if node_helper != self.op.drbd_helper:
3477
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3478
                                     (node, node_helper), errors.ECODE_ENVIRON)
3479

    
3480
    self.cluster = cluster = self.cfg.GetClusterInfo()
3481
    # validate params changes
3482
    if self.op.beparams:
3483
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3484
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3485

    
3486
    if self.op.ndparams:
3487
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3488
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3489

    
3490
      # TODO: we need a more general way to handle resetting
3491
      # cluster-level parameters to default values
3492
      if self.new_ndparams["oob_program"] == "":
3493
        self.new_ndparams["oob_program"] = \
3494
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3495

    
3496
    if self.op.nicparams:
3497
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3498
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3499
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3500
      nic_errors = []
3501

    
3502
      # check all instances for consistency
3503
      for instance in self.cfg.GetAllInstancesInfo().values():
3504
        for nic_idx, nic in enumerate(instance.nics):
3505
          params_copy = copy.deepcopy(nic.nicparams)
3506
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3507

    
3508
          # check parameter syntax
3509
          try:
3510
            objects.NIC.CheckParameterSyntax(params_filled)
3511
          except errors.ConfigurationError, err:
3512
            nic_errors.append("Instance %s, nic/%d: %s" %
3513
                              (instance.name, nic_idx, err))
3514

    
3515
          # if we're moving instances to routed, check that they have an ip
3516
          target_mode = params_filled[constants.NIC_MODE]
3517
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3518
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3519
                              " address" % (instance.name, nic_idx))
3520
      if nic_errors:
3521
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3522
                                   "\n".join(nic_errors))
3523

    
3524
    # hypervisor list/parameters
3525
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3526
    if self.op.hvparams:
3527
      for hv_name, hv_dict in self.op.hvparams.items():
3528
        if hv_name not in self.new_hvparams:
3529
          self.new_hvparams[hv_name] = hv_dict
3530
        else:
3531
          self.new_hvparams[hv_name].update(hv_dict)
3532

    
3533
    # os hypervisor parameters
3534
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3535
    if self.op.os_hvp:
3536
      for os_name, hvs in self.op.os_hvp.items():
3537
        if os_name not in self.new_os_hvp:
3538
          self.new_os_hvp[os_name] = hvs
3539
        else:
3540
          for hv_name, hv_dict in hvs.items():
3541
            if hv_name not in self.new_os_hvp[os_name]:
3542
              self.new_os_hvp[os_name][hv_name] = hv_dict
3543
            else:
3544
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3545

    
3546
    # os parameters
3547
    self.new_osp = objects.FillDict(cluster.osparams, {})
3548
    if self.op.osparams:
3549
      for os_name, osp in self.op.osparams.items():
3550
        if os_name not in self.new_osp:
3551
          self.new_osp[os_name] = {}
3552

    
3553
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3554
                                                  use_none=True)
3555

    
3556
        if not self.new_osp[os_name]:
3557
          # we removed all parameters
3558
          del self.new_osp[os_name]
3559
        else:
3560
          # check the parameter validity (remote check)
3561
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3562
                         os_name, self.new_osp[os_name])
3563

    
3564
    # changes to the hypervisor list
3565
    if self.op.enabled_hypervisors is not None:
3566
      self.hv_list = self.op.enabled_hypervisors
3567
      for hv in self.hv_list:
3568
        # if the hypervisor doesn't already exist in the cluster
3569
        # hvparams, we initialize it to empty, and then (in both
3570
        # cases) we make sure to fill the defaults, as we might not
3571
        # have a complete defaults list if the hypervisor wasn't
3572
        # enabled before
3573
        if hv not in new_hvp:
3574
          new_hvp[hv] = {}
3575
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3576
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3577
    else:
3578
      self.hv_list = cluster.enabled_hypervisors
3579

    
3580
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3581
      # either the enabled list has changed, or the parameters have, validate
3582
      for hv_name, hv_params in self.new_hvparams.items():
3583
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3584
            (self.op.enabled_hypervisors and
3585
             hv_name in self.op.enabled_hypervisors)):
3586
          # either this is a new hypervisor, or its parameters have changed
3587
          hv_class = hypervisor.GetHypervisor(hv_name)
3588
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3589
          hv_class.CheckParameterSyntax(hv_params)
3590
          _CheckHVParams(self, node_list, hv_name, hv_params)
3591

    
3592
    if self.op.os_hvp:
3593
      # no need to check any newly-enabled hypervisors, since the
3594
      # defaults have already been checked in the above code-block
3595
      for os_name, os_hvp in self.new_os_hvp.items():
3596
        for hv_name, hv_params in os_hvp.items():
3597
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3598
          # we need to fill in the new os_hvp on top of the actual hv_p
3599
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3600
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3601
          hv_class = hypervisor.GetHypervisor(hv_name)
3602
          hv_class.CheckParameterSyntax(new_osp)
3603
          _CheckHVParams(self, node_list, hv_name, new_osp)
3604

    
3605
    if self.op.default_iallocator:
3606
      alloc_script = utils.FindFile(self.op.default_iallocator,
3607
                                    constants.IALLOCATOR_SEARCH_PATH,
3608
                                    os.path.isfile)
3609
      if alloc_script is None:
3610
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3611
                                   " specified" % self.op.default_iallocator,
3612
                                   errors.ECODE_INVAL)
3613

    
3614
  def Exec(self, feedback_fn):
3615
    """Change the parameters of the cluster.
3616

3617
    """
3618
    if self.op.vg_name is not None:
3619
      new_volume = self.op.vg_name
3620
      if not new_volume:
3621
        new_volume = None
3622
      if new_volume != self.cfg.GetVGName():
3623
        self.cfg.SetVGName(new_volume)
3624
      else:
3625
        feedback_fn("Cluster LVM configuration already in desired"
3626
                    " state, not changing")
3627
    if self.op.drbd_helper is not None:
3628
      new_helper = self.op.drbd_helper
3629
      if not new_helper:
3630
        new_helper = None
3631
      if new_helper != self.cfg.GetDRBDHelper():
3632
        self.cfg.SetDRBDHelper(new_helper)
3633
      else:
3634
        feedback_fn("Cluster DRBD helper already in desired state,"
3635
                    " not changing")
3636
    if self.op.hvparams:
3637
      self.cluster.hvparams = self.new_hvparams
3638
    if self.op.os_hvp:
3639
      self.cluster.os_hvp = self.new_os_hvp
3640
    if self.op.enabled_hypervisors is not None:
3641
      self.cluster.hvparams = self.new_hvparams
3642
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3643
    if self.op.beparams:
3644
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3645
    if self.op.nicparams:
3646
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3647
    if self.op.osparams:
3648
      self.cluster.osparams = self.new_osp
3649
    if self.op.ndparams:
3650
      self.cluster.ndparams = self.new_ndparams
3651

    
3652
    if self.op.candidate_pool_size is not None:
3653
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3654
      # we need to update the pool size here, otherwise the save will fail
3655
      _AdjustCandidatePool(self, [])
3656

    
3657
    if self.op.maintain_node_health is not None:
3658
      self.cluster.maintain_node_health = self.op.maintain_node_health
3659

    
3660
    if self.op.prealloc_wipe_disks is not None:
3661
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3662

    
3663
    if self.op.add_uids is not None:
3664
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3665

    
3666
    if self.op.remove_uids is not None:
3667
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3668

    
3669
    if self.op.uid_pool is not None:
3670
      self.cluster.uid_pool = self.op.uid_pool
3671

    
3672
    if self.op.default_iallocator is not None:
3673
      self.cluster.default_iallocator = self.op.default_iallocator
3674

    
3675
    if self.op.reserved_lvs is not None:
3676
      self.cluster.reserved_lvs = self.op.reserved_lvs
3677

    
3678
    def helper_os(aname, mods, desc):
3679
      desc += " OS list"
3680
      lst = getattr(self.cluster, aname)
3681
      for key, val in mods:
3682
        if key == constants.DDM_ADD:
3683
          if val in lst:
3684
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3685
          else:
3686
            lst.append(val)
3687
        elif key == constants.DDM_REMOVE:
3688
          if val in lst:
3689
            lst.remove(val)
3690
          else:
3691
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3692
        else:
3693
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3694

    
3695
    if self.op.hidden_os:
3696
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3697

    
3698
    if self.op.blacklisted_os:
3699
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3700

    
3701
    if self.op.master_netdev:
3702
      master = self.cfg.GetMasterNode()
3703
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3704
                  self.cluster.master_netdev)
3705
      result = self.rpc.call_node_deactivate_master_ip(master)
3706
      result.Raise("Could not disable the master ip")
3707
      feedback_fn("Changing master_netdev from %s to %s" %
3708
                  (self.cluster.master_netdev, self.op.master_netdev))
3709
      self.cluster.master_netdev = self.op.master_netdev
3710

    
3711
    if self.op.master_netmask:
3712
      master = self.cfg.GetMasterNode()
3713
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3714
      result = self.rpc.call_node_change_master_netmask(master,
3715
                                                        self.op.master_netmask)
3716
      if result.fail_msg:
3717
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3718
        self.LogWarning(msg)
3719
        feedback_fn(msg)
3720
      else:
3721
        self.cluster.master_netmask = self.op.master_netmask
3722

    
3723
    self.cfg.Update(self.cluster, feedback_fn)
3724

    
3725
    if self.op.master_netdev:
3726
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3727
                  self.op.master_netdev)
3728
      result = self.rpc.call_node_activate_master_ip(master)
3729
      if result.fail_msg:
3730
        self.LogWarning("Could not re-enable the master ip on"
3731
                        " the master, please restart manually: %s",
3732
                        result.fail_msg)
3733

    
3734

    
3735
def _UploadHelper(lu, nodes, fname):
3736
  """Helper for uploading a file and showing warnings.
3737

3738
  """
3739
  if os.path.exists(fname):
3740
    result = lu.rpc.call_upload_file(nodes, fname)
3741
    for to_node, to_result in result.items():
3742
      msg = to_result.fail_msg
3743
      if msg:
3744
        msg = ("Copy of file %s to node %s failed: %s" %
3745
               (fname, to_node, msg))
3746
        lu.proc.LogWarning(msg)
3747

    
3748

    
3749
def _ComputeAncillaryFiles(cluster, redist):
3750
  """Compute files external to Ganeti which need to be consistent.
3751

3752
  @type redist: boolean
3753
  @param redist: Whether to include files which need to be redistributed
3754

3755
  """
3756
  # Compute files for all nodes
3757
  files_all = set([
3758
    constants.SSH_KNOWN_HOSTS_FILE,
3759
    constants.CONFD_HMAC_KEY,
3760
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3761
    constants.SPICE_CERT_FILE,
3762
    constants.SPICE_CACERT_FILE,
3763
    constants.RAPI_USERS_FILE,
3764
    ])
3765

    
3766
  if not redist:
3767
    files_all.update(constants.ALL_CERT_FILES)
3768
    files_all.update(ssconf.SimpleStore().GetFileList())
3769
  else:
3770
    # we need to ship at least the RAPI certificate
3771
    files_all.add(constants.RAPI_CERT_FILE)
3772

    
3773
  if cluster.modify_etc_hosts:
3774
    files_all.add(constants.ETC_HOSTS)
3775

    
3776
  # Files which are optional, these must:
3777
  # - be present in one other category as well
3778
  # - either exist or not exist on all nodes of that category (mc, vm all)
3779
  files_opt = set([
3780
    constants.RAPI_USERS_FILE,
3781
    ])
3782

    
3783
  # Files which should only be on master candidates
3784
  files_mc = set()
3785
  if not redist:
3786
    files_mc.add(constants.CLUSTER_CONF_FILE)
3787

    
3788
  # Files which should only be on VM-capable nodes
3789
  files_vm = set(filename
3790
    for hv_name in cluster.enabled_hypervisors
3791
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3792

    
3793
  files_opt |= set(filename
3794
    for hv_name in cluster.enabled_hypervisors
3795
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3796

    
3797
  # Filenames in each category must be unique
3798
  all_files_set = files_all | files_mc | files_vm
3799
  assert (len(all_files_set) ==
3800
          sum(map(len, [files_all, files_mc, files_vm]))), \
3801
         "Found file listed in more than one file list"
3802

    
3803
  # Optional files must be present in one other category
3804
  assert all_files_set.issuperset(files_opt), \
3805
         "Optional file not in a different required list"
3806

    
3807
  return (files_all, files_opt, files_mc, files_vm)
3808

    
3809

    
3810
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3811
  """Distribute additional files which are part of the cluster configuration.
3812

3813
  ConfigWriter takes care of distributing the config and ssconf files, but
3814
  there are more files which should be distributed to all nodes. This function
3815
  makes sure those are copied.
3816

3817
  @param lu: calling logical unit
3818
  @param additional_nodes: list of nodes not in the config to distribute to
3819
  @type additional_vm: boolean
3820
  @param additional_vm: whether the additional nodes are vm-capable or not
3821

3822
  """
3823
  # Gather target nodes
3824
  cluster = lu.cfg.GetClusterInfo()
3825
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3826

    
3827
  online_nodes = lu.cfg.GetOnlineNodeList()
3828
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3829

    
3830
  if additional_nodes is not None:
3831
    online_nodes.extend(additional_nodes)
3832
    if additional_vm:
3833
      vm_nodes.extend(additional_nodes)
3834

    
3835
  # Never distribute to master node
3836
  for nodelist in [online_nodes, vm_nodes]:
3837
    if master_info.name in nodelist:
3838
      nodelist.remove(master_info.name)
3839

    
3840
  # Gather file lists
3841
  (files_all, _, files_mc, files_vm) = \
3842
    _ComputeAncillaryFiles(cluster, True)
3843

    
3844
  # Never re-distribute configuration file from here
3845
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3846
              constants.CLUSTER_CONF_FILE in files_vm)
3847
  assert not files_mc, "Master candidates not handled in this function"
3848

    
3849
  filemap = [
3850
    (online_nodes, files_all),
3851
    (vm_nodes, files_vm),
3852
    ]
3853

    
3854
  # Upload the files
3855
  for (node_list, files) in filemap:
3856
    for fname in files:
3857
      _UploadHelper(lu, node_list, fname)
3858

    
3859

    
3860
class LUClusterRedistConf(NoHooksLU):
3861
  """Force the redistribution of cluster configuration.
3862

3863
  This is a very simple LU.
3864

3865
  """
3866
  REQ_BGL = False
3867

    
3868
  def ExpandNames(self):
3869
    self.needed_locks = {
3870
      locking.LEVEL_NODE: locking.ALL_SET,
3871
    }
3872
    self.share_locks[locking.LEVEL_NODE] = 1
3873

    
3874
  def Exec(self, feedback_fn):
3875
    """Redistribute the configuration.
3876

3877
    """
3878
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3879
    _RedistributeAncillaryFiles(self)
3880

    
3881

    
3882
class LUClusterActivateMasterIp(NoHooksLU):
3883
  """Activate the master IP on the master node.
3884

3885
  """
3886
  def Exec(self, feedback_fn):
3887
    """Activate the master IP.
3888

3889
    """
3890
    master = self.cfg.GetMasterNode()
3891
    self.rpc.call_node_activate_master_ip(master)
3892

    
3893

    
3894
class LUClusterDeactivateMasterIp(NoHooksLU):
3895
  """Deactivate the master IP on the master node.
3896

3897
  """
3898
  def Exec(self, feedback_fn):
3899
    """Deactivate the master IP.
3900

3901
    """
3902
    master = self.cfg.GetMasterNode()
3903
    self.rpc.call_node_deactivate_master_ip(master)
3904

    
3905

    
3906
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3907
  """Sleep and poll for an instance's disk to sync.
3908

3909
  """
3910
  if not instance.disks or disks is not None and not disks:
3911
    return True
3912

    
3913
  disks = _ExpandCheckDisks(instance, disks)
3914

    
3915
  if not oneshot:
3916
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3917

    
3918
  node = instance.primary_node
3919

    
3920
  for dev in disks:
3921
    lu.cfg.SetDiskID(dev, node)
3922

    
3923
  # TODO: Convert to utils.Retry
3924

    
3925
  retries = 0
3926
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3927
  while True:
3928
    max_time = 0
3929
    done = True
3930
    cumul_degraded = False
3931
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3932
    msg = rstats.fail_msg
3933
    if msg:
3934
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3935
      retries += 1
3936
      if retries >= 10:
3937
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3938
                                 " aborting." % node)
3939
      time.sleep(6)
3940
      continue
3941
    rstats = rstats.payload
3942
    retries = 0
3943
    for i, mstat in enumerate(rstats):
3944
      if mstat is None:
3945
        lu.LogWarning("Can't compute data for node %s/%s",
3946
                           node, disks[i].iv_name)
3947
        continue
3948

    
3949
      cumul_degraded = (cumul_degraded or
3950
                        (mstat.is_degraded and mstat.sync_percent is None))
3951
      if mstat.sync_percent is not None:
3952
        done = False
3953
        if mstat.estimated_time is not None:
3954
          rem_time = ("%s remaining (estimated)" %
3955
                      utils.FormatSeconds(mstat.estimated_time))
3956
          max_time = mstat.estimated_time
3957
        else:
3958
          rem_time = "no time estimate"
3959
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3960
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3961

    
3962
    # if we're done but degraded, let's do a few small retries, to
3963
    # make sure we see a stable and not transient situation; therefore
3964
    # we force restart of the loop
3965
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3966
      logging.info("Degraded disks found, %d retries left", degr_retries)
3967
      degr_retries -= 1
3968
      time.sleep(1)
3969
      continue
3970

    
3971
    if done or oneshot:
3972
      break
3973

    
3974
    time.sleep(min(60, max_time))
3975

    
3976
  if done:
3977
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3978
  return not cumul_degraded
3979

    
3980

    
3981
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3982
  """Check that mirrors are not degraded.
3983

3984
  The ldisk parameter, if True, will change the test from the
3985
  is_degraded attribute (which represents overall non-ok status for
3986
  the device(s)) to the ldisk (representing the local storage status).
3987

3988
  """
3989
  lu.cfg.SetDiskID(dev, node)
3990

    
3991
  result = True
3992

    
3993
  if on_primary or dev.AssembleOnSecondary():
3994
    rstats = lu.rpc.call_blockdev_find(node, dev)
3995
    msg = rstats.fail_msg
3996
    if msg:
3997
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3998
      result = False
3999
    elif not rstats.payload:
4000
      lu.LogWarning("Can't find disk on node %s", node)
4001
      result = False
4002
    else:
4003
      if ldisk:
4004
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4005
      else:
4006
        result = result and not rstats.payload.is_degraded
4007

    
4008
  if dev.children:
4009
    for child in dev.children:
4010
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4011

    
4012
  return result
4013

    
4014

    
4015
class LUOobCommand(NoHooksLU):
4016
  """Logical unit for OOB handling.
4017

4018
  """
4019
  REG_BGL = False
4020
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4021

    
4022
  def ExpandNames(self):
4023
    """Gather locks we need.
4024

4025
    """
4026
    if self.op.node_names:
4027
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4028
      lock_names = self.op.node_names
4029
    else:
4030
      lock_names = locking.ALL_SET
4031

    
4032
    self.needed_locks = {
4033
      locking.LEVEL_NODE: lock_names,
4034
      }
4035

    
4036
  def CheckPrereq(self):
4037
    """Check prerequisites.
4038

4039
    This checks:
4040
     - the node exists in the configuration
4041
     - OOB is supported
4042

4043
    Any errors are signaled by raising errors.OpPrereqError.
4044

4045
    """
4046
    self.nodes = []
4047
    self.master_node = self.cfg.GetMasterNode()
4048

    
4049
    assert self.op.power_delay >= 0.0
4050

    
4051
    if self.op.node_names:
4052
      if (self.op.command in self._SKIP_MASTER and
4053
          self.master_node in self.op.node_names):
4054
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4055
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4056

    
4057
        if master_oob_handler:
4058
          additional_text = ("run '%s %s %s' if you want to operate on the"
4059
                             " master regardless") % (master_oob_handler,
4060
                                                      self.op.command,
4061
                                                      self.master_node)
4062
        else:
4063
          additional_text = "it does not support out-of-band operations"
4064

    
4065
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4066
                                    " allowed for %s; %s") %
4067
                                   (self.master_node, self.op.command,
4068
                                    additional_text), errors.ECODE_INVAL)
4069
    else:
4070
      self.op.node_names = self.cfg.GetNodeList()
4071
      if self.op.command in self._SKIP_MASTER:
4072
        self.op.node_names.remove(self.master_node)
4073

    
4074
    if self.op.command in self._SKIP_MASTER:
4075
      assert self.master_node not in self.op.node_names
4076

    
4077
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4078
      if node is None:
4079
        raise errors.OpPrereqError("Node %s not found" % node_name,
4080
                                   errors.ECODE_NOENT)
4081
      else:
4082
        self.nodes.append(node)
4083

    
4084
      if (not self.op.ignore_status and
4085
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4086
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4087
                                    " not marked offline") % node_name,
4088
                                   errors.ECODE_STATE)
4089

    
4090
  def Exec(self, feedback_fn):
4091
    """Execute OOB and return result if we expect any.
4092

4093
    """
4094
    master_node = self.master_node
4095
    ret = []
4096

    
4097
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4098
                                              key=lambda node: node.name)):
4099
      node_entry = [(constants.RS_NORMAL, node.name)]
4100
      ret.append(node_entry)
4101

    
4102
      oob_program = _SupportsOob(self.cfg, node)
4103

    
4104
      if not oob_program:
4105
        node_entry.append((constants.RS_UNAVAIL, None))
4106
        continue
4107

    
4108
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4109
                   self.op.command, oob_program, node.name)
4110
      result = self.rpc.call_run_oob(master_node, oob_program,
4111
                                     self.op.command, node.name,
4112
                                     self.op.timeout)
4113

    
4114
      if result.fail_msg:
4115
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4116
                        node.name, result.fail_msg)
4117
        node_entry.append((constants.RS_NODATA, None))
4118
      else:
4119
        try:
4120
          self._CheckPayload(result)
4121
        except errors.OpExecError, err:
4122
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4123
                          node.name, err)
4124
          node_entry.append((constants.RS_NODATA, None))
4125
        else:
4126
          if self.op.command == constants.OOB_HEALTH:
4127
            # For health we should log important events
4128
            for item, status in result.payload:
4129
              if status in [constants.OOB_STATUS_WARNING,
4130
                            constants.OOB_STATUS_CRITICAL]:
4131
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4132
                                item, node.name, status)
4133

    
4134
          if self.op.command == constants.OOB_POWER_ON:
4135
            node.powered = True
4136
          elif self.op.command == constants.OOB_POWER_OFF:
4137
            node.powered = False
4138
          elif self.op.command == constants.OOB_POWER_STATUS:
4139
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4140
            if powered != node.powered:
4141
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4142
                               " match actual power state (%s)"), node.powered,
4143
                              node.name, powered)
4144

    
4145
          # For configuration changing commands we should update the node
4146
          if self.op.command in (constants.OOB_POWER_ON,
4147
                                 constants.OOB_POWER_OFF):
4148
            self.cfg.Update(node, feedback_fn)
4149

    
4150
          node_entry.append((constants.RS_NORMAL, result.payload))
4151

    
4152
          if (self.op.command == constants.OOB_POWER_ON and
4153
              idx < len(self.nodes) - 1):
4154
            time.sleep(self.op.power_delay)
4155

    
4156
    return ret
4157

    
4158
  def _CheckPayload(self, result):
4159
    """Checks if the payload is valid.
4160

4161
    @param result: RPC result
4162
    @raises errors.OpExecError: If payload is not valid
4163

4164
    """
4165
    errs = []
4166
    if self.op.command == constants.OOB_HEALTH:
4167
      if not isinstance(result.payload, list):
4168
        errs.append("command 'health' is expected to return a list but got %s" %
4169
                    type(result.payload))
4170
      else:
4171
        for item, status in result.payload:
4172
          if status not in constants.OOB_STATUSES:
4173
            errs.append("health item '%s' has invalid status '%s'" %
4174
                        (item, status))
4175

    
4176
    if self.op.command == constants.OOB_POWER_STATUS:
4177
      if not isinstance(result.payload, dict):
4178
        errs.append("power-status is expected to return a dict but got %s" %
4179
                    type(result.payload))
4180

    
4181
    if self.op.command in [
4182
        constants.OOB_POWER_ON,
4183
        constants.OOB_POWER_OFF,
4184
        constants.OOB_POWER_CYCLE,
4185
        ]:
4186
      if result.payload is not None:
4187
        errs.append("%s is expected to not return payload but got '%s'" %
4188
                    (self.op.command, result.payload))
4189

    
4190
    if errs:
4191
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4192
                               utils.CommaJoin(errs))
4193

    
4194

    
4195
class _OsQuery(_QueryBase):
4196
  FIELDS = query.OS_FIELDS
4197

    
4198
  def ExpandNames(self, lu):
4199
    # Lock all nodes in shared mode
4200
    # Temporary removal of locks, should be reverted later
4201
    # TODO: reintroduce locks when they are lighter-weight
4202
    lu.needed_locks = {}
4203
    #self.share_locks[locking.LEVEL_NODE] = 1
4204
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4205

    
4206
    # The following variables interact with _QueryBase._GetNames
4207
    if self.names:
4208
      self.wanted = self.names
4209
    else:
4210
      self.wanted = locking.ALL_SET
4211

    
4212
    self.do_locking = self.use_locking
4213

    
4214
  def DeclareLocks(self, lu, level):
4215
    pass
4216

    
4217
  @staticmethod
4218
  def _DiagnoseByOS(rlist):
4219
    """Remaps a per-node return list into an a per-os per-node dictionary
4220

4221
    @param rlist: a map with node names as keys and OS objects as values
4222

4223
    @rtype: dict
4224
    @return: a dictionary with osnames as keys and as value another
4225
        map, with nodes as keys and tuples of (path, status, diagnose,
4226
        variants, parameters, api_versions) as values, eg::
4227

4228
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4229
                                     (/srv/..., False, "invalid api")],
4230
                           "node2": [(/srv/..., True, "", [], [])]}
4231
          }
4232

4233
    """
4234
    all_os = {}
4235
    # we build here the list of nodes that didn't fail the RPC (at RPC
4236
    # level), so that nodes with a non-responding node daemon don't
4237
    # make all OSes invalid
4238
    good_nodes = [node_name for node_name in rlist
4239
                  if not rlist[node_name].fail_msg]
4240
    for node_name, nr in rlist.items():
4241
      if nr.fail_msg or not nr.payload:
4242
        continue
4243
      for (name, path, status, diagnose, variants,
4244
           params, api_versions) in nr.payload:
4245
        if name not in all_os:
4246
          # build a list of nodes for this os containing empty lists
4247
          # for each node in node_list
4248
          all_os[name] = {}
4249
          for nname in good_nodes:
4250
            all_os[name][nname] = []
4251
        # convert params from [name, help] to (name, help)
4252
        params = [tuple(v) for v in params]
4253
        all_os[name][node_name].append((path, status, diagnose,
4254
                                        variants, params, api_versions))
4255
    return all_os
4256

    
4257
  def _GetQueryData(self, lu):
4258
    """Computes the list of nodes and their attributes.
4259

4260
    """
4261
    # Locking is not used
4262
    assert not (compat.any(lu.glm.is_owned(level)
4263
                           for level in locking.LEVELS
4264
                           if level != locking.LEVEL_CLUSTER) or
4265
                self.do_locking or self.use_locking)
4266

    
4267
    valid_nodes = [node.name
4268
                   for node in lu.cfg.GetAllNodesInfo().values()
4269
                   if not node.offline and node.vm_capable]
4270
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4271
    cluster = lu.cfg.GetClusterInfo()
4272

    
4273
    data = {}
4274

    
4275
    for (os_name, os_data) in pol.items():
4276
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4277
                          hidden=(os_name in cluster.hidden_os),
4278
                          blacklisted=(os_name in cluster.blacklisted_os))
4279

    
4280
      variants = set()
4281
      parameters = set()
4282
      api_versions = set()
4283

    
4284
      for idx, osl in enumerate(os_data.values()):
4285
        info.valid = bool(info.valid and osl and osl[0][1])
4286
        if not info.valid:
4287
          break
4288

    
4289
        (node_variants, node_params, node_api) = osl[0][3:6]
4290
        if idx == 0:
4291
          # First entry
4292
          variants.update(node_variants)
4293
          parameters.update(node_params)
4294
          api_versions.update(node_api)
4295
        else:
4296
          # Filter out inconsistent values
4297
          variants.intersection_update(node_variants)
4298
          parameters.intersection_update(node_params)
4299
          api_versions.intersection_update(node_api)
4300

    
4301
      info.variants = list(variants)
4302
      info.parameters = list(parameters)
4303
      info.api_versions = list(api_versions)
4304

    
4305
      data[os_name] = info
4306

    
4307
    # Prepare data in requested order
4308
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4309
            if name in data]
4310

    
4311

    
4312
class LUOsDiagnose(NoHooksLU):
4313
  """Logical unit for OS diagnose/query.
4314

4315
  """
4316
  REQ_BGL = False
4317

    
4318
  @staticmethod
4319
  def _BuildFilter(fields, names):
4320
    """Builds a filter for querying OSes.
4321

4322
    """
4323
    name_filter = qlang.MakeSimpleFilter("name", names)
4324

    
4325
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4326
    # respective field is not requested
4327
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4328
                     for fname in ["hidden", "blacklisted"]
4329
                     if fname not in fields]
4330
    if "valid" not in fields:
4331
      status_filter.append([qlang.OP_TRUE, "valid"])
4332

    
4333
    if status_filter:
4334
      status_filter.insert(0, qlang.OP_AND)
4335
    else:
4336
      status_filter = None
4337

    
4338
    if name_filter and status_filter:
4339
      return [qlang.OP_AND, name_filter, status_filter]
4340
    elif name_filter:
4341
      return name_filter
4342
    else:
4343
      return status_filter
4344

    
4345
  def CheckArguments(self):
4346
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4347
                       self.op.output_fields, False)
4348

    
4349
  def ExpandNames(self):
4350
    self.oq.ExpandNames(self)
4351

    
4352
  def Exec(self, feedback_fn):
4353
    return self.oq.OldStyleQuery(self)
4354

    
4355

    
4356
class LUNodeRemove(LogicalUnit):
4357
  """Logical unit for removing a node.
4358

4359
  """
4360
  HPATH = "node-remove"
4361
  HTYPE = constants.HTYPE_NODE
4362

    
4363
  def BuildHooksEnv(self):
4364
    """Build hooks env.
4365

4366
    This doesn't run on the target node in the pre phase as a failed
4367
    node would then be impossible to remove.
4368

4369
    """
4370
    return {
4371
      "OP_TARGET": self.op.node_name,
4372
      "NODE_NAME": self.op.node_name,
4373
      }
4374

    
4375
  def BuildHooksNodes(self):
4376
    """Build hooks nodes.
4377

4378
    """
4379
    all_nodes = self.cfg.GetNodeList()
4380
    try:
4381
      all_nodes.remove(self.op.node_name)
4382
    except ValueError:
4383
      logging.warning("Node '%s', which is about to be removed, was not found"
4384
                      " in the list of all nodes", self.op.node_name)
4385
    return (all_nodes, all_nodes)
4386

    
4387
  def CheckPrereq(self):
4388
    """Check prerequisites.
4389

4390
    This checks:
4391
     - the node exists in the configuration
4392
     - it does not have primary or secondary instances
4393
     - it's not the master
4394

4395
    Any errors are signaled by raising errors.OpPrereqError.
4396

4397
    """
4398
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4399
    node = self.cfg.GetNodeInfo(self.op.node_name)
4400
    assert node is not None
4401

    
4402
    masternode = self.cfg.GetMasterNode()
4403
    if node.name == masternode:
4404
      raise errors.OpPrereqError("Node is the master node, failover to another"
4405
                                 " node is required", errors.ECODE_INVAL)
4406

    
4407
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4408
      if node.name in instance.all_nodes:
4409
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4410
                                   " please remove first" % instance_name,
4411
                                   errors.ECODE_INVAL)
4412
    self.op.node_name = node.name
4413
    self.node = node
4414

    
4415
  def Exec(self, feedback_fn):
4416
    """Removes the node from the cluster.
4417

4418
    """
4419
    node = self.node
4420
    logging.info("Stopping the node daemon and removing configs from node %s",
4421
                 node.name)
4422

    
4423
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4424

    
4425
    # Promote nodes to master candidate as needed
4426
    _AdjustCandidatePool(self, exceptions=[node.name])
4427
    self.context.RemoveNode(node.name)
4428

    
4429
    # Run post hooks on the node before it's removed
4430
    _RunPostHook(self, node.name)
4431

    
4432
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4433
    msg = result.fail_msg
4434
    if msg:
4435
      self.LogWarning("Errors encountered on the remote node while leaving"
4436
                      " the cluster: %s", msg)
4437

    
4438
    # Remove node from our /etc/hosts
4439
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4440
      master_node = self.cfg.GetMasterNode()
4441
      result = self.rpc.call_etc_hosts_modify(master_node,
4442
                                              constants.ETC_HOSTS_REMOVE,
4443
                                              node.name, None)
4444
      result.Raise("Can't update hosts file with new host data")
4445
      _RedistributeAncillaryFiles(self)
4446

    
4447

    
4448
class _NodeQuery(_QueryBase):
4449
  FIELDS = query.NODE_FIELDS
4450

    
4451
  def ExpandNames(self, lu):
4452
    lu.needed_locks = {}
4453
    lu.share_locks = _ShareAll()
4454

    
4455
    if self.names:
4456
      self.wanted = _GetWantedNodes(lu, self.names)
4457
    else:
4458
      self.wanted = locking.ALL_SET
4459

    
4460
    self.do_locking = (self.use_locking and
4461
                       query.NQ_LIVE in self.requested_data)
4462

    
4463
    if self.do_locking:
4464
      # If any non-static field is requested we need to lock the nodes
4465
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4466

    
4467
  def DeclareLocks(self, lu, level):
4468
    pass
4469

    
4470
  def _GetQueryData(self, lu):
4471
    """Computes the list of nodes and their attributes.
4472

4473
    """
4474
    all_info = lu.cfg.GetAllNodesInfo()
4475

    
4476
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4477

    
4478
    # Gather data as requested
4479
    if query.NQ_LIVE in self.requested_data:
4480
      # filter out non-vm_capable nodes
4481
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4482

    
4483
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4484
                                        lu.cfg.GetHypervisorType())
4485
      live_data = dict((name, nresult.payload)
4486
                       for (name, nresult) in node_data.items()
4487
                       if not nresult.fail_msg and nresult.payload)
4488
    else:
4489
      live_data = None
4490

    
4491
    if query.NQ_INST in self.requested_data:
4492
      node_to_primary = dict([(name, set()) for name in nodenames])
4493
      node_to_secondary = dict([(name, set()) for name in nodenames])
4494

    
4495
      inst_data = lu.cfg.GetAllInstancesInfo()
4496

    
4497
      for inst in inst_data.values():
4498
        if inst.primary_node in node_to_primary:
4499
          node_to_primary[inst.primary_node].add(inst.name)
4500
        for secnode in inst.secondary_nodes:
4501
          if secnode in node_to_secondary:
4502
            node_to_secondary[secnode].add(inst.name)
4503
    else:
4504
      node_to_primary = None
4505
      node_to_secondary = None
4506

    
4507
    if query.NQ_OOB in self.requested_data:
4508
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4509
                         for name, node in all_info.iteritems())
4510
    else:
4511
      oob_support = None
4512

    
4513
    if query.NQ_GROUP in self.requested_data:
4514
      groups = lu.cfg.GetAllNodeGroupsInfo()
4515
    else:
4516
      groups = {}
4517

    
4518
    return query.NodeQueryData([all_info[name] for name in nodenames],
4519
                               live_data, lu.cfg.GetMasterNode(),
4520
                               node_to_primary, node_to_secondary, groups,
4521
                               oob_support, lu.cfg.GetClusterInfo())
4522

    
4523

    
4524
class LUNodeQuery(NoHooksLU):
4525
  """Logical unit for querying nodes.
4526

4527
  """
4528
  # pylint: disable=W0142
4529
  REQ_BGL = False
4530

    
4531
  def CheckArguments(self):
4532
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4533
                         self.op.output_fields, self.op.use_locking)
4534

    
4535
  def ExpandNames(self):
4536
    self.nq.ExpandNames(self)
4537

    
4538
  def Exec(self, feedback_fn):
4539
    return self.nq.OldStyleQuery(self)
4540

    
4541

    
4542
class LUNodeQueryvols(NoHooksLU):
4543
  """Logical unit for getting volumes on node(s).
4544

4545
  """
4546
  REQ_BGL = False
4547
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4548
  _FIELDS_STATIC = utils.FieldSet("node")
4549

    
4550
  def CheckArguments(self):
4551
    _CheckOutputFields(static=self._FIELDS_STATIC,
4552
                       dynamic=self._FIELDS_DYNAMIC,
4553
                       selected=self.op.output_fields)
4554

    
4555
  def ExpandNames(self):
4556
    self.needed_locks = {}
4557
    self.share_locks[locking.LEVEL_NODE] = 1
4558
    if not self.op.nodes:
4559
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4560
    else:
4561
      self.needed_locks[locking.LEVEL_NODE] = \
4562
        _GetWantedNodes(self, self.op.nodes)
4563

    
4564
  def Exec(self, feedback_fn):
4565
    """Computes the list of nodes and their attributes.
4566

4567
    """
4568
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4569
    volumes = self.rpc.call_node_volumes(nodenames)
4570

    
4571
    ilist = self.cfg.GetAllInstancesInfo()
4572
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4573

    
4574
    output = []
4575
    for node in nodenames:
4576
      nresult = volumes[node]
4577
      if nresult.offline:
4578
        continue
4579
      msg = nresult.fail_msg
4580
      if msg:
4581
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4582
        continue
4583

    
4584
      node_vols = sorted(nresult.payload,
4585
                         key=operator.itemgetter("dev"))
4586

    
4587
      for vol in node_vols:
4588
        node_output = []
4589
        for field in self.op.output_fields:
4590
          if field == "node":
4591
            val = node
4592
          elif field == "phys":
4593
            val = vol["dev"]
4594
          elif field == "vg":
4595
            val = vol["vg"]
4596
          elif field == "name":
4597
            val = vol["name"]
4598
          elif field == "size":
4599
            val = int(float(vol["size"]))
4600
          elif field == "instance":
4601
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4602
          else:
4603
            raise errors.ParameterError(field)
4604
          node_output.append(str(val))
4605

    
4606
        output.append(node_output)
4607

    
4608
    return output
4609

    
4610

    
4611
class LUNodeQueryStorage(NoHooksLU):
4612
  """Logical unit for getting information on storage units on node(s).
4613

4614
  """
4615
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4616
  REQ_BGL = False
4617

    
4618
  def CheckArguments(self):
4619
    _CheckOutputFields(static=self._FIELDS_STATIC,
4620
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4621
                       selected=self.op.output_fields)
4622

    
4623
  def ExpandNames(self):
4624
    self.needed_locks = {}
4625
    self.share_locks[locking.LEVEL_NODE] = 1
4626

    
4627
    if self.op.nodes:
4628
      self.needed_locks[locking.LEVEL_NODE] = \
4629
        _GetWantedNodes(self, self.op.nodes)
4630
    else:
4631
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4632

    
4633
  def Exec(self, feedback_fn):
4634
    """Computes the list of nodes and their attributes.
4635

4636
    """
4637
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4638

    
4639
    # Always get name to sort by
4640
    if constants.SF_NAME in self.op.output_fields:
4641
      fields = self.op.output_fields[:]
4642
    else:
4643
      fields = [constants.SF_NAME] + self.op.output_fields
4644

    
4645
    # Never ask for node or type as it's only known to the LU
4646
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4647
      while extra in fields:
4648
        fields.remove(extra)
4649

    
4650
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4651
    name_idx = field_idx[constants.SF_NAME]
4652

    
4653
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4654
    data = self.rpc.call_storage_list(self.nodes,
4655
                                      self.op.storage_type, st_args,
4656
                                      self.op.name, fields)
4657

    
4658
    result = []
4659

    
4660
    for node in utils.NiceSort(self.nodes):
4661
      nresult = data[node]
4662
      if nresult.offline:
4663
        continue
4664

    
4665
      msg = nresult.fail_msg
4666
      if msg:
4667
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4668
        continue
4669

    
4670
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4671

    
4672
      for name in utils.NiceSort(rows.keys()):
4673
        row = rows[name]
4674

    
4675
        out = []
4676

    
4677
        for field in self.op.output_fields:
4678
          if field == constants.SF_NODE:
4679
            val = node
4680
          elif field == constants.SF_TYPE:
4681
            val = self.op.storage_type
4682
          elif field in field_idx:
4683
            val = row[field_idx[field]]
4684
          else:
4685
            raise errors.ParameterError(field)
4686

    
4687
          out.append(val)
4688

    
4689
        result.append(out)
4690

    
4691
    return result
4692

    
4693

    
4694
class _InstanceQuery(_QueryBase):
4695
  FIELDS = query.INSTANCE_FIELDS
4696

    
4697
  def ExpandNames(self, lu):
4698
    lu.needed_locks = {}
4699
    lu.share_locks = _ShareAll()
4700

    
4701
    if self.names:
4702
      self.wanted = _GetWantedInstances(lu, self.names)
4703
    else:
4704
      self.wanted = locking.ALL_SET
4705

    
4706
    self.do_locking = (self.use_locking and
4707
                       query.IQ_LIVE in self.requested_data)
4708
    if self.do_locking:
4709
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4710
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4711
      lu.needed_locks[locking.LEVEL_NODE] = []
4712
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4713

    
4714
    self.do_grouplocks = (self.do_locking and
4715
                          query.IQ_NODES in self.requested_data)
4716

    
4717
  def DeclareLocks(self, lu, level):
4718
    if self.do_locking:
4719
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4720
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4721

    
4722
        # Lock all groups used by instances optimistically; this requires going
4723
        # via the node before it's locked, requiring verification later on
4724
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4725
          set(group_uuid
4726
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4727
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4728
      elif level == locking.LEVEL_NODE:
4729
        lu._LockInstancesNodes() # pylint: disable=W0212
4730

    
4731
  @staticmethod
4732
  def _CheckGroupLocks(lu):
4733
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4734
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4735

    
4736
    # Check if node groups for locked instances are still correct
4737
    for instance_name in owned_instances:
4738
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4739

    
4740
  def _GetQueryData(self, lu):
4741
    """Computes the list of instances and their attributes.
4742

4743
    """
4744
    if self.do_grouplocks:
4745
      self._CheckGroupLocks(lu)
4746

    
4747
    cluster = lu.cfg.GetClusterInfo()
4748
    all_info = lu.cfg.GetAllInstancesInfo()
4749

    
4750
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4751

    
4752
    instance_list = [all_info[name] for name in instance_names]
4753
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4754
                                        for inst in instance_list)))
4755
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4756
    bad_nodes = []
4757
    offline_nodes = []
4758
    wrongnode_inst = set()
4759

    
4760
    # Gather data as requested
4761
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4762
      live_data = {}
4763
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4764
      for name in nodes:
4765
        result = node_data[name]
4766
        if result.offline:
4767
          # offline nodes will be in both lists
4768
          assert result.fail_msg
4769
          offline_nodes.append(name)
4770
        if result.fail_msg:
4771
          bad_nodes.append(name)
4772
        elif result.payload:
4773
          for inst in result.payload:
4774
            if inst in all_info:
4775
              if all_info[inst].primary_node == name:
4776
                live_data.update(result.payload)
4777
              else:
4778
                wrongnode_inst.add(inst)
4779
            else:
4780
              # orphan instance; we don't list it here as we don't
4781
              # handle this case yet in the output of instance listing
4782
              logging.warning("Orphan instance '%s' found on node %s",
4783
                              inst, name)
4784
        # else no instance is alive
4785
    else:
4786
      live_data = {}
4787

    
4788
    if query.IQ_DISKUSAGE in self.requested_data:
4789
      disk_usage = dict((inst.name,
4790
                         _ComputeDiskSize(inst.disk_template,
4791
                                          [{constants.IDISK_SIZE: disk.size}
4792
                                           for disk in inst.disks]))
4793
                        for inst in instance_list)
4794
    else:
4795
      disk_usage = None
4796

    
4797
    if query.IQ_CONSOLE in self.requested_data:
4798
      consinfo = {}
4799
      for inst in instance_list:
4800
        if inst.name in live_data:
4801
          # Instance is running
4802
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4803
        else:
4804
          consinfo[inst.name] = None
4805
      assert set(consinfo.keys()) == set(instance_names)
4806
    else:
4807
      consinfo = None
4808

    
4809
    if query.IQ_NODES in self.requested_data:
4810
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4811
                                            instance_list)))
4812
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4813
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4814
                    for uuid in set(map(operator.attrgetter("group"),
4815
                                        nodes.values())))
4816
    else:
4817
      nodes = None
4818
      groups = None
4819

    
4820
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4821
                                   disk_usage, offline_nodes, bad_nodes,
4822
                                   live_data, wrongnode_inst, consinfo,
4823
                                   nodes, groups)
4824

    
4825

    
4826
class LUQuery(NoHooksLU):
4827
  """Query for resources/items of a certain kind.
4828

4829
  """
4830
  # pylint: disable=W0142
4831
  REQ_BGL = False
4832

    
4833
  def CheckArguments(self):
4834
    qcls = _GetQueryImplementation(self.op.what)
4835

    
4836
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4837

    
4838
  def ExpandNames(self):
4839
    self.impl.ExpandNames(self)
4840

    
4841
  def DeclareLocks(self, level):
4842
    self.impl.DeclareLocks(self, level)
4843

    
4844
  def Exec(self, feedback_fn):
4845
    return self.impl.NewStyleQuery(self)
4846

    
4847

    
4848
class LUQueryFields(NoHooksLU):
4849
  """Query for resources/items of a certain kind.
4850

4851
  """
4852
  # pylint: disable=W0142
4853
  REQ_BGL = False
4854

    
4855
  def CheckArguments(self):
4856
    self.qcls = _GetQueryImplementation(self.op.what)
4857

    
4858
  def ExpandNames(self):
4859
    self.needed_locks = {}
4860

    
4861
  def Exec(self, feedback_fn):
4862
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4863

    
4864

    
4865
class LUNodeModifyStorage(NoHooksLU):
4866
  """Logical unit for modifying a storage volume on a node.
4867

4868
  """
4869
  REQ_BGL = False
4870

    
4871
  def CheckArguments(self):
4872
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4873

    
4874
    storage_type = self.op.storage_type
4875

    
4876
    try:
4877
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4878
    except KeyError:
4879
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4880
                                 " modified" % storage_type,
4881
                                 errors.ECODE_INVAL)
4882

    
4883
    diff = set(self.op.changes.keys()) - modifiable
4884
    if diff:
4885
      raise errors.OpPrereqError("The following fields can not be modified for"
4886
                                 " storage units of type '%s': %r" %
4887
                                 (storage_type, list(diff)),
4888
                                 errors.ECODE_INVAL)
4889

    
4890
  def ExpandNames(self):
4891
    self.needed_locks = {
4892
      locking.LEVEL_NODE: self.op.node_name,
4893
      }
4894

    
4895
  def Exec(self, feedback_fn):
4896
    """Computes the list of nodes and their attributes.
4897

4898
    """
4899
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4900
    result = self.rpc.call_storage_modify(self.op.node_name,
4901
                                          self.op.storage_type, st_args,
4902
                                          self.op.name, self.op.changes)
4903
    result.Raise("Failed to modify storage unit '%s' on %s" %
4904
                 (self.op.name, self.op.node_name))
4905

    
4906

    
4907
class LUNodeAdd(LogicalUnit):
4908
  """Logical unit for adding node to the cluster.
4909

4910
  """
4911
  HPATH = "node-add"
4912
  HTYPE = constants.HTYPE_NODE
4913
  _NFLAGS = ["master_capable", "vm_capable"]
4914

    
4915
  def CheckArguments(self):
4916
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4917
    # validate/normalize the node name
4918
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4919
                                         family=self.primary_ip_family)
4920
    self.op.node_name = self.hostname.name
4921

    
4922
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4923
      raise errors.OpPrereqError("Cannot readd the master node",
4924
                                 errors.ECODE_STATE)
4925

    
4926
    if self.op.readd and self.op.group:
4927
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4928
                                 " being readded", errors.ECODE_INVAL)
4929

    
4930
  def BuildHooksEnv(self):
4931
    """Build hooks env.
4932

4933
    This will run on all nodes before, and on all nodes + the new node after.
4934

4935
    """
4936
    return {
4937
      "OP_TARGET": self.op.node_name,
4938
      "NODE_NAME": self.op.node_name,
4939
      "NODE_PIP": self.op.primary_ip,
4940
      "NODE_SIP": self.op.secondary_ip,
4941
      "MASTER_CAPABLE": str(self.op.master_capable),
4942
      "VM_CAPABLE": str(self.op.vm_capable),
4943
      }
4944

    
4945
  def BuildHooksNodes(self):
4946
    """Build hooks nodes.
4947

4948
    """
4949
    # Exclude added node
4950
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4951
    post_nodes = pre_nodes + [self.op.node_name, ]
4952

    
4953
    return (pre_nodes, post_nodes)
4954

    
4955
  def CheckPrereq(self):
4956
    """Check prerequisites.
4957

4958
    This checks:
4959
     - the new node is not already in the config
4960
     - it is resolvable
4961
     - its parameters (single/dual homed) matches the cluster
4962

4963
    Any errors are signaled by raising errors.OpPrereqError.
4964

4965
    """
4966
    cfg = self.cfg
4967
    hostname = self.hostname
4968
    node = hostname.name
4969
    primary_ip = self.op.primary_ip = hostname.ip
4970
    if self.op.secondary_ip is None:
4971
      if self.primary_ip_family == netutils.IP6Address.family:
4972
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4973
                                   " IPv4 address must be given as secondary",
4974
                                   errors.ECODE_INVAL)
4975
      self.op.secondary_ip = primary_ip
4976

    
4977
    secondary_ip = self.op.secondary_ip
4978
    if not netutils.IP4Address.IsValid(secondary_ip):
4979
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4980
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4981

    
4982
    node_list = cfg.GetNodeList()
4983
    if not self.op.readd and node in node_list:
4984
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4985
                                 node, errors.ECODE_EXISTS)
4986
    elif self.op.readd and node not in node_list:
4987
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4988
                                 errors.ECODE_NOENT)
4989

    
4990
    self.changed_primary_ip = False
4991

    
4992
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4993
      if self.op.readd and node == existing_node_name:
4994
        if existing_node.secondary_ip != secondary_ip:
4995
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4996
                                     " address configuration as before",
4997
                                     errors.ECODE_INVAL)
4998
        if existing_node.primary_ip != primary_ip:
4999
          self.changed_primary_ip = True
5000

    
5001
        continue
5002

    
5003
      if (existing_node.primary_ip == primary_ip or
5004
          existing_node.secondary_ip == primary_ip or
5005
          existing_node.primary_ip == secondary_ip or
5006
          existing_node.secondary_ip == secondary_ip):
5007
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5008
                                   " existing node %s" % existing_node.name,
5009
                                   errors.ECODE_NOTUNIQUE)
5010

    
5011
    # After this 'if' block, None is no longer a valid value for the
5012
    # _capable op attributes
5013
    if self.op.readd:
5014
      old_node = self.cfg.GetNodeInfo(node)
5015
      assert old_node is not None, "Can't retrieve locked node %s" % node
5016
      for attr in self._NFLAGS:
5017
        if getattr(self.op, attr) is None:
5018
          setattr(self.op, attr, getattr(old_node, attr))
5019
    else:
5020
      for attr in self._NFLAGS:
5021
        if getattr(self.op, attr) is None:
5022
          setattr(self.op, attr, True)
5023

    
5024
    if self.op.readd and not self.op.vm_capable:
5025
      pri, sec = cfg.GetNodeInstances(node)
5026
      if pri or sec:
5027
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5028
                                   " flag set to false, but it already holds"
5029
                                   " instances" % node,
5030
                                   errors.ECODE_STATE)
5031

    
5032
    # check that the type of the node (single versus dual homed) is the
5033
    # same as for the master
5034
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5035
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5036
    newbie_singlehomed = secondary_ip == primary_ip
5037
    if master_singlehomed != newbie_singlehomed:
5038
      if master_singlehomed:
5039
        raise errors.OpPrereqError("The master has no secondary ip but the"
5040
                                   " new node has one",
5041
                                   errors.ECODE_INVAL)
5042
      else:
5043
        raise errors.OpPrereqError("The master has a secondary ip but the"
5044
                                   " new node doesn't have one",
5045
                                   errors.ECODE_INVAL)
5046

    
5047
    # checks reachability
5048
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5049
      raise errors.OpPrereqError("Node not reachable by ping",
5050
                                 errors.ECODE_ENVIRON)
5051

    
5052
    if not newbie_singlehomed:
5053
      # check reachability from my secondary ip to newbie's secondary ip
5054
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5055
                           source=myself.secondary_ip):
5056
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5057
                                   " based ping to node daemon port",
5058
                                   errors.ECODE_ENVIRON)
5059

    
5060
    if self.op.readd:
5061
      exceptions = [node]
5062
    else:
5063
      exceptions = []
5064

    
5065
    if self.op.master_capable:
5066
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5067
    else:
5068
      self.master_candidate = False
5069

    
5070
    if self.op.readd:
5071
      self.new_node = old_node
5072
    else:
5073
      node_group = cfg.LookupNodeGroup(self.op.group)
5074
      self.new_node = objects.Node(name=node,
5075
                                   primary_ip=primary_ip,
5076
                                   secondary_ip=secondary_ip,
5077
                                   master_candidate=self.master_candidate,
5078
                                   offline=False, drained=False,
5079
                                   group=node_group)
5080

    
5081
    if self.op.ndparams:
5082
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5083

    
5084
  def Exec(self, feedback_fn):
5085
    """Adds the new node to the cluster.
5086

5087
    """
5088
    new_node = self.new_node
5089
    node = new_node.name
5090

    
5091
    # We adding a new node so we assume it's powered
5092
    new_node.powered = True
5093

    
5094
    # for re-adds, reset the offline/drained/master-candidate flags;
5095
    # we need to reset here, otherwise offline would prevent RPC calls
5096
    # later in the procedure; this also means that if the re-add
5097
    # fails, we are left with a non-offlined, broken node
5098
    if self.op.readd:
5099
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5100
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5101
      # if we demote the node, we do cleanup later in the procedure
5102
      new_node.master_candidate = self.master_candidate
5103
      if self.changed_primary_ip:
5104
        new_node.primary_ip = self.op.primary_ip
5105

    
5106
    # copy the master/vm_capable flags
5107
    for attr in self._NFLAGS:
5108
      setattr(new_node, attr, getattr(self.op, attr))
5109

    
5110
    # notify the user about any possible mc promotion
5111
    if new_node.master_candidate:
5112
      self.LogInfo("Node will be a master candidate")
5113

    
5114
    if self.op.ndparams:
5115
      new_node.ndparams = self.op.ndparams
5116
    else:
5117
      new_node.ndparams = {}
5118

    
5119
    # check connectivity
5120
    result = self.rpc.call_version([node])[node]
5121
    result.Raise("Can't get version information from node %s" % node)
5122
    if constants.PROTOCOL_VERSION == result.payload:
5123
      logging.info("Communication to node %s fine, sw version %s match",
5124
                   node, result.payload)
5125
    else:
5126
      raise errors.OpExecError("Version mismatch master version %s,"
5127
                               " node version %s" %
5128
                               (constants.PROTOCOL_VERSION, result.payload))
5129

    
5130
    # Add node to our /etc/hosts, and add key to known_hosts
5131
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5132
      master_node = self.cfg.GetMasterNode()
5133
      result = self.rpc.call_etc_hosts_modify(master_node,
5134
                                              constants.ETC_HOSTS_ADD,
5135
                                              self.hostname.name,
5136
                                              self.hostname.ip)
5137
      result.Raise("Can't update hosts file with new host data")
5138

    
5139
    if new_node.secondary_ip != new_node.primary_ip:
5140
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5141
                               False)
5142

    
5143
    node_verify_list = [self.cfg.GetMasterNode()]
5144
    node_verify_param = {
5145
      constants.NV_NODELIST: ([node], {}),
5146
      # TODO: do a node-net-test as well?
5147
    }
5148

    
5149
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5150
                                       self.cfg.GetClusterName())
5151
    for verifier in node_verify_list:
5152
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5153
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5154
      if nl_payload:
5155
        for failed in nl_payload:
5156
          feedback_fn("ssh/hostname verification failed"
5157
                      " (checking from %s): %s" %
5158
                      (verifier, nl_payload[failed]))
5159
        raise errors.OpExecError("ssh/hostname verification failed")
5160

    
5161
    if self.op.readd:
5162
      _RedistributeAncillaryFiles(self)
5163
      self.context.ReaddNode(new_node)
5164
      # make sure we redistribute the config
5165
      self.cfg.Update(new_node, feedback_fn)
5166
      # and make sure the new node will not have old files around
5167
      if not new_node.master_candidate:
5168
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5169
        msg = result.fail_msg
5170
        if msg:
5171
          self.LogWarning("Node failed to demote itself from master"
5172
                          " candidate status: %s" % msg)
5173
    else:
5174
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5175
                                  additional_vm=self.op.vm_capable)
5176
      self.context.AddNode(new_node, self.proc.GetECId())
5177

    
5178

    
5179
class LUNodeSetParams(LogicalUnit):
5180
  """Modifies the parameters of a node.
5181

5182
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5183
      to the node role (as _ROLE_*)
5184
  @cvar _R2F: a dictionary from node role to tuples of flags
5185
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5186

5187
  """
5188
  HPATH = "node-modify"
5189
  HTYPE = constants.HTYPE_NODE
5190
  REQ_BGL = False
5191
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5192
  _F2R = {
5193
    (True, False, False): _ROLE_CANDIDATE,
5194
    (False, True, False): _ROLE_DRAINED,
5195
    (False, False, True): _ROLE_OFFLINE,
5196
    (False, False, False): _ROLE_REGULAR,
5197
    }
5198
  _R2F = dict((v, k) for k, v in _F2R.items())
5199
  _FLAGS = ["master_candidate", "drained", "offline"]
5200

    
5201
  def CheckArguments(self):
5202
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5203
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5204
                self.op.master_capable, self.op.vm_capable,
5205
                self.op.secondary_ip, self.op.ndparams]
5206
    if all_mods.count(None) == len(all_mods):
5207
      raise errors.OpPrereqError("Please pass at least one modification",
5208
                                 errors.ECODE_INVAL)
5209
    if all_mods.count(True) > 1:
5210
      raise errors.OpPrereqError("Can't set the node into more than one"
5211
                                 " state at the same time",
5212
                                 errors.ECODE_INVAL)
5213

    
5214
    # Boolean value that tells us whether we might be demoting from MC
5215
    self.might_demote = (self.op.master_candidate == False or
5216
                         self.op.offline == True or
5217
                         self.op.drained == True or
5218
                         self.op.master_capable == False)
5219

    
5220
    if self.op.secondary_ip:
5221
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5222
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5223
                                   " address" % self.op.secondary_ip,
5224
                                   errors.ECODE_INVAL)
5225

    
5226
    self.lock_all = self.op.auto_promote and self.might_demote
5227
    self.lock_instances = self.op.secondary_ip is not None
5228

    
5229
  def ExpandNames(self):
5230
    if self.lock_all:
5231
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5232
    else:
5233
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5234

    
5235
    if self.lock_instances:
5236
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5237

    
5238
  def DeclareLocks(self, level):
5239
    # If we have locked all instances, before waiting to lock nodes, release
5240
    # all the ones living on nodes unrelated to the current operation.
5241
    if level == locking.LEVEL_NODE and self.lock_instances:
5242
      self.affected_instances = []
5243
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5244
        instances_keep = []
5245

    
5246
        # Build list of instances to release
5247
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5248
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5249
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5250
              self.op.node_name in instance.all_nodes):
5251
            instances_keep.append(instance_name)
5252
            self.affected_instances.append(instance)
5253

    
5254
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5255

    
5256
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5257
                set(instances_keep))
5258

    
5259
  def BuildHooksEnv(self):
5260
    """Build hooks env.
5261

5262
    This runs on the master node.
5263

5264
    """
5265
    return {
5266
      "OP_TARGET": self.op.node_name,
5267
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5268
      "OFFLINE": str(self.op.offline),
5269
      "DRAINED": str(self.op.drained),
5270
      "MASTER_CAPABLE": str(self.op.master_capable),
5271
      "VM_CAPABLE": str(self.op.vm_capable),
5272
      }
5273

    
5274
  def BuildHooksNodes(self):
5275
    """Build hooks nodes.
5276

5277
    """
5278
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5279
    return (nl, nl)
5280

    
5281
  def CheckPrereq(self):
5282
    """Check prerequisites.
5283

5284
    This only checks the instance list against the existing names.
5285

5286
    """
5287
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5288

    
5289
    if (self.op.master_candidate is not None or
5290
        self.op.drained is not None or
5291
        self.op.offline is not None):
5292
      # we can't change the master's node flags
5293
      if self.op.node_name == self.cfg.GetMasterNode():
5294
        raise errors.OpPrereqError("The master role can be changed"
5295
                                   " only via master-failover",
5296
                                   errors.ECODE_INVAL)
5297

    
5298
    if self.op.master_candidate and not node.master_capable:
5299
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5300
                                 " it a master candidate" % node.name,
5301
                                 errors.ECODE_STATE)
5302

    
5303
    if self.op.vm_capable == False:
5304
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5305
      if ipri or isec:
5306
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5307
                                   " the vm_capable flag" % node.name,
5308
                                   errors.ECODE_STATE)
5309

    
5310
    if node.master_candidate and self.might_demote and not self.lock_all:
5311
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5312
      # check if after removing the current node, we're missing master
5313
      # candidates
5314
      (mc_remaining, mc_should, _) = \
5315
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5316
      if mc_remaining < mc_should:
5317
        raise errors.OpPrereqError("Not enough master candidates, please"
5318
                                   " pass auto promote option to allow"
5319
                                   " promotion", errors.ECODE_STATE)
5320

    
5321
    self.old_flags = old_flags = (node.master_candidate,
5322
                                  node.drained, node.offline)
5323
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5324
    self.old_role = old_role = self._F2R[old_flags]
5325

    
5326
    # Check for ineffective changes
5327
    for attr in self._FLAGS:
5328
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5329
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5330
        setattr(self.op, attr, None)
5331

    
5332
    # Past this point, any flag change to False means a transition
5333
    # away from the respective state, as only real changes are kept
5334

    
5335
    # TODO: We might query the real power state if it supports OOB
5336
    if _SupportsOob(self.cfg, node):
5337
      if self.op.offline is False and not (node.powered or
5338
                                           self.op.powered == True):
5339
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5340
                                    " offline status can be reset") %
5341
                                   self.op.node_name)
5342
    elif self.op.powered is not None:
5343
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5344
                                  " as it does not support out-of-band"
5345
                                  " handling") % self.op.node_name)
5346

    
5347
    # If we're being deofflined/drained, we'll MC ourself if needed
5348
    if (self.op.drained == False or self.op.offline == False or
5349
        (self.op.master_capable and not node.master_capable)):
5350
      if _DecideSelfPromotion(self):
5351
        self.op.master_candidate = True
5352
        self.LogInfo("Auto-promoting node to master candidate")
5353

    
5354
    # If we're no longer master capable, we'll demote ourselves from MC
5355
    if self.op.master_capable == False and node.master_candidate:
5356
      self.LogInfo("Demoting from master candidate")
5357
      self.op.master_candidate = False
5358

    
5359
    # Compute new role
5360
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5361
    if self.op.master_candidate:
5362
      new_role = self._ROLE_CANDIDATE
5363
    elif self.op.drained:
5364
      new_role = self._ROLE_DRAINED
5365
    elif self.op.offline:
5366
      new_role = self._ROLE_OFFLINE
5367
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5368
      # False is still in new flags, which means we're un-setting (the
5369
      # only) True flag
5370
      new_role = self._ROLE_REGULAR
5371
    else: # no new flags, nothing, keep old role
5372
      new_role = old_role
5373

    
5374
    self.new_role = new_role
5375

    
5376
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5377
      # Trying to transition out of offline status
5378
      # TODO: Use standard RPC runner, but make sure it works when the node is
5379
      # still marked offline
5380
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5381
      if result.fail_msg:
5382
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5383
                                   " to report its version: %s" %
5384
                                   (node.name, result.fail_msg),
5385
                                   errors.ECODE_STATE)
5386
      else:
5387
        self.LogWarning("Transitioning node from offline to online state"
5388
                        " without using re-add. Please make sure the node"
5389
                        " is healthy!")
5390

    
5391
    if self.op.secondary_ip:
5392
      # Ok even without locking, because this can't be changed by any LU
5393
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5394
      master_singlehomed = master.secondary_ip == master.primary_ip
5395
      if master_singlehomed and self.op.secondary_ip:
5396
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5397
                                   " homed cluster", errors.ECODE_INVAL)
5398

    
5399
      if node.offline:
5400
        if self.affected_instances:
5401
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5402
                                     " node has instances (%s) configured"
5403
                                     " to use it" % self.affected_instances)
5404
      else:
5405
        # On online nodes, check that no instances are running, and that
5406
        # the node has the new ip and we can reach it.
5407
        for instance in self.affected_instances:
5408
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5409

    
5410
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5411
        if master.name != node.name:
5412
          # check reachability from master secondary ip to new secondary ip
5413
          if not netutils.TcpPing(self.op.secondary_ip,
5414
                                  constants.DEFAULT_NODED_PORT,
5415
                                  source=master.secondary_ip):
5416
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5417
                                       " based ping to node daemon port",
5418
                                       errors.ECODE_ENVIRON)
5419

    
5420
    if self.op.ndparams:
5421
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5422
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5423
      self.new_ndparams = new_ndparams
5424

    
5425
  def Exec(self, feedback_fn):
5426
    """Modifies a node.
5427

5428
    """
5429
    node = self.node
5430
    old_role = self.old_role
5431
    new_role = self.new_role
5432

    
5433
    result = []
5434

    
5435
    if self.op.ndparams:
5436
      node.ndparams = self.new_ndparams
5437

    
5438
    if self.op.powered is not None:
5439
      node.powered = self.op.powered
5440

    
5441
    for attr in ["master_capable", "vm_capable"]:
5442
      val = getattr(self.op, attr)
5443
      if val is not None:
5444
        setattr(node, attr, val)
5445
        result.append((attr, str(val)))
5446

    
5447
    if new_role != old_role:
5448
      # Tell the node to demote itself, if no longer MC and not offline
5449
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5450
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5451
        if msg:
5452
          self.LogWarning("Node failed to demote itself: %s", msg)
5453

    
5454
      new_flags = self._R2F[new_role]
5455
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5456
        if of != nf:
5457
          result.append((desc, str(nf)))
5458
      (node.master_candidate, node.drained, node.offline) = new_flags
5459

    
5460
      # we locked all nodes, we adjust the CP before updating this node
5461
      if self.lock_all:
5462
        _AdjustCandidatePool(self, [node.name])
5463

    
5464
    if self.op.secondary_ip:
5465
      node.secondary_ip = self.op.secondary_ip
5466
      result.append(("secondary_ip", self.op.secondary_ip))
5467

    
5468
    # this will trigger configuration file update, if needed
5469
    self.cfg.Update(node, feedback_fn)
5470

    
5471
    # this will trigger job queue propagation or cleanup if the mc
5472
    # flag changed
5473
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5474
      self.context.ReaddNode(node)
5475

    
5476
    return result
5477

    
5478

    
5479
class LUNodePowercycle(NoHooksLU):
5480
  """Powercycles a node.
5481

5482
  """
5483
  REQ_BGL = False
5484

    
5485
  def CheckArguments(self):
5486
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5487
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5488
      raise errors.OpPrereqError("The node is the master and the force"
5489
                                 " parameter was not set",
5490
                                 errors.ECODE_INVAL)
5491

    
5492
  def ExpandNames(self):
5493
    """Locking for PowercycleNode.
5494

5495
    This is a last-resort option and shouldn't block on other
5496
    jobs. Therefore, we grab no locks.
5497

5498
    """
5499
    self.needed_locks = {}
5500

    
5501
  def Exec(self, feedback_fn):
5502
    """Reboots a node.
5503

5504
    """
5505
    result = self.rpc.call_node_powercycle(self.op.node_name,
5506
                                           self.cfg.GetHypervisorType())
5507
    result.Raise("Failed to schedule the reboot")
5508
    return result.payload
5509

    
5510

    
5511
class LUClusterQuery(NoHooksLU):
5512
  """Query cluster configuration.
5513

5514
  """
5515
  REQ_BGL = False
5516

    
5517
  def ExpandNames(self):
5518
    self.needed_locks = {}
5519

    
5520
  def Exec(self, feedback_fn):
5521
    """Return cluster config.
5522

5523
    """
5524
    cluster = self.cfg.GetClusterInfo()
5525
    os_hvp = {}
5526

    
5527
    # Filter just for enabled hypervisors
5528
    for os_name, hv_dict in cluster.os_hvp.items():
5529
      os_hvp[os_name] = {}
5530
      for hv_name, hv_params in hv_dict.items():
5531
        if hv_name in cluster.enabled_hypervisors:
5532
          os_hvp[os_name][hv_name] = hv_params
5533

    
5534
    # Convert ip_family to ip_version
5535
    primary_ip_version = constants.IP4_VERSION
5536
    if cluster.primary_ip_family == netutils.IP6Address.family:
5537
      primary_ip_version = constants.IP6_VERSION
5538

    
5539
    result = {
5540
      "software_version": constants.RELEASE_VERSION,
5541
      "protocol_version": constants.PROTOCOL_VERSION,
5542
      "config_version": constants.CONFIG_VERSION,
5543
      "os_api_version": max(constants.OS_API_VERSIONS),
5544
      "export_version": constants.EXPORT_VERSION,
5545
      "architecture": (platform.architecture()[0], platform.machine()),
5546
      "name": cluster.cluster_name,
5547
      "master": cluster.master_node,
5548
      "default_hypervisor": cluster.enabled_hypervisors[0],
5549
      "enabled_hypervisors": cluster.enabled_hypervisors,
5550
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5551
                        for hypervisor_name in cluster.enabled_hypervisors]),
5552
      "os_hvp": os_hvp,
5553
      "beparams": cluster.beparams,
5554
      "osparams": cluster.osparams,
5555
      "nicparams": cluster.nicparams,
5556
      "ndparams": cluster.ndparams,
5557
      "candidate_pool_size": cluster.candidate_pool_size,
5558
      "master_netdev": cluster.master_netdev,
5559
      "master_netmask": cluster.master_netmask,
5560
      "volume_group_name": cluster.volume_group_name,
5561
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5562
      "file_storage_dir": cluster.file_storage_dir,
5563
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5564
      "maintain_node_health": cluster.maintain_node_health,
5565
      "ctime": cluster.ctime,
5566
      "mtime": cluster.mtime,
5567
      "uuid": cluster.uuid,
5568
      "tags": list(cluster.GetTags()),
5569
      "uid_pool": cluster.uid_pool,
5570
      "default_iallocator": cluster.default_iallocator,
5571
      "reserved_lvs": cluster.reserved_lvs,
5572
      "primary_ip_version": primary_ip_version,
5573
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5574
      "hidden_os": cluster.hidden_os,
5575
      "blacklisted_os": cluster.blacklisted_os,
5576
      }
5577

    
5578
    return result
5579

    
5580

    
5581
class LUClusterConfigQuery(NoHooksLU):
5582
  """Return configuration values.
5583

5584
  """
5585
  REQ_BGL = False
5586
  _FIELDS_DYNAMIC = utils.FieldSet()
5587
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5588
                                  "watcher_pause", "volume_group_name")
5589

    
5590
  def CheckArguments(self):
5591
    _CheckOutputFields(static=self._FIELDS_STATIC,
5592
                       dynamic=self._FIELDS_DYNAMIC,
5593
                       selected=self.op.output_fields)
5594

    
5595
  def ExpandNames(self):
5596
    self.needed_locks = {}
5597

    
5598
  def Exec(self, feedback_fn):
5599
    """Dump a representation of the cluster config to the standard output.
5600

5601
    """
5602
    values = []
5603
    for field in self.op.output_fields:
5604
      if field == "cluster_name":
5605
        entry = self.cfg.GetClusterName()
5606
      elif field == "master_node":
5607
        entry = self.cfg.GetMasterNode()
5608
      elif field == "drain_flag":
5609
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5610
      elif field == "watcher_pause":
5611
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5612
      elif field == "volume_group_name":
5613
        entry = self.cfg.GetVGName()
5614
      else:
5615
        raise errors.ParameterError(field)
5616
      values.append(entry)
5617
    return values
5618

    
5619

    
5620
class LUInstanceActivateDisks(NoHooksLU):
5621
  """Bring up an instance's disks.
5622

5623
  """
5624
  REQ_BGL = False
5625

    
5626
  def ExpandNames(self):
5627
    self._ExpandAndLockInstance()
5628
    self.needed_locks[locking.LEVEL_NODE] = []
5629
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5630

    
5631
  def DeclareLocks(self, level):
5632
    if level == locking.LEVEL_NODE:
5633
      self._LockInstancesNodes()
5634

    
5635
  def CheckPrereq(self):
5636
    """Check prerequisites.
5637

5638
    This checks that the instance is in the cluster.
5639

5640
    """
5641
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5642
    assert self.instance is not None, \
5643
      "Cannot retrieve locked instance %s" % self.op.instance_name
5644
    _CheckNodeOnline(self, self.instance.primary_node)
5645

    
5646
  def Exec(self, feedback_fn):
5647
    """Activate the disks.
5648

5649
    """
5650
    disks_ok, disks_info = \
5651
              _AssembleInstanceDisks(self, self.instance,
5652
                                     ignore_size=self.op.ignore_size)
5653
    if not disks_ok:
5654
      raise errors.OpExecError("Cannot activate block devices")
5655

    
5656
    return disks_info
5657

    
5658

    
5659
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5660
                           ignore_size=False):
5661
  """Prepare the block devices for an instance.
5662

5663
  This sets up the block devices on all nodes.
5664

5665
  @type lu: L{LogicalUnit}
5666
  @param lu: the logical unit on whose behalf we execute
5667
  @type instance: L{objects.Instance}
5668
  @param instance: the instance for whose disks we assemble
5669
  @type disks: list of L{objects.Disk} or None
5670
  @param disks: which disks to assemble (or all, if None)
5671
  @type ignore_secondaries: boolean
5672
  @param ignore_secondaries: if true, errors on secondary nodes
5673
      won't result in an error return from the function
5674
  @type ignore_size: boolean
5675
  @param ignore_size: if true, the current known size of the disk
5676
      will not be used during the disk activation, useful for cases
5677
      when the size is wrong
5678
  @return: False if the operation failed, otherwise a list of
5679
      (host, instance_visible_name, node_visible_name)
5680
      with the mapping from node devices to instance devices
5681

5682
  """
5683
  device_info = []
5684
  disks_ok = True
5685
  iname = instance.name
5686
  disks = _ExpandCheckDisks(instance, disks)
5687

    
5688
  # With the two passes mechanism we try to reduce the window of
5689
  # opportunity for the race condition of switching DRBD to primary
5690
  # before handshaking occured, but we do not eliminate it
5691

    
5692
  # The proper fix would be to wait (with some limits) until the
5693
  # connection has been made and drbd transitions from WFConnection
5694
  # into any other network-connected state (Connected, SyncTarget,
5695
  # SyncSource, etc.)
5696

    
5697
  # 1st pass, assemble on all nodes in secondary mode
5698
  for idx, inst_disk in enumerate(disks):
5699
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5700
      if ignore_size:
5701
        node_disk = node_disk.Copy()
5702
        node_disk.UnsetSize()
5703
      lu.cfg.SetDiskID(node_disk, node)
5704
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5705
      msg = result.fail_msg
5706
      if msg:
5707
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5708
                           " (is_primary=False, pass=1): %s",
5709
                           inst_disk.iv_name, node, msg)
5710
        if not ignore_secondaries:
5711
          disks_ok = False
5712

    
5713
  # FIXME: race condition on drbd migration to primary
5714

    
5715
  # 2nd pass, do only the primary node
5716
  for idx, inst_disk in enumerate(disks):
5717
    dev_path = None
5718

    
5719
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5720
      if node != instance.primary_node:
5721
        continue
5722
      if ignore_size:
5723
        node_disk = node_disk.Copy()
5724
        node_disk.UnsetSize()
5725
      lu.cfg.SetDiskID(node_disk, node)
5726
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5727
      msg = result.fail_msg
5728
      if msg:
5729
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5730
                           " (is_primary=True, pass=2): %s",
5731
                           inst_disk.iv_name, node, msg)
5732
        disks_ok = False
5733
      else:
5734
        dev_path = result.payload
5735

    
5736
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5737

    
5738
  # leave the disks configured for the primary node
5739
  # this is a workaround that would be fixed better by
5740
  # improving the logical/physical id handling
5741
  for disk in disks:
5742
    lu.cfg.SetDiskID(disk, instance.primary_node)
5743

    
5744
  return disks_ok, device_info
5745

    
5746

    
5747
def _StartInstanceDisks(lu, instance, force):
5748
  """Start the disks of an instance.
5749

5750
  """
5751
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5752
                                           ignore_secondaries=force)
5753
  if not disks_ok:
5754
    _ShutdownInstanceDisks(lu, instance)
5755
    if force is not None and not force:
5756
      lu.proc.LogWarning("", hint="If the message above refers to a"
5757
                         " secondary node,"
5758
                         " you can retry the operation using '--force'.")
5759
    raise errors.OpExecError("Disk consistency error")
5760

    
5761

    
5762
class LUInstanceDeactivateDisks(NoHooksLU):
5763
  """Shutdown an instance's disks.
5764

5765
  """
5766
  REQ_BGL = False
5767

    
5768
  def ExpandNames(self):
5769
    self._ExpandAndLockInstance()
5770
    self.needed_locks[locking.LEVEL_NODE] = []
5771
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5772

    
5773
  def DeclareLocks(self, level):
5774
    if level == locking.LEVEL_NODE:
5775
      self._LockInstancesNodes()
5776

    
5777
  def CheckPrereq(self):
5778
    """Check prerequisites.
5779

5780
    This checks that the instance is in the cluster.
5781

5782
    """
5783
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5784
    assert self.instance is not None, \
5785
      "Cannot retrieve locked instance %s" % self.op.instance_name
5786

    
5787
  def Exec(self, feedback_fn):
5788
    """Deactivate the disks
5789

5790
    """
5791
    instance = self.instance
5792
    if self.op.force:
5793
      _ShutdownInstanceDisks(self, instance)
5794
    else:
5795
      _SafeShutdownInstanceDisks(self, instance)
5796

    
5797

    
5798
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5799
  """Shutdown block devices of an instance.
5800

5801
  This function checks if an instance is running, before calling
5802
  _ShutdownInstanceDisks.
5803

5804
  """
5805
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5806
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5807

    
5808

    
5809
def _ExpandCheckDisks(instance, disks):
5810
  """Return the instance disks selected by the disks list
5811

5812
  @type disks: list of L{objects.Disk} or None
5813
  @param disks: selected disks
5814
  @rtype: list of L{objects.Disk}
5815
  @return: selected instance disks to act on
5816

5817
  """
5818
  if disks is None:
5819
    return instance.disks
5820
  else:
5821
    if not set(disks).issubset(instance.disks):
5822
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5823
                                   " target instance")
5824
    return disks
5825

    
5826

    
5827
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5828
  """Shutdown block devices of an instance.
5829

5830
  This does the shutdown on all nodes of the instance.
5831

5832
  If the ignore_primary is false, errors on the primary node are
5833
  ignored.
5834

5835
  """
5836
  all_result = True
5837
  disks = _ExpandCheckDisks(instance, disks)
5838

    
5839
  for disk in disks:
5840
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5841
      lu.cfg.SetDiskID(top_disk, node)
5842
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5843
      msg = result.fail_msg
5844
      if msg:
5845
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5846
                      disk.iv_name, node, msg)
5847
        if ((node == instance.primary_node and not ignore_primary) or
5848
            (node != instance.primary_node and not result.offline)):
5849
          all_result = False
5850
  return all_result
5851

    
5852

    
5853
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5854
  """Checks if a node has enough free memory.
5855

5856
  This function check if a given node has the needed amount of free
5857
  memory. In case the node has less memory or we cannot get the
5858
  information from the node, this function raise an OpPrereqError
5859
  exception.
5860

5861
  @type lu: C{LogicalUnit}
5862
  @param lu: a logical unit from which we get configuration data
5863
  @type node: C{str}
5864
  @param node: the node to check
5865
  @type reason: C{str}
5866
  @param reason: string to use in the error message
5867
  @type requested: C{int}
5868
  @param requested: the amount of memory in MiB to check for
5869
  @type hypervisor_name: C{str}
5870
  @param hypervisor_name: the hypervisor to ask for memory stats
5871
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5872
      we cannot check the node
5873

5874
  """
5875
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5876
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5877
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5878
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5879
  if not isinstance(free_mem, int):
5880
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5881
                               " was '%s'" % (node, free_mem),
5882
                               errors.ECODE_ENVIRON)
5883
  if requested > free_mem:
5884
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5885
                               " needed %s MiB, available %s MiB" %
5886
                               (node, reason, requested, free_mem),
5887
                               errors.ECODE_NORES)
5888

    
5889

    
5890
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5891
  """Checks if nodes have enough free disk space in the all VGs.
5892

5893
  This function check if all given nodes have the needed amount of
5894
  free disk. In case any node has less disk or we cannot get the
5895
  information from the node, this function raise an OpPrereqError
5896
  exception.
5897

5898
  @type lu: C{LogicalUnit}
5899
  @param lu: a logical unit from which we get configuration data
5900
  @type nodenames: C{list}
5901
  @param nodenames: the list of node names to check
5902
  @type req_sizes: C{dict}
5903
  @param req_sizes: the hash of vg and corresponding amount of disk in
5904
      MiB to check for
5905
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5906
      or we cannot check the node
5907

5908
  """
5909
  for vg, req_size in req_sizes.items():
5910
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5911

    
5912

    
5913
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5914
  """Checks if nodes have enough free disk space in the specified VG.
5915

5916
  This function check if all given nodes have the needed amount of
5917
  free disk. In case any node has less disk or we cannot get the
5918
  information from the node, this function raise an OpPrereqError
5919
  exception.
5920

5921
  @type lu: C{LogicalUnit}
5922
  @param lu: a logical unit from which we get configuration data
5923
  @type nodenames: C{list}
5924
  @param nodenames: the list of node names to check
5925
  @type vg: C{str}
5926
  @param vg: the volume group to check
5927
  @type requested: C{int}
5928
  @param requested: the amount of disk in MiB to check for
5929
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5930
      or we cannot check the node
5931

5932
  """
5933
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5934
  for node in nodenames:
5935
    info = nodeinfo[node]
5936
    info.Raise("Cannot get current information from node %s" % node,
5937
               prereq=True, ecode=errors.ECODE_ENVIRON)
5938
    vg_free = info.payload.get("vg_free", None)
5939
    if not isinstance(vg_free, int):
5940
      raise errors.OpPrereqError("Can't compute free disk space on node"
5941
                                 " %s for vg %s, result was '%s'" %
5942
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5943
    if requested > vg_free:
5944
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5945
                                 " vg %s: required %d MiB, available %d MiB" %
5946
                                 (node, vg, requested, vg_free),
5947
                                 errors.ECODE_NORES)
5948

    
5949

    
5950
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5951
  """Checks if nodes have enough physical CPUs
5952

5953
  This function checks if all given nodes have the needed number of
5954
  physical CPUs. In case any node has less CPUs or we cannot get the
5955
  information from the node, this function raises an OpPrereqError
5956
  exception.
5957

5958
  @type lu: C{LogicalUnit}
5959
  @param lu: a logical unit from which we get configuration data
5960
  @type nodenames: C{list}
5961
  @param nodenames: the list of node names to check
5962
  @type requested: C{int}
5963
  @param requested: the minimum acceptable number of physical CPUs
5964
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5965
      or we cannot check the node
5966

5967
  """
5968
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5969
  for node in nodenames:
5970
    info = nodeinfo[node]
5971
    info.Raise("Cannot get current information from node %s" % node,
5972
               prereq=True, ecode=errors.ECODE_ENVIRON)
5973
    num_cpus = info.payload.get("cpu_total", None)
5974
    if not isinstance(num_cpus, int):
5975
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5976
                                 " on node %s, result was '%s'" %
5977
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5978
    if requested > num_cpus:
5979
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5980
                                 "required" % (node, num_cpus, requested),
5981
                                 errors.ECODE_NORES)
5982

    
5983

    
5984
class LUInstanceStartup(LogicalUnit):
5985
  """Starts an instance.
5986

5987
  """
5988
  HPATH = "instance-start"
5989
  HTYPE = constants.HTYPE_INSTANCE
5990
  REQ_BGL = False
5991

    
5992
  def CheckArguments(self):
5993
    # extra beparams
5994
    if self.op.beparams:
5995
      # fill the beparams dict
5996
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5997

    
5998
  def ExpandNames(self):
5999
    self._ExpandAndLockInstance()
6000

    
6001
  def BuildHooksEnv(self):
6002
    """Build hooks env.
6003

6004
    This runs on master, primary and secondary nodes of the instance.
6005

6006
    """
6007
    env = {
6008
      "FORCE": self.op.force,
6009
      }
6010

    
6011
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6012

    
6013
    return env
6014

    
6015
  def BuildHooksNodes(self):
6016
    """Build hooks nodes.
6017

6018
    """
6019
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6020
    return (nl, nl)
6021

    
6022
  def CheckPrereq(self):
6023
    """Check prerequisites.
6024

6025
    This checks that the instance is in the cluster.
6026

6027
    """
6028
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6029
    assert self.instance is not None, \
6030
      "Cannot retrieve locked instance %s" % self.op.instance_name
6031

    
6032
    # extra hvparams
6033
    if self.op.hvparams:
6034
      # check hypervisor parameter syntax (locally)
6035
      cluster = self.cfg.GetClusterInfo()
6036
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6037
      filled_hvp = cluster.FillHV(instance)
6038
      filled_hvp.update(self.op.hvparams)
6039
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6040
      hv_type.CheckParameterSyntax(filled_hvp)
6041
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6042

    
6043
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6044

    
6045
    if self.primary_offline and self.op.ignore_offline_nodes:
6046
      self.proc.LogWarning("Ignoring offline primary node")
6047

    
6048
      if self.op.hvparams or self.op.beparams:
6049
        self.proc.LogWarning("Overridden parameters are ignored")
6050
    else:
6051
      _CheckNodeOnline(self, instance.primary_node)
6052

    
6053
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6054

    
6055
      # check bridges existence
6056
      _CheckInstanceBridgesExist(self, instance)
6057

    
6058
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6059
                                                instance.name,
6060
                                                instance.hypervisor)
6061
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6062
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6063
      if not remote_info.payload: # not running already
6064
        _CheckNodeFreeMemory(self, instance.primary_node,
6065
                             "starting instance %s" % instance.name,
6066
                             bep[constants.BE_MEMORY], instance.hypervisor)
6067

    
6068
  def Exec(self, feedback_fn):
6069
    """Start the instance.
6070

6071
    """
6072
    instance = self.instance
6073
    force = self.op.force
6074

    
6075
    if not self.op.no_remember:
6076
      self.cfg.MarkInstanceUp(instance.name)
6077

    
6078
    if self.primary_offline:
6079
      assert self.op.ignore_offline_nodes
6080
      self.proc.LogInfo("Primary node offline, marked instance as started")
6081
    else:
6082
      node_current = instance.primary_node
6083

    
6084
      _StartInstanceDisks(self, instance, force)
6085

    
6086
      result = \
6087
        self.rpc.call_instance_start(node_current,
6088
                                     (instance, self.op.hvparams,
6089
                                      self.op.beparams),
6090
                                     self.op.startup_paused)
6091
      msg = result.fail_msg
6092
      if msg:
6093
        _ShutdownInstanceDisks(self, instance)
6094
        raise errors.OpExecError("Could not start instance: %s" % msg)
6095

    
6096

    
6097
class LUInstanceReboot(LogicalUnit):
6098
  """Reboot an instance.
6099

6100
  """
6101
  HPATH = "instance-reboot"
6102
  HTYPE = constants.HTYPE_INSTANCE
6103
  REQ_BGL = False
6104

    
6105
  def ExpandNames(self):
6106
    self._ExpandAndLockInstance()
6107

    
6108
  def BuildHooksEnv(self):
6109
    """Build hooks env.
6110

6111
    This runs on master, primary and secondary nodes of the instance.
6112

6113
    """
6114
    env = {
6115
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6116
      "REBOOT_TYPE": self.op.reboot_type,
6117
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6118
      }
6119

    
6120
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6121

    
6122
    return env
6123

    
6124
  def BuildHooksNodes(self):
6125
    """Build hooks nodes.
6126

6127
    """
6128
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6129
    return (nl, nl)
6130

    
6131
  def CheckPrereq(self):
6132
    """Check prerequisites.
6133

6134
    This checks that the instance is in the cluster.
6135

6136
    """
6137
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6138
    assert self.instance is not None, \
6139
      "Cannot retrieve locked instance %s" % self.op.instance_name
6140

    
6141
    _CheckNodeOnline(self, instance.primary_node)
6142

    
6143
    # check bridges existence
6144
    _CheckInstanceBridgesExist(self, instance)
6145

    
6146
  def Exec(self, feedback_fn):
6147
    """Reboot the instance.
6148

6149
    """
6150
    instance = self.instance
6151
    ignore_secondaries = self.op.ignore_secondaries
6152
    reboot_type = self.op.reboot_type
6153

    
6154
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6155
                                              instance.name,
6156
                                              instance.hypervisor)
6157
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6158
    instance_running = bool(remote_info.payload)
6159

    
6160
    node_current = instance.primary_node
6161

    
6162
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6163
                                            constants.INSTANCE_REBOOT_HARD]:
6164
      for disk in instance.disks:
6165
        self.cfg.SetDiskID(disk, node_current)
6166
      result = self.rpc.call_instance_reboot(node_current, instance,
6167
                                             reboot_type,
6168
                                             self.op.shutdown_timeout)
6169
      result.Raise("Could not reboot instance")
6170
    else:
6171
      if instance_running:
6172
        result = self.rpc.call_instance_shutdown(node_current, instance,
6173
                                                 self.op.shutdown_timeout)
6174
        result.Raise("Could not shutdown instance for full reboot")
6175
        _ShutdownInstanceDisks(self, instance)
6176
      else:
6177
        self.LogInfo("Instance %s was already stopped, starting now",
6178
                     instance.name)
6179
      _StartInstanceDisks(self, instance, ignore_secondaries)
6180
      result = self.rpc.call_instance_start(node_current,
6181
                                            (instance, None, None), False)
6182
      msg = result.fail_msg
6183
      if msg:
6184
        _ShutdownInstanceDisks(self, instance)
6185
        raise errors.OpExecError("Could not start instance for"
6186
                                 " full reboot: %s" % msg)
6187

    
6188
    self.cfg.MarkInstanceUp(instance.name)
6189

    
6190

    
6191
class LUInstanceShutdown(LogicalUnit):
6192
  """Shutdown an instance.
6193

6194
  """
6195
  HPATH = "instance-stop"
6196
  HTYPE = constants.HTYPE_INSTANCE
6197
  REQ_BGL = False
6198

    
6199
  def ExpandNames(self):
6200
    self._ExpandAndLockInstance()
6201

    
6202
  def BuildHooksEnv(self):
6203
    """Build hooks env.
6204

6205
    This runs on master, primary and secondary nodes of the instance.
6206

6207
    """
6208
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6209
    env["TIMEOUT"] = self.op.timeout
6210
    return env
6211

    
6212
  def BuildHooksNodes(self):
6213
    """Build hooks nodes.
6214

6215
    """
6216
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6217
    return (nl, nl)
6218

    
6219
  def CheckPrereq(self):
6220
    """Check prerequisites.
6221

6222
    This checks that the instance is in the cluster.
6223

6224
    """
6225
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6226
    assert self.instance is not None, \
6227
      "Cannot retrieve locked instance %s" % self.op.instance_name
6228

    
6229
    self.primary_offline = \
6230
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6231

    
6232
    if self.primary_offline and self.op.ignore_offline_nodes:
6233
      self.proc.LogWarning("Ignoring offline primary node")
6234
    else:
6235
      _CheckNodeOnline(self, self.instance.primary_node)
6236

    
6237
  def Exec(self, feedback_fn):
6238
    """Shutdown the instance.
6239

6240
    """
6241
    instance = self.instance
6242
    node_current = instance.primary_node
6243
    timeout = self.op.timeout
6244

    
6245
    if not self.op.no_remember:
6246
      self.cfg.MarkInstanceDown(instance.name)
6247

    
6248
    if self.primary_offline:
6249
      assert self.op.ignore_offline_nodes
6250
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6251
    else:
6252
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6253
      msg = result.fail_msg
6254
      if msg:
6255
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6256

    
6257
      _ShutdownInstanceDisks(self, instance)
6258

    
6259

    
6260
class LUInstanceReinstall(LogicalUnit):
6261
  """Reinstall an instance.
6262

6263
  """
6264
  HPATH = "instance-reinstall"
6265
  HTYPE = constants.HTYPE_INSTANCE
6266
  REQ_BGL = False
6267

    
6268
  def ExpandNames(self):
6269
    self._ExpandAndLockInstance()
6270

    
6271
  def BuildHooksEnv(self):
6272
    """Build hooks env.
6273

6274
    This runs on master, primary and secondary nodes of the instance.
6275

6276
    """
6277
    return _BuildInstanceHookEnvByObject(self, self.instance)
6278

    
6279
  def BuildHooksNodes(self):
6280
    """Build hooks nodes.
6281

6282
    """
6283
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6284
    return (nl, nl)
6285

    
6286
  def CheckPrereq(self):
6287
    """Check prerequisites.
6288

6289
    This checks that the instance is in the cluster and is not running.
6290

6291
    """
6292
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6293
    assert instance is not None, \
6294
      "Cannot retrieve locked instance %s" % self.op.instance_name
6295
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6296
                     " offline, cannot reinstall")
6297
    for node in instance.secondary_nodes:
6298
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6299
                       " cannot reinstall")
6300

    
6301
    if instance.disk_template == constants.DT_DISKLESS:
6302
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6303
                                 self.op.instance_name,
6304
                                 errors.ECODE_INVAL)
6305
    _CheckInstanceDown(self, instance, "cannot reinstall")
6306

    
6307
    if self.op.os_type is not None:
6308
      # OS verification
6309
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6310
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6311
      instance_os = self.op.os_type
6312
    else:
6313
      instance_os = instance.os
6314

    
6315
    nodelist = list(instance.all_nodes)
6316

    
6317
    if self.op.osparams:
6318
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6319
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6320
      self.os_inst = i_osdict # the new dict (without defaults)
6321
    else:
6322
      self.os_inst = None
6323

    
6324
    self.instance = instance
6325

    
6326
  def Exec(self, feedback_fn):
6327
    """Reinstall the instance.
6328

6329
    """
6330
    inst = self.instance
6331

    
6332
    if self.op.os_type is not None:
6333
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6334
      inst.os = self.op.os_type
6335
      # Write to configuration
6336
      self.cfg.Update(inst, feedback_fn)
6337

    
6338
    _StartInstanceDisks(self, inst, None)
6339
    try:
6340
      feedback_fn("Running the instance OS create scripts...")
6341
      # FIXME: pass debug option from opcode to backend
6342
      result = self.rpc.call_instance_os_add(inst.primary_node,
6343
                                             (inst, self.os_inst), True,
6344
                                             self.op.debug_level)
6345
      result.Raise("Could not install OS for instance %s on node %s" %
6346
                   (inst.name, inst.primary_node))
6347
    finally:
6348
      _ShutdownInstanceDisks(self, inst)
6349

    
6350

    
6351
class LUInstanceRecreateDisks(LogicalUnit):
6352
  """Recreate an instance's missing disks.
6353

6354
  """
6355
  HPATH = "instance-recreate-disks"
6356
  HTYPE = constants.HTYPE_INSTANCE
6357
  REQ_BGL = False
6358

    
6359
  def CheckArguments(self):
6360
    # normalise the disk list
6361
    self.op.disks = sorted(frozenset(self.op.disks))
6362

    
6363
  def ExpandNames(self):
6364
    self._ExpandAndLockInstance()
6365
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6366
    if self.op.nodes:
6367
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6368
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6369
    else:
6370
      self.needed_locks[locking.LEVEL_NODE] = []
6371

    
6372
  def DeclareLocks(self, level):
6373
    if level == locking.LEVEL_NODE:
6374
      # if we replace the nodes, we only need to lock the old primary,
6375
      # otherwise we need to lock all nodes for disk re-creation
6376
      primary_only = bool(self.op.nodes)
6377
      self._LockInstancesNodes(primary_only=primary_only)
6378

    
6379
  def BuildHooksEnv(self):
6380
    """Build hooks env.
6381

6382
    This runs on master, primary and secondary nodes of the instance.
6383

6384
    """
6385
    return _BuildInstanceHookEnvByObject(self, self.instance)
6386

    
6387
  def BuildHooksNodes(self):
6388
    """Build hooks nodes.
6389

6390
    """
6391
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6392
    return (nl, nl)
6393

    
6394
  def CheckPrereq(self):
6395
    """Check prerequisites.
6396

6397
    This checks that the instance is in the cluster and is not running.
6398

6399
    """
6400
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6401
    assert instance is not None, \
6402
      "Cannot retrieve locked instance %s" % self.op.instance_name
6403
    if self.op.nodes:
6404
      if len(self.op.nodes) != len(instance.all_nodes):
6405
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6406
                                   " %d replacement nodes were specified" %
6407
                                   (instance.name, len(instance.all_nodes),
6408
                                    len(self.op.nodes)),
6409
                                   errors.ECODE_INVAL)
6410
      assert instance.disk_template != constants.DT_DRBD8 or \
6411
          len(self.op.nodes) == 2
6412
      assert instance.disk_template != constants.DT_PLAIN or \
6413
          len(self.op.nodes) == 1
6414
      primary_node = self.op.nodes[0]
6415
    else:
6416
      primary_node = instance.primary_node
6417
    _CheckNodeOnline(self, primary_node)
6418

    
6419
    if instance.disk_template == constants.DT_DISKLESS:
6420
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6421
                                 self.op.instance_name, errors.ECODE_INVAL)
6422
    # if we replace nodes *and* the old primary is offline, we don't
6423
    # check
6424
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6425
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6426
    if not (self.op.nodes and old_pnode.offline):
6427
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6428

    
6429
    if not self.op.disks:
6430
      self.op.disks = range(len(instance.disks))
6431
    else:
6432
      for idx in self.op.disks:
6433
        if idx >= len(instance.disks):
6434
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6435
                                     errors.ECODE_INVAL)
6436
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6437
      raise errors.OpPrereqError("Can't recreate disks partially and"
6438
                                 " change the nodes at the same time",
6439
                                 errors.ECODE_INVAL)
6440
    self.instance = instance
6441

    
6442
  def Exec(self, feedback_fn):
6443
    """Recreate the disks.
6444

6445
    """
6446
    instance = self.instance
6447

    
6448
    to_skip = []
6449
    mods = [] # keeps track of needed logical_id changes
6450

    
6451
    for idx, disk in enumerate(instance.disks):
6452
      if idx not in self.op.disks: # disk idx has not been passed in
6453
        to_skip.append(idx)
6454
        continue
6455
      # update secondaries for disks, if needed
6456
      if self.op.nodes:
6457
        if disk.dev_type == constants.LD_DRBD8:
6458
          # need to update the nodes and minors
6459
          assert len(self.op.nodes) == 2
6460
          assert len(disk.logical_id) == 6 # otherwise disk internals
6461
                                           # have changed
6462
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6463
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6464
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6465
                    new_minors[0], new_minors[1], old_secret)
6466
          assert len(disk.logical_id) == len(new_id)
6467
          mods.append((idx, new_id))
6468

    
6469
    # now that we have passed all asserts above, we can apply the mods
6470
    # in a single run (to avoid partial changes)
6471
    for idx, new_id in mods:
6472
      instance.disks[idx].logical_id = new_id
6473

    
6474
    # change primary node, if needed
6475
    if self.op.nodes:
6476
      instance.primary_node = self.op.nodes[0]
6477
      self.LogWarning("Changing the instance's nodes, you will have to"
6478
                      " remove any disks left on the older nodes manually")
6479

    
6480
    if self.op.nodes:
6481
      self.cfg.Update(instance, feedback_fn)
6482

    
6483
    _CreateDisks(self, instance, to_skip=to_skip)
6484

    
6485

    
6486
class LUInstanceRename(LogicalUnit):
6487
  """Rename an instance.
6488

6489
  """
6490
  HPATH = "instance-rename"
6491
  HTYPE = constants.HTYPE_INSTANCE
6492

    
6493
  def CheckArguments(self):
6494
    """Check arguments.
6495

6496
    """
6497
    if self.op.ip_check and not self.op.name_check:
6498
      # TODO: make the ip check more flexible and not depend on the name check
6499
      raise errors.OpPrereqError("IP address check requires a name check",
6500
                                 errors.ECODE_INVAL)
6501

    
6502
  def BuildHooksEnv(self):
6503
    """Build hooks env.
6504

6505
    This runs on master, primary and secondary nodes of the instance.
6506

6507
    """
6508
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6509
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6510
    return env
6511

    
6512
  def BuildHooksNodes(self):
6513
    """Build hooks nodes.
6514

6515
    """
6516
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6517
    return (nl, nl)
6518

    
6519
  def CheckPrereq(self):
6520
    """Check prerequisites.
6521

6522
    This checks that the instance is in the cluster and is not running.
6523

6524
    """
6525
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6526
                                                self.op.instance_name)
6527
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6528
    assert instance is not None
6529
    _CheckNodeOnline(self, instance.primary_node)
6530
    _CheckInstanceDown(self, instance, "cannot rename")
6531
    self.instance = instance
6532

    
6533
    new_name = self.op.new_name
6534
    if self.op.name_check:
6535
      hostname = netutils.GetHostname(name=new_name)
6536
      if hostname != new_name:
6537
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6538
                     hostname.name)
6539
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6540
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6541
                                    " same as given hostname '%s'") %
6542
                                    (hostname.name, self.op.new_name),
6543
                                    errors.ECODE_INVAL)
6544
      new_name = self.op.new_name = hostname.name
6545
      if (self.op.ip_check and
6546
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6547
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6548
                                   (hostname.ip, new_name),
6549
                                   errors.ECODE_NOTUNIQUE)
6550

    
6551
    instance_list = self.cfg.GetInstanceList()
6552
    if new_name in instance_list and new_name != instance.name:
6553
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6554
                                 new_name, errors.ECODE_EXISTS)
6555

    
6556
  def Exec(self, feedback_fn):
6557
    """Rename the instance.
6558

6559
    """
6560
    inst = self.instance
6561
    old_name = inst.name
6562

    
6563
    rename_file_storage = False
6564
    if (inst.disk_template in constants.DTS_FILEBASED and
6565
        self.op.new_name != inst.name):
6566
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6567
      rename_file_storage = True
6568

    
6569
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6570
    # Change the instance lock. This is definitely safe while we hold the BGL.
6571
    # Otherwise the new lock would have to be added in acquired mode.
6572
    assert self.REQ_BGL
6573
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6574
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6575

    
6576
    # re-read the instance from the configuration after rename
6577
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6578

    
6579
    if rename_file_storage:
6580
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6581
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6582
                                                     old_file_storage_dir,
6583
                                                     new_file_storage_dir)
6584
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6585
                   " (but the instance has been renamed in Ganeti)" %
6586
                   (inst.primary_node, old_file_storage_dir,
6587
                    new_file_storage_dir))
6588

    
6589
    _StartInstanceDisks(self, inst, None)
6590
    try:
6591
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6592
                                                 old_name, self.op.debug_level)
6593
      msg = result.fail_msg
6594
      if msg:
6595
        msg = ("Could not run OS rename script for instance %s on node %s"
6596
               " (but the instance has been renamed in Ganeti): %s" %
6597
               (inst.name, inst.primary_node, msg))
6598
        self.proc.LogWarning(msg)
6599
    finally:
6600
      _ShutdownInstanceDisks(self, inst)
6601

    
6602
    return inst.name
6603

    
6604

    
6605
class LUInstanceRemove(LogicalUnit):
6606
  """Remove an instance.
6607

6608
  """
6609
  HPATH = "instance-remove"
6610
  HTYPE = constants.HTYPE_INSTANCE
6611
  REQ_BGL = False
6612

    
6613
  def ExpandNames(self):
6614
    self._ExpandAndLockInstance()
6615
    self.needed_locks[locking.LEVEL_NODE] = []
6616
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6617

    
6618
  def DeclareLocks(self, level):
6619
    if level == locking.LEVEL_NODE:
6620
      self._LockInstancesNodes()
6621

    
6622
  def BuildHooksEnv(self):
6623
    """Build hooks env.
6624

6625
    This runs on master, primary and secondary nodes of the instance.
6626

6627
    """
6628
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6629
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6630
    return env
6631

    
6632
  def BuildHooksNodes(self):
6633
    """Build hooks nodes.
6634

6635
    """
6636
    nl = [self.cfg.GetMasterNode()]
6637
    nl_post = list(self.instance.all_nodes) + nl
6638
    return (nl, nl_post)
6639

    
6640
  def CheckPrereq(self):
6641
    """Check prerequisites.
6642

6643
    This checks that the instance is in the cluster.
6644

6645
    """
6646
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6647
    assert self.instance is not None, \
6648
      "Cannot retrieve locked instance %s" % self.op.instance_name
6649

    
6650
  def Exec(self, feedback_fn):
6651
    """Remove the instance.
6652

6653
    """
6654
    instance = self.instance
6655
    logging.info("Shutting down instance %s on node %s",
6656
                 instance.name, instance.primary_node)
6657

    
6658
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6659
                                             self.op.shutdown_timeout)
6660
    msg = result.fail_msg
6661
    if msg:
6662
      if self.op.ignore_failures:
6663
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6664
      else:
6665
        raise errors.OpExecError("Could not shutdown instance %s on"
6666
                                 " node %s: %s" %
6667
                                 (instance.name, instance.primary_node, msg))
6668

    
6669
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6670

    
6671

    
6672
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6673
  """Utility function to remove an instance.
6674

6675
  """
6676
  logging.info("Removing block devices for instance %s", instance.name)
6677

    
6678
  if not _RemoveDisks(lu, instance):
6679
    if not ignore_failures:
6680
      raise errors.OpExecError("Can't remove instance's disks")
6681
    feedback_fn("Warning: can't remove instance's disks")
6682

    
6683
  logging.info("Removing instance %s out of cluster config", instance.name)
6684

    
6685
  lu.cfg.RemoveInstance(instance.name)
6686

    
6687
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6688
    "Instance lock removal conflict"
6689

    
6690
  # Remove lock for the instance
6691
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6692

    
6693

    
6694
class LUInstanceQuery(NoHooksLU):
6695
  """Logical unit for querying instances.
6696

6697
  """
6698
  # pylint: disable=W0142
6699
  REQ_BGL = False
6700

    
6701
  def CheckArguments(self):
6702
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6703
                             self.op.output_fields, self.op.use_locking)
6704

    
6705
  def ExpandNames(self):
6706
    self.iq.ExpandNames(self)
6707

    
6708
  def DeclareLocks(self, level):
6709
    self.iq.DeclareLocks(self, level)
6710

    
6711
  def Exec(self, feedback_fn):
6712
    return self.iq.OldStyleQuery(self)
6713

    
6714

    
6715
class LUInstanceFailover(LogicalUnit):
6716
  """Failover an instance.
6717

6718
  """
6719
  HPATH = "instance-failover"
6720
  HTYPE = constants.HTYPE_INSTANCE
6721
  REQ_BGL = False
6722

    
6723
  def CheckArguments(self):
6724
    """Check the arguments.
6725

6726
    """
6727
    self.iallocator = getattr(self.op, "iallocator", None)
6728
    self.target_node = getattr(self.op, "target_node", None)
6729

    
6730
  def ExpandNames(self):
6731
    self._ExpandAndLockInstance()
6732

    
6733
    if self.op.target_node is not None:
6734
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6735

    
6736
    self.needed_locks[locking.LEVEL_NODE] = []
6737
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6738

    
6739
    ignore_consistency = self.op.ignore_consistency
6740
    shutdown_timeout = self.op.shutdown_timeout
6741
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6742
                                       cleanup=False,
6743
                                       failover=True,
6744
                                       ignore_consistency=ignore_consistency,
6745
                                       shutdown_timeout=shutdown_timeout)
6746
    self.tasklets = [self._migrater]
6747

    
6748
  def DeclareLocks(self, level):
6749
    if level == locking.LEVEL_NODE:
6750
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6751
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6752
        if self.op.target_node is None:
6753
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6754
        else:
6755
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6756
                                                   self.op.target_node]
6757
        del self.recalculate_locks[locking.LEVEL_NODE]
6758
      else:
6759
        self._LockInstancesNodes()
6760

    
6761
  def BuildHooksEnv(self):
6762
    """Build hooks env.
6763

6764
    This runs on master, primary and secondary nodes of the instance.
6765

6766
    """
6767
    instance = self._migrater.instance
6768
    source_node = instance.primary_node
6769
    target_node = self.op.target_node
6770
    env = {
6771
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6772
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6773
      "OLD_PRIMARY": source_node,
6774
      "NEW_PRIMARY": target_node,
6775
      }
6776

    
6777
    if instance.disk_template in constants.DTS_INT_MIRROR:
6778
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6779
      env["NEW_SECONDARY"] = source_node
6780
    else:
6781
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6782

    
6783
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6784

    
6785
    return env
6786

    
6787
  def BuildHooksNodes(self):
6788
    """Build hooks nodes.
6789

6790
    """
6791
    instance = self._migrater.instance
6792
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6793
    return (nl, nl + [instance.primary_node])
6794

    
6795

    
6796
class LUInstanceMigrate(LogicalUnit):
6797
  """Migrate an instance.
6798

6799
  This is migration without shutting down, compared to the failover,
6800
  which is done with shutdown.
6801

6802
  """
6803
  HPATH = "instance-migrate"
6804
  HTYPE = constants.HTYPE_INSTANCE
6805
  REQ_BGL = False
6806

    
6807
  def ExpandNames(self):
6808
    self._ExpandAndLockInstance()
6809

    
6810
    if self.op.target_node is not None:
6811
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6812

    
6813
    self.needed_locks[locking.LEVEL_NODE] = []
6814
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6815

    
6816
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6817
                                       cleanup=self.op.cleanup,
6818
                                       failover=False,
6819
                                       fallback=self.op.allow_failover)
6820
    self.tasklets = [self._migrater]
6821

    
6822
  def DeclareLocks(self, level):
6823
    if level == locking.LEVEL_NODE:
6824
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6825
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6826
        if self.op.target_node is None:
6827
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6828
        else:
6829
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6830
                                                   self.op.target_node]
6831
        del self.recalculate_locks[locking.LEVEL_NODE]
6832
      else:
6833
        self._LockInstancesNodes()
6834

    
6835
  def BuildHooksEnv(self):
6836
    """Build hooks env.
6837

6838
    This runs on master, primary and secondary nodes of the instance.
6839

6840
    """
6841
    instance = self._migrater.instance
6842
    source_node = instance.primary_node
6843
    target_node = self.op.target_node
6844
    env = _BuildInstanceHookEnvByObject(self, instance)
6845
    env.update({
6846
      "MIGRATE_LIVE": self._migrater.live,
6847
      "MIGRATE_CLEANUP": self.op.cleanup,
6848
      "OLD_PRIMARY": source_node,
6849
      "NEW_PRIMARY": target_node,
6850
      })
6851

    
6852
    if instance.disk_template in constants.DTS_INT_MIRROR:
6853
      env["OLD_SECONDARY"] = target_node
6854
      env["NEW_SECONDARY"] = source_node
6855
    else:
6856
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6857

    
6858
    return env
6859

    
6860
  def BuildHooksNodes(self):
6861
    """Build hooks nodes.
6862

6863
    """
6864
    instance = self._migrater.instance
6865
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6866
    return (nl, nl + [instance.primary_node])
6867

    
6868

    
6869
class LUInstanceMove(LogicalUnit):
6870
  """Move an instance by data-copying.
6871

6872
  """
6873
  HPATH = "instance-move"
6874
  HTYPE = constants.HTYPE_INSTANCE
6875
  REQ_BGL = False
6876

    
6877
  def ExpandNames(self):
6878
    self._ExpandAndLockInstance()
6879
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6880
    self.op.target_node = target_node
6881
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6882
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6883

    
6884
  def DeclareLocks(self, level):
6885
    if level == locking.LEVEL_NODE:
6886
      self._LockInstancesNodes(primary_only=True)
6887

    
6888
  def BuildHooksEnv(self):
6889
    """Build hooks env.
6890

6891
    This runs on master, primary and secondary nodes of the instance.
6892

6893
    """
6894
    env = {
6895
      "TARGET_NODE": self.op.target_node,
6896
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6897
      }
6898
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6899
    return env
6900

    
6901
  def BuildHooksNodes(self):
6902
    """Build hooks nodes.
6903

6904
    """
6905
    nl = [
6906
      self.cfg.GetMasterNode(),
6907
      self.instance.primary_node,
6908
      self.op.target_node,
6909
      ]
6910
    return (nl, nl)
6911

    
6912
  def CheckPrereq(self):
6913
    """Check prerequisites.
6914

6915
    This checks that the instance is in the cluster.
6916

6917
    """
6918
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6919
    assert self.instance is not None, \
6920
      "Cannot retrieve locked instance %s" % self.op.instance_name
6921

    
6922
    node = self.cfg.GetNodeInfo(self.op.target_node)
6923
    assert node is not None, \
6924
      "Cannot retrieve locked node %s" % self.op.target_node
6925

    
6926
    self.target_node = target_node = node.name
6927

    
6928
    if target_node == instance.primary_node:
6929
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6930
                                 (instance.name, target_node),
6931
                                 errors.ECODE_STATE)
6932

    
6933
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6934

    
6935
    for idx, dsk in enumerate(instance.disks):
6936
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6937
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6938
                                   " cannot copy" % idx, errors.ECODE_STATE)
6939

    
6940
    _CheckNodeOnline(self, target_node)
6941
    _CheckNodeNotDrained(self, target_node)
6942
    _CheckNodeVmCapable(self, target_node)
6943

    
6944
    if instance.admin_up:
6945
      # check memory requirements on the secondary node
6946
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6947
                           instance.name, bep[constants.BE_MEMORY],
6948
                           instance.hypervisor)
6949
    else:
6950
      self.LogInfo("Not checking memory on the secondary node as"
6951
                   " instance will not be started")
6952

    
6953
    # check bridge existance
6954
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6955

    
6956
  def Exec(self, feedback_fn):
6957
    """Move an instance.
6958

6959
    The move is done by shutting it down on its present node, copying
6960
    the data over (slow) and starting it on the new node.
6961

6962
    """
6963
    instance = self.instance
6964

    
6965
    source_node = instance.primary_node
6966
    target_node = self.target_node
6967

    
6968
    self.LogInfo("Shutting down instance %s on source node %s",
6969
                 instance.name, source_node)
6970

    
6971
    result = self.rpc.call_instance_shutdown(source_node, instance,
6972
                                             self.op.shutdown_timeout)
6973
    msg = result.fail_msg
6974
    if msg:
6975
      if self.op.ignore_consistency:
6976
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6977
                             " Proceeding anyway. Please make sure node"
6978
                             " %s is down. Error details: %s",
6979
                             instance.name, source_node, source_node, msg)
6980
      else:
6981
        raise errors.OpExecError("Could not shutdown instance %s on"
6982
                                 " node %s: %s" %
6983
                                 (instance.name, source_node, msg))
6984

    
6985
    # create the target disks
6986
    try:
6987
      _CreateDisks(self, instance, target_node=target_node)
6988
    except errors.OpExecError:
6989
      self.LogWarning("Device creation failed, reverting...")
6990
      try:
6991
        _RemoveDisks(self, instance, target_node=target_node)
6992
      finally:
6993
        self.cfg.ReleaseDRBDMinors(instance.name)
6994
        raise
6995

    
6996
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6997

    
6998
    errs = []
6999
    # activate, get path, copy the data over
7000
    for idx, disk in enumerate(instance.disks):
7001
      self.LogInfo("Copying data for disk %d", idx)
7002
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7003
                                               instance.name, True, idx)
7004
      if result.fail_msg:
7005
        self.LogWarning("Can't assemble newly created disk %d: %s",
7006
                        idx, result.fail_msg)
7007
        errs.append(result.fail_msg)
7008
        break
7009
      dev_path = result.payload
7010
      result = self.rpc.call_blockdev_export(source_node, disk,
7011
                                             target_node, dev_path,
7012
                                             cluster_name)
7013
      if result.fail_msg:
7014
        self.LogWarning("Can't copy data over for disk %d: %s",
7015
                        idx, result.fail_msg)
7016
        errs.append(result.fail_msg)
7017
        break
7018

    
7019
    if errs:
7020
      self.LogWarning("Some disks failed to copy, aborting")
7021
      try:
7022
        _RemoveDisks(self, instance, target_node=target_node)
7023
      finally:
7024
        self.cfg.ReleaseDRBDMinors(instance.name)
7025
        raise errors.OpExecError("Errors during disk copy: %s" %
7026
                                 (",".join(errs),))
7027

    
7028
    instance.primary_node = target_node
7029
    self.cfg.Update(instance, feedback_fn)
7030

    
7031
    self.LogInfo("Removing the disks on the original node")
7032
    _RemoveDisks(self, instance, target_node=source_node)
7033

    
7034
    # Only start the instance if it's marked as up
7035
    if instance.admin_up:
7036
      self.LogInfo("Starting instance %s on node %s",
7037
                   instance.name, target_node)
7038

    
7039
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7040
                                           ignore_secondaries=True)
7041
      if not disks_ok:
7042
        _ShutdownInstanceDisks(self, instance)
7043
        raise errors.OpExecError("Can't activate the instance's disks")
7044

    
7045
      result = self.rpc.call_instance_start(target_node,
7046
                                            (instance, None, None), False)
7047
      msg = result.fail_msg
7048
      if msg:
7049
        _ShutdownInstanceDisks(self, instance)
7050
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7051
                                 (instance.name, target_node, msg))
7052

    
7053

    
7054
class LUNodeMigrate(LogicalUnit):
7055
  """Migrate all instances from a node.
7056

7057
  """
7058
  HPATH = "node-migrate"
7059
  HTYPE = constants.HTYPE_NODE
7060
  REQ_BGL = False
7061

    
7062
  def CheckArguments(self):
7063
    pass
7064

    
7065
  def ExpandNames(self):
7066
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7067

    
7068
    self.share_locks = _ShareAll()
7069
    self.needed_locks = {
7070
      locking.LEVEL_NODE: [self.op.node_name],
7071
      }
7072

    
7073
  def BuildHooksEnv(self):
7074
    """Build hooks env.
7075

7076
    This runs on the master, the primary and all the secondaries.
7077

7078
    """
7079
    return {
7080
      "NODE_NAME": self.op.node_name,
7081
      }
7082

    
7083
  def BuildHooksNodes(self):
7084
    """Build hooks nodes.
7085

7086
    """
7087
    nl = [self.cfg.GetMasterNode()]
7088
    return (nl, nl)
7089

    
7090
  def CheckPrereq(self):
7091
    pass
7092

    
7093
  def Exec(self, feedback_fn):
7094
    # Prepare jobs for migration instances
7095
    jobs = [
7096
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7097
                                 mode=self.op.mode,
7098
                                 live=self.op.live,
7099
                                 iallocator=self.op.iallocator,
7100
                                 target_node=self.op.target_node)]
7101
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7102
      ]
7103

    
7104
    # TODO: Run iallocator in this opcode and pass correct placement options to
7105
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7106
    # running the iallocator and the actual migration, a good consistency model
7107
    # will have to be found.
7108

    
7109
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7110
            frozenset([self.op.node_name]))
7111

    
7112
    return ResultWithJobs(jobs)
7113

    
7114

    
7115
class TLMigrateInstance(Tasklet):
7116
  """Tasklet class for instance migration.
7117

7118
  @type live: boolean
7119
  @ivar live: whether the migration will be done live or non-live;
7120
      this variable is initalized only after CheckPrereq has run
7121
  @type cleanup: boolean
7122
  @ivar cleanup: Wheater we cleanup from a failed migration
7123
  @type iallocator: string
7124
  @ivar iallocator: The iallocator used to determine target_node
7125
  @type target_node: string
7126
  @ivar target_node: If given, the target_node to reallocate the instance to
7127
  @type failover: boolean
7128
  @ivar failover: Whether operation results in failover or migration
7129
  @type fallback: boolean
7130
  @ivar fallback: Whether fallback to failover is allowed if migration not
7131
                  possible
7132
  @type ignore_consistency: boolean
7133
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7134
                            and target node
7135
  @type shutdown_timeout: int
7136
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7137

7138
  """
7139

    
7140
  # Constants
7141
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7142
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7143

    
7144
  def __init__(self, lu, instance_name, cleanup=False,
7145
               failover=False, fallback=False,
7146
               ignore_consistency=False,
7147
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7148
    """Initializes this class.
7149

7150
    """
7151
    Tasklet.__init__(self, lu)
7152

    
7153
    # Parameters
7154
    self.instance_name = instance_name
7155
    self.cleanup = cleanup
7156
    self.live = False # will be overridden later
7157
    self.failover = failover
7158
    self.fallback = fallback
7159
    self.ignore_consistency = ignore_consistency
7160
    self.shutdown_timeout = shutdown_timeout
7161

    
7162
  def CheckPrereq(self):
7163
    """Check prerequisites.
7164

7165
    This checks that the instance is in the cluster.
7166

7167
    """
7168
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7169
    instance = self.cfg.GetInstanceInfo(instance_name)
7170
    assert instance is not None
7171
    self.instance = instance
7172

    
7173
    if (not self.cleanup and not instance.admin_up and not self.failover and
7174
        self.fallback):
7175
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7176
                      " to failover")
7177
      self.failover = True
7178

    
7179
    if instance.disk_template not in constants.DTS_MIRRORED:
7180
      if self.failover:
7181
        text = "failovers"
7182
      else:
7183
        text = "migrations"
7184
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7185
                                 " %s" % (instance.disk_template, text),
7186
                                 errors.ECODE_STATE)
7187

    
7188
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7189
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7190

    
7191
      if self.lu.op.iallocator:
7192
        self._RunAllocator()
7193
      else:
7194
        # We set set self.target_node as it is required by
7195
        # BuildHooksEnv
7196
        self.target_node = self.lu.op.target_node
7197

    
7198
      # self.target_node is already populated, either directly or by the
7199
      # iallocator run
7200
      target_node = self.target_node
7201
      if self.target_node == instance.primary_node:
7202
        raise errors.OpPrereqError("Cannot migrate instance %s"
7203
                                   " to its primary (%s)" %
7204
                                   (instance.name, instance.primary_node))
7205

    
7206
      if len(self.lu.tasklets) == 1:
7207
        # It is safe to release locks only when we're the only tasklet
7208
        # in the LU
7209
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7210
                      keep=[instance.primary_node, self.target_node])
7211

    
7212
    else:
7213
      secondary_nodes = instance.secondary_nodes
7214
      if not secondary_nodes:
7215
        raise errors.ConfigurationError("No secondary node but using"
7216
                                        " %s disk template" %
7217
                                        instance.disk_template)
7218
      target_node = secondary_nodes[0]
7219
      if self.lu.op.iallocator or (self.lu.op.target_node and
7220
                                   self.lu.op.target_node != target_node):
7221
        if self.failover:
7222
          text = "failed over"
7223
        else:
7224
          text = "migrated"
7225
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7226
                                   " be %s to arbitrary nodes"
7227
                                   " (neither an iallocator nor a target"
7228
                                   " node can be passed)" %
7229
                                   (instance.disk_template, text),
7230
                                   errors.ECODE_INVAL)
7231

    
7232
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7233

    
7234
    # check memory requirements on the secondary node
7235
    if not self.failover or instance.admin_up:
7236
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7237
                           instance.name, i_be[constants.BE_MEMORY],
7238
                           instance.hypervisor)
7239
    else:
7240
      self.lu.LogInfo("Not checking memory on the secondary node as"
7241
                      " instance will not be started")
7242

    
7243
    # check bridge existance
7244
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7245

    
7246
    if not self.cleanup:
7247
      _CheckNodeNotDrained(self.lu, target_node)
7248
      if not self.failover:
7249
        result = self.rpc.call_instance_migratable(instance.primary_node,
7250
                                                   instance)
7251
        if result.fail_msg and self.fallback:
7252
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7253
                          " failover")
7254
          self.failover = True
7255
        else:
7256
          result.Raise("Can't migrate, please use failover",
7257
                       prereq=True, ecode=errors.ECODE_STATE)
7258

    
7259
    assert not (self.failover and self.cleanup)
7260

    
7261
    if not self.failover:
7262
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7263
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7264
                                   " parameters are accepted",
7265
                                   errors.ECODE_INVAL)
7266
      if self.lu.op.live is not None:
7267
        if self.lu.op.live:
7268
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7269
        else:
7270
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7271
        # reset the 'live' parameter to None so that repeated
7272
        # invocations of CheckPrereq do not raise an exception
7273
        self.lu.op.live = None
7274
      elif self.lu.op.mode is None:
7275
        # read the default value from the hypervisor
7276
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7277
                                                skip_globals=False)
7278
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7279

    
7280
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7281
    else:
7282
      # Failover is never live
7283
      self.live = False
7284

    
7285
  def _RunAllocator(self):
7286
    """Run the allocator based on input opcode.
7287

7288
    """
7289
    ial = IAllocator(self.cfg, self.rpc,
7290
                     mode=constants.IALLOCATOR_MODE_RELOC,
7291
                     name=self.instance_name,
7292
                     # TODO See why hail breaks with a single node below
7293
                     relocate_from=[self.instance.primary_node,
7294
                                    self.instance.primary_node],
7295
                     )
7296

    
7297
    ial.Run(self.lu.op.iallocator)
7298

    
7299
    if not ial.success:
7300
      raise errors.OpPrereqError("Can't compute nodes using"
7301
                                 " iallocator '%s': %s" %
7302
                                 (self.lu.op.iallocator, ial.info),
7303
                                 errors.ECODE_NORES)
7304
    if len(ial.result) != ial.required_nodes:
7305
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7306
                                 " of nodes (%s), required %s" %
7307
                                 (self.lu.op.iallocator, len(ial.result),
7308
                                  ial.required_nodes), errors.ECODE_FAULT)
7309
    self.target_node = ial.result[0]
7310
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7311
                 self.instance_name, self.lu.op.iallocator,
7312
                 utils.CommaJoin(ial.result))
7313

    
7314
  def _WaitUntilSync(self):
7315
    """Poll with custom rpc for disk sync.
7316

7317
    This uses our own step-based rpc call.
7318

7319
    """
7320
    self.feedback_fn("* wait until resync is done")
7321
    all_done = False
7322
    while not all_done:
7323
      all_done = True
7324
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7325
                                            self.nodes_ip,
7326
                                            self.instance.disks)
7327
      min_percent = 100
7328
      for node, nres in result.items():
7329
        nres.Raise("Cannot resync disks on node %s" % node)
7330
        node_done, node_percent = nres.payload
7331
        all_done = all_done and node_done
7332
        if node_percent is not None:
7333
          min_percent = min(min_percent, node_percent)
7334
      if not all_done:
7335
        if min_percent < 100:
7336
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7337
        time.sleep(2)
7338

    
7339
  def _EnsureSecondary(self, node):
7340
    """Demote a node to secondary.
7341

7342
    """
7343
    self.feedback_fn("* switching node %s to secondary mode" % node)
7344

    
7345
    for dev in self.instance.disks:
7346
      self.cfg.SetDiskID(dev, node)
7347

    
7348
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7349
                                          self.instance.disks)
7350
    result.Raise("Cannot change disk to secondary on node %s" % node)
7351

    
7352
  def _GoStandalone(self):
7353
    """Disconnect from the network.
7354

7355
    """
7356
    self.feedback_fn("* changing into standalone mode")
7357
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7358
                                               self.instance.disks)
7359
    for node, nres in result.items():
7360
      nres.Raise("Cannot disconnect disks node %s" % node)
7361

    
7362
  def _GoReconnect(self, multimaster):
7363
    """Reconnect to the network.
7364

7365
    """
7366
    if multimaster:
7367
      msg = "dual-master"
7368
    else:
7369
      msg = "single-master"
7370
    self.feedback_fn("* changing disks into %s mode" % msg)
7371
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7372
                                           self.instance.disks,
7373
                                           self.instance.name, multimaster)
7374
    for node, nres in result.items():
7375
      nres.Raise("Cannot change disks config on node %s" % node)
7376

    
7377
  def _ExecCleanup(self):
7378
    """Try to cleanup after a failed migration.
7379

7380
    The cleanup is done by:
7381
      - check that the instance is running only on one node
7382
        (and update the config if needed)
7383
      - change disks on its secondary node to secondary
7384
      - wait until disks are fully synchronized
7385
      - disconnect from the network
7386
      - change disks into single-master mode
7387
      - wait again until disks are fully synchronized
7388

7389
    """
7390
    instance = self.instance
7391
    target_node = self.target_node
7392
    source_node = self.source_node
7393

    
7394
    # check running on only one node
7395
    self.feedback_fn("* checking where the instance actually runs"
7396
                     " (if this hangs, the hypervisor might be in"
7397
                     " a bad state)")
7398
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7399
    for node, result in ins_l.items():
7400
      result.Raise("Can't contact node %s" % node)
7401

    
7402
    runningon_source = instance.name in ins_l[source_node].payload
7403
    runningon_target = instance.name in ins_l[target_node].payload
7404

    
7405
    if runningon_source and runningon_target:
7406
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7407
                               " or the hypervisor is confused; you will have"
7408
                               " to ensure manually that it runs only on one"
7409
                               " and restart this operation")
7410

    
7411
    if not (runningon_source or runningon_target):
7412
      raise errors.OpExecError("Instance does not seem to be running at all;"
7413
                               " in this case it's safer to repair by"
7414
                               " running 'gnt-instance stop' to ensure disk"
7415
                               " shutdown, and then restarting it")
7416

    
7417
    if runningon_target:
7418
      # the migration has actually succeeded, we need to update the config
7419
      self.feedback_fn("* instance running on secondary node (%s),"
7420
                       " updating config" % target_node)
7421
      instance.primary_node = target_node
7422
      self.cfg.Update(instance, self.feedback_fn)
7423
      demoted_node = source_node
7424
    else:
7425
      self.feedback_fn("* instance confirmed to be running on its"
7426
                       " primary node (%s)" % source_node)
7427
      demoted_node = target_node
7428

    
7429
    if instance.disk_template in constants.DTS_INT_MIRROR:
7430
      self._EnsureSecondary(demoted_node)
7431
      try:
7432
        self._WaitUntilSync()
7433
      except errors.OpExecError:
7434
        # we ignore here errors, since if the device is standalone, it
7435
        # won't be able to sync
7436
        pass
7437
      self._GoStandalone()
7438
      self._GoReconnect(False)
7439
      self._WaitUntilSync()
7440

    
7441
    self.feedback_fn("* done")
7442

    
7443
  def _RevertDiskStatus(self):
7444
    """Try to revert the disk status after a failed migration.
7445

7446
    """
7447
    target_node = self.target_node
7448
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7449
      return
7450

    
7451
    try:
7452
      self._EnsureSecondary(target_node)
7453
      self._GoStandalone()
7454
      self._GoReconnect(False)
7455
      self._WaitUntilSync()
7456
    except errors.OpExecError, err:
7457
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7458
                         " please try to recover the instance manually;"
7459
                         " error '%s'" % str(err))
7460

    
7461
  def _AbortMigration(self):
7462
    """Call the hypervisor code to abort a started migration.
7463

7464
    """
7465
    instance = self.instance
7466
    target_node = self.target_node
7467
    source_node = self.source_node
7468
    migration_info = self.migration_info
7469

    
7470
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7471
                                                                 instance,
7472
                                                                 migration_info,
7473
                                                                 False)
7474
    abort_msg = abort_result.fail_msg
7475
    if abort_msg:
7476
      logging.error("Aborting migration failed on target node %s: %s",
7477
                    target_node, abort_msg)
7478
      # Don't raise an exception here, as we stil have to try to revert the
7479
      # disk status, even if this step failed.
7480

    
7481
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7482
        instance, False, self.live)
7483
    abort_msg = abort_result.fail_msg
7484
    if abort_msg:
7485
      logging.error("Aborting migration failed on source node %s: %s",
7486
                    source_node, abort_msg)
7487

    
7488
  def _ExecMigration(self):
7489
    """Migrate an instance.
7490

7491
    The migrate is done by:
7492
      - change the disks into dual-master mode
7493
      - wait until disks are fully synchronized again
7494
      - migrate the instance
7495
      - change disks on the new secondary node (the old primary) to secondary
7496
      - wait until disks are fully synchronized
7497
      - change disks into single-master mode
7498

7499
    """
7500
    instance = self.instance
7501
    target_node = self.target_node
7502
    source_node = self.source_node
7503

    
7504
    # Check for hypervisor version mismatch and warn the user.
7505
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7506
                                       None, self.instance.hypervisor)
7507
    src_info = nodeinfo[source_node]
7508
    dst_info = nodeinfo[target_node]
7509

    
7510
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7511
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7512
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7513
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7514
      if src_version != dst_version:
7515
        self.feedback_fn("* warning: hypervisor version mismatch between"
7516
                         " source (%s) and target (%s) node" %
7517
                         (src_version, dst_version))
7518

    
7519
    self.feedback_fn("* checking disk consistency between source and target")
7520
    for dev in instance.disks:
7521
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7522
        raise errors.OpExecError("Disk %s is degraded or not fully"
7523
                                 " synchronized on target node,"
7524
                                 " aborting migration" % dev.iv_name)
7525

    
7526
    # First get the migration information from the remote node
7527
    result = self.rpc.call_migration_info(source_node, instance)
7528
    msg = result.fail_msg
7529
    if msg:
7530
      log_err = ("Failed fetching source migration information from %s: %s" %
7531
                 (source_node, msg))
7532
      logging.error(log_err)
7533
      raise errors.OpExecError(log_err)
7534

    
7535
    self.migration_info = migration_info = result.payload
7536

    
7537
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7538
      # Then switch the disks to master/master mode
7539
      self._EnsureSecondary(target_node)
7540
      self._GoStandalone()
7541
      self._GoReconnect(True)
7542
      self._WaitUntilSync()
7543

    
7544
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7545
    result = self.rpc.call_accept_instance(target_node,
7546
                                           instance,
7547
                                           migration_info,
7548
                                           self.nodes_ip[target_node])
7549

    
7550
    msg = result.fail_msg
7551
    if msg:
7552
      logging.error("Instance pre-migration failed, trying to revert"
7553
                    " disk status: %s", msg)
7554
      self.feedback_fn("Pre-migration failed, aborting")
7555
      self._AbortMigration()
7556
      self._RevertDiskStatus()
7557
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7558
                               (instance.name, msg))
7559

    
7560
    self.feedback_fn("* migrating instance to %s" % target_node)
7561
    result = self.rpc.call_instance_migrate(source_node, instance,
7562
                                            self.nodes_ip[target_node],
7563
                                            self.live)
7564
    msg = result.fail_msg
7565
    if msg:
7566
      logging.error("Instance migration failed, trying to revert"
7567
                    " disk status: %s", msg)
7568
      self.feedback_fn("Migration failed, aborting")
7569
      self._AbortMigration()
7570
      self._RevertDiskStatus()
7571
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7572
                               (instance.name, msg))
7573

    
7574
    self.feedback_fn("* starting memory transfer")
7575
    last_feedback = time.time()
7576
    while True:
7577
      result = self.rpc.call_instance_get_migration_status(source_node,
7578
                                                           instance)
7579
      msg = result.fail_msg
7580
      ms = result.payload   # MigrationStatus instance
7581
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7582
        logging.error("Instance migration failed, trying to revert"
7583
                      " disk status: %s", msg)
7584
        self.feedback_fn("Migration failed, aborting")
7585
        self._AbortMigration()
7586
        self._RevertDiskStatus()
7587
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7588
                                 (instance.name, msg))
7589

    
7590
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7591
        self.feedback_fn("* memory transfer complete")
7592
        break
7593

    
7594
      if (utils.TimeoutExpired(last_feedback,
7595
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7596
          ms.transferred_ram is not None):
7597
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7598
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7599
        last_feedback = time.time()
7600

    
7601
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7602

    
7603
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7604
                                                           instance,
7605
                                                           True,
7606
                                                           self.live)
7607
    msg = result.fail_msg
7608
    if msg:
7609
      logging.error("Instance migration succeeded, but finalization failed"
7610
                    " on the source node: %s", msg)
7611
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7612
                               msg)
7613

    
7614
    instance.primary_node = target_node
7615

    
7616
    # distribute new instance config to the other nodes
7617
    self.cfg.Update(instance, self.feedback_fn)
7618

    
7619
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7620
                                                           instance,
7621
                                                           migration_info,
7622
                                                           True)
7623
    msg = result.fail_msg
7624
    if msg:
7625
      logging.error("Instance migration succeeded, but finalization failed"
7626
                    " on the target node: %s", msg)
7627
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7628
                               msg)
7629

    
7630
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7631
      self._EnsureSecondary(source_node)
7632
      self._WaitUntilSync()
7633
      self._GoStandalone()
7634
      self._GoReconnect(False)
7635
      self._WaitUntilSync()
7636

    
7637
    self.feedback_fn("* done")
7638

    
7639
  def _ExecFailover(self):
7640
    """Failover an instance.
7641

7642
    The failover is done by shutting it down on its present node and
7643
    starting it on the secondary.
7644

7645
    """
7646
    instance = self.instance
7647
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7648

    
7649
    source_node = instance.primary_node
7650
    target_node = self.target_node
7651

    
7652
    if instance.admin_up:
7653
      self.feedback_fn("* checking disk consistency between source and target")
7654
      for dev in instance.disks:
7655
        # for drbd, these are drbd over lvm
7656
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7657
          if primary_node.offline:
7658
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7659
                             " target node %s" %
7660
                             (primary_node.name, dev.iv_name, target_node))
7661
          elif not self.ignore_consistency:
7662
            raise errors.OpExecError("Disk %s is degraded on target node,"
7663
                                     " aborting failover" % dev.iv_name)
7664
    else:
7665
      self.feedback_fn("* not checking disk consistency as instance is not"
7666
                       " running")
7667

    
7668
    self.feedback_fn("* shutting down instance on source node")
7669
    logging.info("Shutting down instance %s on node %s",
7670
                 instance.name, source_node)
7671

    
7672
    result = self.rpc.call_instance_shutdown(source_node, instance,
7673
                                             self.shutdown_timeout)
7674
    msg = result.fail_msg
7675
    if msg:
7676
      if self.ignore_consistency or primary_node.offline:
7677
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7678
                           " proceeding anyway; please make sure node"
7679
                           " %s is down; error details: %s",
7680
                           instance.name, source_node, source_node, msg)
7681
      else:
7682
        raise errors.OpExecError("Could not shutdown instance %s on"
7683
                                 " node %s: %s" %
7684
                                 (instance.name, source_node, msg))
7685

    
7686
    self.feedback_fn("* deactivating the instance's disks on source node")
7687
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7688
      raise errors.OpExecError("Can't shut down the instance's disks")
7689

    
7690
    instance.primary_node = target_node
7691
    # distribute new instance config to the other nodes
7692
    self.cfg.Update(instance, self.feedback_fn)
7693

    
7694
    # Only start the instance if it's marked as up
7695
    if instance.admin_up:
7696
      self.feedback_fn("* activating the instance's disks on target node %s" %
7697
                       target_node)
7698
      logging.info("Starting instance %s on node %s",
7699
                   instance.name, target_node)
7700

    
7701
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7702
                                           ignore_secondaries=True)
7703
      if not disks_ok:
7704
        _ShutdownInstanceDisks(self.lu, instance)
7705
        raise errors.OpExecError("Can't activate the instance's disks")
7706

    
7707
      self.feedback_fn("* starting the instance on the target node %s" %
7708
                       target_node)
7709
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7710
                                            False)
7711
      msg = result.fail_msg
7712
      if msg:
7713
        _ShutdownInstanceDisks(self.lu, instance)
7714
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7715
                                 (instance.name, target_node, msg))
7716

    
7717
  def Exec(self, feedback_fn):
7718
    """Perform the migration.
7719

7720
    """
7721
    self.feedback_fn = feedback_fn
7722
    self.source_node = self.instance.primary_node
7723

    
7724
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7725
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7726
      self.target_node = self.instance.secondary_nodes[0]
7727
      # Otherwise self.target_node has been populated either
7728
      # directly, or through an iallocator.
7729

    
7730
    self.all_nodes = [self.source_node, self.target_node]
7731
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7732
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7733

    
7734
    if self.failover:
7735
      feedback_fn("Failover instance %s" % self.instance.name)
7736
      self._ExecFailover()
7737
    else:
7738
      feedback_fn("Migrating instance %s" % self.instance.name)
7739

    
7740
      if self.cleanup:
7741
        return self._ExecCleanup()
7742
      else:
7743
        return self._ExecMigration()
7744

    
7745

    
7746
def _CreateBlockDev(lu, node, instance, device, force_create,
7747
                    info, force_open):
7748
  """Create a tree of block devices on a given node.
7749

7750
  If this device type has to be created on secondaries, create it and
7751
  all its children.
7752

7753
  If not, just recurse to children keeping the same 'force' value.
7754

7755
  @param lu: the lu on whose behalf we execute
7756
  @param node: the node on which to create the device
7757
  @type instance: L{objects.Instance}
7758
  @param instance: the instance which owns the device
7759
  @type device: L{objects.Disk}
7760
  @param device: the device to create
7761
  @type force_create: boolean
7762
  @param force_create: whether to force creation of this device; this
7763
      will be change to True whenever we find a device which has
7764
      CreateOnSecondary() attribute
7765
  @param info: the extra 'metadata' we should attach to the device
7766
      (this will be represented as a LVM tag)
7767
  @type force_open: boolean
7768
  @param force_open: this parameter will be passes to the
7769
      L{backend.BlockdevCreate} function where it specifies
7770
      whether we run on primary or not, and it affects both
7771
      the child assembly and the device own Open() execution
7772

7773
  """
7774
  if device.CreateOnSecondary():
7775
    force_create = True
7776

    
7777
  if device.children:
7778
    for child in device.children:
7779
      _CreateBlockDev(lu, node, instance, child, force_create,
7780
                      info, force_open)
7781

    
7782
  if not force_create:
7783
    return
7784

    
7785
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7786

    
7787

    
7788
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7789
  """Create a single block device on a given node.
7790

7791
  This will not recurse over children of the device, so they must be
7792
  created in advance.
7793

7794
  @param lu: the lu on whose behalf we execute
7795
  @param node: the node on which to create the device
7796
  @type instance: L{objects.Instance}
7797
  @param instance: the instance which owns the device
7798
  @type device: L{objects.Disk}
7799
  @param device: the device to create
7800
  @param info: the extra 'metadata' we should attach to the device
7801
      (this will be represented as a LVM tag)
7802
  @type force_open: boolean
7803
  @param force_open: this parameter will be passes to the
7804
      L{backend.BlockdevCreate} function where it specifies
7805
      whether we run on primary or not, and it affects both
7806
      the child assembly and the device own Open() execution
7807

7808
  """
7809
  lu.cfg.SetDiskID(device, node)
7810
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7811
                                       instance.name, force_open, info)
7812
  result.Raise("Can't create block device %s on"
7813
               " node %s for instance %s" % (device, node, instance.name))
7814
  if device.physical_id is None:
7815
    device.physical_id = result.payload
7816

    
7817

    
7818
def _GenerateUniqueNames(lu, exts):
7819
  """Generate a suitable LV name.
7820

7821
  This will generate a logical volume name for the given instance.
7822

7823
  """
7824
  results = []
7825
  for val in exts:
7826
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7827
    results.append("%s%s" % (new_id, val))
7828
  return results
7829

    
7830

    
7831
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7832
                         iv_name, p_minor, s_minor):
7833
  """Generate a drbd8 device complete with its children.
7834

7835
  """
7836
  assert len(vgnames) == len(names) == 2
7837
  port = lu.cfg.AllocatePort()
7838
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7839
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7840
                          logical_id=(vgnames[0], names[0]))
7841
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7842
                          logical_id=(vgnames[1], names[1]))
7843
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7844
                          logical_id=(primary, secondary, port,
7845
                                      p_minor, s_minor,
7846
                                      shared_secret),
7847
                          children=[dev_data, dev_meta],
7848
                          iv_name=iv_name)
7849
  return drbd_dev
7850

    
7851

    
7852
def _GenerateDiskTemplate(lu, template_name,
7853
                          instance_name, primary_node,
7854
                          secondary_nodes, disk_info,
7855
                          file_storage_dir, file_driver,
7856
                          base_index, feedback_fn):
7857
  """Generate the entire disk layout for a given template type.
7858

7859
  """
7860
  #TODO: compute space requirements
7861

    
7862
  vgname = lu.cfg.GetVGName()
7863
  disk_count = len(disk_info)
7864
  disks = []
7865
  if template_name == constants.DT_DISKLESS:
7866
    pass
7867
  elif template_name == constants.DT_PLAIN:
7868
    if len(secondary_nodes) != 0:
7869
      raise errors.ProgrammerError("Wrong template configuration")
7870

    
7871
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7872
                                      for i in range(disk_count)])
7873
    for idx, disk in enumerate(disk_info):
7874
      disk_index = idx + base_index
7875
      vg = disk.get(constants.IDISK_VG, vgname)
7876
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7877
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7878
                              size=disk[constants.IDISK_SIZE],
7879
                              logical_id=(vg, names[idx]),
7880
                              iv_name="disk/%d" % disk_index,
7881
                              mode=disk[constants.IDISK_MODE])
7882
      disks.append(disk_dev)
7883
  elif template_name == constants.DT_DRBD8:
7884
    if len(secondary_nodes) != 1:
7885
      raise errors.ProgrammerError("Wrong template configuration")
7886
    remote_node = secondary_nodes[0]
7887
    minors = lu.cfg.AllocateDRBDMinor(
7888
      [primary_node, remote_node] * len(disk_info), instance_name)
7889

    
7890
    names = []
7891
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7892
                                               for i in range(disk_count)]):
7893
      names.append(lv_prefix + "_data")
7894
      names.append(lv_prefix + "_meta")
7895
    for idx, disk in enumerate(disk_info):
7896
      disk_index = idx + base_index
7897
      data_vg = disk.get(constants.IDISK_VG, vgname)
7898
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7899
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7900
                                      disk[constants.IDISK_SIZE],
7901
                                      [data_vg, meta_vg],
7902
                                      names[idx * 2:idx * 2 + 2],
7903
                                      "disk/%d" % disk_index,
7904
                                      minors[idx * 2], minors[idx * 2 + 1])
7905
      disk_dev.mode = disk[constants.IDISK_MODE]
7906
      disks.append(disk_dev)
7907
  elif template_name == constants.DT_FILE:
7908
    if len(secondary_nodes) != 0:
7909
      raise errors.ProgrammerError("Wrong template configuration")
7910

    
7911
    opcodes.RequireFileStorage()
7912

    
7913
    for idx, disk in enumerate(disk_info):
7914
      disk_index = idx + base_index
7915
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7916
                              size=disk[constants.IDISK_SIZE],
7917
                              iv_name="disk/%d" % disk_index,
7918
                              logical_id=(file_driver,
7919
                                          "%s/disk%d" % (file_storage_dir,
7920
                                                         disk_index)),
7921
                              mode=disk[constants.IDISK_MODE])
7922
      disks.append(disk_dev)
7923
  elif template_name == constants.DT_SHARED_FILE:
7924
    if len(secondary_nodes) != 0:
7925
      raise errors.ProgrammerError("Wrong template configuration")
7926

    
7927
    opcodes.RequireSharedFileStorage()
7928

    
7929
    for idx, disk in enumerate(disk_info):
7930
      disk_index = idx + base_index
7931
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7932
                              size=disk[constants.IDISK_SIZE],
7933
                              iv_name="disk/%d" % disk_index,
7934
                              logical_id=(file_driver,
7935
                                          "%s/disk%d" % (file_storage_dir,
7936
                                                         disk_index)),
7937
                              mode=disk[constants.IDISK_MODE])
7938
      disks.append(disk_dev)
7939
  elif template_name == constants.DT_BLOCK:
7940
    if len(secondary_nodes) != 0:
7941
      raise errors.ProgrammerError("Wrong template configuration")
7942

    
7943
    for idx, disk in enumerate(disk_info):
7944
      disk_index = idx + base_index
7945
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7946
                              size=disk[constants.IDISK_SIZE],
7947
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7948
                                          disk[constants.IDISK_ADOPT]),
7949
                              iv_name="disk/%d" % disk_index,
7950
                              mode=disk[constants.IDISK_MODE])
7951
      disks.append(disk_dev)
7952

    
7953
  else:
7954
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7955
  return disks
7956

    
7957

    
7958
def _GetInstanceInfoText(instance):
7959
  """Compute that text that should be added to the disk's metadata.
7960

7961
  """
7962
  return "originstname+%s" % instance.name
7963

    
7964

    
7965
def _CalcEta(time_taken, written, total_size):
7966
  """Calculates the ETA based on size written and total size.
7967

7968
  @param time_taken: The time taken so far
7969
  @param written: amount written so far
7970
  @param total_size: The total size of data to be written
7971
  @return: The remaining time in seconds
7972

7973
  """
7974
  avg_time = time_taken / float(written)
7975
  return (total_size - written) * avg_time
7976

    
7977

    
7978
def _WipeDisks(lu, instance):
7979
  """Wipes instance disks.
7980

7981
  @type lu: L{LogicalUnit}
7982
  @param lu: the logical unit on whose behalf we execute
7983
  @type instance: L{objects.Instance}
7984
  @param instance: the instance whose disks we should create
7985
  @return: the success of the wipe
7986

7987
  """
7988
  node = instance.primary_node
7989

    
7990
  for device in instance.disks:
7991
    lu.cfg.SetDiskID(device, node)
7992

    
7993
  logging.info("Pause sync of instance %s disks", instance.name)
7994
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7995

    
7996
  for idx, success in enumerate(result.payload):
7997
    if not success:
7998
      logging.warn("pause-sync of instance %s for disks %d failed",
7999
                   instance.name, idx)
8000

    
8001
  try:
8002
    for idx, device in enumerate(instance.disks):
8003
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8004
      # MAX_WIPE_CHUNK at max
8005
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8006
                            constants.MIN_WIPE_CHUNK_PERCENT)
8007
      # we _must_ make this an int, otherwise rounding errors will
8008
      # occur
8009
      wipe_chunk_size = int(wipe_chunk_size)
8010

    
8011
      lu.LogInfo("* Wiping disk %d", idx)
8012
      logging.info("Wiping disk %d for instance %s, node %s using"
8013
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8014

    
8015
      offset = 0
8016
      size = device.size
8017
      last_output = 0
8018
      start_time = time.time()
8019

    
8020
      while offset < size:
8021
        wipe_size = min(wipe_chunk_size, size - offset)
8022
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8023
                      idx, offset, wipe_size)
8024
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8025
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8026
                     (idx, offset, wipe_size))
8027
        now = time.time()
8028
        offset += wipe_size
8029
        if now - last_output >= 60:
8030
          eta = _CalcEta(now - start_time, offset, size)
8031
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8032
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8033
          last_output = now
8034
  finally:
8035
    logging.info("Resume sync of instance %s disks", instance.name)
8036

    
8037
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8038

    
8039
    for idx, success in enumerate(result.payload):
8040
      if not success:
8041
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8042
                      " look at the status and troubleshoot the issue", idx)
8043
        logging.warn("resume-sync of instance %s for disks %d failed",
8044
                     instance.name, idx)
8045

    
8046

    
8047
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8048
  """Create all disks for an instance.
8049

8050
  This abstracts away some work from AddInstance.
8051

8052
  @type lu: L{LogicalUnit}
8053
  @param lu: the logical unit on whose behalf we execute
8054
  @type instance: L{objects.Instance}
8055
  @param instance: the instance whose disks we should create
8056
  @type to_skip: list
8057
  @param to_skip: list of indices to skip
8058
  @type target_node: string
8059
  @param target_node: if passed, overrides the target node for creation
8060
  @rtype: boolean
8061
  @return: the success of the creation
8062

8063
  """
8064
  info = _GetInstanceInfoText(instance)
8065
  if target_node is None:
8066
    pnode = instance.primary_node
8067
    all_nodes = instance.all_nodes
8068
  else:
8069
    pnode = target_node
8070
    all_nodes = [pnode]
8071

    
8072
  if instance.disk_template in constants.DTS_FILEBASED:
8073
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8074
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8075

    
8076
    result.Raise("Failed to create directory '%s' on"
8077
                 " node %s" % (file_storage_dir, pnode))
8078

    
8079
  # Note: this needs to be kept in sync with adding of disks in
8080
  # LUInstanceSetParams
8081
  for idx, device in enumerate(instance.disks):
8082
    if to_skip and idx in to_skip:
8083
      continue
8084
    logging.info("Creating volume %s for instance %s",
8085
                 device.iv_name, instance.name)
8086
    #HARDCODE
8087
    for node in all_nodes:
8088
      f_create = node == pnode
8089
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8090

    
8091

    
8092
def _RemoveDisks(lu, instance, target_node=None):
8093
  """Remove all disks for an instance.
8094

8095
  This abstracts away some work from `AddInstance()` and
8096
  `RemoveInstance()`. Note that in case some of the devices couldn't
8097
  be removed, the removal will continue with the other ones (compare
8098
  with `_CreateDisks()`).
8099

8100
  @type lu: L{LogicalUnit}
8101
  @param lu: the logical unit on whose behalf we execute
8102
  @type instance: L{objects.Instance}
8103
  @param instance: the instance whose disks we should remove
8104
  @type target_node: string
8105
  @param target_node: used to override the node on which to remove the disks
8106
  @rtype: boolean
8107
  @return: the success of the removal
8108

8109
  """
8110
  logging.info("Removing block devices for instance %s", instance.name)
8111

    
8112
  all_result = True
8113
  for device in instance.disks:
8114
    if target_node:
8115
      edata = [(target_node, device)]
8116
    else:
8117
      edata = device.ComputeNodeTree(instance.primary_node)
8118
    for node, disk in edata:
8119
      lu.cfg.SetDiskID(disk, node)
8120
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8121
      if msg:
8122
        lu.LogWarning("Could not remove block device %s on node %s,"
8123
                      " continuing anyway: %s", device.iv_name, node, msg)
8124
        all_result = False
8125

    
8126
  if instance.disk_template == constants.DT_FILE:
8127
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8128
    if target_node:
8129
      tgt = target_node
8130
    else:
8131
      tgt = instance.primary_node
8132
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8133
    if result.fail_msg:
8134
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8135
                    file_storage_dir, instance.primary_node, result.fail_msg)
8136
      all_result = False
8137

    
8138
  return all_result
8139

    
8140

    
8141
def _ComputeDiskSizePerVG(disk_template, disks):
8142
  """Compute disk size requirements in the volume group
8143

8144
  """
8145
  def _compute(disks, payload):
8146
    """Universal algorithm.
8147

8148
    """
8149
    vgs = {}
8150
    for disk in disks:
8151
      vgs[disk[constants.IDISK_VG]] = \
8152
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8153

    
8154
    return vgs
8155

    
8156
  # Required free disk space as a function of disk and swap space
8157
  req_size_dict = {
8158
    constants.DT_DISKLESS: {},
8159
    constants.DT_PLAIN: _compute(disks, 0),
8160
    # 128 MB are added for drbd metadata for each disk
8161
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8162
    constants.DT_FILE: {},
8163
    constants.DT_SHARED_FILE: {},
8164
  }
8165

    
8166
  if disk_template not in req_size_dict:
8167
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8168
                                 " is unknown" % disk_template)
8169

    
8170
  return req_size_dict[disk_template]
8171

    
8172

    
8173
def _ComputeDiskSize(disk_template, disks):
8174
  """Compute disk size requirements in the volume group
8175

8176
  """
8177
  # Required free disk space as a function of disk and swap space
8178
  req_size_dict = {
8179
    constants.DT_DISKLESS: None,
8180
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8181
    # 128 MB are added for drbd metadata for each disk
8182
    constants.DT_DRBD8:
8183
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8184
    constants.DT_FILE: None,
8185
    constants.DT_SHARED_FILE: 0,
8186
    constants.DT_BLOCK: 0,
8187
  }
8188

    
8189
  if disk_template not in req_size_dict:
8190
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8191
                                 " is unknown" % disk_template)
8192

    
8193
  return req_size_dict[disk_template]
8194

    
8195

    
8196
def _FilterVmNodes(lu, nodenames):
8197
  """Filters out non-vm_capable nodes from a list.
8198

8199
  @type lu: L{LogicalUnit}
8200
  @param lu: the logical unit for which we check
8201
  @type nodenames: list
8202
  @param nodenames: the list of nodes on which we should check
8203
  @rtype: list
8204
  @return: the list of vm-capable nodes
8205

8206
  """
8207
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8208
  return [name for name in nodenames if name not in vm_nodes]
8209

    
8210

    
8211
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8212
  """Hypervisor parameter validation.
8213

8214
  This function abstract the hypervisor parameter validation to be
8215
  used in both instance create and instance modify.
8216

8217
  @type lu: L{LogicalUnit}
8218
  @param lu: the logical unit for which we check
8219
  @type nodenames: list
8220
  @param nodenames: the list of nodes on which we should check
8221
  @type hvname: string
8222
  @param hvname: the name of the hypervisor we should use
8223
  @type hvparams: dict
8224
  @param hvparams: the parameters which we need to check
8225
  @raise errors.OpPrereqError: if the parameters are not valid
8226

8227
  """
8228
  nodenames = _FilterVmNodes(lu, nodenames)
8229

    
8230
  cluster = lu.cfg.GetClusterInfo()
8231
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8232

    
8233
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8234
  for node in nodenames:
8235
    info = hvinfo[node]
8236
    if info.offline:
8237
      continue
8238
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8239

    
8240

    
8241
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8242
  """OS parameters validation.
8243

8244
  @type lu: L{LogicalUnit}
8245
  @param lu: the logical unit for which we check
8246
  @type required: boolean
8247
  @param required: whether the validation should fail if the OS is not
8248
      found
8249
  @type nodenames: list
8250
  @param nodenames: the list of nodes on which we should check
8251
  @type osname: string
8252
  @param osname: the name of the hypervisor we should use
8253
  @type osparams: dict
8254
  @param osparams: the parameters which we need to check
8255
  @raise errors.OpPrereqError: if the parameters are not valid
8256

8257
  """
8258
  nodenames = _FilterVmNodes(lu, nodenames)
8259
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8260
                                   [constants.OS_VALIDATE_PARAMETERS],
8261
                                   osparams)
8262
  for node, nres in result.items():
8263
    # we don't check for offline cases since this should be run only
8264
    # against the master node and/or an instance's nodes
8265
    nres.Raise("OS Parameters validation failed on node %s" % node)
8266
    if not nres.payload:
8267
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8268
                 osname, node)
8269

    
8270

    
8271
class LUInstanceCreate(LogicalUnit):
8272
  """Create an instance.
8273

8274
  """
8275
  HPATH = "instance-add"
8276
  HTYPE = constants.HTYPE_INSTANCE
8277
  REQ_BGL = False
8278

    
8279
  def CheckArguments(self):
8280
    """Check arguments.
8281

8282
    """
8283
    # do not require name_check to ease forward/backward compatibility
8284
    # for tools
8285
    if self.op.no_install and self.op.start:
8286
      self.LogInfo("No-installation mode selected, disabling startup")
8287
      self.op.start = False
8288
    # validate/normalize the instance name
8289
    self.op.instance_name = \
8290
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8291

    
8292
    if self.op.ip_check and not self.op.name_check:
8293
      # TODO: make the ip check more flexible and not depend on the name check
8294
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8295
                                 " check", errors.ECODE_INVAL)
8296

    
8297
    # check nics' parameter names
8298
    for nic in self.op.nics:
8299
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8300

    
8301
    # check disks. parameter names and consistent adopt/no-adopt strategy
8302
    has_adopt = has_no_adopt = False
8303
    for disk in self.op.disks:
8304
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8305
      if constants.IDISK_ADOPT in disk:
8306
        has_adopt = True
8307
      else:
8308
        has_no_adopt = True
8309
    if has_adopt and has_no_adopt:
8310
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8311
                                 errors.ECODE_INVAL)
8312
    if has_adopt:
8313
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8314
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8315
                                   " '%s' disk template" %
8316
                                   self.op.disk_template,
8317
                                   errors.ECODE_INVAL)
8318
      if self.op.iallocator is not None:
8319
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8320
                                   " iallocator script", errors.ECODE_INVAL)
8321
      if self.op.mode == constants.INSTANCE_IMPORT:
8322
        raise errors.OpPrereqError("Disk adoption not allowed for"
8323
                                   " instance import", errors.ECODE_INVAL)
8324
    else:
8325
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8326
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8327
                                   " but no 'adopt' parameter given" %
8328
                                   self.op.disk_template,
8329
                                   errors.ECODE_INVAL)
8330

    
8331
    self.adopt_disks = has_adopt
8332

    
8333
    # instance name verification
8334
    if self.op.name_check:
8335
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8336
      self.op.instance_name = self.hostname1.name
8337
      # used in CheckPrereq for ip ping check
8338
      self.check_ip = self.hostname1.ip
8339
    else:
8340
      self.check_ip = None
8341

    
8342
    # file storage checks
8343
    if (self.op.file_driver and
8344
        not self.op.file_driver in constants.FILE_DRIVER):
8345
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8346
                                 self.op.file_driver, errors.ECODE_INVAL)
8347

    
8348
    if self.op.disk_template == constants.DT_FILE:
8349
      opcodes.RequireFileStorage()
8350
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8351
      opcodes.RequireSharedFileStorage()
8352

    
8353
    ### Node/iallocator related checks
8354
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8355

    
8356
    if self.op.pnode is not None:
8357
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8358
        if self.op.snode is None:
8359
          raise errors.OpPrereqError("The networked disk templates need"
8360
                                     " a mirror node", errors.ECODE_INVAL)
8361
      elif self.op.snode:
8362
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8363
                        " template")
8364
        self.op.snode = None
8365

    
8366
    self._cds = _GetClusterDomainSecret()
8367

    
8368
    if self.op.mode == constants.INSTANCE_IMPORT:
8369
      # On import force_variant must be True, because if we forced it at
8370
      # initial install, our only chance when importing it back is that it
8371
      # works again!
8372
      self.op.force_variant = True
8373

    
8374
      if self.op.no_install:
8375
        self.LogInfo("No-installation mode has no effect during import")
8376

    
8377
    elif self.op.mode == constants.INSTANCE_CREATE:
8378
      if self.op.os_type is None:
8379
        raise errors.OpPrereqError("No guest OS specified",
8380
                                   errors.ECODE_INVAL)
8381
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8382
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8383
                                   " installation" % self.op.os_type,
8384
                                   errors.ECODE_STATE)
8385
      if self.op.disk_template is None:
8386
        raise errors.OpPrereqError("No disk template specified",
8387
                                   errors.ECODE_INVAL)
8388

    
8389
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8390
      # Check handshake to ensure both clusters have the same domain secret
8391
      src_handshake = self.op.source_handshake
8392
      if not src_handshake:
8393
        raise errors.OpPrereqError("Missing source handshake",
8394
                                   errors.ECODE_INVAL)
8395

    
8396
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8397
                                                           src_handshake)
8398
      if errmsg:
8399
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8400
                                   errors.ECODE_INVAL)
8401

    
8402
      # Load and check source CA
8403
      self.source_x509_ca_pem = self.op.source_x509_ca
8404
      if not self.source_x509_ca_pem:
8405
        raise errors.OpPrereqError("Missing source X509 CA",
8406
                                   errors.ECODE_INVAL)
8407

    
8408
      try:
8409
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8410
                                                    self._cds)
8411
      except OpenSSL.crypto.Error, err:
8412
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8413
                                   (err, ), errors.ECODE_INVAL)
8414

    
8415
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8416
      if errcode is not None:
8417
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8418
                                   errors.ECODE_INVAL)
8419

    
8420
      self.source_x509_ca = cert
8421

    
8422
      src_instance_name = self.op.source_instance_name
8423
      if not src_instance_name:
8424
        raise errors.OpPrereqError("Missing source instance name",
8425
                                   errors.ECODE_INVAL)
8426

    
8427
      self.source_instance_name = \
8428
          netutils.GetHostname(name=src_instance_name).name
8429

    
8430
    else:
8431
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8432
                                 self.op.mode, errors.ECODE_INVAL)
8433

    
8434
  def ExpandNames(self):
8435
    """ExpandNames for CreateInstance.
8436

8437
    Figure out the right locks for instance creation.
8438

8439
    """
8440
    self.needed_locks = {}
8441

    
8442
    instance_name = self.op.instance_name
8443
    # this is just a preventive check, but someone might still add this
8444
    # instance in the meantime, and creation will fail at lock-add time
8445
    if instance_name in self.cfg.GetInstanceList():
8446
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8447
                                 instance_name, errors.ECODE_EXISTS)
8448

    
8449
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8450

    
8451
    if self.op.iallocator:
8452
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8453
    else:
8454
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8455
      nodelist = [self.op.pnode]
8456
      if self.op.snode is not None:
8457
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8458
        nodelist.append(self.op.snode)
8459
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8460

    
8461
    # in case of import lock the source node too
8462
    if self.op.mode == constants.INSTANCE_IMPORT:
8463
      src_node = self.op.src_node
8464
      src_path = self.op.src_path
8465

    
8466
      if src_path is None:
8467
        self.op.src_path = src_path = self.op.instance_name
8468

    
8469
      if src_node is None:
8470
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8471
        self.op.src_node = None
8472
        if os.path.isabs(src_path):
8473
          raise errors.OpPrereqError("Importing an instance from a path"
8474
                                     " requires a source node option",
8475
                                     errors.ECODE_INVAL)
8476
      else:
8477
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8478
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8479
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8480
        if not os.path.isabs(src_path):
8481
          self.op.src_path = src_path = \
8482
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8483

    
8484
  def _RunAllocator(self):
8485
    """Run the allocator based on input opcode.
8486

8487
    """
8488
    nics = [n.ToDict() for n in self.nics]
8489
    ial = IAllocator(self.cfg, self.rpc,
8490
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8491
                     name=self.op.instance_name,
8492
                     disk_template=self.op.disk_template,
8493
                     tags=self.op.tags,
8494
                     os=self.op.os_type,
8495
                     vcpus=self.be_full[constants.BE_VCPUS],
8496
                     memory=self.be_full[constants.BE_MEMORY],
8497
                     disks=self.disks,
8498
                     nics=nics,
8499
                     hypervisor=self.op.hypervisor,
8500
                     )
8501

    
8502
    ial.Run(self.op.iallocator)
8503

    
8504
    if not ial.success:
8505
      raise errors.OpPrereqError("Can't compute nodes using"
8506
                                 " iallocator '%s': %s" %
8507
                                 (self.op.iallocator, ial.info),
8508
                                 errors.ECODE_NORES)
8509
    if len(ial.result) != ial.required_nodes:
8510
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8511
                                 " of nodes (%s), required %s" %
8512
                                 (self.op.iallocator, len(ial.result),
8513
                                  ial.required_nodes), errors.ECODE_FAULT)
8514
    self.op.pnode = ial.result[0]
8515
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8516
                 self.op.instance_name, self.op.iallocator,
8517
                 utils.CommaJoin(ial.result))
8518
    if ial.required_nodes == 2:
8519
      self.op.snode = ial.result[1]
8520

    
8521
  def BuildHooksEnv(self):
8522
    """Build hooks env.
8523

8524
    This runs on master, primary and secondary nodes of the instance.
8525

8526
    """
8527
    env = {
8528
      "ADD_MODE": self.op.mode,
8529
      }
8530
    if self.op.mode == constants.INSTANCE_IMPORT:
8531
      env["SRC_NODE"] = self.op.src_node
8532
      env["SRC_PATH"] = self.op.src_path
8533
      env["SRC_IMAGES"] = self.src_images
8534

    
8535
    env.update(_BuildInstanceHookEnv(
8536
      name=self.op.instance_name,
8537
      primary_node=self.op.pnode,
8538
      secondary_nodes=self.secondaries,
8539
      status=self.op.start,
8540
      os_type=self.op.os_type,
8541
      memory=self.be_full[constants.BE_MEMORY],
8542
      vcpus=self.be_full[constants.BE_VCPUS],
8543
      nics=_NICListToTuple(self, self.nics),
8544
      disk_template=self.op.disk_template,
8545
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8546
             for d in self.disks],
8547
      bep=self.be_full,
8548
      hvp=self.hv_full,
8549
      hypervisor_name=self.op.hypervisor,
8550
      tags=self.op.tags,
8551
    ))
8552

    
8553
    return env
8554

    
8555
  def BuildHooksNodes(self):
8556
    """Build hooks nodes.
8557

8558
    """
8559
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8560
    return nl, nl
8561

    
8562
  def _ReadExportInfo(self):
8563
    """Reads the export information from disk.
8564

8565
    It will override the opcode source node and path with the actual
8566
    information, if these two were not specified before.
8567

8568
    @return: the export information
8569

8570
    """
8571
    assert self.op.mode == constants.INSTANCE_IMPORT
8572

    
8573
    src_node = self.op.src_node
8574
    src_path = self.op.src_path
8575

    
8576
    if src_node is None:
8577
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8578
      exp_list = self.rpc.call_export_list(locked_nodes)
8579
      found = False
8580
      for node in exp_list:
8581
        if exp_list[node].fail_msg:
8582
          continue
8583
        if src_path in exp_list[node].payload:
8584
          found = True
8585
          self.op.src_node = src_node = node
8586
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8587
                                                       src_path)
8588
          break
8589
      if not found:
8590
        raise errors.OpPrereqError("No export found for relative path %s" %
8591
                                    src_path, errors.ECODE_INVAL)
8592

    
8593
    _CheckNodeOnline(self, src_node)
8594
    result = self.rpc.call_export_info(src_node, src_path)
8595
    result.Raise("No export or invalid export found in dir %s" % src_path)
8596

    
8597
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8598
    if not export_info.has_section(constants.INISECT_EXP):
8599
      raise errors.ProgrammerError("Corrupted export config",
8600
                                   errors.ECODE_ENVIRON)
8601

    
8602
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8603
    if (int(ei_version) != constants.EXPORT_VERSION):
8604
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8605
                                 (ei_version, constants.EXPORT_VERSION),
8606
                                 errors.ECODE_ENVIRON)
8607
    return export_info
8608

    
8609
  def _ReadExportParams(self, einfo):
8610
    """Use export parameters as defaults.
8611

8612
    In case the opcode doesn't specify (as in override) some instance
8613
    parameters, then try to use them from the export information, if
8614
    that declares them.
8615

8616
    """
8617
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8618

    
8619
    if self.op.disk_template is None:
8620
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8621
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8622
                                          "disk_template")
8623
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8624
          raise errors.OpPrereqError("Disk template specified in configuration"
8625
                                     " file is not one of the allowed values:"
8626
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8627
      else:
8628
        raise errors.OpPrereqError("No disk template specified and the export"
8629
                                   " is missing the disk_template information",
8630
                                   errors.ECODE_INVAL)
8631

    
8632
    if not self.op.disks:
8633
      disks = []
8634
      # TODO: import the disk iv_name too
8635
      for idx in range(constants.MAX_DISKS):
8636
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8637
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8638
          disks.append({constants.IDISK_SIZE: disk_sz})
8639
      self.op.disks = disks
8640
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8641
        raise errors.OpPrereqError("No disk info specified and the export"
8642
                                   " is missing the disk information",
8643
                                   errors.ECODE_INVAL)
8644

    
8645
    if not self.op.nics:
8646
      nics = []
8647
      for idx in range(constants.MAX_NICS):
8648
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8649
          ndict = {}
8650
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8651
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8652
            ndict[name] = v
8653
          nics.append(ndict)
8654
        else:
8655
          break
8656
      self.op.nics = nics
8657

    
8658
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8659
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8660

    
8661
    if (self.op.hypervisor is None and
8662
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8663
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8664

    
8665
    if einfo.has_section(constants.INISECT_HYP):
8666
      # use the export parameters but do not override the ones
8667
      # specified by the user
8668
      for name, value in einfo.items(constants.INISECT_HYP):
8669
        if name not in self.op.hvparams:
8670
          self.op.hvparams[name] = value
8671

    
8672
    if einfo.has_section(constants.INISECT_BEP):
8673
      # use the parameters, without overriding
8674
      for name, value in einfo.items(constants.INISECT_BEP):
8675
        if name not in self.op.beparams:
8676
          self.op.beparams[name] = value
8677
    else:
8678
      # try to read the parameters old style, from the main section
8679
      for name in constants.BES_PARAMETERS:
8680
        if (name not in self.op.beparams and
8681
            einfo.has_option(constants.INISECT_INS, name)):
8682
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8683

    
8684
    if einfo.has_section(constants.INISECT_OSP):
8685
      # use the parameters, without overriding
8686
      for name, value in einfo.items(constants.INISECT_OSP):
8687
        if name not in self.op.osparams:
8688
          self.op.osparams[name] = value
8689

    
8690
  def _RevertToDefaults(self, cluster):
8691
    """Revert the instance parameters to the default values.
8692

8693
    """
8694
    # hvparams
8695
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8696
    for name in self.op.hvparams.keys():
8697
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8698
        del self.op.hvparams[name]
8699
    # beparams
8700
    be_defs = cluster.SimpleFillBE({})
8701
    for name in self.op.beparams.keys():
8702
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8703
        del self.op.beparams[name]
8704
    # nic params
8705
    nic_defs = cluster.SimpleFillNIC({})
8706
    for nic in self.op.nics:
8707
      for name in constants.NICS_PARAMETERS:
8708
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8709
          del nic[name]
8710
    # osparams
8711
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8712
    for name in self.op.osparams.keys():
8713
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8714
        del self.op.osparams[name]
8715

    
8716
  def _CalculateFileStorageDir(self):
8717
    """Calculate final instance file storage dir.
8718

8719
    """
8720
    # file storage dir calculation/check
8721
    self.instance_file_storage_dir = None
8722
    if self.op.disk_template in constants.DTS_FILEBASED:
8723
      # build the full file storage dir path
8724
      joinargs = []
8725

    
8726
      if self.op.disk_template == constants.DT_SHARED_FILE:
8727
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8728
      else:
8729
        get_fsd_fn = self.cfg.GetFileStorageDir
8730

    
8731
      cfg_storagedir = get_fsd_fn()
8732
      if not cfg_storagedir:
8733
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8734
      joinargs.append(cfg_storagedir)
8735

    
8736
      if self.op.file_storage_dir is not None:
8737
        joinargs.append(self.op.file_storage_dir)
8738

    
8739
      joinargs.append(self.op.instance_name)
8740

    
8741
      # pylint: disable=W0142
8742
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8743

    
8744
  def CheckPrereq(self):
8745
    """Check prerequisites.
8746

8747
    """
8748
    self._CalculateFileStorageDir()
8749

    
8750
    if self.op.mode == constants.INSTANCE_IMPORT:
8751
      export_info = self._ReadExportInfo()
8752
      self._ReadExportParams(export_info)
8753

    
8754
    if (not self.cfg.GetVGName() and
8755
        self.op.disk_template not in constants.DTS_NOT_LVM):
8756
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8757
                                 " instances", errors.ECODE_STATE)
8758

    
8759
    if (self.op.hypervisor is None or
8760
        self.op.hypervisor == constants.VALUE_AUTO):
8761
      self.op.hypervisor = self.cfg.GetHypervisorType()
8762

    
8763
    cluster = self.cfg.GetClusterInfo()
8764
    enabled_hvs = cluster.enabled_hypervisors
8765
    if self.op.hypervisor not in enabled_hvs:
8766
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8767
                                 " cluster (%s)" % (self.op.hypervisor,
8768
                                  ",".join(enabled_hvs)),
8769
                                 errors.ECODE_STATE)
8770

    
8771
    # Check tag validity
8772
    for tag in self.op.tags:
8773
      objects.TaggableObject.ValidateTag(tag)
8774

    
8775
    # check hypervisor parameter syntax (locally)
8776
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8777
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8778
                                      self.op.hvparams)
8779
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8780
    hv_type.CheckParameterSyntax(filled_hvp)
8781
    self.hv_full = filled_hvp
8782
    # check that we don't specify global parameters on an instance
8783
    _CheckGlobalHvParams(self.op.hvparams)
8784

    
8785
    # fill and remember the beparams dict
8786
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8787
    for param, value in self.op.beparams.iteritems():
8788
      if value == constants.VALUE_AUTO:
8789
        self.op.beparams[param] = default_beparams[param]
8790
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8791
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8792

    
8793
    # build os parameters
8794
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8795

    
8796
    # now that hvp/bep are in final format, let's reset to defaults,
8797
    # if told to do so
8798
    if self.op.identify_defaults:
8799
      self._RevertToDefaults(cluster)
8800

    
8801
    # NIC buildup
8802
    self.nics = []
8803
    for idx, nic in enumerate(self.op.nics):
8804
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8805
      nic_mode = nic_mode_req
8806
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8807
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8808

    
8809
      # in routed mode, for the first nic, the default ip is 'auto'
8810
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8811
        default_ip_mode = constants.VALUE_AUTO
8812
      else:
8813
        default_ip_mode = constants.VALUE_NONE
8814

    
8815
      # ip validity checks
8816
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8817
      if ip is None or ip.lower() == constants.VALUE_NONE:
8818
        nic_ip = None
8819
      elif ip.lower() == constants.VALUE_AUTO:
8820
        if not self.op.name_check:
8821
          raise errors.OpPrereqError("IP address set to auto but name checks"
8822
                                     " have been skipped",
8823
                                     errors.ECODE_INVAL)
8824
        nic_ip = self.hostname1.ip
8825
      else:
8826
        if not netutils.IPAddress.IsValid(ip):
8827
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8828
                                     errors.ECODE_INVAL)
8829
        nic_ip = ip
8830

    
8831
      # TODO: check the ip address for uniqueness
8832
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8833
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8834
                                   errors.ECODE_INVAL)
8835

    
8836
      # MAC address verification
8837
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8838
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8839
        mac = utils.NormalizeAndValidateMac(mac)
8840

    
8841
        try:
8842
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8843
        except errors.ReservationError:
8844
          raise errors.OpPrereqError("MAC address %s already in use"
8845
                                     " in cluster" % mac,
8846
                                     errors.ECODE_NOTUNIQUE)
8847

    
8848
      #  Build nic parameters
8849
      link = nic.get(constants.INIC_LINK, None)
8850
      if link == constants.VALUE_AUTO:
8851
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8852
      nicparams = {}
8853
      if nic_mode_req:
8854
        nicparams[constants.NIC_MODE] = nic_mode
8855
      if link:
8856
        nicparams[constants.NIC_LINK] = link
8857

    
8858
      check_params = cluster.SimpleFillNIC(nicparams)
8859
      objects.NIC.CheckParameterSyntax(check_params)
8860
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8861

    
8862
    # disk checks/pre-build
8863
    default_vg = self.cfg.GetVGName()
8864
    self.disks = []
8865
    for disk in self.op.disks:
8866
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8867
      if mode not in constants.DISK_ACCESS_SET:
8868
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8869
                                   mode, errors.ECODE_INVAL)
8870
      size = disk.get(constants.IDISK_SIZE, None)
8871
      if size is None:
8872
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8873
      try:
8874
        size = int(size)
8875
      except (TypeError, ValueError):
8876
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8877
                                   errors.ECODE_INVAL)
8878

    
8879
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8880
      new_disk = {
8881
        constants.IDISK_SIZE: size,
8882
        constants.IDISK_MODE: mode,
8883
        constants.IDISK_VG: data_vg,
8884
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8885
        }
8886
      if constants.IDISK_ADOPT in disk:
8887
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8888
      self.disks.append(new_disk)
8889

    
8890
    if self.op.mode == constants.INSTANCE_IMPORT:
8891
      disk_images = []
8892
      for idx in range(len(self.disks)):
8893
        option = "disk%d_dump" % idx
8894
        if export_info.has_option(constants.INISECT_INS, option):
8895
          # FIXME: are the old os-es, disk sizes, etc. useful?
8896
          export_name = export_info.get(constants.INISECT_INS, option)
8897
          image = utils.PathJoin(self.op.src_path, export_name)
8898
          disk_images.append(image)
8899
        else:
8900
          disk_images.append(False)
8901

    
8902
      self.src_images = disk_images
8903

    
8904
      old_name = export_info.get(constants.INISECT_INS, "name")
8905
      if self.op.instance_name == old_name:
8906
        for idx, nic in enumerate(self.nics):
8907
          if nic.mac == constants.VALUE_AUTO:
8908
            nic_mac_ini = "nic%d_mac" % idx
8909
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8910

    
8911
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8912

    
8913
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8914
    if self.op.ip_check:
8915
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8916
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8917
                                   (self.check_ip, self.op.instance_name),
8918
                                   errors.ECODE_NOTUNIQUE)
8919

    
8920
    #### mac address generation
8921
    # By generating here the mac address both the allocator and the hooks get
8922
    # the real final mac address rather than the 'auto' or 'generate' value.
8923
    # There is a race condition between the generation and the instance object
8924
    # creation, which means that we know the mac is valid now, but we're not
8925
    # sure it will be when we actually add the instance. If things go bad
8926
    # adding the instance will abort because of a duplicate mac, and the
8927
    # creation job will fail.
8928
    for nic in self.nics:
8929
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8930
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8931

    
8932
    #### allocator run
8933

    
8934
    if self.op.iallocator is not None:
8935
      self._RunAllocator()
8936

    
8937
    #### node related checks
8938

    
8939
    # check primary node
8940
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8941
    assert self.pnode is not None, \
8942
      "Cannot retrieve locked node %s" % self.op.pnode
8943
    if pnode.offline:
8944
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8945
                                 pnode.name, errors.ECODE_STATE)
8946
    if pnode.drained:
8947
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8948
                                 pnode.name, errors.ECODE_STATE)
8949
    if not pnode.vm_capable:
8950
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8951
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8952

    
8953
    self.secondaries = []
8954

    
8955
    # mirror node verification
8956
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8957
      if self.op.snode == pnode.name:
8958
        raise errors.OpPrereqError("The secondary node cannot be the"
8959
                                   " primary node", errors.ECODE_INVAL)
8960
      _CheckNodeOnline(self, self.op.snode)
8961
      _CheckNodeNotDrained(self, self.op.snode)
8962
      _CheckNodeVmCapable(self, self.op.snode)
8963
      self.secondaries.append(self.op.snode)
8964

    
8965
    nodenames = [pnode.name] + self.secondaries
8966

    
8967
    if not self.adopt_disks:
8968
      # Check lv size requirements, if not adopting
8969
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8970
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8971

    
8972
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8973
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8974
                                disk[constants.IDISK_ADOPT])
8975
                     for disk in self.disks])
8976
      if len(all_lvs) != len(self.disks):
8977
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8978
                                   errors.ECODE_INVAL)
8979
      for lv_name in all_lvs:
8980
        try:
8981
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8982
          # to ReserveLV uses the same syntax
8983
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8984
        except errors.ReservationError:
8985
          raise errors.OpPrereqError("LV named %s used by another instance" %
8986
                                     lv_name, errors.ECODE_NOTUNIQUE)
8987

    
8988
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8989
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8990

    
8991
      node_lvs = self.rpc.call_lv_list([pnode.name],
8992
                                       vg_names.payload.keys())[pnode.name]
8993
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8994
      node_lvs = node_lvs.payload
8995

    
8996
      delta = all_lvs.difference(node_lvs.keys())
8997
      if delta:
8998
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8999
                                   utils.CommaJoin(delta),
9000
                                   errors.ECODE_INVAL)
9001
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9002
      if online_lvs:
9003
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9004
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9005
                                   errors.ECODE_STATE)
9006
      # update the size of disk based on what is found
9007
      for dsk in self.disks:
9008
        dsk[constants.IDISK_SIZE] = \
9009
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9010
                                        dsk[constants.IDISK_ADOPT])][0]))
9011

    
9012
    elif self.op.disk_template == constants.DT_BLOCK:
9013
      # Normalize and de-duplicate device paths
9014
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9015
                       for disk in self.disks])
9016
      if len(all_disks) != len(self.disks):
9017
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9018
                                   errors.ECODE_INVAL)
9019
      baddisks = [d for d in all_disks
9020
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9021
      if baddisks:
9022
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9023
                                   " cannot be adopted" %
9024
                                   (", ".join(baddisks),
9025
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9026
                                   errors.ECODE_INVAL)
9027

    
9028
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9029
                                            list(all_disks))[pnode.name]
9030
      node_disks.Raise("Cannot get block device information from node %s" %
9031
                       pnode.name)
9032
      node_disks = node_disks.payload
9033
      delta = all_disks.difference(node_disks.keys())
9034
      if delta:
9035
        raise errors.OpPrereqError("Missing block device(s): %s" %
9036
                                   utils.CommaJoin(delta),
9037
                                   errors.ECODE_INVAL)
9038
      for dsk in self.disks:
9039
        dsk[constants.IDISK_SIZE] = \
9040
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9041

    
9042
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9043

    
9044
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9045
    # check OS parameters (remotely)
9046
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9047

    
9048
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9049

    
9050
    # memory check on primary node
9051
    if self.op.start:
9052
      _CheckNodeFreeMemory(self, self.pnode.name,
9053
                           "creating instance %s" % self.op.instance_name,
9054
                           self.be_full[constants.BE_MEMORY],
9055
                           self.op.hypervisor)
9056

    
9057
    self.dry_run_result = list(nodenames)
9058

    
9059
  def Exec(self, feedback_fn):
9060
    """Create and add the instance to the cluster.
9061

9062
    """
9063
    instance = self.op.instance_name
9064
    pnode_name = self.pnode.name
9065

    
9066
    ht_kind = self.op.hypervisor
9067
    if ht_kind in constants.HTS_REQ_PORT:
9068
      network_port = self.cfg.AllocatePort()
9069
    else:
9070
      network_port = None
9071

    
9072
    disks = _GenerateDiskTemplate(self,
9073
                                  self.op.disk_template,
9074
                                  instance, pnode_name,
9075
                                  self.secondaries,
9076
                                  self.disks,
9077
                                  self.instance_file_storage_dir,
9078
                                  self.op.file_driver,
9079
                                  0,
9080
                                  feedback_fn)
9081

    
9082
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9083
                            primary_node=pnode_name,
9084
                            nics=self.nics, disks=disks,
9085
                            disk_template=self.op.disk_template,
9086
                            admin_up=False,
9087
                            network_port=network_port,
9088
                            beparams=self.op.beparams,
9089
                            hvparams=self.op.hvparams,
9090
                            hypervisor=self.op.hypervisor,
9091
                            osparams=self.op.osparams,
9092
                            )
9093

    
9094
    if self.op.tags:
9095
      for tag in self.op.tags:
9096
        iobj.AddTag(tag)
9097

    
9098
    if self.adopt_disks:
9099
      if self.op.disk_template == constants.DT_PLAIN:
9100
        # rename LVs to the newly-generated names; we need to construct
9101
        # 'fake' LV disks with the old data, plus the new unique_id
9102
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9103
        rename_to = []
9104
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9105
          rename_to.append(t_dsk.logical_id)
9106
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9107
          self.cfg.SetDiskID(t_dsk, pnode_name)
9108
        result = self.rpc.call_blockdev_rename(pnode_name,
9109
                                               zip(tmp_disks, rename_to))
9110
        result.Raise("Failed to rename adoped LVs")
9111
    else:
9112
      feedback_fn("* creating instance disks...")
9113
      try:
9114
        _CreateDisks(self, iobj)
9115
      except errors.OpExecError:
9116
        self.LogWarning("Device creation failed, reverting...")
9117
        try:
9118
          _RemoveDisks(self, iobj)
9119
        finally:
9120
          self.cfg.ReleaseDRBDMinors(instance)
9121
          raise
9122

    
9123
    feedback_fn("adding instance %s to cluster config" % instance)
9124

    
9125
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9126

    
9127
    # Declare that we don't want to remove the instance lock anymore, as we've
9128
    # added the instance to the config
9129
    del self.remove_locks[locking.LEVEL_INSTANCE]
9130

    
9131
    if self.op.mode == constants.INSTANCE_IMPORT:
9132
      # Release unused nodes
9133
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9134
    else:
9135
      # Release all nodes
9136
      _ReleaseLocks(self, locking.LEVEL_NODE)
9137

    
9138
    disk_abort = False
9139
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9140
      feedback_fn("* wiping instance disks...")
9141
      try:
9142
        _WipeDisks(self, iobj)
9143
      except errors.OpExecError, err:
9144
        logging.exception("Wiping disks failed")
9145
        self.LogWarning("Wiping instance disks failed (%s)", err)
9146
        disk_abort = True
9147

    
9148
    if disk_abort:
9149
      # Something is already wrong with the disks, don't do anything else
9150
      pass
9151
    elif self.op.wait_for_sync:
9152
      disk_abort = not _WaitForSync(self, iobj)
9153
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9154
      # make sure the disks are not degraded (still sync-ing is ok)
9155
      feedback_fn("* checking mirrors status")
9156
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9157
    else:
9158
      disk_abort = False
9159

    
9160
    if disk_abort:
9161
      _RemoveDisks(self, iobj)
9162
      self.cfg.RemoveInstance(iobj.name)
9163
      # Make sure the instance lock gets removed
9164
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9165
      raise errors.OpExecError("There are some degraded disks for"
9166
                               " this instance")
9167

    
9168
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9169
      if self.op.mode == constants.INSTANCE_CREATE:
9170
        if not self.op.no_install:
9171
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9172
                        not self.op.wait_for_sync)
9173
          if pause_sync:
9174
            feedback_fn("* pausing disk sync to install instance OS")
9175
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9176
                                                              iobj.disks, True)
9177
            for idx, success in enumerate(result.payload):
9178
              if not success:
9179
                logging.warn("pause-sync of instance %s for disk %d failed",
9180
                             instance, idx)
9181

    
9182
          feedback_fn("* running the instance OS create scripts...")
9183
          # FIXME: pass debug option from opcode to backend
9184
          os_add_result = \
9185
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9186
                                          self.op.debug_level)
9187
          if pause_sync:
9188
            feedback_fn("* resuming disk sync")
9189
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9190
                                                              iobj.disks, False)
9191
            for idx, success in enumerate(result.payload):
9192
              if not success:
9193
                logging.warn("resume-sync of instance %s for disk %d failed",
9194
                             instance, idx)
9195

    
9196
          os_add_result.Raise("Could not add os for instance %s"
9197
                              " on node %s" % (instance, pnode_name))
9198

    
9199
      elif self.op.mode == constants.INSTANCE_IMPORT:
9200
        feedback_fn("* running the instance OS import scripts...")
9201

    
9202
        transfers = []
9203

    
9204
        for idx, image in enumerate(self.src_images):
9205
          if not image:
9206
            continue
9207

    
9208
          # FIXME: pass debug option from opcode to backend
9209
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9210
                                             constants.IEIO_FILE, (image, ),
9211
                                             constants.IEIO_SCRIPT,
9212
                                             (iobj.disks[idx], idx),
9213
                                             None)
9214
          transfers.append(dt)
9215

    
9216
        import_result = \
9217
          masterd.instance.TransferInstanceData(self, feedback_fn,
9218
                                                self.op.src_node, pnode_name,
9219
                                                self.pnode.secondary_ip,
9220
                                                iobj, transfers)
9221
        if not compat.all(import_result):
9222
          self.LogWarning("Some disks for instance %s on node %s were not"
9223
                          " imported successfully" % (instance, pnode_name))
9224

    
9225
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9226
        feedback_fn("* preparing remote import...")
9227
        # The source cluster will stop the instance before attempting to make a
9228
        # connection. In some cases stopping an instance can take a long time,
9229
        # hence the shutdown timeout is added to the connection timeout.
9230
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9231
                           self.op.source_shutdown_timeout)
9232
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9233

    
9234
        assert iobj.primary_node == self.pnode.name
9235
        disk_results = \
9236
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9237
                                        self.source_x509_ca,
9238
                                        self._cds, timeouts)
9239
        if not compat.all(disk_results):
9240
          # TODO: Should the instance still be started, even if some disks
9241
          # failed to import (valid for local imports, too)?
9242
          self.LogWarning("Some disks for instance %s on node %s were not"
9243
                          " imported successfully" % (instance, pnode_name))
9244

    
9245
        # Run rename script on newly imported instance
9246
        assert iobj.name == instance
9247
        feedback_fn("Running rename script for %s" % instance)
9248
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9249
                                                   self.source_instance_name,
9250
                                                   self.op.debug_level)
9251
        if result.fail_msg:
9252
          self.LogWarning("Failed to run rename script for %s on node"
9253
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9254

    
9255
      else:
9256
        # also checked in the prereq part
9257
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9258
                                     % self.op.mode)
9259

    
9260
    if self.op.start:
9261
      iobj.admin_up = True
9262
      self.cfg.Update(iobj, feedback_fn)
9263
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9264
      feedback_fn("* starting instance...")
9265
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9266
                                            False)
9267
      result.Raise("Could not start instance")
9268

    
9269
    return list(iobj.all_nodes)
9270

    
9271

    
9272
class LUInstanceConsole(NoHooksLU):
9273
  """Connect to an instance's console.
9274

9275
  This is somewhat special in that it returns the command line that
9276
  you need to run on the master node in order to connect to the
9277
  console.
9278

9279
  """
9280
  REQ_BGL = False
9281

    
9282
  def ExpandNames(self):
9283
    self._ExpandAndLockInstance()
9284

    
9285
  def CheckPrereq(self):
9286
    """Check prerequisites.
9287

9288
    This checks that the instance is in the cluster.
9289

9290
    """
9291
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9292
    assert self.instance is not None, \
9293
      "Cannot retrieve locked instance %s" % self.op.instance_name
9294
    _CheckNodeOnline(self, self.instance.primary_node)
9295

    
9296
  def Exec(self, feedback_fn):
9297
    """Connect to the console of an instance
9298

9299
    """
9300
    instance = self.instance
9301
    node = instance.primary_node
9302

    
9303
    node_insts = self.rpc.call_instance_list([node],
9304
                                             [instance.hypervisor])[node]
9305
    node_insts.Raise("Can't get node information from %s" % node)
9306

    
9307
    if instance.name not in node_insts.payload:
9308
      if instance.admin_up:
9309
        state = constants.INSTST_ERRORDOWN
9310
      else:
9311
        state = constants.INSTST_ADMINDOWN
9312
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9313
                               (instance.name, state))
9314

    
9315
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9316

    
9317
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9318

    
9319

    
9320
def _GetInstanceConsole(cluster, instance):
9321
  """Returns console information for an instance.
9322

9323
  @type cluster: L{objects.Cluster}
9324
  @type instance: L{objects.Instance}
9325
  @rtype: dict
9326

9327
  """
9328
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9329
  # beparams and hvparams are passed separately, to avoid editing the
9330
  # instance and then saving the defaults in the instance itself.
9331
  hvparams = cluster.FillHV(instance)
9332
  beparams = cluster.FillBE(instance)
9333
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9334

    
9335
  assert console.instance == instance.name
9336
  assert console.Validate()
9337

    
9338
  return console.ToDict()
9339

    
9340

    
9341
class LUInstanceReplaceDisks(LogicalUnit):
9342
  """Replace the disks of an instance.
9343

9344
  """
9345
  HPATH = "mirrors-replace"
9346
  HTYPE = constants.HTYPE_INSTANCE
9347
  REQ_BGL = False
9348

    
9349
  def CheckArguments(self):
9350
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9351
                                  self.op.iallocator)
9352

    
9353
  def ExpandNames(self):
9354
    self._ExpandAndLockInstance()
9355

    
9356
    assert locking.LEVEL_NODE not in self.needed_locks
9357
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9358

    
9359
    assert self.op.iallocator is None or self.op.remote_node is None, \
9360
      "Conflicting options"
9361

    
9362
    if self.op.remote_node is not None:
9363
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9364

    
9365
      # Warning: do not remove the locking of the new secondary here
9366
      # unless DRBD8.AddChildren is changed to work in parallel;
9367
      # currently it doesn't since parallel invocations of
9368
      # FindUnusedMinor will conflict
9369
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9370
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9371
    else:
9372
      self.needed_locks[locking.LEVEL_NODE] = []
9373
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9374

    
9375
      if self.op.iallocator is not None:
9376
        # iallocator will select a new node in the same group
9377
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9378

    
9379
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9380
                                   self.op.iallocator, self.op.remote_node,
9381
                                   self.op.disks, False, self.op.early_release)
9382

    
9383
    self.tasklets = [self.replacer]
9384

    
9385
  def DeclareLocks(self, level):
9386
    if level == locking.LEVEL_NODEGROUP:
9387
      assert self.op.remote_node is None
9388
      assert self.op.iallocator is not None
9389
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9390

    
9391
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9392
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9393
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9394

    
9395
    elif level == locking.LEVEL_NODE:
9396
      if self.op.iallocator is not None:
9397
        assert self.op.remote_node is None
9398
        assert not self.needed_locks[locking.LEVEL_NODE]
9399

    
9400
        # Lock member nodes of all locked groups
9401
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9402
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9403
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9404
      else:
9405
        self._LockInstancesNodes()
9406

    
9407
  def BuildHooksEnv(self):
9408
    """Build hooks env.
9409

9410
    This runs on the master, the primary and all the secondaries.
9411

9412
    """
9413
    instance = self.replacer.instance
9414
    env = {
9415
      "MODE": self.op.mode,
9416
      "NEW_SECONDARY": self.op.remote_node,
9417
      "OLD_SECONDARY": instance.secondary_nodes[0],
9418
      }
9419
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9420
    return env
9421

    
9422
  def BuildHooksNodes(self):
9423
    """Build hooks nodes.
9424

9425
    """
9426
    instance = self.replacer.instance
9427
    nl = [
9428
      self.cfg.GetMasterNode(),
9429
      instance.primary_node,
9430
      ]
9431
    if self.op.remote_node is not None:
9432
      nl.append(self.op.remote_node)
9433
    return nl, nl
9434

    
9435
  def CheckPrereq(self):
9436
    """Check prerequisites.
9437

9438
    """
9439
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9440
            self.op.iallocator is None)
9441

    
9442
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9443
    if owned_groups:
9444
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9445

    
9446
    return LogicalUnit.CheckPrereq(self)
9447

    
9448

    
9449
class TLReplaceDisks(Tasklet):
9450
  """Replaces disks for an instance.
9451

9452
  Note: Locking is not within the scope of this class.
9453

9454
  """
9455
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9456
               disks, delay_iallocator, early_release):
9457
    """Initializes this class.
9458

9459
    """
9460
    Tasklet.__init__(self, lu)
9461

    
9462
    # Parameters
9463
    self.instance_name = instance_name
9464
    self.mode = mode
9465
    self.iallocator_name = iallocator_name
9466
    self.remote_node = remote_node
9467
    self.disks = disks
9468
    self.delay_iallocator = delay_iallocator
9469
    self.early_release = early_release
9470

    
9471
    # Runtime data
9472
    self.instance = None
9473
    self.new_node = None
9474
    self.target_node = None
9475
    self.other_node = None
9476
    self.remote_node_info = None
9477
    self.node_secondary_ip = None
9478

    
9479
  @staticmethod
9480
  def CheckArguments(mode, remote_node, iallocator):
9481
    """Helper function for users of this class.
9482

9483
    """
9484
    # check for valid parameter combination
9485
    if mode == constants.REPLACE_DISK_CHG:
9486
      if remote_node is None and iallocator is None:
9487
        raise errors.OpPrereqError("When changing the secondary either an"
9488
                                   " iallocator script must be used or the"
9489
                                   " new node given", errors.ECODE_INVAL)
9490

    
9491
      if remote_node is not None and iallocator is not None:
9492
        raise errors.OpPrereqError("Give either the iallocator or the new"
9493
                                   " secondary, not both", errors.ECODE_INVAL)
9494

    
9495
    elif remote_node is not None or iallocator is not None:
9496
      # Not replacing the secondary
9497
      raise errors.OpPrereqError("The iallocator and new node options can"
9498
                                 " only be used when changing the"
9499
                                 " secondary node", errors.ECODE_INVAL)
9500

    
9501
  @staticmethod
9502
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9503
    """Compute a new secondary node using an IAllocator.
9504

9505
    """
9506
    ial = IAllocator(lu.cfg, lu.rpc,
9507
                     mode=constants.IALLOCATOR_MODE_RELOC,
9508
                     name=instance_name,
9509
                     relocate_from=list(relocate_from))
9510

    
9511
    ial.Run(iallocator_name)
9512

    
9513
    if not ial.success:
9514
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9515
                                 " %s" % (iallocator_name, ial.info),
9516
                                 errors.ECODE_NORES)
9517

    
9518
    if len(ial.result) != ial.required_nodes:
9519
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9520
                                 " of nodes (%s), required %s" %
9521
                                 (iallocator_name,
9522
                                  len(ial.result), ial.required_nodes),
9523
                                 errors.ECODE_FAULT)
9524

    
9525
    remote_node_name = ial.result[0]
9526

    
9527
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9528
               instance_name, remote_node_name)
9529

    
9530
    return remote_node_name
9531

    
9532
  def _FindFaultyDisks(self, node_name):
9533
    """Wrapper for L{_FindFaultyInstanceDisks}.
9534

9535
    """
9536
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9537
                                    node_name, True)
9538

    
9539
  def _CheckDisksActivated(self, instance):
9540
    """Checks if the instance disks are activated.
9541

9542
    @param instance: The instance to check disks
9543
    @return: True if they are activated, False otherwise
9544

9545
    """
9546
    nodes = instance.all_nodes
9547

    
9548
    for idx, dev in enumerate(instance.disks):
9549
      for node in nodes:
9550
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9551
        self.cfg.SetDiskID(dev, node)
9552

    
9553
        result = self.rpc.call_blockdev_find(node, dev)
9554

    
9555
        if result.offline:
9556
          continue
9557
        elif result.fail_msg or not result.payload:
9558
          return False
9559

    
9560
    return True
9561

    
9562
  def CheckPrereq(self):
9563
    """Check prerequisites.
9564

9565
    This checks that the instance is in the cluster.
9566

9567
    """
9568
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9569
    assert instance is not None, \
9570
      "Cannot retrieve locked instance %s" % self.instance_name
9571

    
9572
    if instance.disk_template != constants.DT_DRBD8:
9573
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9574
                                 " instances", errors.ECODE_INVAL)
9575

    
9576
    if len(instance.secondary_nodes) != 1:
9577
      raise errors.OpPrereqError("The instance has a strange layout,"
9578
                                 " expected one secondary but found %d" %
9579
                                 len(instance.secondary_nodes),
9580
                                 errors.ECODE_FAULT)
9581

    
9582
    if not self.delay_iallocator:
9583
      self._CheckPrereq2()
9584

    
9585
  def _CheckPrereq2(self):
9586
    """Check prerequisites, second part.
9587

9588
    This function should always be part of CheckPrereq. It was separated and is
9589
    now called from Exec because during node evacuation iallocator was only
9590
    called with an unmodified cluster model, not taking planned changes into
9591
    account.
9592

9593
    """
9594
    instance = self.instance
9595
    secondary_node = instance.secondary_nodes[0]
9596

    
9597
    if self.iallocator_name is None:
9598
      remote_node = self.remote_node
9599
    else:
9600
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9601
                                       instance.name, instance.secondary_nodes)
9602

    
9603
    if remote_node is None:
9604
      self.remote_node_info = None
9605
    else:
9606
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9607
             "Remote node '%s' is not locked" % remote_node
9608

    
9609
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9610
      assert self.remote_node_info is not None, \
9611
        "Cannot retrieve locked node %s" % remote_node
9612

    
9613
    if remote_node == self.instance.primary_node:
9614
      raise errors.OpPrereqError("The specified node is the primary node of"
9615
                                 " the instance", errors.ECODE_INVAL)
9616

    
9617
    if remote_node == secondary_node:
9618
      raise errors.OpPrereqError("The specified node is already the"
9619
                                 " secondary node of the instance",
9620
                                 errors.ECODE_INVAL)
9621

    
9622
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9623
                                    constants.REPLACE_DISK_CHG):
9624
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9625
                                 errors.ECODE_INVAL)
9626

    
9627
    if self.mode == constants.REPLACE_DISK_AUTO:
9628
      if not self._CheckDisksActivated(instance):
9629
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9630
                                   " first" % self.instance_name,
9631
                                   errors.ECODE_STATE)
9632
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9633
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9634

    
9635
      if faulty_primary and faulty_secondary:
9636
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9637
                                   " one node and can not be repaired"
9638
                                   " automatically" % self.instance_name,
9639
                                   errors.ECODE_STATE)
9640

    
9641
      if faulty_primary:
9642
        self.disks = faulty_primary
9643
        self.target_node = instance.primary_node
9644
        self.other_node = secondary_node
9645
        check_nodes = [self.target_node, self.other_node]
9646
      elif faulty_secondary:
9647
        self.disks = faulty_secondary
9648
        self.target_node = secondary_node
9649
        self.other_node = instance.primary_node
9650
        check_nodes = [self.target_node, self.other_node]
9651
      else:
9652
        self.disks = []
9653
        check_nodes = []
9654

    
9655
    else:
9656
      # Non-automatic modes
9657
      if self.mode == constants.REPLACE_DISK_PRI:
9658
        self.target_node = instance.primary_node
9659
        self.other_node = secondary_node
9660
        check_nodes = [self.target_node, self.other_node]
9661

    
9662
      elif self.mode == constants.REPLACE_DISK_SEC:
9663
        self.target_node = secondary_node
9664
        self.other_node = instance.primary_node
9665
        check_nodes = [self.target_node, self.other_node]
9666

    
9667
      elif self.mode == constants.REPLACE_DISK_CHG:
9668
        self.new_node = remote_node
9669
        self.other_node = instance.primary_node
9670
        self.target_node = secondary_node
9671
        check_nodes = [self.new_node, self.other_node]
9672

    
9673
        _CheckNodeNotDrained(self.lu, remote_node)
9674
        _CheckNodeVmCapable(self.lu, remote_node)
9675

    
9676
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9677
        assert old_node_info is not None
9678
        if old_node_info.offline and not self.early_release:
9679
          # doesn't make sense to delay the release
9680
          self.early_release = True
9681
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9682
                          " early-release mode", secondary_node)
9683

    
9684
      else:
9685
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9686
                                     self.mode)
9687

    
9688
      # If not specified all disks should be replaced
9689
      if not self.disks:
9690
        self.disks = range(len(self.instance.disks))
9691

    
9692
    for node in check_nodes:
9693
      _CheckNodeOnline(self.lu, node)
9694

    
9695
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9696
                                                          self.other_node,
9697
                                                          self.target_node]
9698
                              if node_name is not None)
9699

    
9700
    # Release unneeded node locks
9701
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9702

    
9703
    # Release any owned node group
9704
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9705
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9706

    
9707
    # Check whether disks are valid
9708
    for disk_idx in self.disks:
9709
      instance.FindDisk(disk_idx)
9710

    
9711
    # Get secondary node IP addresses
9712
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9713
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9714

    
9715
  def Exec(self, feedback_fn):
9716
    """Execute disk replacement.
9717

9718
    This dispatches the disk replacement to the appropriate handler.
9719

9720
    """
9721
    if self.delay_iallocator:
9722
      self._CheckPrereq2()
9723

    
9724
    if __debug__:
9725
      # Verify owned locks before starting operation
9726
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9727
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9728
          ("Incorrect node locks, owning %s, expected %s" %
9729
           (owned_nodes, self.node_secondary_ip.keys()))
9730

    
9731
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9732
      assert list(owned_instances) == [self.instance_name], \
9733
          "Instance '%s' not locked" % self.instance_name
9734

    
9735
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9736
          "Should not own any node group lock at this point"
9737

    
9738
    if not self.disks:
9739
      feedback_fn("No disks need replacement")
9740
      return
9741

    
9742
    feedback_fn("Replacing disk(s) %s for %s" %
9743
                (utils.CommaJoin(self.disks), self.instance.name))
9744

    
9745
    activate_disks = (not self.instance.admin_up)
9746

    
9747
    # Activate the instance disks if we're replacing them on a down instance
9748
    if activate_disks:
9749
      _StartInstanceDisks(self.lu, self.instance, True)
9750

    
9751
    try:
9752
      # Should we replace the secondary node?
9753
      if self.new_node is not None:
9754
        fn = self._ExecDrbd8Secondary
9755
      else:
9756
        fn = self._ExecDrbd8DiskOnly
9757

    
9758
      result = fn(feedback_fn)
9759
    finally:
9760
      # Deactivate the instance disks if we're replacing them on a
9761
      # down instance
9762
      if activate_disks:
9763
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9764

    
9765
    if __debug__:
9766
      # Verify owned locks
9767
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9768
      nodes = frozenset(self.node_secondary_ip)
9769
      assert ((self.early_release and not owned_nodes) or
9770
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9771
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9772
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9773

    
9774
    return result
9775

    
9776
  def _CheckVolumeGroup(self, nodes):
9777
    self.lu.LogInfo("Checking volume groups")
9778

    
9779
    vgname = self.cfg.GetVGName()
9780

    
9781
    # Make sure volume group exists on all involved nodes
9782
    results = self.rpc.call_vg_list(nodes)
9783
    if not results:
9784
      raise errors.OpExecError("Can't list volume groups on the nodes")
9785

    
9786
    for node in nodes:
9787
      res = results[node]
9788
      res.Raise("Error checking node %s" % node)
9789
      if vgname not in res.payload:
9790
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9791
                                 (vgname, node))
9792

    
9793
  def _CheckDisksExistence(self, nodes):
9794
    # Check disk existence
9795
    for idx, dev in enumerate(self.instance.disks):
9796
      if idx not in self.disks:
9797
        continue
9798

    
9799
      for node in nodes:
9800
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9801
        self.cfg.SetDiskID(dev, node)
9802

    
9803
        result = self.rpc.call_blockdev_find(node, dev)
9804

    
9805
        msg = result.fail_msg
9806
        if msg or not result.payload:
9807
          if not msg:
9808
            msg = "disk not found"
9809
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9810
                                   (idx, node, msg))
9811

    
9812
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9813
    for idx, dev in enumerate(self.instance.disks):
9814
      if idx not in self.disks:
9815
        continue
9816

    
9817
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9818
                      (idx, node_name))
9819

    
9820
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9821
                                   ldisk=ldisk):
9822
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9823
                                 " replace disks for instance %s" %
9824
                                 (node_name, self.instance.name))
9825

    
9826
  def _CreateNewStorage(self, node_name):
9827
    """Create new storage on the primary or secondary node.
9828

9829
    This is only used for same-node replaces, not for changing the
9830
    secondary node, hence we don't want to modify the existing disk.
9831

9832
    """
9833
    iv_names = {}
9834

    
9835
    for idx, dev in enumerate(self.instance.disks):
9836
      if idx not in self.disks:
9837
        continue
9838

    
9839
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9840

    
9841
      self.cfg.SetDiskID(dev, node_name)
9842

    
9843
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9844
      names = _GenerateUniqueNames(self.lu, lv_names)
9845

    
9846
      vg_data = dev.children[0].logical_id[0]
9847
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9848
                             logical_id=(vg_data, names[0]))
9849
      vg_meta = dev.children[1].logical_id[0]
9850
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9851
                             logical_id=(vg_meta, names[1]))
9852

    
9853
      new_lvs = [lv_data, lv_meta]
9854
      old_lvs = [child.Copy() for child in dev.children]
9855
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9856

    
9857
      # we pass force_create=True to force the LVM creation
9858
      for new_lv in new_lvs:
9859
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9860
                        _GetInstanceInfoText(self.instance), False)
9861

    
9862
    return iv_names
9863

    
9864
  def _CheckDevices(self, node_name, iv_names):
9865
    for name, (dev, _, _) in iv_names.iteritems():
9866
      self.cfg.SetDiskID(dev, node_name)
9867

    
9868
      result = self.rpc.call_blockdev_find(node_name, dev)
9869

    
9870
      msg = result.fail_msg
9871
      if msg or not result.payload:
9872
        if not msg:
9873
          msg = "disk not found"
9874
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9875
                                 (name, msg))
9876

    
9877
      if result.payload.is_degraded:
9878
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9879

    
9880
  def _RemoveOldStorage(self, node_name, iv_names):
9881
    for name, (_, old_lvs, _) in iv_names.iteritems():
9882
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9883

    
9884
      for lv in old_lvs:
9885
        self.cfg.SetDiskID(lv, node_name)
9886

    
9887
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9888
        if msg:
9889
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9890
                             hint="remove unused LVs manually")
9891

    
9892
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9893
    """Replace a disk on the primary or secondary for DRBD 8.
9894

9895
    The algorithm for replace is quite complicated:
9896

9897
      1. for each disk to be replaced:
9898

9899
        1. create new LVs on the target node with unique names
9900
        1. detach old LVs from the drbd device
9901
        1. rename old LVs to name_replaced.<time_t>
9902
        1. rename new LVs to old LVs
9903
        1. attach the new LVs (with the old names now) to the drbd device
9904

9905
      1. wait for sync across all devices
9906

9907
      1. for each modified disk:
9908

9909
        1. remove old LVs (which have the name name_replaces.<time_t>)
9910

9911
    Failures are not very well handled.
9912

9913
    """
9914
    steps_total = 6
9915

    
9916
    # Step: check device activation
9917
    self.lu.LogStep(1, steps_total, "Check device existence")
9918
    self._CheckDisksExistence([self.other_node, self.target_node])
9919
    self._CheckVolumeGroup([self.target_node, self.other_node])
9920

    
9921
    # Step: check other node consistency
9922
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9923
    self._CheckDisksConsistency(self.other_node,
9924
                                self.other_node == self.instance.primary_node,
9925
                                False)
9926

    
9927
    # Step: create new storage
9928
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9929
    iv_names = self._CreateNewStorage(self.target_node)
9930

    
9931
    # Step: for each lv, detach+rename*2+attach
9932
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9933
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9934
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9935

    
9936
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9937
                                                     old_lvs)
9938
      result.Raise("Can't detach drbd from local storage on node"
9939
                   " %s for device %s" % (self.target_node, dev.iv_name))
9940
      #dev.children = []
9941
      #cfg.Update(instance)
9942

    
9943
      # ok, we created the new LVs, so now we know we have the needed
9944
      # storage; as such, we proceed on the target node to rename
9945
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9946
      # using the assumption that logical_id == physical_id (which in
9947
      # turn is the unique_id on that node)
9948

    
9949
      # FIXME(iustin): use a better name for the replaced LVs
9950
      temp_suffix = int(time.time())
9951
      ren_fn = lambda d, suff: (d.physical_id[0],
9952
                                d.physical_id[1] + "_replaced-%s" % suff)
9953

    
9954
      # Build the rename list based on what LVs exist on the node
9955
      rename_old_to_new = []
9956
      for to_ren in old_lvs:
9957
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9958
        if not result.fail_msg and result.payload:
9959
          # device exists
9960
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9961

    
9962
      self.lu.LogInfo("Renaming the old LVs on the target node")
9963
      result = self.rpc.call_blockdev_rename(self.target_node,
9964
                                             rename_old_to_new)
9965
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9966

    
9967
      # Now we rename the new LVs to the old LVs
9968
      self.lu.LogInfo("Renaming the new LVs on the target node")
9969
      rename_new_to_old = [(new, old.physical_id)
9970
                           for old, new in zip(old_lvs, new_lvs)]
9971
      result = self.rpc.call_blockdev_rename(self.target_node,
9972
                                             rename_new_to_old)
9973
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9974

    
9975
      # Intermediate steps of in memory modifications
9976
      for old, new in zip(old_lvs, new_lvs):
9977
        new.logical_id = old.logical_id
9978
        self.cfg.SetDiskID(new, self.target_node)
9979

    
9980
      # We need to modify old_lvs so that removal later removes the
9981
      # right LVs, not the newly added ones; note that old_lvs is a
9982
      # copy here
9983
      for disk in old_lvs:
9984
        disk.logical_id = ren_fn(disk, temp_suffix)
9985
        self.cfg.SetDiskID(disk, self.target_node)
9986

    
9987
      # Now that the new lvs have the old name, we can add them to the device
9988
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9989
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9990
                                                  new_lvs)
9991
      msg = result.fail_msg
9992
      if msg:
9993
        for new_lv in new_lvs:
9994
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9995
                                               new_lv).fail_msg
9996
          if msg2:
9997
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9998
                               hint=("cleanup manually the unused logical"
9999
                                     "volumes"))
10000
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10001

    
10002
    cstep = 5
10003
    if self.early_release:
10004
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10005
      cstep += 1
10006
      self._RemoveOldStorage(self.target_node, iv_names)
10007
      # WARNING: we release both node locks here, do not do other RPCs
10008
      # than WaitForSync to the primary node
10009
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10010
                    names=[self.target_node, self.other_node])
10011

    
10012
    # Wait for sync
10013
    # This can fail as the old devices are degraded and _WaitForSync
10014
    # does a combined result over all disks, so we don't check its return value
10015
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10016
    cstep += 1
10017
    _WaitForSync(self.lu, self.instance)
10018

    
10019
    # Check all devices manually
10020
    self._CheckDevices(self.instance.primary_node, iv_names)
10021

    
10022
    # Step: remove old storage
10023
    if not self.early_release:
10024
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10025
      cstep += 1
10026
      self._RemoveOldStorage(self.target_node, iv_names)
10027

    
10028
  def _ExecDrbd8Secondary(self, feedback_fn):
10029
    """Replace the secondary node for DRBD 8.
10030

10031
    The algorithm for replace is quite complicated:
10032
      - for all disks of the instance:
10033
        - create new LVs on the new node with same names
10034
        - shutdown the drbd device on the old secondary
10035
        - disconnect the drbd network on the primary
10036
        - create the drbd device on the new secondary
10037
        - network attach the drbd on the primary, using an artifice:
10038
          the drbd code for Attach() will connect to the network if it
10039
          finds a device which is connected to the good local disks but
10040
          not network enabled
10041
      - wait for sync across all devices
10042
      - remove all disks from the old secondary
10043

10044
    Failures are not very well handled.
10045

10046
    """
10047
    steps_total = 6
10048

    
10049
    pnode = self.instance.primary_node
10050

    
10051
    # Step: check device activation
10052
    self.lu.LogStep(1, steps_total, "Check device existence")
10053
    self._CheckDisksExistence([self.instance.primary_node])
10054
    self._CheckVolumeGroup([self.instance.primary_node])
10055

    
10056
    # Step: check other node consistency
10057
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10058
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10059

    
10060
    # Step: create new storage
10061
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10062
    for idx, dev in enumerate(self.instance.disks):
10063
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10064
                      (self.new_node, idx))
10065
      # we pass force_create=True to force LVM creation
10066
      for new_lv in dev.children:
10067
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10068
                        _GetInstanceInfoText(self.instance), False)
10069

    
10070
    # Step 4: dbrd minors and drbd setups changes
10071
    # after this, we must manually remove the drbd minors on both the
10072
    # error and the success paths
10073
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10074
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10075
                                         for dev in self.instance.disks],
10076
                                        self.instance.name)
10077
    logging.debug("Allocated minors %r", minors)
10078

    
10079
    iv_names = {}
10080
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10081
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10082
                      (self.new_node, idx))
10083
      # create new devices on new_node; note that we create two IDs:
10084
      # one without port, so the drbd will be activated without
10085
      # networking information on the new node at this stage, and one
10086
      # with network, for the latter activation in step 4
10087
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10088
      if self.instance.primary_node == o_node1:
10089
        p_minor = o_minor1
10090
      else:
10091
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10092
        p_minor = o_minor2
10093

    
10094
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10095
                      p_minor, new_minor, o_secret)
10096
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10097
                    p_minor, new_minor, o_secret)
10098

    
10099
      iv_names[idx] = (dev, dev.children, new_net_id)
10100
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10101
                    new_net_id)
10102
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10103
                              logical_id=new_alone_id,
10104
                              children=dev.children,
10105
                              size=dev.size)
10106
      try:
10107
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10108
                              _GetInstanceInfoText(self.instance), False)
10109
      except errors.GenericError:
10110
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10111
        raise
10112

    
10113
    # We have new devices, shutdown the drbd on the old secondary
10114
    for idx, dev in enumerate(self.instance.disks):
10115
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10116
      self.cfg.SetDiskID(dev, self.target_node)
10117
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10118
      if msg:
10119
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10120
                           "node: %s" % (idx, msg),
10121
                           hint=("Please cleanup this device manually as"
10122
                                 " soon as possible"))
10123

    
10124
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10125
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10126
                                               self.instance.disks)[pnode]
10127

    
10128
    msg = result.fail_msg
10129
    if msg:
10130
      # detaches didn't succeed (unlikely)
10131
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10132
      raise errors.OpExecError("Can't detach the disks from the network on"
10133
                               " old node: %s" % (msg,))
10134

    
10135
    # if we managed to detach at least one, we update all the disks of
10136
    # the instance to point to the new secondary
10137
    self.lu.LogInfo("Updating instance configuration")
10138
    for dev, _, new_logical_id in iv_names.itervalues():
10139
      dev.logical_id = new_logical_id
10140
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10141

    
10142
    self.cfg.Update(self.instance, feedback_fn)
10143

    
10144
    # and now perform the drbd attach
10145
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10146
                    " (standalone => connected)")
10147
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10148
                                            self.new_node],
10149
                                           self.node_secondary_ip,
10150
                                           self.instance.disks,
10151
                                           self.instance.name,
10152
                                           False)
10153
    for to_node, to_result in result.items():
10154
      msg = to_result.fail_msg
10155
      if msg:
10156
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10157
                           to_node, msg,
10158
                           hint=("please do a gnt-instance info to see the"
10159
                                 " status of disks"))
10160
    cstep = 5
10161
    if self.early_release:
10162
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10163
      cstep += 1
10164
      self._RemoveOldStorage(self.target_node, iv_names)
10165
      # WARNING: we release all node locks here, do not do other RPCs
10166
      # than WaitForSync to the primary node
10167
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10168
                    names=[self.instance.primary_node,
10169
                           self.target_node,
10170
                           self.new_node])
10171

    
10172
    # Wait for sync
10173
    # This can fail as the old devices are degraded and _WaitForSync
10174
    # does a combined result over all disks, so we don't check its return value
10175
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10176
    cstep += 1
10177
    _WaitForSync(self.lu, self.instance)
10178

    
10179
    # Check all devices manually
10180
    self._CheckDevices(self.instance.primary_node, iv_names)
10181

    
10182
    # Step: remove old storage
10183
    if not self.early_release:
10184
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10185
      self._RemoveOldStorage(self.target_node, iv_names)
10186

    
10187

    
10188
class LURepairNodeStorage(NoHooksLU):
10189
  """Repairs the volume group on a node.
10190

10191
  """
10192
  REQ_BGL = False
10193

    
10194
  def CheckArguments(self):
10195
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10196

    
10197
    storage_type = self.op.storage_type
10198

    
10199
    if (constants.SO_FIX_CONSISTENCY not in
10200
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10201
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10202
                                 " repaired" % storage_type,
10203
                                 errors.ECODE_INVAL)
10204

    
10205
  def ExpandNames(self):
10206
    self.needed_locks = {
10207
      locking.LEVEL_NODE: [self.op.node_name],
10208
      }
10209

    
10210
  def _CheckFaultyDisks(self, instance, node_name):
10211
    """Ensure faulty disks abort the opcode or at least warn."""
10212
    try:
10213
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10214
                                  node_name, True):
10215
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10216
                                   " node '%s'" % (instance.name, node_name),
10217
                                   errors.ECODE_STATE)
10218
    except errors.OpPrereqError, err:
10219
      if self.op.ignore_consistency:
10220
        self.proc.LogWarning(str(err.args[0]))
10221
      else:
10222
        raise
10223

    
10224
  def CheckPrereq(self):
10225
    """Check prerequisites.
10226

10227
    """
10228
    # Check whether any instance on this node has faulty disks
10229
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10230
      if not inst.admin_up:
10231
        continue
10232
      check_nodes = set(inst.all_nodes)
10233
      check_nodes.discard(self.op.node_name)
10234
      for inst_node_name in check_nodes:
10235
        self._CheckFaultyDisks(inst, inst_node_name)
10236

    
10237
  def Exec(self, feedback_fn):
10238
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10239
                (self.op.name, self.op.node_name))
10240

    
10241
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10242
    result = self.rpc.call_storage_execute(self.op.node_name,
10243
                                           self.op.storage_type, st_args,
10244
                                           self.op.name,
10245
                                           constants.SO_FIX_CONSISTENCY)
10246
    result.Raise("Failed to repair storage unit '%s' on %s" %
10247
                 (self.op.name, self.op.node_name))
10248

    
10249

    
10250
class LUNodeEvacuate(NoHooksLU):
10251
  """Evacuates instances off a list of nodes.
10252

10253
  """
10254
  REQ_BGL = False
10255

    
10256
  def CheckArguments(self):
10257
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10258

    
10259
  def ExpandNames(self):
10260
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10261

    
10262
    if self.op.remote_node is not None:
10263
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10264
      assert self.op.remote_node
10265

    
10266
      if self.op.remote_node == self.op.node_name:
10267
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10268
                                   " secondary node", errors.ECODE_INVAL)
10269

    
10270
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10271
        raise errors.OpPrereqError("Without the use of an iallocator only"
10272
                                   " secondary instances can be evacuated",
10273
                                   errors.ECODE_INVAL)
10274

    
10275
    # Declare locks
10276
    self.share_locks = _ShareAll()
10277
    self.needed_locks = {
10278
      locking.LEVEL_INSTANCE: [],
10279
      locking.LEVEL_NODEGROUP: [],
10280
      locking.LEVEL_NODE: [],
10281
      }
10282

    
10283
    if self.op.remote_node is None:
10284
      # Iallocator will choose any node(s) in the same group
10285
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10286
    else:
10287
      group_nodes = frozenset([self.op.remote_node])
10288

    
10289
    # Determine nodes to be locked
10290
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10291

    
10292
  def _DetermineInstances(self):
10293
    """Builds list of instances to operate on.
10294

10295
    """
10296
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10297

    
10298
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10299
      # Primary instances only
10300
      inst_fn = _GetNodePrimaryInstances
10301
      assert self.op.remote_node is None, \
10302
        "Evacuating primary instances requires iallocator"
10303
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10304
      # Secondary instances only
10305
      inst_fn = _GetNodeSecondaryInstances
10306
    else:
10307
      # All instances
10308
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10309
      inst_fn = _GetNodeInstances
10310

    
10311
    return inst_fn(self.cfg, self.op.node_name)
10312

    
10313
  def DeclareLocks(self, level):
10314
    if level == locking.LEVEL_INSTANCE:
10315
      # Lock instances optimistically, needs verification once node and group
10316
      # locks have been acquired
10317
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10318
        set(i.name for i in self._DetermineInstances())
10319

    
10320
    elif level == locking.LEVEL_NODEGROUP:
10321
      # Lock node groups optimistically, needs verification once nodes have
10322
      # been acquired
10323
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10324
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10325

    
10326
    elif level == locking.LEVEL_NODE:
10327
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10328

    
10329
  def CheckPrereq(self):
10330
    # Verify locks
10331
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10332
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10333
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10334

    
10335
    assert owned_nodes == self.lock_nodes
10336

    
10337
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10338
    if owned_groups != wanted_groups:
10339
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10340
                               " current groups are '%s', used to be '%s'" %
10341
                               (utils.CommaJoin(wanted_groups),
10342
                                utils.CommaJoin(owned_groups)))
10343

    
10344
    # Determine affected instances
10345
    self.instances = self._DetermineInstances()
10346
    self.instance_names = [i.name for i in self.instances]
10347

    
10348
    if set(self.instance_names) != owned_instances:
10349
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10350
                               " were acquired, current instances are '%s',"
10351
                               " used to be '%s'" %
10352
                               (self.op.node_name,
10353
                                utils.CommaJoin(self.instance_names),
10354
                                utils.CommaJoin(owned_instances)))
10355

    
10356
    if self.instance_names:
10357
      self.LogInfo("Evacuating instances from node '%s': %s",
10358
                   self.op.node_name,
10359
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10360
    else:
10361
      self.LogInfo("No instances to evacuate from node '%s'",
10362
                   self.op.node_name)
10363

    
10364
    if self.op.remote_node is not None:
10365
      for i in self.instances:
10366
        if i.primary_node == self.op.remote_node:
10367
          raise errors.OpPrereqError("Node %s is the primary node of"
10368
                                     " instance %s, cannot use it as"
10369
                                     " secondary" %
10370
                                     (self.op.remote_node, i.name),
10371
                                     errors.ECODE_INVAL)
10372

    
10373
  def Exec(self, feedback_fn):
10374
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10375

    
10376
    if not self.instance_names:
10377
      # No instances to evacuate
10378
      jobs = []
10379

    
10380
    elif self.op.iallocator is not None:
10381
      # TODO: Implement relocation to other group
10382
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10383
                       evac_mode=self.op.mode,
10384
                       instances=list(self.instance_names))
10385

    
10386
      ial.Run(self.op.iallocator)
10387

    
10388
      if not ial.success:
10389
        raise errors.OpPrereqError("Can't compute node evacuation using"
10390
                                   " iallocator '%s': %s" %
10391
                                   (self.op.iallocator, ial.info),
10392
                                   errors.ECODE_NORES)
10393

    
10394
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10395

    
10396
    elif self.op.remote_node is not None:
10397
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10398
      jobs = [
10399
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10400
                                        remote_node=self.op.remote_node,
10401
                                        disks=[],
10402
                                        mode=constants.REPLACE_DISK_CHG,
10403
                                        early_release=self.op.early_release)]
10404
        for instance_name in self.instance_names
10405
        ]
10406

    
10407
    else:
10408
      raise errors.ProgrammerError("No iallocator or remote node")
10409

    
10410
    return ResultWithJobs(jobs)
10411

    
10412

    
10413
def _SetOpEarlyRelease(early_release, op):
10414
  """Sets C{early_release} flag on opcodes if available.
10415

10416
  """
10417
  try:
10418
    op.early_release = early_release
10419
  except AttributeError:
10420
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10421

    
10422
  return op
10423

    
10424

    
10425
def _NodeEvacDest(use_nodes, group, nodes):
10426
  """Returns group or nodes depending on caller's choice.
10427

10428
  """
10429
  if use_nodes:
10430
    return utils.CommaJoin(nodes)
10431
  else:
10432
    return group
10433

    
10434

    
10435
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10436
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10437

10438
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10439
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10440

10441
  @type lu: L{LogicalUnit}
10442
  @param lu: Logical unit instance
10443
  @type alloc_result: tuple/list
10444
  @param alloc_result: Result from iallocator
10445
  @type early_release: bool
10446
  @param early_release: Whether to release locks early if possible
10447
  @type use_nodes: bool
10448
  @param use_nodes: Whether to display node names instead of groups
10449

10450
  """
10451
  (moved, failed, jobs) = alloc_result
10452

    
10453
  if failed:
10454
    lu.LogWarning("Unable to evacuate instances %s",
10455
                  utils.CommaJoin("%s (%s)" % (name, reason)
10456
                                  for (name, reason) in failed))
10457

    
10458
  if moved:
10459
    lu.LogInfo("Instances to be moved: %s",
10460
               utils.CommaJoin("%s (to %s)" %
10461
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10462
                               for (name, group, nodes) in moved))
10463

    
10464
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10465
              map(opcodes.OpCode.LoadOpCode, ops))
10466
          for ops in jobs]
10467

    
10468

    
10469
class LUInstanceGrowDisk(LogicalUnit):
10470
  """Grow a disk of an instance.
10471

10472
  """
10473
  HPATH = "disk-grow"
10474
  HTYPE = constants.HTYPE_INSTANCE
10475
  REQ_BGL = False
10476

    
10477
  def ExpandNames(self):
10478
    self._ExpandAndLockInstance()
10479
    self.needed_locks[locking.LEVEL_NODE] = []
10480
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10481

    
10482
  def DeclareLocks(self, level):
10483
    if level == locking.LEVEL_NODE:
10484
      self._LockInstancesNodes()
10485

    
10486
  def BuildHooksEnv(self):
10487
    """Build hooks env.
10488

10489
    This runs on the master, the primary and all the secondaries.
10490

10491
    """
10492
    env = {
10493
      "DISK": self.op.disk,
10494
      "AMOUNT": self.op.amount,
10495
      }
10496
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10497
    return env
10498

    
10499
  def BuildHooksNodes(self):
10500
    """Build hooks nodes.
10501

10502
    """
10503
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10504
    return (nl, nl)
10505

    
10506
  def CheckPrereq(self):
10507
    """Check prerequisites.
10508

10509
    This checks that the instance is in the cluster.
10510

10511
    """
10512
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10513
    assert instance is not None, \
10514
      "Cannot retrieve locked instance %s" % self.op.instance_name
10515
    nodenames = list(instance.all_nodes)
10516
    for node in nodenames:
10517
      _CheckNodeOnline(self, node)
10518

    
10519
    self.instance = instance
10520

    
10521
    if instance.disk_template not in constants.DTS_GROWABLE:
10522
      raise errors.OpPrereqError("Instance's disk layout does not support"
10523
                                 " growing", errors.ECODE_INVAL)
10524

    
10525
    self.disk = instance.FindDisk(self.op.disk)
10526

    
10527
    if instance.disk_template not in (constants.DT_FILE,
10528
                                      constants.DT_SHARED_FILE):
10529
      # TODO: check the free disk space for file, when that feature will be
10530
      # supported
10531
      _CheckNodesFreeDiskPerVG(self, nodenames,
10532
                               self.disk.ComputeGrowth(self.op.amount))
10533

    
10534
  def Exec(self, feedback_fn):
10535
    """Execute disk grow.
10536

10537
    """
10538
    instance = self.instance
10539
    disk = self.disk
10540

    
10541
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10542
    if not disks_ok:
10543
      raise errors.OpExecError("Cannot activate block device to grow")
10544

    
10545
    # First run all grow ops in dry-run mode
10546
    for node in instance.all_nodes:
10547
      self.cfg.SetDiskID(disk, node)
10548
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10549
      result.Raise("Grow request failed to node %s" % node)
10550

    
10551
    # We know that (as far as we can test) operations across different
10552
    # nodes will succeed, time to run it for real
10553
    for node in instance.all_nodes:
10554
      self.cfg.SetDiskID(disk, node)
10555
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10556
      result.Raise("Grow request failed to node %s" % node)
10557

    
10558
      # TODO: Rewrite code to work properly
10559
      # DRBD goes into sync mode for a short amount of time after executing the
10560
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10561
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10562
      # time is a work-around.
10563
      time.sleep(5)
10564

    
10565
    disk.RecordGrow(self.op.amount)
10566
    self.cfg.Update(instance, feedback_fn)
10567
    if self.op.wait_for_sync:
10568
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10569
      if disk_abort:
10570
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10571
                             " status; please check the instance")
10572
      if not instance.admin_up:
10573
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10574
    elif not instance.admin_up:
10575
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10576
                           " not supposed to be running because no wait for"
10577
                           " sync mode was requested")
10578

    
10579

    
10580
class LUInstanceQueryData(NoHooksLU):
10581
  """Query runtime instance data.
10582

10583
  """
10584
  REQ_BGL = False
10585

    
10586
  def ExpandNames(self):
10587
    self.needed_locks = {}
10588

    
10589
    # Use locking if requested or when non-static information is wanted
10590
    if not (self.op.static or self.op.use_locking):
10591
      self.LogWarning("Non-static data requested, locks need to be acquired")
10592
      self.op.use_locking = True
10593

    
10594
    if self.op.instances or not self.op.use_locking:
10595
      # Expand instance names right here
10596
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10597
    else:
10598
      # Will use acquired locks
10599
      self.wanted_names = None
10600

    
10601
    if self.op.use_locking:
10602
      self.share_locks = _ShareAll()
10603

    
10604
      if self.wanted_names is None:
10605
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10606
      else:
10607
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10608

    
10609
      self.needed_locks[locking.LEVEL_NODE] = []
10610
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10611

    
10612
  def DeclareLocks(self, level):
10613
    if self.op.use_locking and level == locking.LEVEL_NODE:
10614
      self._LockInstancesNodes()
10615

    
10616
  def CheckPrereq(self):
10617
    """Check prerequisites.
10618

10619
    This only checks the optional instance list against the existing names.
10620

10621
    """
10622
    if self.wanted_names is None:
10623
      assert self.op.use_locking, "Locking was not used"
10624
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10625

    
10626
    self.wanted_instances = \
10627
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10628

    
10629
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10630
    """Returns the status of a block device
10631

10632
    """
10633
    if self.op.static or not node:
10634
      return None
10635

    
10636
    self.cfg.SetDiskID(dev, node)
10637

    
10638
    result = self.rpc.call_blockdev_find(node, dev)
10639
    if result.offline:
10640
      return None
10641

    
10642
    result.Raise("Can't compute disk status for %s" % instance_name)
10643

    
10644
    status = result.payload
10645
    if status is None:
10646
      return None
10647

    
10648
    return (status.dev_path, status.major, status.minor,
10649
            status.sync_percent, status.estimated_time,
10650
            status.is_degraded, status.ldisk_status)
10651

    
10652
  def _ComputeDiskStatus(self, instance, snode, dev):
10653
    """Compute block device status.
10654

10655
    """
10656
    if dev.dev_type in constants.LDS_DRBD:
10657
      # we change the snode then (otherwise we use the one passed in)
10658
      if dev.logical_id[0] == instance.primary_node:
10659
        snode = dev.logical_id[1]
10660
      else:
10661
        snode = dev.logical_id[0]
10662

    
10663
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10664
                                              instance.name, dev)
10665
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10666

    
10667
    if dev.children:
10668
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10669
                                        instance, snode),
10670
                         dev.children)
10671
    else:
10672
      dev_children = []
10673

    
10674
    return {
10675
      "iv_name": dev.iv_name,
10676
      "dev_type": dev.dev_type,
10677
      "logical_id": dev.logical_id,
10678
      "physical_id": dev.physical_id,
10679
      "pstatus": dev_pstatus,
10680
      "sstatus": dev_sstatus,
10681
      "children": dev_children,
10682
      "mode": dev.mode,
10683
      "size": dev.size,
10684
      }
10685

    
10686
  def Exec(self, feedback_fn):
10687
    """Gather and return data"""
10688
    result = {}
10689

    
10690
    cluster = self.cfg.GetClusterInfo()
10691

    
10692
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10693
                                          for i in self.wanted_instances)
10694
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10695
      if self.op.static or pnode.offline:
10696
        remote_state = None
10697
        if pnode.offline:
10698
          self.LogWarning("Primary node %s is marked offline, returning static"
10699
                          " information only for instance %s" %
10700
                          (pnode.name, instance.name))
10701
      else:
10702
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10703
                                                  instance.name,
10704
                                                  instance.hypervisor)
10705
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10706
        remote_info = remote_info.payload
10707
        if remote_info and "state" in remote_info:
10708
          remote_state = "up"
10709
        else:
10710
          remote_state = "down"
10711

    
10712
      if instance.admin_up:
10713
        config_state = "up"
10714
      else:
10715
        config_state = "down"
10716

    
10717
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10718
                  instance.disks)
10719

    
10720
      result[instance.name] = {
10721
        "name": instance.name,
10722
        "config_state": config_state,
10723
        "run_state": remote_state,
10724
        "pnode": instance.primary_node,
10725
        "snodes": instance.secondary_nodes,
10726
        "os": instance.os,
10727
        # this happens to be the same format used for hooks
10728
        "nics": _NICListToTuple(self, instance.nics),
10729
        "disk_template": instance.disk_template,
10730
        "disks": disks,
10731
        "hypervisor": instance.hypervisor,
10732
        "network_port": instance.network_port,
10733
        "hv_instance": instance.hvparams,
10734
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10735
        "be_instance": instance.beparams,
10736
        "be_actual": cluster.FillBE(instance),
10737
        "os_instance": instance.osparams,
10738
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10739
        "serial_no": instance.serial_no,
10740
        "mtime": instance.mtime,
10741
        "ctime": instance.ctime,
10742
        "uuid": instance.uuid,
10743
        }
10744

    
10745
    return result
10746

    
10747

    
10748
class LUInstanceSetParams(LogicalUnit):
10749
  """Modifies an instances's parameters.
10750

10751
  """
10752
  HPATH = "instance-modify"
10753
  HTYPE = constants.HTYPE_INSTANCE
10754
  REQ_BGL = False
10755

    
10756
  def CheckArguments(self):
10757
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10758
            self.op.hvparams or self.op.beparams or self.op.os_name):
10759
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10760

    
10761
    if self.op.hvparams:
10762
      _CheckGlobalHvParams(self.op.hvparams)
10763

    
10764
    # Disk validation
10765
    disk_addremove = 0
10766
    for disk_op, disk_dict in self.op.disks:
10767
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10768
      if disk_op == constants.DDM_REMOVE:
10769
        disk_addremove += 1
10770
        continue
10771
      elif disk_op == constants.DDM_ADD:
10772
        disk_addremove += 1
10773
      else:
10774
        if not isinstance(disk_op, int):
10775
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10776
        if not isinstance(disk_dict, dict):
10777
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10778
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10779

    
10780
      if disk_op == constants.DDM_ADD:
10781
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10782
        if mode not in constants.DISK_ACCESS_SET:
10783
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10784
                                     errors.ECODE_INVAL)
10785
        size = disk_dict.get(constants.IDISK_SIZE, None)
10786
        if size is None:
10787
          raise errors.OpPrereqError("Required disk parameter size missing",
10788
                                     errors.ECODE_INVAL)
10789
        try:
10790
          size = int(size)
10791
        except (TypeError, ValueError), err:
10792
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10793
                                     str(err), errors.ECODE_INVAL)
10794
        disk_dict[constants.IDISK_SIZE] = size
10795
      else:
10796
        # modification of disk
10797
        if constants.IDISK_SIZE in disk_dict:
10798
          raise errors.OpPrereqError("Disk size change not possible, use"
10799
                                     " grow-disk", errors.ECODE_INVAL)
10800

    
10801
    if disk_addremove > 1:
10802
      raise errors.OpPrereqError("Only one disk add or remove operation"
10803
                                 " supported at a time", errors.ECODE_INVAL)
10804

    
10805
    if self.op.disks and self.op.disk_template is not None:
10806
      raise errors.OpPrereqError("Disk template conversion and other disk"
10807
                                 " changes not supported at the same time",
10808
                                 errors.ECODE_INVAL)
10809

    
10810
    if (self.op.disk_template and
10811
        self.op.disk_template in constants.DTS_INT_MIRROR and
10812
        self.op.remote_node is None):
10813
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10814
                                 " one requires specifying a secondary node",
10815
                                 errors.ECODE_INVAL)
10816

    
10817
    # NIC validation
10818
    nic_addremove = 0
10819
    for nic_op, nic_dict in self.op.nics:
10820
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10821
      if nic_op == constants.DDM_REMOVE:
10822
        nic_addremove += 1
10823
        continue
10824
      elif nic_op == constants.DDM_ADD:
10825
        nic_addremove += 1
10826
      else:
10827
        if not isinstance(nic_op, int):
10828
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10829
        if not isinstance(nic_dict, dict):
10830
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10831
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10832

    
10833
      # nic_dict should be a dict
10834
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10835
      if nic_ip is not None:
10836
        if nic_ip.lower() == constants.VALUE_NONE:
10837
          nic_dict[constants.INIC_IP] = None
10838
        else:
10839
          if not netutils.IPAddress.IsValid(nic_ip):
10840
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10841
                                       errors.ECODE_INVAL)
10842

    
10843
      nic_bridge = nic_dict.get("bridge", None)
10844
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10845
      if nic_bridge and nic_link:
10846
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10847
                                   " at the same time", errors.ECODE_INVAL)
10848
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10849
        nic_dict["bridge"] = None
10850
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10851
        nic_dict[constants.INIC_LINK] = None
10852

    
10853
      if nic_op == constants.DDM_ADD:
10854
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10855
        if nic_mac is None:
10856
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10857

    
10858
      if constants.INIC_MAC in nic_dict:
10859
        nic_mac = nic_dict[constants.INIC_MAC]
10860
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10861
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10862

    
10863
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10864
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10865
                                     " modifying an existing nic",
10866
                                     errors.ECODE_INVAL)
10867

    
10868
    if nic_addremove > 1:
10869
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10870
                                 " supported at a time", errors.ECODE_INVAL)
10871

    
10872
  def ExpandNames(self):
10873
    self._ExpandAndLockInstance()
10874
    self.needed_locks[locking.LEVEL_NODE] = []
10875
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10876

    
10877
  def DeclareLocks(self, level):
10878
    if level == locking.LEVEL_NODE:
10879
      self._LockInstancesNodes()
10880
      if self.op.disk_template and self.op.remote_node:
10881
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10882
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10883

    
10884
  def BuildHooksEnv(self):
10885
    """Build hooks env.
10886

10887
    This runs on the master, primary and secondaries.
10888

10889
    """
10890
    args = dict()
10891
    if constants.BE_MEMORY in self.be_new:
10892
      args["memory"] = self.be_new[constants.BE_MEMORY]
10893
    if constants.BE_VCPUS in self.be_new:
10894
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10895
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10896
    # information at all.
10897
    if self.op.nics:
10898
      args["nics"] = []
10899
      nic_override = dict(self.op.nics)
10900
      for idx, nic in enumerate(self.instance.nics):
10901
        if idx in nic_override:
10902
          this_nic_override = nic_override[idx]
10903
        else:
10904
          this_nic_override = {}
10905
        if constants.INIC_IP in this_nic_override:
10906
          ip = this_nic_override[constants.INIC_IP]
10907
        else:
10908
          ip = nic.ip
10909
        if constants.INIC_MAC in this_nic_override:
10910
          mac = this_nic_override[constants.INIC_MAC]
10911
        else:
10912
          mac = nic.mac
10913
        if idx in self.nic_pnew:
10914
          nicparams = self.nic_pnew[idx]
10915
        else:
10916
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10917
        mode = nicparams[constants.NIC_MODE]
10918
        link = nicparams[constants.NIC_LINK]
10919
        args["nics"].append((ip, mac, mode, link))
10920
      if constants.DDM_ADD in nic_override:
10921
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10922
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10923
        nicparams = self.nic_pnew[constants.DDM_ADD]
10924
        mode = nicparams[constants.NIC_MODE]
10925
        link = nicparams[constants.NIC_LINK]
10926
        args["nics"].append((ip, mac, mode, link))
10927
      elif constants.DDM_REMOVE in nic_override:
10928
        del args["nics"][-1]
10929

    
10930
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10931
    if self.op.disk_template:
10932
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10933

    
10934
    return env
10935

    
10936
  def BuildHooksNodes(self):
10937
    """Build hooks nodes.
10938

10939
    """
10940
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10941
    return (nl, nl)
10942

    
10943
  def CheckPrereq(self):
10944
    """Check prerequisites.
10945

10946
    This only checks the instance list against the existing names.
10947

10948
    """
10949
    # checking the new params on the primary/secondary nodes
10950

    
10951
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10952
    cluster = self.cluster = self.cfg.GetClusterInfo()
10953
    assert self.instance is not None, \
10954
      "Cannot retrieve locked instance %s" % self.op.instance_name
10955
    pnode = instance.primary_node
10956
    nodelist = list(instance.all_nodes)
10957

    
10958
    # OS change
10959
    if self.op.os_name and not self.op.force:
10960
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10961
                      self.op.force_variant)
10962
      instance_os = self.op.os_name
10963
    else:
10964
      instance_os = instance.os
10965

    
10966
    if self.op.disk_template:
10967
      if instance.disk_template == self.op.disk_template:
10968
        raise errors.OpPrereqError("Instance already has disk template %s" %
10969
                                   instance.disk_template, errors.ECODE_INVAL)
10970

    
10971
      if (instance.disk_template,
10972
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10973
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10974
                                   " %s to %s" % (instance.disk_template,
10975
                                                  self.op.disk_template),
10976
                                   errors.ECODE_INVAL)
10977
      _CheckInstanceDown(self, instance, "cannot change disk template")
10978
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10979
        if self.op.remote_node == pnode:
10980
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10981
                                     " as the primary node of the instance" %
10982
                                     self.op.remote_node, errors.ECODE_STATE)
10983
        _CheckNodeOnline(self, self.op.remote_node)
10984
        _CheckNodeNotDrained(self, self.op.remote_node)
10985
        # FIXME: here we assume that the old instance type is DT_PLAIN
10986
        assert instance.disk_template == constants.DT_PLAIN
10987
        disks = [{constants.IDISK_SIZE: d.size,
10988
                  constants.IDISK_VG: d.logical_id[0]}
10989
                 for d in instance.disks]
10990
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10991
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10992

    
10993
    # hvparams processing
10994
    if self.op.hvparams:
10995
      hv_type = instance.hypervisor
10996
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10997
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10998
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10999

    
11000
      # local check
11001
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11002
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11003
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11004
      self.hv_inst = i_hvdict # the new dict (without defaults)
11005
    else:
11006
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11007
                                              instance.hvparams)
11008
      self.hv_new = self.hv_inst = {}
11009

    
11010
    # beparams processing
11011
    if self.op.beparams:
11012
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11013
                                   use_none=True)
11014
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11015
      be_new = cluster.SimpleFillBE(i_bedict)
11016
      self.be_proposed = self.be_new = be_new # the new actual values
11017
      self.be_inst = i_bedict # the new dict (without defaults)
11018
    else:
11019
      self.be_new = self.be_inst = {}
11020
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11021
    be_old = cluster.FillBE(instance)
11022

    
11023
    # CPU param validation -- checking every time a paramtere is
11024
    # changed to cover all cases where either CPU mask or vcpus have
11025
    # changed
11026
    if (constants.BE_VCPUS in self.be_proposed and
11027
        constants.HV_CPU_MASK in self.hv_proposed):
11028
      cpu_list = \
11029
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11030
      # Verify mask is consistent with number of vCPUs. Can skip this
11031
      # test if only 1 entry in the CPU mask, which means same mask
11032
      # is applied to all vCPUs.
11033
      if (len(cpu_list) > 1 and
11034
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11035
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11036
                                   " CPU mask [%s]" %
11037
                                   (self.be_proposed[constants.BE_VCPUS],
11038
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11039
                                   errors.ECODE_INVAL)
11040

    
11041
      # Only perform this test if a new CPU mask is given
11042
      if constants.HV_CPU_MASK in self.hv_new:
11043
        # Calculate the largest CPU number requested
11044
        max_requested_cpu = max(map(max, cpu_list))
11045
        # Check that all of the instance's nodes have enough physical CPUs to
11046
        # satisfy the requested CPU mask
11047
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11048
                                max_requested_cpu + 1, instance.hypervisor)
11049

    
11050
    # osparams processing
11051
    if self.op.osparams:
11052
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11053
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11054
      self.os_inst = i_osdict # the new dict (without defaults)
11055
    else:
11056
      self.os_inst = {}
11057

    
11058
    self.warn = []
11059

    
11060
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11061
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11062
      mem_check_list = [pnode]
11063
      if be_new[constants.BE_AUTO_BALANCE]:
11064
        # either we changed auto_balance to yes or it was from before
11065
        mem_check_list.extend(instance.secondary_nodes)
11066
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11067
                                                  instance.hypervisor)
11068
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11069
                                         instance.hypervisor)
11070
      pninfo = nodeinfo[pnode]
11071
      msg = pninfo.fail_msg
11072
      if msg:
11073
        # Assume the primary node is unreachable and go ahead
11074
        self.warn.append("Can't get info from primary node %s: %s" %
11075
                         (pnode, msg))
11076
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11077
        self.warn.append("Node data from primary node %s doesn't contain"
11078
                         " free memory information" % pnode)
11079
      elif instance_info.fail_msg:
11080
        self.warn.append("Can't get instance runtime information: %s" %
11081
                        instance_info.fail_msg)
11082
      else:
11083
        if instance_info.payload:
11084
          current_mem = int(instance_info.payload["memory"])
11085
        else:
11086
          # Assume instance not running
11087
          # (there is a slight race condition here, but it's not very probable,
11088
          # and we have no other way to check)
11089
          current_mem = 0
11090
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11091
                    pninfo.payload["memory_free"])
11092
        if miss_mem > 0:
11093
          raise errors.OpPrereqError("This change will prevent the instance"
11094
                                     " from starting, due to %d MB of memory"
11095
                                     " missing on its primary node" % miss_mem,
11096
                                     errors.ECODE_NORES)
11097

    
11098
      if be_new[constants.BE_AUTO_BALANCE]:
11099
        for node, nres in nodeinfo.items():
11100
          if node not in instance.secondary_nodes:
11101
            continue
11102
          nres.Raise("Can't get info from secondary node %s" % node,
11103
                     prereq=True, ecode=errors.ECODE_STATE)
11104
          if not isinstance(nres.payload.get("memory_free", None), int):
11105
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11106
                                       " memory information" % node,
11107
                                       errors.ECODE_STATE)
11108
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11109
            raise errors.OpPrereqError("This change will prevent the instance"
11110
                                       " from failover to its secondary node"
11111
                                       " %s, due to not enough memory" % node,
11112
                                       errors.ECODE_STATE)
11113

    
11114
    # NIC processing
11115
    self.nic_pnew = {}
11116
    self.nic_pinst = {}
11117
    for nic_op, nic_dict in self.op.nics:
11118
      if nic_op == constants.DDM_REMOVE:
11119
        if not instance.nics:
11120
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11121
                                     errors.ECODE_INVAL)
11122
        continue
11123
      if nic_op != constants.DDM_ADD:
11124
        # an existing nic
11125
        if not instance.nics:
11126
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11127
                                     " no NICs" % nic_op,
11128
                                     errors.ECODE_INVAL)
11129
        if nic_op < 0 or nic_op >= len(instance.nics):
11130
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11131
                                     " are 0 to %d" %
11132
                                     (nic_op, len(instance.nics) - 1),
11133
                                     errors.ECODE_INVAL)
11134
        old_nic_params = instance.nics[nic_op].nicparams
11135
        old_nic_ip = instance.nics[nic_op].ip
11136
      else:
11137
        old_nic_params = {}
11138
        old_nic_ip = None
11139

    
11140
      update_params_dict = dict([(key, nic_dict[key])
11141
                                 for key in constants.NICS_PARAMETERS
11142
                                 if key in nic_dict])
11143

    
11144
      if "bridge" in nic_dict:
11145
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11146

    
11147
      new_nic_params = _GetUpdatedParams(old_nic_params,
11148
                                         update_params_dict)
11149
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11150
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11151
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11152
      self.nic_pinst[nic_op] = new_nic_params
11153
      self.nic_pnew[nic_op] = new_filled_nic_params
11154
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11155

    
11156
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11157
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11158
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11159
        if msg:
11160
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11161
          if self.op.force:
11162
            self.warn.append(msg)
11163
          else:
11164
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11165
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11166
        if constants.INIC_IP in nic_dict:
11167
          nic_ip = nic_dict[constants.INIC_IP]
11168
        else:
11169
          nic_ip = old_nic_ip
11170
        if nic_ip is None:
11171
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11172
                                     " on a routed nic", errors.ECODE_INVAL)
11173
      if constants.INIC_MAC in nic_dict:
11174
        nic_mac = nic_dict[constants.INIC_MAC]
11175
        if nic_mac is None:
11176
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11177
                                     errors.ECODE_INVAL)
11178
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11179
          # otherwise generate the mac
11180
          nic_dict[constants.INIC_MAC] = \
11181
            self.cfg.GenerateMAC(self.proc.GetECId())
11182
        else:
11183
          # or validate/reserve the current one
11184
          try:
11185
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11186
          except errors.ReservationError:
11187
            raise errors.OpPrereqError("MAC address %s already in use"
11188
                                       " in cluster" % nic_mac,
11189
                                       errors.ECODE_NOTUNIQUE)
11190

    
11191
    # DISK processing
11192
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11193
      raise errors.OpPrereqError("Disk operations not supported for"
11194
                                 " diskless instances",
11195
                                 errors.ECODE_INVAL)
11196
    for disk_op, _ in self.op.disks:
11197
      if disk_op == constants.DDM_REMOVE:
11198
        if len(instance.disks) == 1:
11199
          raise errors.OpPrereqError("Cannot remove the last disk of"
11200
                                     " an instance", errors.ECODE_INVAL)
11201
        _CheckInstanceDown(self, instance, "cannot remove disks")
11202

    
11203
      if (disk_op == constants.DDM_ADD and
11204
          len(instance.disks) >= constants.MAX_DISKS):
11205
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11206
                                   " add more" % constants.MAX_DISKS,
11207
                                   errors.ECODE_STATE)
11208
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11209
        # an existing disk
11210
        if disk_op < 0 or disk_op >= len(instance.disks):
11211
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11212
                                     " are 0 to %d" %
11213
                                     (disk_op, len(instance.disks)),
11214
                                     errors.ECODE_INVAL)
11215

    
11216
    return
11217

    
11218
  def _ConvertPlainToDrbd(self, feedback_fn):
11219
    """Converts an instance from plain to drbd.
11220

11221
    """
11222
    feedback_fn("Converting template to drbd")
11223
    instance = self.instance
11224
    pnode = instance.primary_node
11225
    snode = self.op.remote_node
11226

    
11227
    # create a fake disk info for _GenerateDiskTemplate
11228
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11229
                  constants.IDISK_VG: d.logical_id[0]}
11230
                 for d in instance.disks]
11231
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11232
                                      instance.name, pnode, [snode],
11233
                                      disk_info, None, None, 0, feedback_fn)
11234
    info = _GetInstanceInfoText(instance)
11235
    feedback_fn("Creating aditional volumes...")
11236
    # first, create the missing data and meta devices
11237
    for disk in new_disks:
11238
      # unfortunately this is... not too nice
11239
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11240
                            info, True)
11241
      for child in disk.children:
11242
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11243
    # at this stage, all new LVs have been created, we can rename the
11244
    # old ones
11245
    feedback_fn("Renaming original volumes...")
11246
    rename_list = [(o, n.children[0].logical_id)
11247
                   for (o, n) in zip(instance.disks, new_disks)]
11248
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11249
    result.Raise("Failed to rename original LVs")
11250

    
11251
    feedback_fn("Initializing DRBD devices...")
11252
    # all child devices are in place, we can now create the DRBD devices
11253
    for disk in new_disks:
11254
      for node in [pnode, snode]:
11255
        f_create = node == pnode
11256
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11257

    
11258
    # at this point, the instance has been modified
11259
    instance.disk_template = constants.DT_DRBD8
11260
    instance.disks = new_disks
11261
    self.cfg.Update(instance, feedback_fn)
11262

    
11263
    # disks are created, waiting for sync
11264
    disk_abort = not _WaitForSync(self, instance,
11265
                                  oneshot=not self.op.wait_for_sync)
11266
    if disk_abort:
11267
      raise errors.OpExecError("There are some degraded disks for"
11268
                               " this instance, please cleanup manually")
11269

    
11270
  def _ConvertDrbdToPlain(self, feedback_fn):
11271
    """Converts an instance from drbd to plain.
11272

11273
    """
11274
    instance = self.instance
11275
    assert len(instance.secondary_nodes) == 1
11276
    pnode = instance.primary_node
11277
    snode = instance.secondary_nodes[0]
11278
    feedback_fn("Converting template to plain")
11279

    
11280
    old_disks = instance.disks
11281
    new_disks = [d.children[0] for d in old_disks]
11282

    
11283
    # copy over size and mode
11284
    for parent, child in zip(old_disks, new_disks):
11285
      child.size = parent.size
11286
      child.mode = parent.mode
11287

    
11288
    # update instance structure
11289
    instance.disks = new_disks
11290
    instance.disk_template = constants.DT_PLAIN
11291
    self.cfg.Update(instance, feedback_fn)
11292

    
11293
    feedback_fn("Removing volumes on the secondary node...")
11294
    for disk in old_disks:
11295
      self.cfg.SetDiskID(disk, snode)
11296
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11297
      if msg:
11298
        self.LogWarning("Could not remove block device %s on node %s,"
11299
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11300

    
11301
    feedback_fn("Removing unneeded volumes on the primary node...")
11302
    for idx, disk in enumerate(old_disks):
11303
      meta = disk.children[1]
11304
      self.cfg.SetDiskID(meta, pnode)
11305
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11306
      if msg:
11307
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11308
                        " continuing anyway: %s", idx, pnode, msg)
11309

    
11310
  def Exec(self, feedback_fn):
11311
    """Modifies an instance.
11312

11313
    All parameters take effect only at the next restart of the instance.
11314

11315
    """
11316
    # Process here the warnings from CheckPrereq, as we don't have a
11317
    # feedback_fn there.
11318
    for warn in self.warn:
11319
      feedback_fn("WARNING: %s" % warn)
11320

    
11321
    result = []
11322
    instance = self.instance
11323
    # disk changes
11324
    for disk_op, disk_dict in self.op.disks:
11325
      if disk_op == constants.DDM_REMOVE:
11326
        # remove the last disk
11327
        device = instance.disks.pop()
11328
        device_idx = len(instance.disks)
11329
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11330
          self.cfg.SetDiskID(disk, node)
11331
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11332
          if msg:
11333
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11334
                            " continuing anyway", device_idx, node, msg)
11335
        result.append(("disk/%d" % device_idx, "remove"))
11336
      elif disk_op == constants.DDM_ADD:
11337
        # add a new disk
11338
        if instance.disk_template in (constants.DT_FILE,
11339
                                        constants.DT_SHARED_FILE):
11340
          file_driver, file_path = instance.disks[0].logical_id
11341
          file_path = os.path.dirname(file_path)
11342
        else:
11343
          file_driver = file_path = None
11344
        disk_idx_base = len(instance.disks)
11345
        new_disk = _GenerateDiskTemplate(self,
11346
                                         instance.disk_template,
11347
                                         instance.name, instance.primary_node,
11348
                                         instance.secondary_nodes,
11349
                                         [disk_dict],
11350
                                         file_path,
11351
                                         file_driver,
11352
                                         disk_idx_base, feedback_fn)[0]
11353
        instance.disks.append(new_disk)
11354
        info = _GetInstanceInfoText(instance)
11355

    
11356
        logging.info("Creating volume %s for instance %s",
11357
                     new_disk.iv_name, instance.name)
11358
        # Note: this needs to be kept in sync with _CreateDisks
11359
        #HARDCODE
11360
        for node in instance.all_nodes:
11361
          f_create = node == instance.primary_node
11362
          try:
11363
            _CreateBlockDev(self, node, instance, new_disk,
11364
                            f_create, info, f_create)
11365
          except errors.OpExecError, err:
11366
            self.LogWarning("Failed to create volume %s (%s) on"
11367
                            " node %s: %s",
11368
                            new_disk.iv_name, new_disk, node, err)
11369
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11370
                       (new_disk.size, new_disk.mode)))
11371
      else:
11372
        # change a given disk
11373
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11374
        result.append(("disk.mode/%d" % disk_op,
11375
                       disk_dict[constants.IDISK_MODE]))
11376

    
11377
    if self.op.disk_template:
11378
      r_shut = _ShutdownInstanceDisks(self, instance)
11379
      if not r_shut:
11380
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11381
                                 " proceed with disk template conversion")
11382
      mode = (instance.disk_template, self.op.disk_template)
11383
      try:
11384
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11385
      except:
11386
        self.cfg.ReleaseDRBDMinors(instance.name)
11387
        raise
11388
      result.append(("disk_template", self.op.disk_template))
11389

    
11390
    # NIC changes
11391
    for nic_op, nic_dict in self.op.nics:
11392
      if nic_op == constants.DDM_REMOVE:
11393
        # remove the last nic
11394
        del instance.nics[-1]
11395
        result.append(("nic.%d" % len(instance.nics), "remove"))
11396
      elif nic_op == constants.DDM_ADD:
11397
        # mac and bridge should be set, by now
11398
        mac = nic_dict[constants.INIC_MAC]
11399
        ip = nic_dict.get(constants.INIC_IP, None)
11400
        nicparams = self.nic_pinst[constants.DDM_ADD]
11401
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11402
        instance.nics.append(new_nic)
11403
        result.append(("nic.%d" % (len(instance.nics) - 1),
11404
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11405
                       (new_nic.mac, new_nic.ip,
11406
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11407
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11408
                       )))
11409
      else:
11410
        for key in (constants.INIC_MAC, constants.INIC_IP):
11411
          if key in nic_dict:
11412
            setattr(instance.nics[nic_op], key, nic_dict[key])
11413
        if nic_op in self.nic_pinst:
11414
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11415
        for key, val in nic_dict.iteritems():
11416
          result.append(("nic.%s/%d" % (key, nic_op), val))
11417

    
11418
    # hvparams changes
11419
    if self.op.hvparams:
11420
      instance.hvparams = self.hv_inst
11421
      for key, val in self.op.hvparams.iteritems():
11422
        result.append(("hv/%s" % key, val))
11423

    
11424
    # beparams changes
11425
    if self.op.beparams:
11426
      instance.beparams = self.be_inst
11427
      for key, val in self.op.beparams.iteritems():
11428
        result.append(("be/%s" % key, val))
11429

    
11430
    # OS change
11431
    if self.op.os_name:
11432
      instance.os = self.op.os_name
11433

    
11434
    # osparams changes
11435
    if self.op.osparams:
11436
      instance.osparams = self.os_inst
11437
      for key, val in self.op.osparams.iteritems():
11438
        result.append(("os/%s" % key, val))
11439

    
11440
    self.cfg.Update(instance, feedback_fn)
11441

    
11442
    return result
11443

    
11444
  _DISK_CONVERSIONS = {
11445
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11446
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11447
    }
11448

    
11449

    
11450
class LUInstanceChangeGroup(LogicalUnit):
11451
  HPATH = "instance-change-group"
11452
  HTYPE = constants.HTYPE_INSTANCE
11453
  REQ_BGL = False
11454

    
11455
  def ExpandNames(self):
11456
    self.share_locks = _ShareAll()
11457
    self.needed_locks = {
11458
      locking.LEVEL_NODEGROUP: [],
11459
      locking.LEVEL_NODE: [],
11460
      }
11461

    
11462
    self._ExpandAndLockInstance()
11463

    
11464
    if self.op.target_groups:
11465
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11466
                                  self.op.target_groups)
11467
    else:
11468
      self.req_target_uuids = None
11469

    
11470
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11471

    
11472
  def DeclareLocks(self, level):
11473
    if level == locking.LEVEL_NODEGROUP:
11474
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11475

    
11476
      if self.req_target_uuids:
11477
        lock_groups = set(self.req_target_uuids)
11478

    
11479
        # Lock all groups used by instance optimistically; this requires going
11480
        # via the node before it's locked, requiring verification later on
11481
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11482
        lock_groups.update(instance_groups)
11483
      else:
11484
        # No target groups, need to lock all of them
11485
        lock_groups = locking.ALL_SET
11486

    
11487
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11488

    
11489
    elif level == locking.LEVEL_NODE:
11490
      if self.req_target_uuids:
11491
        # Lock all nodes used by instances
11492
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11493
        self._LockInstancesNodes()
11494

    
11495
        # Lock all nodes in all potential target groups
11496
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11497
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11498
        member_nodes = [node_name
11499
                        for group in lock_groups
11500
                        for node_name in self.cfg.GetNodeGroup(group).members]
11501
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11502
      else:
11503
        # Lock all nodes as all groups are potential targets
11504
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11505

    
11506
  def CheckPrereq(self):
11507
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11508
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11509
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11510

    
11511
    assert (self.req_target_uuids is None or
11512
            owned_groups.issuperset(self.req_target_uuids))
11513
    assert owned_instances == set([self.op.instance_name])
11514

    
11515
    # Get instance information
11516
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11517

    
11518
    # Check if node groups for locked instance are still correct
11519
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11520
      ("Instance %s's nodes changed while we kept the lock" %
11521
       self.op.instance_name)
11522

    
11523
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11524
                                           owned_groups)
11525

    
11526
    if self.req_target_uuids:
11527
      # User requested specific target groups
11528
      self.target_uuids = self.req_target_uuids
11529
    else:
11530
      # All groups except those used by the instance are potential targets
11531
      self.target_uuids = owned_groups - inst_groups
11532

    
11533
    conflicting_groups = self.target_uuids & inst_groups
11534
    if conflicting_groups:
11535
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11536
                                 " used by the instance '%s'" %
11537
                                 (utils.CommaJoin(conflicting_groups),
11538
                                  self.op.instance_name),
11539
                                 errors.ECODE_INVAL)
11540

    
11541
    if not self.target_uuids:
11542
      raise errors.OpPrereqError("There are no possible target groups",
11543
                                 errors.ECODE_INVAL)
11544

    
11545
  def BuildHooksEnv(self):
11546
    """Build hooks env.
11547

11548
    """
11549
    assert self.target_uuids
11550

    
11551
    env = {
11552
      "TARGET_GROUPS": " ".join(self.target_uuids),
11553
      }
11554

    
11555
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11556

    
11557
    return env
11558

    
11559
  def BuildHooksNodes(self):
11560
    """Build hooks nodes.
11561

11562
    """
11563
    mn = self.cfg.GetMasterNode()
11564
    return ([mn], [mn])
11565

    
11566
  def Exec(self, feedback_fn):
11567
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11568

    
11569
    assert instances == [self.op.instance_name], "Instance not locked"
11570

    
11571
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11572
                     instances=instances, target_groups=list(self.target_uuids))
11573

    
11574
    ial.Run(self.op.iallocator)
11575

    
11576
    if not ial.success:
11577
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11578
                                 " instance '%s' using iallocator '%s': %s" %
11579
                                 (self.op.instance_name, self.op.iallocator,
11580
                                  ial.info),
11581
                                 errors.ECODE_NORES)
11582

    
11583
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11584

    
11585
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11586
                 " instance '%s'", len(jobs), self.op.instance_name)
11587

    
11588
    return ResultWithJobs(jobs)
11589

    
11590

    
11591
class LUBackupQuery(NoHooksLU):
11592
  """Query the exports list
11593

11594
  """
11595
  REQ_BGL = False
11596

    
11597
  def ExpandNames(self):
11598
    self.needed_locks = {}
11599
    self.share_locks[locking.LEVEL_NODE] = 1
11600
    if not self.op.nodes:
11601
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11602
    else:
11603
      self.needed_locks[locking.LEVEL_NODE] = \
11604
        _GetWantedNodes(self, self.op.nodes)
11605

    
11606
  def Exec(self, feedback_fn):
11607
    """Compute the list of all the exported system images.
11608

11609
    @rtype: dict
11610
    @return: a dictionary with the structure node->(export-list)
11611
        where export-list is a list of the instances exported on
11612
        that node.
11613

11614
    """
11615
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11616
    rpcresult = self.rpc.call_export_list(self.nodes)
11617
    result = {}
11618
    for node in rpcresult:
11619
      if rpcresult[node].fail_msg:
11620
        result[node] = False
11621
      else:
11622
        result[node] = rpcresult[node].payload
11623

    
11624
    return result
11625

    
11626

    
11627
class LUBackupPrepare(NoHooksLU):
11628
  """Prepares an instance for an export and returns useful information.
11629

11630
  """
11631
  REQ_BGL = False
11632

    
11633
  def ExpandNames(self):
11634
    self._ExpandAndLockInstance()
11635

    
11636
  def CheckPrereq(self):
11637
    """Check prerequisites.
11638

11639
    """
11640
    instance_name = self.op.instance_name
11641

    
11642
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11643
    assert self.instance is not None, \
11644
          "Cannot retrieve locked instance %s" % self.op.instance_name
11645
    _CheckNodeOnline(self, self.instance.primary_node)
11646

    
11647
    self._cds = _GetClusterDomainSecret()
11648

    
11649
  def Exec(self, feedback_fn):
11650
    """Prepares an instance for an export.
11651

11652
    """
11653
    instance = self.instance
11654

    
11655
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11656
      salt = utils.GenerateSecret(8)
11657

    
11658
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11659
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11660
                                              constants.RIE_CERT_VALIDITY)
11661
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11662

    
11663
      (name, cert_pem) = result.payload
11664

    
11665
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11666
                                             cert_pem)
11667

    
11668
      return {
11669
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11670
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11671
                          salt),
11672
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11673
        }
11674

    
11675
    return None
11676

    
11677

    
11678
class LUBackupExport(LogicalUnit):
11679
  """Export an instance to an image in the cluster.
11680

11681
  """
11682
  HPATH = "instance-export"
11683
  HTYPE = constants.HTYPE_INSTANCE
11684
  REQ_BGL = False
11685

    
11686
  def CheckArguments(self):
11687
    """Check the arguments.
11688

11689
    """
11690
    self.x509_key_name = self.op.x509_key_name
11691
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11692

    
11693
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11694
      if not self.x509_key_name:
11695
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11696
                                   errors.ECODE_INVAL)
11697

    
11698
      if not self.dest_x509_ca_pem:
11699
        raise errors.OpPrereqError("Missing destination X509 CA",
11700
                                   errors.ECODE_INVAL)
11701

    
11702
  def ExpandNames(self):
11703
    self._ExpandAndLockInstance()
11704

    
11705
    # Lock all nodes for local exports
11706
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11707
      # FIXME: lock only instance primary and destination node
11708
      #
11709
      # Sad but true, for now we have do lock all nodes, as we don't know where
11710
      # the previous export might be, and in this LU we search for it and
11711
      # remove it from its current node. In the future we could fix this by:
11712
      #  - making a tasklet to search (share-lock all), then create the
11713
      #    new one, then one to remove, after
11714
      #  - removing the removal operation altogether
11715
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11716

    
11717
  def DeclareLocks(self, level):
11718
    """Last minute lock declaration."""
11719
    # All nodes are locked anyway, so nothing to do here.
11720

    
11721
  def BuildHooksEnv(self):
11722
    """Build hooks env.
11723

11724
    This will run on the master, primary node and target node.
11725

11726
    """
11727
    env = {
11728
      "EXPORT_MODE": self.op.mode,
11729
      "EXPORT_NODE": self.op.target_node,
11730
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11731
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11732
      # TODO: Generic function for boolean env variables
11733
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11734
      }
11735

    
11736
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11737

    
11738
    return env
11739

    
11740
  def BuildHooksNodes(self):
11741
    """Build hooks nodes.
11742

11743
    """
11744
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11745

    
11746
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11747
      nl.append(self.op.target_node)
11748

    
11749
    return (nl, nl)
11750

    
11751
  def CheckPrereq(self):
11752
    """Check prerequisites.
11753

11754
    This checks that the instance and node names are valid.
11755

11756
    """
11757
    instance_name = self.op.instance_name
11758

    
11759
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11760
    assert self.instance is not None, \
11761
          "Cannot retrieve locked instance %s" % self.op.instance_name
11762
    _CheckNodeOnline(self, self.instance.primary_node)
11763

    
11764
    if (self.op.remove_instance and self.instance.admin_up and
11765
        not self.op.shutdown):
11766
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11767
                                 " down before")
11768

    
11769
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11770
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11771
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11772
      assert self.dst_node is not None
11773

    
11774
      _CheckNodeOnline(self, self.dst_node.name)
11775
      _CheckNodeNotDrained(self, self.dst_node.name)
11776

    
11777
      self._cds = None
11778
      self.dest_disk_info = None
11779
      self.dest_x509_ca = None
11780

    
11781
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11782
      self.dst_node = None
11783

    
11784
      if len(self.op.target_node) != len(self.instance.disks):
11785
        raise errors.OpPrereqError(("Received destination information for %s"
11786
                                    " disks, but instance %s has %s disks") %
11787
                                   (len(self.op.target_node), instance_name,
11788
                                    len(self.instance.disks)),
11789
                                   errors.ECODE_INVAL)
11790

    
11791
      cds = _GetClusterDomainSecret()
11792

    
11793
      # Check X509 key name
11794
      try:
11795
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11796
      except (TypeError, ValueError), err:
11797
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11798

    
11799
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11800
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11801
                                   errors.ECODE_INVAL)
11802

    
11803
      # Load and verify CA
11804
      try:
11805
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11806
      except OpenSSL.crypto.Error, err:
11807
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11808
                                   (err, ), errors.ECODE_INVAL)
11809

    
11810
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11811
      if errcode is not None:
11812
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11813
                                   (msg, ), errors.ECODE_INVAL)
11814

    
11815
      self.dest_x509_ca = cert
11816

    
11817
      # Verify target information
11818
      disk_info = []
11819
      for idx, disk_data in enumerate(self.op.target_node):
11820
        try:
11821
          (host, port, magic) = \
11822
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11823
        except errors.GenericError, err:
11824
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11825
                                     (idx, err), errors.ECODE_INVAL)
11826

    
11827
        disk_info.append((host, port, magic))
11828

    
11829
      assert len(disk_info) == len(self.op.target_node)
11830
      self.dest_disk_info = disk_info
11831

    
11832
    else:
11833
      raise errors.ProgrammerError("Unhandled export mode %r" %
11834
                                   self.op.mode)
11835

    
11836
    # instance disk type verification
11837
    # TODO: Implement export support for file-based disks
11838
    for disk in self.instance.disks:
11839
      if disk.dev_type == constants.LD_FILE:
11840
        raise errors.OpPrereqError("Export not supported for instances with"
11841
                                   " file-based disks", errors.ECODE_INVAL)
11842

    
11843
  def _CleanupExports(self, feedback_fn):
11844
    """Removes exports of current instance from all other nodes.
11845

11846
    If an instance in a cluster with nodes A..D was exported to node C, its
11847
    exports will be removed from the nodes A, B and D.
11848

11849
    """
11850
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11851

    
11852
    nodelist = self.cfg.GetNodeList()
11853
    nodelist.remove(self.dst_node.name)
11854

    
11855
    # on one-node clusters nodelist will be empty after the removal
11856
    # if we proceed the backup would be removed because OpBackupQuery
11857
    # substitutes an empty list with the full cluster node list.
11858
    iname = self.instance.name
11859
    if nodelist:
11860
      feedback_fn("Removing old exports for instance %s" % iname)
11861
      exportlist = self.rpc.call_export_list(nodelist)
11862
      for node in exportlist:
11863
        if exportlist[node].fail_msg:
11864
          continue
11865
        if iname in exportlist[node].payload:
11866
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11867
          if msg:
11868
            self.LogWarning("Could not remove older export for instance %s"
11869
                            " on node %s: %s", iname, node, msg)
11870

    
11871
  def Exec(self, feedback_fn):
11872
    """Export an instance to an image in the cluster.
11873

11874
    """
11875
    assert self.op.mode in constants.EXPORT_MODES
11876

    
11877
    instance = self.instance
11878
    src_node = instance.primary_node
11879

    
11880
    if self.op.shutdown:
11881
      # shutdown the instance, but not the disks
11882
      feedback_fn("Shutting down instance %s" % instance.name)
11883
      result = self.rpc.call_instance_shutdown(src_node, instance,
11884
                                               self.op.shutdown_timeout)
11885
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11886
      result.Raise("Could not shutdown instance %s on"
11887
                   " node %s" % (instance.name, src_node))
11888

    
11889
    # set the disks ID correctly since call_instance_start needs the
11890
    # correct drbd minor to create the symlinks
11891
    for disk in instance.disks:
11892
      self.cfg.SetDiskID(disk, src_node)
11893

    
11894
    activate_disks = (not instance.admin_up)
11895

    
11896
    if activate_disks:
11897
      # Activate the instance disks if we'exporting a stopped instance
11898
      feedback_fn("Activating disks for %s" % instance.name)
11899
      _StartInstanceDisks(self, instance, None)
11900

    
11901
    try:
11902
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11903
                                                     instance)
11904

    
11905
      helper.CreateSnapshots()
11906
      try:
11907
        if (self.op.shutdown and instance.admin_up and
11908
            not self.op.remove_instance):
11909
          assert not activate_disks
11910
          feedback_fn("Starting instance %s" % instance.name)
11911
          result = self.rpc.call_instance_start(src_node,
11912
                                                (instance, None, None), False)
11913
          msg = result.fail_msg
11914
          if msg:
11915
            feedback_fn("Failed to start instance: %s" % msg)
11916
            _ShutdownInstanceDisks(self, instance)
11917
            raise errors.OpExecError("Could not start instance: %s" % msg)
11918

    
11919
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11920
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11921
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11922
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11923
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11924

    
11925
          (key_name, _, _) = self.x509_key_name
11926

    
11927
          dest_ca_pem = \
11928
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11929
                                            self.dest_x509_ca)
11930

    
11931
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11932
                                                     key_name, dest_ca_pem,
11933
                                                     timeouts)
11934
      finally:
11935
        helper.Cleanup()
11936

    
11937
      # Check for backwards compatibility
11938
      assert len(dresults) == len(instance.disks)
11939
      assert compat.all(isinstance(i, bool) for i in dresults), \
11940
             "Not all results are boolean: %r" % dresults
11941

    
11942
    finally:
11943
      if activate_disks:
11944
        feedback_fn("Deactivating disks for %s" % instance.name)
11945
        _ShutdownInstanceDisks(self, instance)
11946

    
11947
    if not (compat.all(dresults) and fin_resu):
11948
      failures = []
11949
      if not fin_resu:
11950
        failures.append("export finalization")
11951
      if not compat.all(dresults):
11952
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11953
                               if not dsk)
11954
        failures.append("disk export: disk(s) %s" % fdsk)
11955

    
11956
      raise errors.OpExecError("Export failed, errors in %s" %
11957
                               utils.CommaJoin(failures))
11958

    
11959
    # At this point, the export was successful, we can cleanup/finish
11960

    
11961
    # Remove instance if requested
11962
    if self.op.remove_instance:
11963
      feedback_fn("Removing instance %s" % instance.name)
11964
      _RemoveInstance(self, feedback_fn, instance,
11965
                      self.op.ignore_remove_failures)
11966

    
11967
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11968
      self._CleanupExports(feedback_fn)
11969

    
11970
    return fin_resu, dresults
11971

    
11972

    
11973
class LUBackupRemove(NoHooksLU):
11974
  """Remove exports related to the named instance.
11975

11976
  """
11977
  REQ_BGL = False
11978

    
11979
  def ExpandNames(self):
11980
    self.needed_locks = {}
11981
    # We need all nodes to be locked in order for RemoveExport to work, but we
11982
    # don't need to lock the instance itself, as nothing will happen to it (and
11983
    # we can remove exports also for a removed instance)
11984
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11985

    
11986
  def Exec(self, feedback_fn):
11987
    """Remove any export.
11988

11989
    """
11990
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11991
    # If the instance was not found we'll try with the name that was passed in.
11992
    # This will only work if it was an FQDN, though.
11993
    fqdn_warn = False
11994
    if not instance_name:
11995
      fqdn_warn = True
11996
      instance_name = self.op.instance_name
11997

    
11998
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11999
    exportlist = self.rpc.call_export_list(locked_nodes)
12000
    found = False
12001
    for node in exportlist:
12002
      msg = exportlist[node].fail_msg
12003
      if msg:
12004
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12005
        continue
12006
      if instance_name in exportlist[node].payload:
12007
        found = True
12008
        result = self.rpc.call_export_remove(node, instance_name)
12009
        msg = result.fail_msg
12010
        if msg:
12011
          logging.error("Could not remove export for instance %s"
12012
                        " on node %s: %s", instance_name, node, msg)
12013

    
12014
    if fqdn_warn and not found:
12015
      feedback_fn("Export not found. If trying to remove an export belonging"
12016
                  " to a deleted instance please use its Fully Qualified"
12017
                  " Domain Name.")
12018

    
12019

    
12020
class LUGroupAdd(LogicalUnit):
12021
  """Logical unit for creating node groups.
12022

12023
  """
12024
  HPATH = "group-add"
12025
  HTYPE = constants.HTYPE_GROUP
12026
  REQ_BGL = False
12027

    
12028
  def ExpandNames(self):
12029
    # We need the new group's UUID here so that we can create and acquire the
12030
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12031
    # that it should not check whether the UUID exists in the configuration.
12032
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12033
    self.needed_locks = {}
12034
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12035

    
12036
  def CheckPrereq(self):
12037
    """Check prerequisites.
12038

12039
    This checks that the given group name is not an existing node group
12040
    already.
12041

12042
    """
12043
    try:
12044
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12045
    except errors.OpPrereqError:
12046
      pass
12047
    else:
12048
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12049
                                 " node group (UUID: %s)" %
12050
                                 (self.op.group_name, existing_uuid),
12051
                                 errors.ECODE_EXISTS)
12052

    
12053
    if self.op.ndparams:
12054
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12055

    
12056
  def BuildHooksEnv(self):
12057
    """Build hooks env.
12058

12059
    """
12060
    return {
12061
      "GROUP_NAME": self.op.group_name,
12062
      }
12063

    
12064
  def BuildHooksNodes(self):
12065
    """Build hooks nodes.
12066

12067
    """
12068
    mn = self.cfg.GetMasterNode()
12069
    return ([mn], [mn])
12070

    
12071
  def Exec(self, feedback_fn):
12072
    """Add the node group to the cluster.
12073

12074
    """
12075
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12076
                                  uuid=self.group_uuid,
12077
                                  alloc_policy=self.op.alloc_policy,
12078
                                  ndparams=self.op.ndparams)
12079

    
12080
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12081
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12082

    
12083

    
12084
class LUGroupAssignNodes(NoHooksLU):
12085
  """Logical unit for assigning nodes to groups.
12086

12087
  """
12088
  REQ_BGL = False
12089

    
12090
  def ExpandNames(self):
12091
    # These raise errors.OpPrereqError on their own:
12092
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12093
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12094

    
12095
    # We want to lock all the affected nodes and groups. We have readily
12096
    # available the list of nodes, and the *destination* group. To gather the
12097
    # list of "source" groups, we need to fetch node information later on.
12098
    self.needed_locks = {
12099
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12100
      locking.LEVEL_NODE: self.op.nodes,
12101
      }
12102

    
12103
  def DeclareLocks(self, level):
12104
    if level == locking.LEVEL_NODEGROUP:
12105
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12106

    
12107
      # Try to get all affected nodes' groups without having the group or node
12108
      # lock yet. Needs verification later in the code flow.
12109
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12110

    
12111
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12112

    
12113
  def CheckPrereq(self):
12114
    """Check prerequisites.
12115

12116
    """
12117
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12118
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12119
            frozenset(self.op.nodes))
12120

    
12121
    expected_locks = (set([self.group_uuid]) |
12122
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12123
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12124
    if actual_locks != expected_locks:
12125
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12126
                               " current groups are '%s', used to be '%s'" %
12127
                               (utils.CommaJoin(expected_locks),
12128
                                utils.CommaJoin(actual_locks)))
12129

    
12130
    self.node_data = self.cfg.GetAllNodesInfo()
12131
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12132
    instance_data = self.cfg.GetAllInstancesInfo()
12133

    
12134
    if self.group is None:
12135
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12136
                               (self.op.group_name, self.group_uuid))
12137

    
12138
    (new_splits, previous_splits) = \
12139
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12140
                                             for node in self.op.nodes],
12141
                                            self.node_data, instance_data)
12142

    
12143
    if new_splits:
12144
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12145

    
12146
      if not self.op.force:
12147
        raise errors.OpExecError("The following instances get split by this"
12148
                                 " change and --force was not given: %s" %
12149
                                 fmt_new_splits)
12150
      else:
12151
        self.LogWarning("This operation will split the following instances: %s",
12152
                        fmt_new_splits)
12153

    
12154
        if previous_splits:
12155
          self.LogWarning("In addition, these already-split instances continue"
12156
                          " to be split across groups: %s",
12157
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12158

    
12159
  def Exec(self, feedback_fn):
12160
    """Assign nodes to a new group.
12161

12162
    """
12163
    for node in self.op.nodes:
12164
      self.node_data[node].group = self.group_uuid
12165

    
12166
    # FIXME: Depends on side-effects of modifying the result of
12167
    # C{cfg.GetAllNodesInfo}
12168

    
12169
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12170

    
12171
  @staticmethod
12172
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12173
    """Check for split instances after a node assignment.
12174

12175
    This method considers a series of node assignments as an atomic operation,
12176
    and returns information about split instances after applying the set of
12177
    changes.
12178

12179
    In particular, it returns information about newly split instances, and
12180
    instances that were already split, and remain so after the change.
12181

12182
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12183
    considered.
12184

12185
    @type changes: list of (node_name, new_group_uuid) pairs.
12186
    @param changes: list of node assignments to consider.
12187
    @param node_data: a dict with data for all nodes
12188
    @param instance_data: a dict with all instances to consider
12189
    @rtype: a two-tuple
12190
    @return: a list of instances that were previously okay and result split as a
12191
      consequence of this change, and a list of instances that were previously
12192
      split and this change does not fix.
12193

12194
    """
12195
    changed_nodes = dict((node, group) for node, group in changes
12196
                         if node_data[node].group != group)
12197

    
12198
    all_split_instances = set()
12199
    previously_split_instances = set()
12200

    
12201
    def InstanceNodes(instance):
12202
      return [instance.primary_node] + list(instance.secondary_nodes)
12203

    
12204
    for inst in instance_data.values():
12205
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12206
        continue
12207

    
12208
      instance_nodes = InstanceNodes(inst)
12209

    
12210
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12211
        previously_split_instances.add(inst.name)
12212

    
12213
      if len(set(changed_nodes.get(node, node_data[node].group)
12214
                 for node in instance_nodes)) > 1:
12215
        all_split_instances.add(inst.name)
12216

    
12217
    return (list(all_split_instances - previously_split_instances),
12218
            list(previously_split_instances & all_split_instances))
12219

    
12220

    
12221
class _GroupQuery(_QueryBase):
12222
  FIELDS = query.GROUP_FIELDS
12223

    
12224
  def ExpandNames(self, lu):
12225
    lu.needed_locks = {}
12226

    
12227
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12228
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12229

    
12230
    if not self.names:
12231
      self.wanted = [name_to_uuid[name]
12232
                     for name in utils.NiceSort(name_to_uuid.keys())]
12233
    else:
12234
      # Accept names to be either names or UUIDs.
12235
      missing = []
12236
      self.wanted = []
12237
      all_uuid = frozenset(self._all_groups.keys())
12238

    
12239
      for name in self.names:
12240
        if name in all_uuid:
12241
          self.wanted.append(name)
12242
        elif name in name_to_uuid:
12243
          self.wanted.append(name_to_uuid[name])
12244
        else:
12245
          missing.append(name)
12246

    
12247
      if missing:
12248
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12249
                                   utils.CommaJoin(missing),
12250
                                   errors.ECODE_NOENT)
12251

    
12252
  def DeclareLocks(self, lu, level):
12253
    pass
12254

    
12255
  def _GetQueryData(self, lu):
12256
    """Computes the list of node groups and their attributes.
12257

12258
    """
12259
    do_nodes = query.GQ_NODE in self.requested_data
12260
    do_instances = query.GQ_INST in self.requested_data
12261

    
12262
    group_to_nodes = None
12263
    group_to_instances = None
12264

    
12265
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12266
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12267
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12268
    # instance->node. Hence, we will need to process nodes even if we only need
12269
    # instance information.
12270
    if do_nodes or do_instances:
12271
      all_nodes = lu.cfg.GetAllNodesInfo()
12272
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12273
      node_to_group = {}
12274

    
12275
      for node in all_nodes.values():
12276
        if node.group in group_to_nodes:
12277
          group_to_nodes[node.group].append(node.name)
12278
          node_to_group[node.name] = node.group
12279

    
12280
      if do_instances:
12281
        all_instances = lu.cfg.GetAllInstancesInfo()
12282
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12283

    
12284
        for instance in all_instances.values():
12285
          node = instance.primary_node
12286
          if node in node_to_group:
12287
            group_to_instances[node_to_group[node]].append(instance.name)
12288

    
12289
        if not do_nodes:
12290
          # Do not pass on node information if it was not requested.
12291
          group_to_nodes = None
12292

    
12293
    return query.GroupQueryData([self._all_groups[uuid]
12294
                                 for uuid in self.wanted],
12295
                                group_to_nodes, group_to_instances)
12296

    
12297

    
12298
class LUGroupQuery(NoHooksLU):
12299
  """Logical unit for querying node groups.
12300

12301
  """
12302
  REQ_BGL = False
12303

    
12304
  def CheckArguments(self):
12305
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12306
                          self.op.output_fields, False)
12307

    
12308
  def ExpandNames(self):
12309
    self.gq.ExpandNames(self)
12310

    
12311
  def DeclareLocks(self, level):
12312
    self.gq.DeclareLocks(self, level)
12313

    
12314
  def Exec(self, feedback_fn):
12315
    return self.gq.OldStyleQuery(self)
12316

    
12317

    
12318
class LUGroupSetParams(LogicalUnit):
12319
  """Modifies the parameters of a node group.
12320

12321
  """
12322
  HPATH = "group-modify"
12323
  HTYPE = constants.HTYPE_GROUP
12324
  REQ_BGL = False
12325

    
12326
  def CheckArguments(self):
12327
    all_changes = [
12328
      self.op.ndparams,
12329
      self.op.alloc_policy,
12330
      ]
12331

    
12332
    if all_changes.count(None) == len(all_changes):
12333
      raise errors.OpPrereqError("Please pass at least one modification",
12334
                                 errors.ECODE_INVAL)
12335

    
12336
  def ExpandNames(self):
12337
    # This raises errors.OpPrereqError on its own:
12338
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12339

    
12340
    self.needed_locks = {
12341
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12342
      }
12343

    
12344
  def CheckPrereq(self):
12345
    """Check prerequisites.
12346

12347
    """
12348
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12349

    
12350
    if self.group is None:
12351
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12352
                               (self.op.group_name, self.group_uuid))
12353

    
12354
    if self.op.ndparams:
12355
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12356
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12357
      self.new_ndparams = new_ndparams
12358

    
12359
  def BuildHooksEnv(self):
12360
    """Build hooks env.
12361

12362
    """
12363
    return {
12364
      "GROUP_NAME": self.op.group_name,
12365
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12366
      }
12367

    
12368
  def BuildHooksNodes(self):
12369
    """Build hooks nodes.
12370

12371
    """
12372
    mn = self.cfg.GetMasterNode()
12373
    return ([mn], [mn])
12374

    
12375
  def Exec(self, feedback_fn):
12376
    """Modifies the node group.
12377

12378
    """
12379
    result = []
12380

    
12381
    if self.op.ndparams:
12382
      self.group.ndparams = self.new_ndparams
12383
      result.append(("ndparams", str(self.group.ndparams)))
12384

    
12385
    if self.op.alloc_policy:
12386
      self.group.alloc_policy = self.op.alloc_policy
12387

    
12388
    self.cfg.Update(self.group, feedback_fn)
12389
    return result
12390

    
12391

    
12392
class LUGroupRemove(LogicalUnit):
12393
  HPATH = "group-remove"
12394
  HTYPE = constants.HTYPE_GROUP
12395
  REQ_BGL = False
12396

    
12397
  def ExpandNames(self):
12398
    # This will raises errors.OpPrereqError on its own:
12399
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12400
    self.needed_locks = {
12401
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12402
      }
12403

    
12404
  def CheckPrereq(self):
12405
    """Check prerequisites.
12406

12407
    This checks that the given group name exists as a node group, that is
12408
    empty (i.e., contains no nodes), and that is not the last group of the
12409
    cluster.
12410

12411
    """
12412
    # Verify that the group is empty.
12413
    group_nodes = [node.name
12414
                   for node in self.cfg.GetAllNodesInfo().values()
12415
                   if node.group == self.group_uuid]
12416

    
12417
    if group_nodes:
12418
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12419
                                 " nodes: %s" %
12420
                                 (self.op.group_name,
12421
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12422
                                 errors.ECODE_STATE)
12423

    
12424
    # Verify the cluster would not be left group-less.
12425
    if len(self.cfg.GetNodeGroupList()) == 1:
12426
      raise errors.OpPrereqError("Group '%s' is the only group,"
12427
                                 " cannot be removed" %
12428
                                 self.op.group_name,
12429
                                 errors.ECODE_STATE)
12430

    
12431
  def BuildHooksEnv(self):
12432
    """Build hooks env.
12433

12434
    """
12435
    return {
12436
      "GROUP_NAME": self.op.group_name,
12437
      }
12438

    
12439
  def BuildHooksNodes(self):
12440
    """Build hooks nodes.
12441

12442
    """
12443
    mn = self.cfg.GetMasterNode()
12444
    return ([mn], [mn])
12445

    
12446
  def Exec(self, feedback_fn):
12447
    """Remove the node group.
12448

12449
    """
12450
    try:
12451
      self.cfg.RemoveNodeGroup(self.group_uuid)
12452
    except errors.ConfigurationError:
12453
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12454
                               (self.op.group_name, self.group_uuid))
12455

    
12456
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12457

    
12458

    
12459
class LUGroupRename(LogicalUnit):
12460
  HPATH = "group-rename"
12461
  HTYPE = constants.HTYPE_GROUP
12462
  REQ_BGL = False
12463

    
12464
  def ExpandNames(self):
12465
    # This raises errors.OpPrereqError on its own:
12466
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12467

    
12468
    self.needed_locks = {
12469
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12470
      }
12471

    
12472
  def CheckPrereq(self):
12473
    """Check prerequisites.
12474

12475
    Ensures requested new name is not yet used.
12476

12477
    """
12478
    try:
12479
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12480
    except errors.OpPrereqError:
12481
      pass
12482
    else:
12483
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12484
                                 " node group (UUID: %s)" %
12485
                                 (self.op.new_name, new_name_uuid),
12486
                                 errors.ECODE_EXISTS)
12487

    
12488
  def BuildHooksEnv(self):
12489
    """Build hooks env.
12490

12491
    """
12492
    return {
12493
      "OLD_NAME": self.op.group_name,
12494
      "NEW_NAME": self.op.new_name,
12495
      }
12496

    
12497
  def BuildHooksNodes(self):
12498
    """Build hooks nodes.
12499

12500
    """
12501
    mn = self.cfg.GetMasterNode()
12502

    
12503
    all_nodes = self.cfg.GetAllNodesInfo()
12504
    all_nodes.pop(mn, None)
12505

    
12506
    run_nodes = [mn]
12507
    run_nodes.extend(node.name for node in all_nodes.values()
12508
                     if node.group == self.group_uuid)
12509

    
12510
    return (run_nodes, run_nodes)
12511

    
12512
  def Exec(self, feedback_fn):
12513
    """Rename the node group.
12514

12515
    """
12516
    group = self.cfg.GetNodeGroup(self.group_uuid)
12517

    
12518
    if group is None:
12519
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12520
                               (self.op.group_name, self.group_uuid))
12521

    
12522
    group.name = self.op.new_name
12523
    self.cfg.Update(group, feedback_fn)
12524

    
12525
    return self.op.new_name
12526

    
12527

    
12528
class LUGroupEvacuate(LogicalUnit):
12529
  HPATH = "group-evacuate"
12530
  HTYPE = constants.HTYPE_GROUP
12531
  REQ_BGL = False
12532

    
12533
  def ExpandNames(self):
12534
    # This raises errors.OpPrereqError on its own:
12535
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12536

    
12537
    if self.op.target_groups:
12538
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12539
                                  self.op.target_groups)
12540
    else:
12541
      self.req_target_uuids = []
12542

    
12543
    if self.group_uuid in self.req_target_uuids:
12544
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12545
                                 " as a target group (targets are %s)" %
12546
                                 (self.group_uuid,
12547
                                  utils.CommaJoin(self.req_target_uuids)),
12548
                                 errors.ECODE_INVAL)
12549

    
12550
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12551

    
12552
    self.share_locks = _ShareAll()
12553
    self.needed_locks = {
12554
      locking.LEVEL_INSTANCE: [],
12555
      locking.LEVEL_NODEGROUP: [],
12556
      locking.LEVEL_NODE: [],
12557
      }
12558

    
12559
  def DeclareLocks(self, level):
12560
    if level == locking.LEVEL_INSTANCE:
12561
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12562

    
12563
      # Lock instances optimistically, needs verification once node and group
12564
      # locks have been acquired
12565
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12566
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12567

    
12568
    elif level == locking.LEVEL_NODEGROUP:
12569
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12570

    
12571
      if self.req_target_uuids:
12572
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12573

    
12574
        # Lock all groups used by instances optimistically; this requires going
12575
        # via the node before it's locked, requiring verification later on
12576
        lock_groups.update(group_uuid
12577
                           for instance_name in
12578
                             self.owned_locks(locking.LEVEL_INSTANCE)
12579
                           for group_uuid in
12580
                             self.cfg.GetInstanceNodeGroups(instance_name))
12581
      else:
12582
        # No target groups, need to lock all of them
12583
        lock_groups = locking.ALL_SET
12584

    
12585
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12586

    
12587
    elif level == locking.LEVEL_NODE:
12588
      # This will only lock the nodes in the group to be evacuated which
12589
      # contain actual instances
12590
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12591
      self._LockInstancesNodes()
12592

    
12593
      # Lock all nodes in group to be evacuated and target groups
12594
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12595
      assert self.group_uuid in owned_groups
12596
      member_nodes = [node_name
12597
                      for group in owned_groups
12598
                      for node_name in self.cfg.GetNodeGroup(group).members]
12599
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12600

    
12601
  def CheckPrereq(self):
12602
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12603
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12604
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12605

    
12606
    assert owned_groups.issuperset(self.req_target_uuids)
12607
    assert self.group_uuid in owned_groups
12608

    
12609
    # Check if locked instances are still correct
12610
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12611

    
12612
    # Get instance information
12613
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12614

    
12615
    # Check if node groups for locked instances are still correct
12616
    for instance_name in owned_instances:
12617
      inst = self.instances[instance_name]
12618
      assert owned_nodes.issuperset(inst.all_nodes), \
12619
        "Instance %s's nodes changed while we kept the lock" % instance_name
12620

    
12621
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12622
                                             owned_groups)
12623

    
12624
      assert self.group_uuid in inst_groups, \
12625
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12626

    
12627
    if self.req_target_uuids:
12628
      # User requested specific target groups
12629
      self.target_uuids = self.req_target_uuids
12630
    else:
12631
      # All groups except the one to be evacuated are potential targets
12632
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12633
                           if group_uuid != self.group_uuid]
12634

    
12635
      if not self.target_uuids:
12636
        raise errors.OpPrereqError("There are no possible target groups",
12637
                                   errors.ECODE_INVAL)
12638

    
12639
  def BuildHooksEnv(self):
12640
    """Build hooks env.
12641

12642
    """
12643
    return {
12644
      "GROUP_NAME": self.op.group_name,
12645
      "TARGET_GROUPS": " ".join(self.target_uuids),
12646
      }
12647

    
12648
  def BuildHooksNodes(self):
12649
    """Build hooks nodes.
12650

12651
    """
12652
    mn = self.cfg.GetMasterNode()
12653

    
12654
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12655

    
12656
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12657

    
12658
    return (run_nodes, run_nodes)
12659

    
12660
  def Exec(self, feedback_fn):
12661
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12662

    
12663
    assert self.group_uuid not in self.target_uuids
12664

    
12665
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12666
                     instances=instances, target_groups=self.target_uuids)
12667

    
12668
    ial.Run(self.op.iallocator)
12669

    
12670
    if not ial.success:
12671
      raise errors.OpPrereqError("Can't compute group evacuation using"
12672
                                 " iallocator '%s': %s" %
12673
                                 (self.op.iallocator, ial.info),
12674
                                 errors.ECODE_NORES)
12675

    
12676
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12677

    
12678
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12679
                 len(jobs), self.op.group_name)
12680

    
12681
    return ResultWithJobs(jobs)
12682

    
12683

    
12684
class TagsLU(NoHooksLU): # pylint: disable=W0223
12685
  """Generic tags LU.
12686

12687
  This is an abstract class which is the parent of all the other tags LUs.
12688

12689
  """
12690
  def ExpandNames(self):
12691
    self.group_uuid = None
12692
    self.needed_locks = {}
12693
    if self.op.kind == constants.TAG_NODE:
12694
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12695
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12696
    elif self.op.kind == constants.TAG_INSTANCE:
12697
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12698
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12699
    elif self.op.kind == constants.TAG_NODEGROUP:
12700
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12701

    
12702
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12703
    # not possible to acquire the BGL based on opcode parameters)
12704

    
12705
  def CheckPrereq(self):
12706
    """Check prerequisites.
12707

12708
    """
12709
    if self.op.kind == constants.TAG_CLUSTER:
12710
      self.target = self.cfg.GetClusterInfo()
12711
    elif self.op.kind == constants.TAG_NODE:
12712
      self.target = self.cfg.GetNodeInfo(self.op.name)
12713
    elif self.op.kind == constants.TAG_INSTANCE:
12714
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12715
    elif self.op.kind == constants.TAG_NODEGROUP:
12716
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12717
    else:
12718
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12719
                                 str(self.op.kind), errors.ECODE_INVAL)
12720

    
12721

    
12722
class LUTagsGet(TagsLU):
12723
  """Returns the tags of a given object.
12724

12725
  """
12726
  REQ_BGL = False
12727

    
12728
  def ExpandNames(self):
12729
    TagsLU.ExpandNames(self)
12730

    
12731
    # Share locks as this is only a read operation
12732
    self.share_locks = _ShareAll()
12733

    
12734
  def Exec(self, feedback_fn):
12735
    """Returns the tag list.
12736

12737
    """
12738
    return list(self.target.GetTags())
12739

    
12740

    
12741
class LUTagsSearch(NoHooksLU):
12742
  """Searches the tags for a given pattern.
12743

12744
  """
12745
  REQ_BGL = False
12746

    
12747
  def ExpandNames(self):
12748
    self.needed_locks = {}
12749

    
12750
  def CheckPrereq(self):
12751
    """Check prerequisites.
12752

12753
    This checks the pattern passed for validity by compiling it.
12754

12755
    """
12756
    try:
12757
      self.re = re.compile(self.op.pattern)
12758
    except re.error, err:
12759
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12760
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12761

    
12762
  def Exec(self, feedback_fn):
12763
    """Returns the tag list.
12764

12765
    """
12766
    cfg = self.cfg
12767
    tgts = [("/cluster", cfg.GetClusterInfo())]
12768
    ilist = cfg.GetAllInstancesInfo().values()
12769
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12770
    nlist = cfg.GetAllNodesInfo().values()
12771
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12772
    tgts.extend(("/nodegroup/%s" % n.name, n)
12773
                for n in cfg.GetAllNodeGroupsInfo().values())
12774
    results = []
12775
    for path, target in tgts:
12776
      for tag in target.GetTags():
12777
        if self.re.search(tag):
12778
          results.append((path, tag))
12779
    return results
12780

    
12781

    
12782
class LUTagsSet(TagsLU):
12783
  """Sets a tag on a given object.
12784

12785
  """
12786
  REQ_BGL = False
12787

    
12788
  def CheckPrereq(self):
12789
    """Check prerequisites.
12790

12791
    This checks the type and length of the tag name and value.
12792

12793
    """
12794
    TagsLU.CheckPrereq(self)
12795
    for tag in self.op.tags:
12796
      objects.TaggableObject.ValidateTag(tag)
12797

    
12798
  def Exec(self, feedback_fn):
12799
    """Sets the tag.
12800

12801
    """
12802
    try:
12803
      for tag in self.op.tags:
12804
        self.target.AddTag(tag)
12805
    except errors.TagError, err:
12806
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12807
    self.cfg.Update(self.target, feedback_fn)
12808

    
12809

    
12810
class LUTagsDel(TagsLU):
12811
  """Delete a list of tags from a given object.
12812

12813
  """
12814
  REQ_BGL = False
12815

    
12816
  def CheckPrereq(self):
12817
    """Check prerequisites.
12818

12819
    This checks that we have the given tag.
12820

12821
    """
12822
    TagsLU.CheckPrereq(self)
12823
    for tag in self.op.tags:
12824
      objects.TaggableObject.ValidateTag(tag)
12825
    del_tags = frozenset(self.op.tags)
12826
    cur_tags = self.target.GetTags()
12827

    
12828
    diff_tags = del_tags - cur_tags
12829
    if diff_tags:
12830
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12831
      raise errors.OpPrereqError("Tag(s) %s not found" %
12832
                                 (utils.CommaJoin(diff_names), ),
12833
                                 errors.ECODE_NOENT)
12834

    
12835
  def Exec(self, feedback_fn):
12836
    """Remove the tag from the object.
12837

12838
    """
12839
    for tag in self.op.tags:
12840
      self.target.RemoveTag(tag)
12841
    self.cfg.Update(self.target, feedback_fn)
12842

    
12843

    
12844
class LUTestDelay(NoHooksLU):
12845
  """Sleep for a specified amount of time.
12846

12847
  This LU sleeps on the master and/or nodes for a specified amount of
12848
  time.
12849

12850
  """
12851
  REQ_BGL = False
12852

    
12853
  def ExpandNames(self):
12854
    """Expand names and set required locks.
12855

12856
    This expands the node list, if any.
12857

12858
    """
12859
    self.needed_locks = {}
12860
    if self.op.on_nodes:
12861
      # _GetWantedNodes can be used here, but is not always appropriate to use
12862
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12863
      # more information.
12864
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12865
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12866

    
12867
  def _TestDelay(self):
12868
    """Do the actual sleep.
12869

12870
    """
12871
    if self.op.on_master:
12872
      if not utils.TestDelay(self.op.duration):
12873
        raise errors.OpExecError("Error during master delay test")
12874
    if self.op.on_nodes:
12875
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12876
      for node, node_result in result.items():
12877
        node_result.Raise("Failure during rpc call to node %s" % node)
12878

    
12879
  def Exec(self, feedback_fn):
12880
    """Execute the test delay opcode, with the wanted repetitions.
12881

12882
    """
12883
    if self.op.repeat == 0:
12884
      self._TestDelay()
12885
    else:
12886
      top_value = self.op.repeat - 1
12887
      for i in range(self.op.repeat):
12888
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12889
        self._TestDelay()
12890

    
12891

    
12892
class LUTestJqueue(NoHooksLU):
12893
  """Utility LU to test some aspects of the job queue.
12894

12895
  """
12896
  REQ_BGL = False
12897

    
12898
  # Must be lower than default timeout for WaitForJobChange to see whether it
12899
  # notices changed jobs
12900
  _CLIENT_CONNECT_TIMEOUT = 20.0
12901
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12902

    
12903
  @classmethod
12904
  def _NotifyUsingSocket(cls, cb, errcls):
12905
    """Opens a Unix socket and waits for another program to connect.
12906

12907
    @type cb: callable
12908
    @param cb: Callback to send socket name to client
12909
    @type errcls: class
12910
    @param errcls: Exception class to use for errors
12911

12912
    """
12913
    # Using a temporary directory as there's no easy way to create temporary
12914
    # sockets without writing a custom loop around tempfile.mktemp and
12915
    # socket.bind
12916
    tmpdir = tempfile.mkdtemp()
12917
    try:
12918
      tmpsock = utils.PathJoin(tmpdir, "sock")
12919

    
12920
      logging.debug("Creating temporary socket at %s", tmpsock)
12921
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12922
      try:
12923
        sock.bind(tmpsock)
12924
        sock.listen(1)
12925

    
12926
        # Send details to client
12927
        cb(tmpsock)
12928

    
12929
        # Wait for client to connect before continuing
12930
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12931
        try:
12932
          (conn, _) = sock.accept()
12933
        except socket.error, err:
12934
          raise errcls("Client didn't connect in time (%s)" % err)
12935
      finally:
12936
        sock.close()
12937
    finally:
12938
      # Remove as soon as client is connected
12939
      shutil.rmtree(tmpdir)
12940

    
12941
    # Wait for client to close
12942
    try:
12943
      try:
12944
        # pylint: disable=E1101
12945
        # Instance of '_socketobject' has no ... member
12946
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12947
        conn.recv(1)
12948
      except socket.error, err:
12949
        raise errcls("Client failed to confirm notification (%s)" % err)
12950
    finally:
12951
      conn.close()
12952

    
12953
  def _SendNotification(self, test, arg, sockname):
12954
    """Sends a notification to the client.
12955

12956
    @type test: string
12957
    @param test: Test name
12958
    @param arg: Test argument (depends on test)
12959
    @type sockname: string
12960
    @param sockname: Socket path
12961

12962
    """
12963
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12964

    
12965
  def _Notify(self, prereq, test, arg):
12966
    """Notifies the client of a test.
12967

12968
    @type prereq: bool
12969
    @param prereq: Whether this is a prereq-phase test
12970
    @type test: string
12971
    @param test: Test name
12972
    @param arg: Test argument (depends on test)
12973

12974
    """
12975
    if prereq:
12976
      errcls = errors.OpPrereqError
12977
    else:
12978
      errcls = errors.OpExecError
12979

    
12980
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12981
                                                  test, arg),
12982
                                   errcls)
12983

    
12984
  def CheckArguments(self):
12985
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12986
    self.expandnames_calls = 0
12987

    
12988
  def ExpandNames(self):
12989
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12990
    if checkargs_calls < 1:
12991
      raise errors.ProgrammerError("CheckArguments was not called")
12992

    
12993
    self.expandnames_calls += 1
12994

    
12995
    if self.op.notify_waitlock:
12996
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12997

    
12998
    self.LogInfo("Expanding names")
12999

    
13000
    # Get lock on master node (just to get a lock, not for a particular reason)
13001
    self.needed_locks = {
13002
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13003
      }
13004

    
13005
  def Exec(self, feedback_fn):
13006
    if self.expandnames_calls < 1:
13007
      raise errors.ProgrammerError("ExpandNames was not called")
13008

    
13009
    if self.op.notify_exec:
13010
      self._Notify(False, constants.JQT_EXEC, None)
13011

    
13012
    self.LogInfo("Executing")
13013

    
13014
    if self.op.log_messages:
13015
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13016
      for idx, msg in enumerate(self.op.log_messages):
13017
        self.LogInfo("Sending log message %s", idx + 1)
13018
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13019
        # Report how many test messages have been sent
13020
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13021

    
13022
    if self.op.fail:
13023
      raise errors.OpExecError("Opcode failure was requested")
13024

    
13025
    return True
13026

    
13027

    
13028
class IAllocator(object):
13029
  """IAllocator framework.
13030

13031
  An IAllocator instance has three sets of attributes:
13032
    - cfg that is needed to query the cluster
13033
    - input data (all members of the _KEYS class attribute are required)
13034
    - four buffer attributes (in|out_data|text), that represent the
13035
      input (to the external script) in text and data structure format,
13036
      and the output from it, again in two formats
13037
    - the result variables from the script (success, info, nodes) for
13038
      easy usage
13039

13040
  """
13041
  # pylint: disable=R0902
13042
  # lots of instance attributes
13043

    
13044
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13045
    self.cfg = cfg
13046
    self.rpc = rpc_runner
13047
    # init buffer variables
13048
    self.in_text = self.out_text = self.in_data = self.out_data = None
13049
    # init all input fields so that pylint is happy
13050
    self.mode = mode
13051
    self.memory = self.disks = self.disk_template = None
13052
    self.os = self.tags = self.nics = self.vcpus = None
13053
    self.hypervisor = None
13054
    self.relocate_from = None
13055
    self.name = None
13056
    self.instances = None
13057
    self.evac_mode = None
13058
    self.target_groups = []
13059
    # computed fields
13060
    self.required_nodes = None
13061
    # init result fields
13062
    self.success = self.info = self.result = None
13063

    
13064
    try:
13065
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13066
    except KeyError:
13067
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13068
                                   " IAllocator" % self.mode)
13069

    
13070
    keyset = [n for (n, _) in keydata]
13071

    
13072
    for key in kwargs:
13073
      if key not in keyset:
13074
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13075
                                     " IAllocator" % key)
13076
      setattr(self, key, kwargs[key])
13077

    
13078
    for key in keyset:
13079
      if key not in kwargs:
13080
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13081
                                     " IAllocator" % key)
13082
    self._BuildInputData(compat.partial(fn, self), keydata)
13083

    
13084
  def _ComputeClusterData(self):
13085
    """Compute the generic allocator input data.
13086

13087
    This is the data that is independent of the actual operation.
13088

13089
    """
13090
    cfg = self.cfg
13091
    cluster_info = cfg.GetClusterInfo()
13092
    # cluster data
13093
    data = {
13094
      "version": constants.IALLOCATOR_VERSION,
13095
      "cluster_name": cfg.GetClusterName(),
13096
      "cluster_tags": list(cluster_info.GetTags()),
13097
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13098
      # we don't have job IDs
13099
      }
13100
    ninfo = cfg.GetAllNodesInfo()
13101
    iinfo = cfg.GetAllInstancesInfo().values()
13102
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13103

    
13104
    # node data
13105
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13106

    
13107
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13108
      hypervisor_name = self.hypervisor
13109
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13110
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13111
    else:
13112
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13113

    
13114
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13115
                                        hypervisor_name)
13116
    node_iinfo = \
13117
      self.rpc.call_all_instances_info(node_list,
13118
                                       cluster_info.enabled_hypervisors)
13119

    
13120
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13121

    
13122
    config_ndata = self._ComputeBasicNodeData(ninfo)
13123
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13124
                                                 i_list, config_ndata)
13125
    assert len(data["nodes"]) == len(ninfo), \
13126
        "Incomplete node data computed"
13127

    
13128
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13129

    
13130
    self.in_data = data
13131

    
13132
  @staticmethod
13133
  def _ComputeNodeGroupData(cfg):
13134
    """Compute node groups data.
13135

13136
    """
13137
    ng = dict((guuid, {
13138
      "name": gdata.name,
13139
      "alloc_policy": gdata.alloc_policy,
13140
      })
13141
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13142

    
13143
    return ng
13144

    
13145
  @staticmethod
13146
  def _ComputeBasicNodeData(node_cfg):
13147
    """Compute global node data.
13148

13149
    @rtype: dict
13150
    @returns: a dict of name: (node dict, node config)
13151

13152
    """
13153
    # fill in static (config-based) values
13154
    node_results = dict((ninfo.name, {
13155
      "tags": list(ninfo.GetTags()),
13156
      "primary_ip": ninfo.primary_ip,
13157
      "secondary_ip": ninfo.secondary_ip,
13158
      "offline": ninfo.offline,
13159
      "drained": ninfo.drained,
13160
      "master_candidate": ninfo.master_candidate,
13161
      "group": ninfo.group,
13162
      "master_capable": ninfo.master_capable,
13163
      "vm_capable": ninfo.vm_capable,
13164
      })
13165
      for ninfo in node_cfg.values())
13166

    
13167
    return node_results
13168

    
13169
  @staticmethod
13170
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13171
                              node_results):
13172
    """Compute global node data.
13173

13174
    @param node_results: the basic node structures as filled from the config
13175

13176
    """
13177
    # make a copy of the current dict
13178
    node_results = dict(node_results)
13179
    for nname, nresult in node_data.items():
13180
      assert nname in node_results, "Missing basic data for node %s" % nname
13181
      ninfo = node_cfg[nname]
13182

    
13183
      if not (ninfo.offline or ninfo.drained):
13184
        nresult.Raise("Can't get data for node %s" % nname)
13185
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13186
                                nname)
13187
        remote_info = nresult.payload
13188

    
13189
        for attr in ["memory_total", "memory_free", "memory_dom0",
13190
                     "vg_size", "vg_free", "cpu_total"]:
13191
          if attr not in remote_info:
13192
            raise errors.OpExecError("Node '%s' didn't return attribute"
13193
                                     " '%s'" % (nname, attr))
13194
          if not isinstance(remote_info[attr], int):
13195
            raise errors.OpExecError("Node '%s' returned invalid value"
13196
                                     " for '%s': %s" %
13197
                                     (nname, attr, remote_info[attr]))
13198
        # compute memory used by primary instances
13199
        i_p_mem = i_p_up_mem = 0
13200
        for iinfo, beinfo in i_list:
13201
          if iinfo.primary_node == nname:
13202
            i_p_mem += beinfo[constants.BE_MEMORY]
13203
            if iinfo.name not in node_iinfo[nname].payload:
13204
              i_used_mem = 0
13205
            else:
13206
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13207
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13208
            remote_info["memory_free"] -= max(0, i_mem_diff)
13209

    
13210
            if iinfo.admin_up:
13211
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13212

    
13213
        # compute memory used by instances
13214
        pnr_dyn = {
13215
          "total_memory": remote_info["memory_total"],
13216
          "reserved_memory": remote_info["memory_dom0"],
13217
          "free_memory": remote_info["memory_free"],
13218
          "total_disk": remote_info["vg_size"],
13219
          "free_disk": remote_info["vg_free"],
13220
          "total_cpus": remote_info["cpu_total"],
13221
          "i_pri_memory": i_p_mem,
13222
          "i_pri_up_memory": i_p_up_mem,
13223
          }
13224
        pnr_dyn.update(node_results[nname])
13225
        node_results[nname] = pnr_dyn
13226

    
13227
    return node_results
13228

    
13229
  @staticmethod
13230
  def _ComputeInstanceData(cluster_info, i_list):
13231
    """Compute global instance data.
13232

13233
    """
13234
    instance_data = {}
13235
    for iinfo, beinfo in i_list:
13236
      nic_data = []
13237
      for nic in iinfo.nics:
13238
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13239
        nic_dict = {
13240
          "mac": nic.mac,
13241
          "ip": nic.ip,
13242
          "mode": filled_params[constants.NIC_MODE],
13243
          "link": filled_params[constants.NIC_LINK],
13244
          }
13245
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13246
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13247
        nic_data.append(nic_dict)
13248
      pir = {
13249
        "tags": list(iinfo.GetTags()),
13250
        "admin_up": iinfo.admin_up,
13251
        "vcpus": beinfo[constants.BE_VCPUS],
13252
        "memory": beinfo[constants.BE_MEMORY],
13253
        "os": iinfo.os,
13254
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13255
        "nics": nic_data,
13256
        "disks": [{constants.IDISK_SIZE: dsk.size,
13257
                   constants.IDISK_MODE: dsk.mode}
13258
                  for dsk in iinfo.disks],
13259
        "disk_template": iinfo.disk_template,
13260
        "hypervisor": iinfo.hypervisor,
13261
        }
13262
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13263
                                                 pir["disks"])
13264
      instance_data[iinfo.name] = pir
13265

    
13266
    return instance_data
13267

    
13268
  def _AddNewInstance(self):
13269
    """Add new instance data to allocator structure.
13270

13271
    This in combination with _AllocatorGetClusterData will create the
13272
    correct structure needed as input for the allocator.
13273

13274
    The checks for the completeness of the opcode must have already been
13275
    done.
13276

13277
    """
13278
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13279

    
13280
    if self.disk_template in constants.DTS_INT_MIRROR:
13281
      self.required_nodes = 2
13282
    else:
13283
      self.required_nodes = 1
13284

    
13285
    request = {
13286
      "name": self.name,
13287
      "disk_template": self.disk_template,
13288
      "tags": self.tags,
13289
      "os": self.os,
13290
      "vcpus": self.vcpus,
13291
      "memory": self.memory,
13292
      "disks": self.disks,
13293
      "disk_space_total": disk_space,
13294
      "nics": self.nics,
13295
      "required_nodes": self.required_nodes,
13296
      "hypervisor": self.hypervisor,
13297
      }
13298

    
13299
    return request
13300

    
13301
  def _AddRelocateInstance(self):
13302
    """Add relocate instance data to allocator structure.
13303

13304
    This in combination with _IAllocatorGetClusterData will create the
13305
    correct structure needed as input for the allocator.
13306

13307
    The checks for the completeness of the opcode must have already been
13308
    done.
13309

13310
    """
13311
    instance = self.cfg.GetInstanceInfo(self.name)
13312
    if instance is None:
13313
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13314
                                   " IAllocator" % self.name)
13315

    
13316
    if instance.disk_template not in constants.DTS_MIRRORED:
13317
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13318
                                 errors.ECODE_INVAL)
13319

    
13320
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13321
        len(instance.secondary_nodes) != 1:
13322
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13323
                                 errors.ECODE_STATE)
13324

    
13325
    self.required_nodes = 1
13326
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13327
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13328

    
13329
    request = {
13330
      "name": self.name,
13331
      "disk_space_total": disk_space,
13332
      "required_nodes": self.required_nodes,
13333
      "relocate_from": self.relocate_from,
13334
      }
13335
    return request
13336

    
13337
  def _AddNodeEvacuate(self):
13338
    """Get data for node-evacuate requests.
13339

13340
    """
13341
    return {
13342
      "instances": self.instances,
13343
      "evac_mode": self.evac_mode,
13344
      }
13345

    
13346
  def _AddChangeGroup(self):
13347
    """Get data for node-evacuate requests.
13348

13349
    """
13350
    return {
13351
      "instances": self.instances,
13352
      "target_groups": self.target_groups,
13353
      }
13354

    
13355
  def _BuildInputData(self, fn, keydata):
13356
    """Build input data structures.
13357

13358
    """
13359
    self._ComputeClusterData()
13360

    
13361
    request = fn()
13362
    request["type"] = self.mode
13363
    for keyname, keytype in keydata:
13364
      if keyname not in request:
13365
        raise errors.ProgrammerError("Request parameter %s is missing" %
13366
                                     keyname)
13367
      val = request[keyname]
13368
      if not keytype(val):
13369
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13370
                                     " validation, value %s, expected"
13371
                                     " type %s" % (keyname, val, keytype))
13372
    self.in_data["request"] = request
13373

    
13374
    self.in_text = serializer.Dump(self.in_data)
13375

    
13376
  _STRING_LIST = ht.TListOf(ht.TString)
13377
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13378
     # pylint: disable=E1101
13379
     # Class '...' has no 'OP_ID' member
13380
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13381
                          opcodes.OpInstanceMigrate.OP_ID,
13382
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13383
     })))
13384

    
13385
  _NEVAC_MOVED = \
13386
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13387
                       ht.TItems([ht.TNonEmptyString,
13388
                                  ht.TNonEmptyString,
13389
                                  ht.TListOf(ht.TNonEmptyString),
13390
                                 ])))
13391
  _NEVAC_FAILED = \
13392
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13393
                       ht.TItems([ht.TNonEmptyString,
13394
                                  ht.TMaybeString,
13395
                                 ])))
13396
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13397
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13398

    
13399
  _MODE_DATA = {
13400
    constants.IALLOCATOR_MODE_ALLOC:
13401
      (_AddNewInstance,
13402
       [
13403
        ("name", ht.TString),
13404
        ("memory", ht.TInt),
13405
        ("disks", ht.TListOf(ht.TDict)),
13406
        ("disk_template", ht.TString),
13407
        ("os", ht.TString),
13408
        ("tags", _STRING_LIST),
13409
        ("nics", ht.TListOf(ht.TDict)),
13410
        ("vcpus", ht.TInt),
13411
        ("hypervisor", ht.TString),
13412
        ], ht.TList),
13413
    constants.IALLOCATOR_MODE_RELOC:
13414
      (_AddRelocateInstance,
13415
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13416
       ht.TList),
13417
     constants.IALLOCATOR_MODE_NODE_EVAC:
13418
      (_AddNodeEvacuate, [
13419
        ("instances", _STRING_LIST),
13420
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13421
        ], _NEVAC_RESULT),
13422
     constants.IALLOCATOR_MODE_CHG_GROUP:
13423
      (_AddChangeGroup, [
13424
        ("instances", _STRING_LIST),
13425
        ("target_groups", _STRING_LIST),
13426
        ], _NEVAC_RESULT),
13427
    }
13428

    
13429
  def Run(self, name, validate=True, call_fn=None):
13430
    """Run an instance allocator and return the results.
13431

13432
    """
13433
    if call_fn is None:
13434
      call_fn = self.rpc.call_iallocator_runner
13435

    
13436
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13437
    result.Raise("Failure while running the iallocator script")
13438

    
13439
    self.out_text = result.payload
13440
    if validate:
13441
      self._ValidateResult()
13442

    
13443
  def _ValidateResult(self):
13444
    """Process the allocator results.
13445

13446
    This will process and if successful save the result in
13447
    self.out_data and the other parameters.
13448

13449
    """
13450
    try:
13451
      rdict = serializer.Load(self.out_text)
13452
    except Exception, err:
13453
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13454

    
13455
    if not isinstance(rdict, dict):
13456
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13457

    
13458
    # TODO: remove backwards compatiblity in later versions
13459
    if "nodes" in rdict and "result" not in rdict:
13460
      rdict["result"] = rdict["nodes"]
13461
      del rdict["nodes"]
13462

    
13463
    for key in "success", "info", "result":
13464
      if key not in rdict:
13465
        raise errors.OpExecError("Can't parse iallocator results:"
13466
                                 " missing key '%s'" % key)
13467
      setattr(self, key, rdict[key])
13468

    
13469
    if not self._result_check(self.result):
13470
      raise errors.OpExecError("Iallocator returned invalid result,"
13471
                               " expected %s, got %s" %
13472
                               (self._result_check, self.result),
13473
                               errors.ECODE_INVAL)
13474

    
13475
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13476
      assert self.relocate_from is not None
13477
      assert self.required_nodes == 1
13478

    
13479
      node2group = dict((name, ndata["group"])
13480
                        for (name, ndata) in self.in_data["nodes"].items())
13481

    
13482
      fn = compat.partial(self._NodesToGroups, node2group,
13483
                          self.in_data["nodegroups"])
13484

    
13485
      instance = self.cfg.GetInstanceInfo(self.name)
13486
      request_groups = fn(self.relocate_from + [instance.primary_node])
13487
      result_groups = fn(rdict["result"] + [instance.primary_node])
13488

    
13489
      if self.success and not set(result_groups).issubset(request_groups):
13490
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13491
                                 " differ from original groups (%s)" %
13492
                                 (utils.CommaJoin(result_groups),
13493
                                  utils.CommaJoin(request_groups)))
13494

    
13495
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13496
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13497

    
13498
    self.out_data = rdict
13499

    
13500
  @staticmethod
13501
  def _NodesToGroups(node2group, groups, nodes):
13502
    """Returns a list of unique group names for a list of nodes.
13503

13504
    @type node2group: dict
13505
    @param node2group: Map from node name to group UUID
13506
    @type groups: dict
13507
    @param groups: Group information
13508
    @type nodes: list
13509
    @param nodes: Node names
13510

13511
    """
13512
    result = set()
13513

    
13514
    for node in nodes:
13515
      try:
13516
        group_uuid = node2group[node]
13517
      except KeyError:
13518
        # Ignore unknown node
13519
        pass
13520
      else:
13521
        try:
13522
          group = groups[group_uuid]
13523
        except KeyError:
13524
          # Can't find group, let's use UUID
13525
          group_name = group_uuid
13526
        else:
13527
          group_name = group["name"]
13528

    
13529
        result.add(group_name)
13530

    
13531
    return sorted(result)
13532

    
13533

    
13534
class LUTestAllocator(NoHooksLU):
13535
  """Run allocator tests.
13536

13537
  This LU runs the allocator tests
13538

13539
  """
13540
  def CheckPrereq(self):
13541
    """Check prerequisites.
13542

13543
    This checks the opcode parameters depending on the director and mode test.
13544

13545
    """
13546
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13547
      for attr in ["memory", "disks", "disk_template",
13548
                   "os", "tags", "nics", "vcpus"]:
13549
        if not hasattr(self.op, attr):
13550
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13551
                                     attr, errors.ECODE_INVAL)
13552
      iname = self.cfg.ExpandInstanceName(self.op.name)
13553
      if iname is not None:
13554
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13555
                                   iname, errors.ECODE_EXISTS)
13556
      if not isinstance(self.op.nics, list):
13557
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13558
                                   errors.ECODE_INVAL)
13559
      if not isinstance(self.op.disks, list):
13560
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13561
                                   errors.ECODE_INVAL)
13562
      for row in self.op.disks:
13563
        if (not isinstance(row, dict) or
13564
            constants.IDISK_SIZE not in row or
13565
            not isinstance(row[constants.IDISK_SIZE], int) or
13566
            constants.IDISK_MODE not in row or
13567
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13568
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13569
                                     " parameter", errors.ECODE_INVAL)
13570
      if self.op.hypervisor is None:
13571
        self.op.hypervisor = self.cfg.GetHypervisorType()
13572
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13573
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13574
      self.op.name = fname
13575
      self.relocate_from = \
13576
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13577
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13578
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13579
      if not self.op.instances:
13580
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13581
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13582
    else:
13583
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13584
                                 self.op.mode, errors.ECODE_INVAL)
13585

    
13586
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13587
      if self.op.allocator is None:
13588
        raise errors.OpPrereqError("Missing allocator name",
13589
                                   errors.ECODE_INVAL)
13590
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13591
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13592
                                 self.op.direction, errors.ECODE_INVAL)
13593

    
13594
  def Exec(self, feedback_fn):
13595
    """Run the allocator test.
13596

13597
    """
13598
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13599
      ial = IAllocator(self.cfg, self.rpc,
13600
                       mode=self.op.mode,
13601
                       name=self.op.name,
13602
                       memory=self.op.memory,
13603
                       disks=self.op.disks,
13604
                       disk_template=self.op.disk_template,
13605
                       os=self.op.os,
13606
                       tags=self.op.tags,
13607
                       nics=self.op.nics,
13608
                       vcpus=self.op.vcpus,
13609
                       hypervisor=self.op.hypervisor,
13610
                       )
13611
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13612
      ial = IAllocator(self.cfg, self.rpc,
13613
                       mode=self.op.mode,
13614
                       name=self.op.name,
13615
                       relocate_from=list(self.relocate_from),
13616
                       )
13617
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13618
      ial = IAllocator(self.cfg, self.rpc,
13619
                       mode=self.op.mode,
13620
                       instances=self.op.instances,
13621
                       target_groups=self.op.target_groups)
13622
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13623
      ial = IAllocator(self.cfg, self.rpc,
13624
                       mode=self.op.mode,
13625
                       instances=self.op.instances,
13626
                       evac_mode=self.op.evac_mode)
13627
    else:
13628
      raise errors.ProgrammerError("Uncatched mode %s in"
13629
                                   " LUTestAllocator.Exec", self.op.mode)
13630

    
13631
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13632
      result = ial.in_text
13633
    else:
13634
      ial.Run(self.op.allocator, validate=False)
13635
      result = ial.out_text
13636
    return result
13637

    
13638

    
13639
#: Query type implementations
13640
_QUERY_IMPL = {
13641
  constants.QR_INSTANCE: _InstanceQuery,
13642
  constants.QR_NODE: _NodeQuery,
13643
  constants.QR_GROUP: _GroupQuery,
13644
  constants.QR_OS: _OsQuery,
13645
  }
13646

    
13647
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13648

    
13649

    
13650
def _GetQueryImplementation(name):
13651
  """Returns the implemtnation for a query type.
13652

13653
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13654

13655
  """
13656
  try:
13657
    return _QUERY_IMPL[name]
13658
  except KeyError:
13659
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13660
                               errors.ECODE_INVAL)