Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ fab9573b

History | View | Annotate | Download (466.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    self.context = context
123
    self.rpc = rpc
124
    # Dicts used to declare locking needs to mcpu
125
    self.needed_locks = None
126
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
127
    self.add_locks = {}
128
    self.remove_locks = {}
129
    # Used to force good behavior when calling helper functions
130
    self.recalculate_locks = {}
131
    # logging
132
    self.Log = processor.Log # pylint: disable-msg=C0103
133
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136
    # support for dry-run
137
    self.dry_run_result = None
138
    # support for generic debug attribute
139
    if (not hasattr(self.op, "debug_level") or
140
        not isinstance(self.op.debug_level, int)):
141
      self.op.debug_level = 0
142

    
143
    # Tasklets
144
    self.tasklets = None
145

    
146
    # Validate opcode parameters and set defaults
147
    self.op.Validate(True)
148

    
149
    self.CheckArguments()
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    @rtype: dict
277
    @return: Dictionary containing the environment that will be used for
278
      running the hooks for this LU. The keys of the dict must not be prefixed
279
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280
      will extend the environment with additional variables. If no environment
281
      should be defined, an empty dictionary should be returned (not C{None}).
282
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
283
      will not be called.
284

285
    """
286
    raise NotImplementedError
287

    
288
  def BuildHooksNodes(self):
289
    """Build list of nodes to run LU's hooks.
290

291
    @rtype: tuple; (list, list)
292
    @return: Tuple containing a list of node names on which the hook
293
      should run before the execution and a list of node names on which the
294
      hook should run after the execution. No nodes should be returned as an
295
      empty list (and not None).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303
    """Notify the LU about the results of its hooks.
304

305
    This method is called every time a hooks phase is executed, and notifies
306
    the Logical Unit about the hooks' result. The LU can then use it to alter
307
    its result based on the hooks.  By default the method does nothing and the
308
    previous result is passed back unchanged but any LU can define it if it
309
    wants to use the local cluster hook-scripts somehow.
310

311
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
312
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313
    @param hook_results: the results of the multi-node hooks rpc call
314
    @param feedback_fn: function used send feedback back to the caller
315
    @param lu_result: the previous Exec result this LU had, or None
316
        in the PRE phase
317
    @return: the new Exec result, based on the previous result
318
        and hook results
319

320
    """
321
    # API must be kept, thus we ignore the unused argument and could
322
    # be a function warnings
323
    # pylint: disable-msg=W0613,R0201
324
    return lu_result
325

    
326
  def _ExpandAndLockInstance(self):
327
    """Helper function to expand and lock an instance.
328

329
    Many LUs that work on an instance take its name in self.op.instance_name
330
    and need to expand it and then declare the expanded name for locking. This
331
    function does it, and then updates self.op.instance_name to the expanded
332
    name. It also initializes needed_locks as a dict, if this hasn't been done
333
    before.
334

335
    """
336
    if self.needed_locks is None:
337
      self.needed_locks = {}
338
    else:
339
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340
        "_ExpandAndLockInstance called with instance-level locks set"
341
    self.op.instance_name = _ExpandInstanceName(self.cfg,
342
                                                self.op.instance_name)
343
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
344

    
345
  def _LockInstancesNodes(self, primary_only=False):
346
    """Helper function to declare instances' nodes for locking.
347

348
    This function should be called after locking one or more instances to lock
349
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350
    with all primary or secondary nodes for instances already locked and
351
    present in self.needed_locks[locking.LEVEL_INSTANCE].
352

353
    It should be called from DeclareLocks, and for safety only works if
354
    self.recalculate_locks[locking.LEVEL_NODE] is set.
355

356
    In the future it may grow parameters to just lock some instance's nodes, or
357
    to just lock primaries or secondary nodes, if needed.
358

359
    If should be called in DeclareLocks in a way similar to::
360

361
      if level == locking.LEVEL_NODE:
362
        self._LockInstancesNodes()
363

364
    @type primary_only: boolean
365
    @param primary_only: only lock primary nodes of locked instances
366

367
    """
368
    assert locking.LEVEL_NODE in self.recalculate_locks, \
369
      "_LockInstancesNodes helper function called with no nodes to recalculate"
370

    
371
    # TODO: check if we're really been called with the instance locks held
372

    
373
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374
    # future we might want to have different behaviors depending on the value
375
    # of self.recalculate_locks[locking.LEVEL_NODE]
376
    wanted_nodes = []
377
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
378
      instance = self.context.cfg.GetInstanceInfo(instance_name)
379
      wanted_nodes.append(instance.primary_node)
380
      if not primary_only:
381
        wanted_nodes.extend(instance.secondary_nodes)
382

    
383
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
387

    
388
    del self.recalculate_locks[locking.LEVEL_NODE]
389

    
390

    
391
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392
  """Simple LU which runs no hooks.
393

394
  This LU is intended as a parent for other LogicalUnits which will
395
  run no hooks, in order to reduce duplicate code.
396

397
  """
398
  HPATH = None
399
  HTYPE = None
400

    
401
  def BuildHooksEnv(self):
402
    """Empty BuildHooksEnv for NoHooksLu.
403

404
    This just raises an error.
405

406
    """
407
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
408

    
409
  def BuildHooksNodes(self):
410
    """Empty BuildHooksNodes for NoHooksLU.
411

412
    """
413
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
414

    
415

    
416
class Tasklet:
417
  """Tasklet base class.
418

419
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
421
  tasklets know nothing about locks.
422

423
  Subclasses must follow these rules:
424
    - Implement CheckPrereq
425
    - Implement Exec
426

427
  """
428
  def __init__(self, lu):
429
    self.lu = lu
430

    
431
    # Shortcuts
432
    self.cfg = lu.cfg
433
    self.rpc = lu.rpc
434

    
435
  def CheckPrereq(self):
436
    """Check prerequisites for this tasklets.
437

438
    This method should check whether the prerequisites for the execution of
439
    this tasklet are fulfilled. It can do internode communication, but it
440
    should be idempotent - no cluster or system changes are allowed.
441

442
    The method should raise errors.OpPrereqError in case something is not
443
    fulfilled. Its return value is ignored.
444

445
    This method should also update all parameters to their canonical form if it
446
    hasn't been done before.
447

448
    """
449
    pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the tasklet.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in code, or
456
    expected.
457

458
    """
459
    raise NotImplementedError
460

    
461

    
462
class _QueryBase:
463
  """Base for query utility classes.
464

465
  """
466
  #: Attribute holding field definitions
467
  FIELDS = None
468

    
469
  def __init__(self, filter_, fields, use_locking):
470
    """Initializes this class.
471

472
    """
473
    self.use_locking = use_locking
474

    
475
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
476
                             namefield="name")
477
    self.requested_data = self.query.RequestedData()
478
    self.names = self.query.RequestedNames()
479

    
480
    # Sort only if no names were requested
481
    self.sort_by_name = not self.names
482

    
483
    self.do_locking = None
484
    self.wanted = None
485

    
486
  def _GetNames(self, lu, all_names, lock_level):
487
    """Helper function to determine names asked for in the query.
488

489
    """
490
    if self.do_locking:
491
      names = lu.glm.list_owned(lock_level)
492
    else:
493
      names = all_names
494

    
495
    if self.wanted == locking.ALL_SET:
496
      assert not self.names
497
      # caller didn't specify names, so ordering is not important
498
      return utils.NiceSort(names)
499

    
500
    # caller specified names and we must keep the same order
501
    assert self.names
502
    assert not self.do_locking or lu.glm.is_owned(lock_level)
503

    
504
    missing = set(self.wanted).difference(names)
505
    if missing:
506
      raise errors.OpExecError("Some items were removed before retrieving"
507
                               " their data: %s" % missing)
508

    
509
    # Return expanded names
510
    return self.wanted
511

    
512
  def ExpandNames(self, lu):
513
    """Expand names for this query.
514

515
    See L{LogicalUnit.ExpandNames}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def DeclareLocks(self, lu, level):
521
    """Declare locks for this query.
522

523
    See L{LogicalUnit.DeclareLocks}.
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def _GetQueryData(self, lu):
529
    """Collects all data for this query.
530

531
    @return: Query data object
532

533
    """
534
    raise NotImplementedError()
535

    
536
  def NewStyleQuery(self, lu):
537
    """Collect data and execute query.
538

539
    """
540
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541
                                  sort_by_name=self.sort_by_name)
542

    
543
  def OldStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return self.query.OldStyleQuery(self._GetQueryData(lu),
548
                                    sort_by_name=self.sort_by_name)
549

    
550

    
551
def _ShareAll():
552
  """Returns a dict declaring all lock levels shared.
553

554
  """
555
  return dict.fromkeys(locking.LEVELS, 1)
556

    
557

    
558
def _SupportsOob(cfg, node):
559
  """Tells if node supports OOB.
560

561
  @type cfg: L{config.ConfigWriter}
562
  @param cfg: The cluster configuration
563
  @type node: L{objects.Node}
564
  @param node: The node
565
  @return: The OOB script if supported or an empty string otherwise
566

567
  """
568
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
569

    
570

    
571
def _GetWantedNodes(lu, nodes):
572
  """Returns list of checked and expanded node names.
573

574
  @type lu: L{LogicalUnit}
575
  @param lu: the logical unit on whose behalf we execute
576
  @type nodes: list
577
  @param nodes: list of node names or None for all nodes
578
  @rtype: list
579
  @return: the list of nodes, sorted
580
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
581

582
  """
583
  if nodes:
584
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
585

    
586
  return utils.NiceSort(lu.cfg.GetNodeList())
587

    
588

    
589
def _GetWantedInstances(lu, instances):
590
  """Returns list of checked and expanded instance names.
591

592
  @type lu: L{LogicalUnit}
593
  @param lu: the logical unit on whose behalf we execute
594
  @type instances: list
595
  @param instances: list of instance names or None for all instances
596
  @rtype: list
597
  @return: the list of instances, sorted
598
  @raise errors.OpPrereqError: if the instances parameter is wrong type
599
  @raise errors.OpPrereqError: if any of the passed instances is not found
600

601
  """
602
  if instances:
603
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
604
  else:
605
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
606
  return wanted
607

    
608

    
609
def _GetUpdatedParams(old_params, update_dict,
610
                      use_default=True, use_none=False):
611
  """Return the new version of a parameter dictionary.
612

613
  @type old_params: dict
614
  @param old_params: old parameters
615
  @type update_dict: dict
616
  @param update_dict: dict containing new parameter values, or
617
      constants.VALUE_DEFAULT to reset the parameter to its default
618
      value
619
  @param use_default: boolean
620
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621
      values as 'to be deleted' values
622
  @param use_none: boolean
623
  @type use_none: whether to recognise C{None} values as 'to be
624
      deleted' values
625
  @rtype: dict
626
  @return: the new parameter dictionary
627

628
  """
629
  params_copy = copy.deepcopy(old_params)
630
  for key, val in update_dict.iteritems():
631
    if ((use_default and val == constants.VALUE_DEFAULT) or
632
        (use_none and val is None)):
633
      try:
634
        del params_copy[key]
635
      except KeyError:
636
        pass
637
    else:
638
      params_copy[key] = val
639
  return params_copy
640

    
641

    
642
def _ReleaseLocks(lu, level, names=None, keep=None):
643
  """Releases locks owned by an LU.
644

645
  @type lu: L{LogicalUnit}
646
  @param level: Lock level
647
  @type names: list or None
648
  @param names: Names of locks to release
649
  @type keep: list or None
650
  @param keep: Names of locks to retain
651

652
  """
653
  assert not (keep is not None and names is not None), \
654
         "Only one of the 'names' and the 'keep' parameters can be given"
655

    
656
  if names is not None:
657
    should_release = names.__contains__
658
  elif keep:
659
    should_release = lambda name: name not in keep
660
  else:
661
    should_release = None
662

    
663
  if should_release:
664
    retain = []
665
    release = []
666

    
667
    # Determine which locks to release
668
    for name in lu.glm.list_owned(level):
669
      if should_release(name):
670
        release.append(name)
671
      else:
672
        retain.append(name)
673

    
674
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
675

    
676
    # Release just some locks
677
    lu.glm.release(level, names=release)
678

    
679
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
680
  else:
681
    # Release everything
682
    lu.glm.release(level)
683

    
684
    assert not lu.glm.is_owned(level), "No locks should be owned"
685

    
686

    
687
def _MapInstanceDisksToNodes(instances):
688
  """Creates a map from (node, volume) to instance name.
689

690
  @type instances: list of L{objects.Instance}
691
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
692

693
  """
694
  return dict(((node, vol), inst.name)
695
              for inst in instances
696
              for (node, vols) in inst.MapLVsByNode().items()
697
              for vol in vols)
698

    
699

    
700
def _RunPostHook(lu, node_name):
701
  """Runs the post-hook for an opcode on a single node.
702

703
  """
704
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
705
  try:
706
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
707
  except:
708
    # pylint: disable-msg=W0702
709
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
710

    
711

    
712
def _CheckOutputFields(static, dynamic, selected):
713
  """Checks whether all selected fields are valid.
714

715
  @type static: L{utils.FieldSet}
716
  @param static: static fields set
717
  @type dynamic: L{utils.FieldSet}
718
  @param dynamic: dynamic fields set
719

720
  """
721
  f = utils.FieldSet()
722
  f.Extend(static)
723
  f.Extend(dynamic)
724

    
725
  delta = f.NonMatching(selected)
726
  if delta:
727
    raise errors.OpPrereqError("Unknown output fields selected: %s"
728
                               % ",".join(delta), errors.ECODE_INVAL)
729

    
730

    
731
def _CheckGlobalHvParams(params):
732
  """Validates that given hypervisor params are not global ones.
733

734
  This will ensure that instances don't get customised versions of
735
  global params.
736

737
  """
738
  used_globals = constants.HVC_GLOBALS.intersection(params)
739
  if used_globals:
740
    msg = ("The following hypervisor parameters are global and cannot"
741
           " be customized at instance level, please modify them at"
742
           " cluster level: %s" % utils.CommaJoin(used_globals))
743
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
744

    
745

    
746
def _CheckNodeOnline(lu, node, msg=None):
747
  """Ensure that a given node is online.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param msg: if passed, should be a message to replace the default one
752
  @raise errors.OpPrereqError: if the node is offline
753

754
  """
755
  if msg is None:
756
    msg = "Can't use offline node"
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
759

    
760

    
761
def _CheckNodeNotDrained(lu, node):
762
  """Ensure that a given node is not drained.
763

764
  @param lu: the LU on behalf of which we make the check
765
  @param node: the node to check
766
  @raise errors.OpPrereqError: if the node is drained
767

768
  """
769
  if lu.cfg.GetNodeInfo(node).drained:
770
    raise errors.OpPrereqError("Can't use drained node %s" % node,
771
                               errors.ECODE_STATE)
772

    
773

    
774
def _CheckNodeVmCapable(lu, node):
775
  """Ensure that a given node is vm capable.
776

777
  @param lu: the LU on behalf of which we make the check
778
  @param node: the node to check
779
  @raise errors.OpPrereqError: if the node is not vm capable
780

781
  """
782
  if not lu.cfg.GetNodeInfo(node).vm_capable:
783
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
784
                               errors.ECODE_STATE)
785

    
786

    
787
def _CheckNodeHasOS(lu, node, os_name, force_variant):
788
  """Ensure that a node supports a given OS.
789

790
  @param lu: the LU on behalf of which we make the check
791
  @param node: the node to check
792
  @param os_name: the OS to query about
793
  @param force_variant: whether to ignore variant errors
794
  @raise errors.OpPrereqError: if the node is not supporting the OS
795

796
  """
797
  result = lu.rpc.call_os_get(node, os_name)
798
  result.Raise("OS '%s' not in supported OS list for node %s" %
799
               (os_name, node),
800
               prereq=True, ecode=errors.ECODE_INVAL)
801
  if not force_variant:
802
    _CheckOSVariant(result.payload, os_name)
803

    
804

    
805
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806
  """Ensure that a node has the given secondary ip.
807

808
  @type lu: L{LogicalUnit}
809
  @param lu: the LU on behalf of which we make the check
810
  @type node: string
811
  @param node: the node to check
812
  @type secondary_ip: string
813
  @param secondary_ip: the ip to check
814
  @type prereq: boolean
815
  @param prereq: whether to throw a prerequisite or an execute error
816
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
818

819
  """
820
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821
  result.Raise("Failure checking secondary ip on node %s" % node,
822
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
823
  if not result.payload:
824
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825
           " please fix and re-run this command" % secondary_ip)
826
    if prereq:
827
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
828
    else:
829
      raise errors.OpExecError(msg)
830

    
831

    
832
def _GetClusterDomainSecret():
833
  """Reads the cluster domain secret.
834

835
  """
836
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
837
                               strict=True)
838

    
839

    
840
def _CheckInstanceDown(lu, instance, reason):
841
  """Ensure that an instance is not running."""
842
  if instance.admin_up:
843
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844
                               (instance.name, reason), errors.ECODE_STATE)
845

    
846
  pnode = instance.primary_node
847
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
849
              prereq=True, ecode=errors.ECODE_ENVIRON)
850

    
851
  if instance.name in ins_l.payload:
852
    raise errors.OpPrereqError("Instance %s is running, %s" %
853
                               (instance.name, reason), errors.ECODE_STATE)
854

    
855

    
856
def _ExpandItemName(fn, name, kind):
857
  """Expand an item name.
858

859
  @param fn: the function to use for expansion
860
  @param name: requested item name
861
  @param kind: text description ('Node' or 'Instance')
862
  @return: the resolved (full) name
863
  @raise errors.OpPrereqError: if the item is not found
864

865
  """
866
  full_name = fn(name)
867
  if full_name is None:
868
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
869
                               errors.ECODE_NOENT)
870
  return full_name
871

    
872

    
873
def _ExpandNodeName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for nodes."""
875
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
876

    
877

    
878
def _ExpandInstanceName(cfg, name):
879
  """Wrapper over L{_ExpandItemName} for instance."""
880
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
881

    
882

    
883
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884
                          memory, vcpus, nics, disk_template, disks,
885
                          bep, hvp, hypervisor_name, tags):
886
  """Builds instance related env variables for hooks
887

888
  This builds the hook environment from individual variables.
889

890
  @type name: string
891
  @param name: the name of the instance
892
  @type primary_node: string
893
  @param primary_node: the name of the instance's primary node
894
  @type secondary_nodes: list
895
  @param secondary_nodes: list of secondary nodes as strings
896
  @type os_type: string
897
  @param os_type: the name of the instance's OS
898
  @type status: boolean
899
  @param status: the should_run status of the instance
900
  @type memory: string
901
  @param memory: the memory size of the instance
902
  @type vcpus: string
903
  @param vcpus: the count of VCPUs the instance has
904
  @type nics: list
905
  @param nics: list of tuples (ip, mac, mode, link) representing
906
      the NICs the instance has
907
  @type disk_template: string
908
  @param disk_template: the disk template of the instance
909
  @type disks: list
910
  @param disks: the list of (size, mode) pairs
911
  @type bep: dict
912
  @param bep: the backend parameters for the instance
913
  @type hvp: dict
914
  @param hvp: the hypervisor parameters for the instance
915
  @type hypervisor_name: string
916
  @param hypervisor_name: the hypervisor for the instance
917
  @type tags: list
918
  @param tags: list of instance tags as strings
919
  @rtype: dict
920
  @return: the hook environment for this instance
921

922
  """
923
  if status:
924
    str_status = "up"
925
  else:
926
    str_status = "down"
927
  env = {
928
    "OP_TARGET": name,
929
    "INSTANCE_NAME": name,
930
    "INSTANCE_PRIMARY": primary_node,
931
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932
    "INSTANCE_OS_TYPE": os_type,
933
    "INSTANCE_STATUS": str_status,
934
    "INSTANCE_MEMORY": memory,
935
    "INSTANCE_VCPUS": vcpus,
936
    "INSTANCE_DISK_TEMPLATE": disk_template,
937
    "INSTANCE_HYPERVISOR": hypervisor_name,
938
  }
939

    
940
  if nics:
941
    nic_count = len(nics)
942
    for idx, (ip, mac, mode, link) in enumerate(nics):
943
      if ip is None:
944
        ip = ""
945
      env["INSTANCE_NIC%d_IP" % idx] = ip
946
      env["INSTANCE_NIC%d_MAC" % idx] = mac
947
      env["INSTANCE_NIC%d_MODE" % idx] = mode
948
      env["INSTANCE_NIC%d_LINK" % idx] = link
949
      if mode == constants.NIC_MODE_BRIDGED:
950
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
951
  else:
952
    nic_count = 0
953

    
954
  env["INSTANCE_NIC_COUNT"] = nic_count
955

    
956
  if disks:
957
    disk_count = len(disks)
958
    for idx, (size, mode) in enumerate(disks):
959
      env["INSTANCE_DISK%d_SIZE" % idx] = size
960
      env["INSTANCE_DISK%d_MODE" % idx] = mode
961
  else:
962
    disk_count = 0
963

    
964
  env["INSTANCE_DISK_COUNT"] = disk_count
965

    
966
  if not tags:
967
    tags = []
968

    
969
  env["INSTANCE_TAGS"] = " ".join(tags)
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUInstanceQueryData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    "name": instance.name,
1022
    "primary_node": instance.primary_node,
1023
    "secondary_nodes": instance.secondary_nodes,
1024
    "os_type": instance.os,
1025
    "status": instance.admin_up,
1026
    "memory": bep[constants.BE_MEMORY],
1027
    "vcpus": bep[constants.BE_VCPUS],
1028
    "nics": _NICListToTuple(lu, instance.nics),
1029
    "disk_template": instance.disk_template,
1030
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031
    "bep": bep,
1032
    "hvp": hvp,
1033
    "hypervisor_name": instance.hypervisor,
1034
    "tags": instance.tags,
1035
  }
1036
  if override:
1037
    args.update(override)
1038
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039

    
1040

    
1041
def _AdjustCandidatePool(lu, exceptions):
1042
  """Adjust the candidate pool after node operations.
1043

1044
  """
1045
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046
  if mod_list:
1047
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1048
               utils.CommaJoin(node.name for node in mod_list))
1049
    for name in mod_list:
1050
      lu.context.ReaddNode(name)
1051
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  if mc_now > mc_max:
1053
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054
               (mc_now, mc_max))
1055

    
1056

    
1057
def _DecideSelfPromotion(lu, exceptions=None):
1058
  """Decide whether I should promote myself as a master candidate.
1059

1060
  """
1061
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063
  # the new node will increase mc_max with one, so:
1064
  mc_should = min(mc_should + 1, cp_size)
1065
  return mc_now < mc_should
1066

    
1067

    
1068
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069
  """Check that the brigdes needed by a list of nics exist.
1070

1071
  """
1072
  cluster = lu.cfg.GetClusterInfo()
1073
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074
  brlist = [params[constants.NIC_LINK] for params in paramslist
1075
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076
  if brlist:
1077
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1078
    result.Raise("Error checking bridges on destination node '%s'" %
1079
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080

    
1081

    
1082
def _CheckInstanceBridgesExist(lu, instance, node=None):
1083
  """Check that the brigdes needed by an instance exist.
1084

1085
  """
1086
  if node is None:
1087
    node = instance.primary_node
1088
  _CheckNicsBridgesExist(lu, instance.nics, node)
1089

    
1090

    
1091
def _CheckOSVariant(os_obj, name):
1092
  """Check whether an OS name conforms to the os variants specification.
1093

1094
  @type os_obj: L{objects.OS}
1095
  @param os_obj: OS object to check
1096
  @type name: string
1097
  @param name: OS name passed by the user, to check for validity
1098

1099
  """
1100
  variant = objects.OS.GetVariant(name)
1101
  if not os_obj.supported_variants:
1102
    if variant:
1103
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104
                                 " passed)" % (os_obj.name, variant),
1105
                                 errors.ECODE_INVAL)
1106
    return
1107
  if not variant:
1108
    raise errors.OpPrereqError("OS name must include a variant",
1109
                               errors.ECODE_INVAL)
1110

    
1111
  if variant not in os_obj.supported_variants:
1112
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1113

    
1114

    
1115
def _GetNodeInstancesInner(cfg, fn):
1116
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1117

    
1118

    
1119
def _GetNodeInstances(cfg, node_name):
1120
  """Returns a list of all primary and secondary instances on a node.
1121

1122
  """
1123

    
1124
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1125

    
1126

    
1127
def _GetNodePrimaryInstances(cfg, node_name):
1128
  """Returns primary instances on a node.
1129

1130
  """
1131
  return _GetNodeInstancesInner(cfg,
1132
                                lambda inst: node_name == inst.primary_node)
1133

    
1134

    
1135
def _GetNodeSecondaryInstances(cfg, node_name):
1136
  """Returns secondary instances on a node.
1137

1138
  """
1139
  return _GetNodeInstancesInner(cfg,
1140
                                lambda inst: node_name in inst.secondary_nodes)
1141

    
1142

    
1143
def _GetStorageTypeArgs(cfg, storage_type):
1144
  """Returns the arguments for a storage type.
1145

1146
  """
1147
  # Special case for file storage
1148
  if storage_type == constants.ST_FILE:
1149
    # storage.FileStorage wants a list of storage directories
1150
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1151

    
1152
  return []
1153

    
1154

    
1155
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1156
  faulty = []
1157

    
1158
  for dev in instance.disks:
1159
    cfg.SetDiskID(dev, node_name)
1160

    
1161
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162
  result.Raise("Failed to get disk status from node %s" % node_name,
1163
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164

    
1165
  for idx, bdev_status in enumerate(result.payload):
1166
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1167
      faulty.append(idx)
1168

    
1169
  return faulty
1170

    
1171

    
1172
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173
  """Check the sanity of iallocator and node arguments and use the
1174
  cluster-wide iallocator if appropriate.
1175

1176
  Check that at most one of (iallocator, node) is specified. If none is
1177
  specified, then the LU's opcode's iallocator slot is filled with the
1178
  cluster-wide default iallocator.
1179

1180
  @type iallocator_slot: string
1181
  @param iallocator_slot: the name of the opcode iallocator slot
1182
  @type node_slot: string
1183
  @param node_slot: the name of the opcode target node slot
1184

1185
  """
1186
  node = getattr(lu.op, node_slot, None)
1187
  iallocator = getattr(lu.op, iallocator_slot, None)
1188

    
1189
  if node is not None and iallocator is not None:
1190
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1191
                               errors.ECODE_INVAL)
1192
  elif node is None and iallocator is None:
1193
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1194
    if default_iallocator:
1195
      setattr(lu.op, iallocator_slot, default_iallocator)
1196
    else:
1197
      raise errors.OpPrereqError("No iallocator or node given and no"
1198
                                 " cluster-wide default iallocator found;"
1199
                                 " please specify either an iallocator or a"
1200
                                 " node, or set a cluster-wide default"
1201
                                 " iallocator")
1202

    
1203

    
1204
class LUClusterPostInit(LogicalUnit):
1205
  """Logical unit for running hooks after cluster initialization.
1206

1207
  """
1208
  HPATH = "cluster-init"
1209
  HTYPE = constants.HTYPE_CLUSTER
1210

    
1211
  def BuildHooksEnv(self):
1212
    """Build hooks env.
1213

1214
    """
1215
    return {
1216
      "OP_TARGET": self.cfg.GetClusterName(),
1217
      }
1218

    
1219
  def BuildHooksNodes(self):
1220
    """Build hooks nodes.
1221

1222
    """
1223
    return ([], [self.cfg.GetMasterNode()])
1224

    
1225
  def Exec(self, feedback_fn):
1226
    """Nothing to do.
1227

1228
    """
1229
    return True
1230

    
1231

    
1232
class LUClusterDestroy(LogicalUnit):
1233
  """Logical unit for destroying the cluster.
1234

1235
  """
1236
  HPATH = "cluster-destroy"
1237
  HTYPE = constants.HTYPE_CLUSTER
1238

    
1239
  def BuildHooksEnv(self):
1240
    """Build hooks env.
1241

1242
    """
1243
    return {
1244
      "OP_TARGET": self.cfg.GetClusterName(),
1245
      }
1246

    
1247
  def BuildHooksNodes(self):
1248
    """Build hooks nodes.
1249

1250
    """
1251
    return ([], [])
1252

    
1253
  def CheckPrereq(self):
1254
    """Check prerequisites.
1255

1256
    This checks whether the cluster is empty.
1257

1258
    Any errors are signaled by raising errors.OpPrereqError.
1259

1260
    """
1261
    master = self.cfg.GetMasterNode()
1262

    
1263
    nodelist = self.cfg.GetNodeList()
1264
    if len(nodelist) != 1 or nodelist[0] != master:
1265
      raise errors.OpPrereqError("There are still %d node(s) in"
1266
                                 " this cluster." % (len(nodelist) - 1),
1267
                                 errors.ECODE_INVAL)
1268
    instancelist = self.cfg.GetInstanceList()
1269
    if instancelist:
1270
      raise errors.OpPrereqError("There are still %d instance(s) in"
1271
                                 " this cluster." % len(instancelist),
1272
                                 errors.ECODE_INVAL)
1273

    
1274
  def Exec(self, feedback_fn):
1275
    """Destroys the cluster.
1276

1277
    """
1278
    master = self.cfg.GetMasterNode()
1279

    
1280
    # Run post hooks on master node before it's removed
1281
    _RunPostHook(self, master)
1282

    
1283
    result = self.rpc.call_node_stop_master(master, False)
1284
    result.Raise("Could not disable the master role")
1285

    
1286
    return master
1287

    
1288

    
1289
def _VerifyCertificate(filename):
1290
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1291

1292
  @type filename: string
1293
  @param filename: Path to PEM file
1294

1295
  """
1296
  try:
1297
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1298
                                           utils.ReadFile(filename))
1299
  except Exception, err: # pylint: disable-msg=W0703
1300
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1301
            "Failed to load X509 certificate %s: %s" % (filename, err))
1302

    
1303
  (errcode, msg) = \
1304
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1305
                                constants.SSL_CERT_EXPIRATION_ERROR)
1306

    
1307
  if msg:
1308
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1309
  else:
1310
    fnamemsg = None
1311

    
1312
  if errcode is None:
1313
    return (None, fnamemsg)
1314
  elif errcode == utils.CERT_WARNING:
1315
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1316
  elif errcode == utils.CERT_ERROR:
1317
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1318

    
1319
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1320

    
1321

    
1322
def _GetAllHypervisorParameters(cluster, instances):
1323
  """Compute the set of all hypervisor parameters.
1324

1325
  @type cluster: L{objects.Cluster}
1326
  @param cluster: the cluster object
1327
  @param instances: list of L{objects.Instance}
1328
  @param instances: additional instances from which to obtain parameters
1329
  @rtype: list of (origin, hypervisor, parameters)
1330
  @return: a list with all parameters found, indicating the hypervisor they
1331
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1332

1333
  """
1334
  hvp_data = []
1335

    
1336
  for hv_name in cluster.enabled_hypervisors:
1337
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1338

    
1339
  for os_name, os_hvp in cluster.os_hvp.items():
1340
    for hv_name, hv_params in os_hvp.items():
1341
      if hv_params:
1342
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1343
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1344

    
1345
  # TODO: collapse identical parameter values in a single one
1346
  for instance in instances:
1347
    if instance.hvparams:
1348
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1349
                       cluster.FillHV(instance)))
1350

    
1351
  return hvp_data
1352

    
1353

    
1354
class _VerifyErrors(object):
1355
  """Mix-in for cluster/group verify LUs.
1356

1357
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1358
  self.op and self._feedback_fn to be available.)
1359

1360
  """
1361
  TCLUSTER = "cluster"
1362
  TNODE = "node"
1363
  TINSTANCE = "instance"
1364

    
1365
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1366
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1367
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1368
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1369
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1370
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1371
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1372
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1373
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1374
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1375
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1376
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1377
  ENODEDRBD = (TNODE, "ENODEDRBD")
1378
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1379
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1380
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1381
  ENODEHV = (TNODE, "ENODEHV")
1382
  ENODELVM = (TNODE, "ENODELVM")
1383
  ENODEN1 = (TNODE, "ENODEN1")
1384
  ENODENET = (TNODE, "ENODENET")
1385
  ENODEOS = (TNODE, "ENODEOS")
1386
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1387
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1388
  ENODERPC = (TNODE, "ENODERPC")
1389
  ENODESSH = (TNODE, "ENODESSH")
1390
  ENODEVERSION = (TNODE, "ENODEVERSION")
1391
  ENODESETUP = (TNODE, "ENODESETUP")
1392
  ENODETIME = (TNODE, "ENODETIME")
1393
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1394

    
1395
  ETYPE_FIELD = "code"
1396
  ETYPE_ERROR = "ERROR"
1397
  ETYPE_WARNING = "WARNING"
1398

    
1399
  def _Error(self, ecode, item, msg, *args, **kwargs):
1400
    """Format an error message.
1401

1402
    Based on the opcode's error_codes parameter, either format a
1403
    parseable error code, or a simpler error string.
1404

1405
    This must be called only from Exec and functions called from Exec.
1406

1407
    """
1408
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1409
    itype, etxt = ecode
1410
    # first complete the msg
1411
    if args:
1412
      msg = msg % args
1413
    # then format the whole message
1414
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1415
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1416
    else:
1417
      if item:
1418
        item = " " + item
1419
      else:
1420
        item = ""
1421
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1422
    # and finally report it via the feedback_fn
1423
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1424

    
1425
  def _ErrorIf(self, cond, *args, **kwargs):
1426
    """Log an error message if the passed condition is True.
1427

1428
    """
1429
    cond = (bool(cond)
1430
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1431
    if cond:
1432
      self._Error(*args, **kwargs)
1433
    # do not mark the operation as failed for WARN cases only
1434
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1435
      self.bad = self.bad or cond
1436

    
1437

    
1438
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1439
  """Verifies the cluster config.
1440

1441
  """
1442
  REQ_BGL = True
1443

    
1444
  def _VerifyHVP(self, hvp_data):
1445
    """Verifies locally the syntax of the hypervisor parameters.
1446

1447
    """
1448
    for item, hv_name, hv_params in hvp_data:
1449
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1450
             (item, hv_name))
1451
      try:
1452
        hv_class = hypervisor.GetHypervisor(hv_name)
1453
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1454
        hv_class.CheckParameterSyntax(hv_params)
1455
      except errors.GenericError, err:
1456
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1457

    
1458
  def ExpandNames(self):
1459
    # Information can be safely retrieved as the BGL is acquired in exclusive
1460
    # mode
1461
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1462
    self.all_node_info = self.cfg.GetAllNodesInfo()
1463
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1464
    self.needed_locks = {}
1465

    
1466
  def Exec(self, feedback_fn):
1467
    """Verify integrity of cluster, performing various test on nodes.
1468

1469
    """
1470
    self.bad = False
1471
    self._feedback_fn = feedback_fn
1472

    
1473
    feedback_fn("* Verifying cluster config")
1474

    
1475
    for msg in self.cfg.VerifyConfig():
1476
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1477

    
1478
    feedback_fn("* Verifying cluster certificate files")
1479

    
1480
    for cert_filename in constants.ALL_CERT_FILES:
1481
      (errcode, msg) = _VerifyCertificate(cert_filename)
1482
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1483

    
1484
    feedback_fn("* Verifying hypervisor parameters")
1485

    
1486
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1487
                                                self.all_inst_info.values()))
1488

    
1489
    feedback_fn("* Verifying all nodes belong to an existing group")
1490

    
1491
    # We do this verification here because, should this bogus circumstance
1492
    # occur, it would never be caught by VerifyGroup, which only acts on
1493
    # nodes/instances reachable from existing node groups.
1494

    
1495
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1496
                         if node.group not in self.all_group_info)
1497

    
1498
    dangling_instances = {}
1499
    no_node_instances = []
1500

    
1501
    for inst in self.all_inst_info.values():
1502
      if inst.primary_node in dangling_nodes:
1503
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1504
      elif inst.primary_node not in self.all_node_info:
1505
        no_node_instances.append(inst.name)
1506

    
1507
    pretty_dangling = [
1508
        "%s (%s)" %
1509
        (node.name,
1510
         utils.CommaJoin(dangling_instances.get(node.name,
1511
                                                ["no instances"])))
1512
        for node in dangling_nodes]
1513

    
1514
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1515
                  "the following nodes (and their instances) belong to a non"
1516
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1517

    
1518
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1519
                  "the following instances have a non-existing primary-node:"
1520
                  " %s", utils.CommaJoin(no_node_instances))
1521

    
1522
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1523

    
1524

    
1525
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1526
  """Verifies the status of a node group.
1527

1528
  """
1529
  HPATH = "cluster-verify"
1530
  HTYPE = constants.HTYPE_CLUSTER
1531
  REQ_BGL = False
1532

    
1533
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1534

    
1535
  class NodeImage(object):
1536
    """A class representing the logical and physical status of a node.
1537

1538
    @type name: string
1539
    @ivar name: the node name to which this object refers
1540
    @ivar volumes: a structure as returned from
1541
        L{ganeti.backend.GetVolumeList} (runtime)
1542
    @ivar instances: a list of running instances (runtime)
1543
    @ivar pinst: list of configured primary instances (config)
1544
    @ivar sinst: list of configured secondary instances (config)
1545
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1546
        instances for which this node is secondary (config)
1547
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1548
    @ivar dfree: free disk, as reported by the node (runtime)
1549
    @ivar offline: the offline status (config)
1550
    @type rpc_fail: boolean
1551
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1552
        not whether the individual keys were correct) (runtime)
1553
    @type lvm_fail: boolean
1554
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1555
    @type hyp_fail: boolean
1556
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1557
    @type ghost: boolean
1558
    @ivar ghost: whether this is a known node or not (config)
1559
    @type os_fail: boolean
1560
    @ivar os_fail: whether the RPC call didn't return valid OS data
1561
    @type oslist: list
1562
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1563
    @type vm_capable: boolean
1564
    @ivar vm_capable: whether the node can host instances
1565

1566
    """
1567
    def __init__(self, offline=False, name=None, vm_capable=True):
1568
      self.name = name
1569
      self.volumes = {}
1570
      self.instances = []
1571
      self.pinst = []
1572
      self.sinst = []
1573
      self.sbp = {}
1574
      self.mfree = 0
1575
      self.dfree = 0
1576
      self.offline = offline
1577
      self.vm_capable = vm_capable
1578
      self.rpc_fail = False
1579
      self.lvm_fail = False
1580
      self.hyp_fail = False
1581
      self.ghost = False
1582
      self.os_fail = False
1583
      self.oslist = {}
1584

    
1585
  def ExpandNames(self):
1586
    # This raises errors.OpPrereqError on its own:
1587
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1588

    
1589
    # Get instances in node group; this is unsafe and needs verification later
1590
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1591

    
1592
    self.needed_locks = {
1593
      locking.LEVEL_INSTANCE: inst_names,
1594
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1595
      locking.LEVEL_NODE: [],
1596
      }
1597

    
1598
    self.share_locks = _ShareAll()
1599

    
1600
  def DeclareLocks(self, level):
1601
    if level == locking.LEVEL_NODE:
1602
      # Get members of node group; this is unsafe and needs verification later
1603
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1604

    
1605
      all_inst_info = self.cfg.GetAllInstancesInfo()
1606

    
1607
      # In Exec(), we warn about mirrored instances that have primary and
1608
      # secondary living in separate node groups. To fully verify that
1609
      # volumes for these instances are healthy, we will need to do an
1610
      # extra call to their secondaries. We ensure here those nodes will
1611
      # be locked.
1612
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1613
        # Important: access only the instances whose lock is owned
1614
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1615
          nodes.update(all_inst_info[inst].secondary_nodes)
1616

    
1617
      self.needed_locks[locking.LEVEL_NODE] = nodes
1618

    
1619
  def CheckPrereq(self):
1620
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1621
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1622

    
1623
    unlocked_nodes = \
1624
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1625

    
1626
    unlocked_instances = \
1627
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1628

    
1629
    if unlocked_nodes:
1630
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1631
                                 utils.CommaJoin(unlocked_nodes))
1632

    
1633
    if unlocked_instances:
1634
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1635
                                 utils.CommaJoin(unlocked_instances))
1636

    
1637
    self.all_node_info = self.cfg.GetAllNodesInfo()
1638
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1639

    
1640
    self.my_node_names = utils.NiceSort(group_nodes)
1641
    self.my_inst_names = utils.NiceSort(group_instances)
1642

    
1643
    self.my_node_info = dict((name, self.all_node_info[name])
1644
                             for name in self.my_node_names)
1645

    
1646
    self.my_inst_info = dict((name, self.all_inst_info[name])
1647
                             for name in self.my_inst_names)
1648

    
1649
    # We detect here the nodes that will need the extra RPC calls for verifying
1650
    # split LV volumes; they should be locked.
1651
    extra_lv_nodes = set()
1652

    
1653
    for inst in self.my_inst_info.values():
1654
      if inst.disk_template in constants.DTS_INT_MIRROR:
1655
        group = self.my_node_info[inst.primary_node].group
1656
        for nname in inst.secondary_nodes:
1657
          if self.all_node_info[nname].group != group:
1658
            extra_lv_nodes.add(nname)
1659

    
1660
    unlocked_lv_nodes = \
1661
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1662

    
1663
    if unlocked_lv_nodes:
1664
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1665
                                 utils.CommaJoin(unlocked_lv_nodes))
1666
    self.extra_lv_nodes = list(extra_lv_nodes)
1667

    
1668
  def _VerifyNode(self, ninfo, nresult):
1669
    """Perform some basic validation on data returned from a node.
1670

1671
      - check the result data structure is well formed and has all the
1672
        mandatory fields
1673
      - check ganeti version
1674

1675
    @type ninfo: L{objects.Node}
1676
    @param ninfo: the node to check
1677
    @param nresult: the results from the node
1678
    @rtype: boolean
1679
    @return: whether overall this call was successful (and we can expect
1680
         reasonable values in the respose)
1681

1682
    """
1683
    node = ninfo.name
1684
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1685

    
1686
    # main result, nresult should be a non-empty dict
1687
    test = not nresult or not isinstance(nresult, dict)
1688
    _ErrorIf(test, self.ENODERPC, node,
1689
                  "unable to verify node: no data returned")
1690
    if test:
1691
      return False
1692

    
1693
    # compares ganeti version
1694
    local_version = constants.PROTOCOL_VERSION
1695
    remote_version = nresult.get("version", None)
1696
    test = not (remote_version and
1697
                isinstance(remote_version, (list, tuple)) and
1698
                len(remote_version) == 2)
1699
    _ErrorIf(test, self.ENODERPC, node,
1700
             "connection to node returned invalid data")
1701
    if test:
1702
      return False
1703

    
1704
    test = local_version != remote_version[0]
1705
    _ErrorIf(test, self.ENODEVERSION, node,
1706
             "incompatible protocol versions: master %s,"
1707
             " node %s", local_version, remote_version[0])
1708
    if test:
1709
      return False
1710

    
1711
    # node seems compatible, we can actually try to look into its results
1712

    
1713
    # full package version
1714
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1715
                  self.ENODEVERSION, node,
1716
                  "software version mismatch: master %s, node %s",
1717
                  constants.RELEASE_VERSION, remote_version[1],
1718
                  code=self.ETYPE_WARNING)
1719

    
1720
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1721
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1722
      for hv_name, hv_result in hyp_result.iteritems():
1723
        test = hv_result is not None
1724
        _ErrorIf(test, self.ENODEHV, node,
1725
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1726

    
1727
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1728
    if ninfo.vm_capable and isinstance(hvp_result, list):
1729
      for item, hv_name, hv_result in hvp_result:
1730
        _ErrorIf(True, self.ENODEHV, node,
1731
                 "hypervisor %s parameter verify failure (source %s): %s",
1732
                 hv_name, item, hv_result)
1733

    
1734
    test = nresult.get(constants.NV_NODESETUP,
1735
                       ["Missing NODESETUP results"])
1736
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1737
             "; ".join(test))
1738

    
1739
    return True
1740

    
1741
  def _VerifyNodeTime(self, ninfo, nresult,
1742
                      nvinfo_starttime, nvinfo_endtime):
1743
    """Check the node time.
1744

1745
    @type ninfo: L{objects.Node}
1746
    @param ninfo: the node to check
1747
    @param nresult: the remote results for the node
1748
    @param nvinfo_starttime: the start time of the RPC call
1749
    @param nvinfo_endtime: the end time of the RPC call
1750

1751
    """
1752
    node = ninfo.name
1753
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1754

    
1755
    ntime = nresult.get(constants.NV_TIME, None)
1756
    try:
1757
      ntime_merged = utils.MergeTime(ntime)
1758
    except (ValueError, TypeError):
1759
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1760
      return
1761

    
1762
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1763
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1764
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1765
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1766
    else:
1767
      ntime_diff = None
1768

    
1769
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1770
             "Node time diverges by at least %s from master node time",
1771
             ntime_diff)
1772

    
1773
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1774
    """Check the node LVM results.
1775

1776
    @type ninfo: L{objects.Node}
1777
    @param ninfo: the node to check
1778
    @param nresult: the remote results for the node
1779
    @param vg_name: the configured VG name
1780

1781
    """
1782
    if vg_name is None:
1783
      return
1784

    
1785
    node = ninfo.name
1786
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1787

    
1788
    # checks vg existence and size > 20G
1789
    vglist = nresult.get(constants.NV_VGLIST, None)
1790
    test = not vglist
1791
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1792
    if not test:
1793
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1794
                                            constants.MIN_VG_SIZE)
1795
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1796

    
1797
    # check pv names
1798
    pvlist = nresult.get(constants.NV_PVLIST, None)
1799
    test = pvlist is None
1800
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1801
    if not test:
1802
      # check that ':' is not present in PV names, since it's a
1803
      # special character for lvcreate (denotes the range of PEs to
1804
      # use on the PV)
1805
      for _, pvname, owner_vg in pvlist:
1806
        test = ":" in pvname
1807
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1808
                 " '%s' of VG '%s'", pvname, owner_vg)
1809

    
1810
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1811
    """Check the node bridges.
1812

1813
    @type ninfo: L{objects.Node}
1814
    @param ninfo: the node to check
1815
    @param nresult: the remote results for the node
1816
    @param bridges: the expected list of bridges
1817

1818
    """
1819
    if not bridges:
1820
      return
1821

    
1822
    node = ninfo.name
1823
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824

    
1825
    missing = nresult.get(constants.NV_BRIDGES, None)
1826
    test = not isinstance(missing, list)
1827
    _ErrorIf(test, self.ENODENET, node,
1828
             "did not return valid bridge information")
1829
    if not test:
1830
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1831
               utils.CommaJoin(sorted(missing)))
1832

    
1833
  def _VerifyNodeNetwork(self, ninfo, nresult):
1834
    """Check the node network connectivity results.
1835

1836
    @type ninfo: L{objects.Node}
1837
    @param ninfo: the node to check
1838
    @param nresult: the remote results for the node
1839

1840
    """
1841
    node = ninfo.name
1842
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1843

    
1844
    test = constants.NV_NODELIST not in nresult
1845
    _ErrorIf(test, self.ENODESSH, node,
1846
             "node hasn't returned node ssh connectivity data")
1847
    if not test:
1848
      if nresult[constants.NV_NODELIST]:
1849
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1850
          _ErrorIf(True, self.ENODESSH, node,
1851
                   "ssh communication with node '%s': %s", a_node, a_msg)
1852

    
1853
    test = constants.NV_NODENETTEST not in nresult
1854
    _ErrorIf(test, self.ENODENET, node,
1855
             "node hasn't returned node tcp connectivity data")
1856
    if not test:
1857
      if nresult[constants.NV_NODENETTEST]:
1858
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1859
        for anode in nlist:
1860
          _ErrorIf(True, self.ENODENET, node,
1861
                   "tcp communication with node '%s': %s",
1862
                   anode, nresult[constants.NV_NODENETTEST][anode])
1863

    
1864
    test = constants.NV_MASTERIP not in nresult
1865
    _ErrorIf(test, self.ENODENET, node,
1866
             "node hasn't returned node master IP reachability data")
1867
    if not test:
1868
      if not nresult[constants.NV_MASTERIP]:
1869
        if node == self.master_node:
1870
          msg = "the master node cannot reach the master IP (not configured?)"
1871
        else:
1872
          msg = "cannot reach the master IP"
1873
        _ErrorIf(True, self.ENODENET, node, msg)
1874

    
1875
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1876
                      diskstatus):
1877
    """Verify an instance.
1878

1879
    This function checks to see if the required block devices are
1880
    available on the instance's node.
1881

1882
    """
1883
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884
    node_current = instanceconfig.primary_node
1885

    
1886
    node_vol_should = {}
1887
    instanceconfig.MapLVsByNode(node_vol_should)
1888

    
1889
    for node in node_vol_should:
1890
      n_img = node_image[node]
1891
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1892
        # ignore missing volumes on offline or broken nodes
1893
        continue
1894
      for volume in node_vol_should[node]:
1895
        test = volume not in n_img.volumes
1896
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1897
                 "volume %s missing on node %s", volume, node)
1898

    
1899
    if instanceconfig.admin_up:
1900
      pri_img = node_image[node_current]
1901
      test = instance not in pri_img.instances and not pri_img.offline
1902
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1903
               "instance not running on its primary node %s",
1904
               node_current)
1905

    
1906
    diskdata = [(nname, success, status, idx)
1907
                for (nname, disks) in diskstatus.items()
1908
                for idx, (success, status) in enumerate(disks)]
1909

    
1910
    for nname, success, bdev_status, idx in diskdata:
1911
      # the 'ghost node' construction in Exec() ensures that we have a
1912
      # node here
1913
      snode = node_image[nname]
1914
      bad_snode = snode.ghost or snode.offline
1915
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1916
               self.EINSTANCEFAULTYDISK, instance,
1917
               "couldn't retrieve status for disk/%s on %s: %s",
1918
               idx, nname, bdev_status)
1919
      _ErrorIf((instanceconfig.admin_up and success and
1920
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1921
               self.EINSTANCEFAULTYDISK, instance,
1922
               "disk/%s on %s is faulty", idx, nname)
1923

    
1924
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1925
    """Verify if there are any unknown volumes in the cluster.
1926

1927
    The .os, .swap and backup volumes are ignored. All other volumes are
1928
    reported as unknown.
1929

1930
    @type reserved: L{ganeti.utils.FieldSet}
1931
    @param reserved: a FieldSet of reserved volume names
1932

1933
    """
1934
    for node, n_img in node_image.items():
1935
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1936
        # skip non-healthy nodes
1937
        continue
1938
      for volume in n_img.volumes:
1939
        test = ((node not in node_vol_should or
1940
                volume not in node_vol_should[node]) and
1941
                not reserved.Matches(volume))
1942
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1943
                      "volume %s is unknown", volume)
1944

    
1945
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1946
    """Verify N+1 Memory Resilience.
1947

1948
    Check that if one single node dies we can still start all the
1949
    instances it was primary for.
1950

1951
    """
1952
    cluster_info = self.cfg.GetClusterInfo()
1953
    for node, n_img in node_image.items():
1954
      # This code checks that every node which is now listed as
1955
      # secondary has enough memory to host all instances it is
1956
      # supposed to should a single other node in the cluster fail.
1957
      # FIXME: not ready for failover to an arbitrary node
1958
      # FIXME: does not support file-backed instances
1959
      # WARNING: we currently take into account down instances as well
1960
      # as up ones, considering that even if they're down someone
1961
      # might want to start them even in the event of a node failure.
1962
      if n_img.offline:
1963
        # we're skipping offline nodes from the N+1 warning, since
1964
        # most likely we don't have good memory infromation from them;
1965
        # we already list instances living on such nodes, and that's
1966
        # enough warning
1967
        continue
1968
      for prinode, instances in n_img.sbp.items():
1969
        needed_mem = 0
1970
        for instance in instances:
1971
          bep = cluster_info.FillBE(instance_cfg[instance])
1972
          if bep[constants.BE_AUTO_BALANCE]:
1973
            needed_mem += bep[constants.BE_MEMORY]
1974
        test = n_img.mfree < needed_mem
1975
        self._ErrorIf(test, self.ENODEN1, node,
1976
                      "not enough memory to accomodate instance failovers"
1977
                      " should node %s fail (%dMiB needed, %dMiB available)",
1978
                      prinode, needed_mem, n_img.mfree)
1979

    
1980
  @classmethod
1981
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1982
                   (files_all, files_all_opt, files_mc, files_vm)):
1983
    """Verifies file checksums collected from all nodes.
1984

1985
    @param errorif: Callback for reporting errors
1986
    @param nodeinfo: List of L{objects.Node} objects
1987
    @param master_node: Name of master node
1988
    @param all_nvinfo: RPC results
1989

1990
    """
1991
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
1992

    
1993
    assert master_node in node_names
1994
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1995
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1996
           "Found file listed in more than one file list"
1997

    
1998
    # Define functions determining which nodes to consider for a file
1999
    file2nodefn = dict([(filename, fn)
2000
      for (files, fn) in [(files_all, None),
2001
                          (files_all_opt, None),
2002
                          (files_mc, lambda node: (node.master_candidate or
2003
                                                   node.name == master_node)),
2004
                          (files_vm, lambda node: node.vm_capable)]
2005
      for filename in files])
2006

    
2007
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2008

    
2009
    for node in nodeinfo:
2010
      if node.offline:
2011
        continue
2012

    
2013
      nresult = all_nvinfo[node.name]
2014

    
2015
      if nresult.fail_msg or not nresult.payload:
2016
        node_files = None
2017
      else:
2018
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2019

    
2020
      test = not (node_files and isinstance(node_files, dict))
2021
      errorif(test, cls.ENODEFILECHECK, node.name,
2022
              "Node did not return file checksum data")
2023
      if test:
2024
        continue
2025

    
2026
      for (filename, checksum) in node_files.items():
2027
        # Check if the file should be considered for a node
2028
        fn = file2nodefn[filename]
2029
        if fn is None or fn(node):
2030
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2031

    
2032
    for (filename, checksums) in fileinfo.items():
2033
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2034

    
2035
      # Nodes having the file
2036
      with_file = frozenset(node_name
2037
                            for nodes in fileinfo[filename].values()
2038
                            for node_name in nodes)
2039

    
2040
      # Nodes missing file
2041
      missing_file = node_names - with_file
2042

    
2043
      if filename in files_all_opt:
2044
        # All or no nodes
2045
        errorif(missing_file and missing_file != node_names,
2046
                cls.ECLUSTERFILECHECK, None,
2047
                "File %s is optional, but it must exist on all or no"
2048
                " nodes (not found on %s)",
2049
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2050
      else:
2051
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2052
                "File %s is missing from node(s) %s", filename,
2053
                utils.CommaJoin(utils.NiceSort(missing_file)))
2054

    
2055
      # See if there are multiple versions of the file
2056
      test = len(checksums) > 1
2057
      if test:
2058
        variants = ["variant %s on %s" %
2059
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2060
                    for (idx, (checksum, nodes)) in
2061
                      enumerate(sorted(checksums.items()))]
2062
      else:
2063
        variants = []
2064

    
2065
      errorif(test, cls.ECLUSTERFILECHECK, None,
2066
              "File %s found with %s different checksums (%s)",
2067
              filename, len(checksums), "; ".join(variants))
2068

    
2069
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2070
                      drbd_map):
2071
    """Verifies and the node DRBD status.
2072

2073
    @type ninfo: L{objects.Node}
2074
    @param ninfo: the node to check
2075
    @param nresult: the remote results for the node
2076
    @param instanceinfo: the dict of instances
2077
    @param drbd_helper: the configured DRBD usermode helper
2078
    @param drbd_map: the DRBD map as returned by
2079
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2080

2081
    """
2082
    node = ninfo.name
2083
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2084

    
2085
    if drbd_helper:
2086
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2087
      test = (helper_result == None)
2088
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2089
               "no drbd usermode helper returned")
2090
      if helper_result:
2091
        status, payload = helper_result
2092
        test = not status
2093
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2094
                 "drbd usermode helper check unsuccessful: %s", payload)
2095
        test = status and (payload != drbd_helper)
2096
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2097
                 "wrong drbd usermode helper: %s", payload)
2098

    
2099
    # compute the DRBD minors
2100
    node_drbd = {}
2101
    for minor, instance in drbd_map[node].items():
2102
      test = instance not in instanceinfo
2103
      _ErrorIf(test, self.ECLUSTERCFG, None,
2104
               "ghost instance '%s' in temporary DRBD map", instance)
2105
        # ghost instance should not be running, but otherwise we
2106
        # don't give double warnings (both ghost instance and
2107
        # unallocated minor in use)
2108
      if test:
2109
        node_drbd[minor] = (instance, False)
2110
      else:
2111
        instance = instanceinfo[instance]
2112
        node_drbd[minor] = (instance.name, instance.admin_up)
2113

    
2114
    # and now check them
2115
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2116
    test = not isinstance(used_minors, (tuple, list))
2117
    _ErrorIf(test, self.ENODEDRBD, node,
2118
             "cannot parse drbd status file: %s", str(used_minors))
2119
    if test:
2120
      # we cannot check drbd status
2121
      return
2122

    
2123
    for minor, (iname, must_exist) in node_drbd.items():
2124
      test = minor not in used_minors and must_exist
2125
      _ErrorIf(test, self.ENODEDRBD, node,
2126
               "drbd minor %d of instance %s is not active", minor, iname)
2127
    for minor in used_minors:
2128
      test = minor not in node_drbd
2129
      _ErrorIf(test, self.ENODEDRBD, node,
2130
               "unallocated drbd minor %d is in use", minor)
2131

    
2132
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2133
    """Builds the node OS structures.
2134

2135
    @type ninfo: L{objects.Node}
2136
    @param ninfo: the node to check
2137
    @param nresult: the remote results for the node
2138
    @param nimg: the node image object
2139

2140
    """
2141
    node = ninfo.name
2142
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2143

    
2144
    remote_os = nresult.get(constants.NV_OSLIST, None)
2145
    test = (not isinstance(remote_os, list) or
2146
            not compat.all(isinstance(v, list) and len(v) == 7
2147
                           for v in remote_os))
2148

    
2149
    _ErrorIf(test, self.ENODEOS, node,
2150
             "node hasn't returned valid OS data")
2151

    
2152
    nimg.os_fail = test
2153

    
2154
    if test:
2155
      return
2156

    
2157
    os_dict = {}
2158

    
2159
    for (name, os_path, status, diagnose,
2160
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2161

    
2162
      if name not in os_dict:
2163
        os_dict[name] = []
2164

    
2165
      # parameters is a list of lists instead of list of tuples due to
2166
      # JSON lacking a real tuple type, fix it:
2167
      parameters = [tuple(v) for v in parameters]
2168
      os_dict[name].append((os_path, status, diagnose,
2169
                            set(variants), set(parameters), set(api_ver)))
2170

    
2171
    nimg.oslist = os_dict
2172

    
2173
  def _VerifyNodeOS(self, ninfo, nimg, base):
2174
    """Verifies the node OS list.
2175

2176
    @type ninfo: L{objects.Node}
2177
    @param ninfo: the node to check
2178
    @param nimg: the node image object
2179
    @param base: the 'template' node we match against (e.g. from the master)
2180

2181
    """
2182
    node = ninfo.name
2183
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2184

    
2185
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2186

    
2187
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2188
    for os_name, os_data in nimg.oslist.items():
2189
      assert os_data, "Empty OS status for OS %s?!" % os_name
2190
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2191
      _ErrorIf(not f_status, self.ENODEOS, node,
2192
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2193
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2194
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2195
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2196
      # comparisons with the 'base' image
2197
      test = os_name not in base.oslist
2198
      _ErrorIf(test, self.ENODEOS, node,
2199
               "Extra OS %s not present on reference node (%s)",
2200
               os_name, base.name)
2201
      if test:
2202
        continue
2203
      assert base.oslist[os_name], "Base node has empty OS status?"
2204
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2205
      if not b_status:
2206
        # base OS is invalid, skipping
2207
        continue
2208
      for kind, a, b in [("API version", f_api, b_api),
2209
                         ("variants list", f_var, b_var),
2210
                         ("parameters", beautify_params(f_param),
2211
                          beautify_params(b_param))]:
2212
        _ErrorIf(a != b, self.ENODEOS, node,
2213
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2214
                 kind, os_name, base.name,
2215
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2216

    
2217
    # check any missing OSes
2218
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2219
    _ErrorIf(missing, self.ENODEOS, node,
2220
             "OSes present on reference node %s but missing on this node: %s",
2221
             base.name, utils.CommaJoin(missing))
2222

    
2223
  def _VerifyOob(self, ninfo, nresult):
2224
    """Verifies out of band functionality of a node.
2225

2226
    @type ninfo: L{objects.Node}
2227
    @param ninfo: the node to check
2228
    @param nresult: the remote results for the node
2229

2230
    """
2231
    node = ninfo.name
2232
    # We just have to verify the paths on master and/or master candidates
2233
    # as the oob helper is invoked on the master
2234
    if ((ninfo.master_candidate or ninfo.master_capable) and
2235
        constants.NV_OOB_PATHS in nresult):
2236
      for path_result in nresult[constants.NV_OOB_PATHS]:
2237
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2238

    
2239
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2240
    """Verifies and updates the node volume data.
2241

2242
    This function will update a L{NodeImage}'s internal structures
2243
    with data from the remote call.
2244

2245
    @type ninfo: L{objects.Node}
2246
    @param ninfo: the node to check
2247
    @param nresult: the remote results for the node
2248
    @param nimg: the node image object
2249
    @param vg_name: the configured VG name
2250

2251
    """
2252
    node = ninfo.name
2253
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2254

    
2255
    nimg.lvm_fail = True
2256
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2257
    if vg_name is None:
2258
      pass
2259
    elif isinstance(lvdata, basestring):
2260
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2261
               utils.SafeEncode(lvdata))
2262
    elif not isinstance(lvdata, dict):
2263
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2264
    else:
2265
      nimg.volumes = lvdata
2266
      nimg.lvm_fail = False
2267

    
2268
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2269
    """Verifies and updates the node instance list.
2270

2271
    If the listing was successful, then updates this node's instance
2272
    list. Otherwise, it marks the RPC call as failed for the instance
2273
    list key.
2274

2275
    @type ninfo: L{objects.Node}
2276
    @param ninfo: the node to check
2277
    @param nresult: the remote results for the node
2278
    @param nimg: the node image object
2279

2280
    """
2281
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2282
    test = not isinstance(idata, list)
2283
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2284
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2285
    if test:
2286
      nimg.hyp_fail = True
2287
    else:
2288
      nimg.instances = idata
2289

    
2290
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2291
    """Verifies and computes a node information map
2292

2293
    @type ninfo: L{objects.Node}
2294
    @param ninfo: the node to check
2295
    @param nresult: the remote results for the node
2296
    @param nimg: the node image object
2297
    @param vg_name: the configured VG name
2298

2299
    """
2300
    node = ninfo.name
2301
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2302

    
2303
    # try to read free memory (from the hypervisor)
2304
    hv_info = nresult.get(constants.NV_HVINFO, None)
2305
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2306
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2307
    if not test:
2308
      try:
2309
        nimg.mfree = int(hv_info["memory_free"])
2310
      except (ValueError, TypeError):
2311
        _ErrorIf(True, self.ENODERPC, node,
2312
                 "node returned invalid nodeinfo, check hypervisor")
2313

    
2314
    # FIXME: devise a free space model for file based instances as well
2315
    if vg_name is not None:
2316
      test = (constants.NV_VGLIST not in nresult or
2317
              vg_name not in nresult[constants.NV_VGLIST])
2318
      _ErrorIf(test, self.ENODELVM, node,
2319
               "node didn't return data for the volume group '%s'"
2320
               " - it is either missing or broken", vg_name)
2321
      if not test:
2322
        try:
2323
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2324
        except (ValueError, TypeError):
2325
          _ErrorIf(True, self.ENODERPC, node,
2326
                   "node returned invalid LVM info, check LVM status")
2327

    
2328
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2329
    """Gets per-disk status information for all instances.
2330

2331
    @type nodelist: list of strings
2332
    @param nodelist: Node names
2333
    @type node_image: dict of (name, L{objects.Node})
2334
    @param node_image: Node objects
2335
    @type instanceinfo: dict of (name, L{objects.Instance})
2336
    @param instanceinfo: Instance objects
2337
    @rtype: {instance: {node: [(succes, payload)]}}
2338
    @return: a dictionary of per-instance dictionaries with nodes as
2339
        keys and disk information as values; the disk information is a
2340
        list of tuples (success, payload)
2341

2342
    """
2343
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2344

    
2345
    node_disks = {}
2346
    node_disks_devonly = {}
2347
    diskless_instances = set()
2348
    diskless = constants.DT_DISKLESS
2349

    
2350
    for nname in nodelist:
2351
      node_instances = list(itertools.chain(node_image[nname].pinst,
2352
                                            node_image[nname].sinst))
2353
      diskless_instances.update(inst for inst in node_instances
2354
                                if instanceinfo[inst].disk_template == diskless)
2355
      disks = [(inst, disk)
2356
               for inst in node_instances
2357
               for disk in instanceinfo[inst].disks]
2358

    
2359
      if not disks:
2360
        # No need to collect data
2361
        continue
2362

    
2363
      node_disks[nname] = disks
2364

    
2365
      # Creating copies as SetDiskID below will modify the objects and that can
2366
      # lead to incorrect data returned from nodes
2367
      devonly = [dev.Copy() for (_, dev) in disks]
2368

    
2369
      for dev in devonly:
2370
        self.cfg.SetDiskID(dev, nname)
2371

    
2372
      node_disks_devonly[nname] = devonly
2373

    
2374
    assert len(node_disks) == len(node_disks_devonly)
2375

    
2376
    # Collect data from all nodes with disks
2377
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2378
                                                          node_disks_devonly)
2379

    
2380
    assert len(result) == len(node_disks)
2381

    
2382
    instdisk = {}
2383

    
2384
    for (nname, nres) in result.items():
2385
      disks = node_disks[nname]
2386

    
2387
      if nres.offline:
2388
        # No data from this node
2389
        data = len(disks) * [(False, "node offline")]
2390
      else:
2391
        msg = nres.fail_msg
2392
        _ErrorIf(msg, self.ENODERPC, nname,
2393
                 "while getting disk information: %s", msg)
2394
        if msg:
2395
          # No data from this node
2396
          data = len(disks) * [(False, msg)]
2397
        else:
2398
          data = []
2399
          for idx, i in enumerate(nres.payload):
2400
            if isinstance(i, (tuple, list)) and len(i) == 2:
2401
              data.append(i)
2402
            else:
2403
              logging.warning("Invalid result from node %s, entry %d: %s",
2404
                              nname, idx, i)
2405
              data.append((False, "Invalid result from the remote node"))
2406

    
2407
      for ((inst, _), status) in zip(disks, data):
2408
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2409

    
2410
    # Add empty entries for diskless instances.
2411
    for inst in diskless_instances:
2412
      assert inst not in instdisk
2413
      instdisk[inst] = {}
2414

    
2415
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2416
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2417
                      compat.all(isinstance(s, (tuple, list)) and
2418
                                 len(s) == 2 for s in statuses)
2419
                      for inst, nnames in instdisk.items()
2420
                      for nname, statuses in nnames.items())
2421
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2422

    
2423
    return instdisk
2424

    
2425
  def BuildHooksEnv(self):
2426
    """Build hooks env.
2427

2428
    Cluster-Verify hooks just ran in the post phase and their failure makes
2429
    the output be logged in the verify output and the verification to fail.
2430

2431
    """
2432
    env = {
2433
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2434
      }
2435

    
2436
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2437
               for node in self.my_node_info.values())
2438

    
2439
    return env
2440

    
2441
  def BuildHooksNodes(self):
2442
    """Build hooks nodes.
2443

2444
    """
2445
    return ([], self.my_node_names)
2446

    
2447
  def Exec(self, feedback_fn):
2448
    """Verify integrity of the node group, performing various test on nodes.
2449

2450
    """
2451
    # This method has too many local variables. pylint: disable-msg=R0914
2452

    
2453
    if not self.my_node_names:
2454
      # empty node group
2455
      feedback_fn("* Empty node group, skipping verification")
2456
      return True
2457

    
2458
    self.bad = False
2459
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2460
    verbose = self.op.verbose
2461
    self._feedback_fn = feedback_fn
2462

    
2463
    vg_name = self.cfg.GetVGName()
2464
    drbd_helper = self.cfg.GetDRBDHelper()
2465
    cluster = self.cfg.GetClusterInfo()
2466
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2467
    hypervisors = cluster.enabled_hypervisors
2468
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2469

    
2470
    i_non_redundant = [] # Non redundant instances
2471
    i_non_a_balanced = [] # Non auto-balanced instances
2472
    n_offline = 0 # Count of offline nodes
2473
    n_drained = 0 # Count of nodes being drained
2474
    node_vol_should = {}
2475

    
2476
    # FIXME: verify OS list
2477

    
2478
    # File verification
2479
    filemap = _ComputeAncillaryFiles(cluster, False)
2480

    
2481
    # do local checksums
2482
    master_node = self.master_node = self.cfg.GetMasterNode()
2483
    master_ip = self.cfg.GetMasterIP()
2484

    
2485
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2486

    
2487
    # We will make nodes contact all nodes in their group, and one node from
2488
    # every other group.
2489
    # TODO: should it be a *random* node, different every time?
2490
    online_nodes = [node.name for node in node_data_list if not node.offline]
2491
    other_group_nodes = {}
2492

    
2493
    for name in sorted(self.all_node_info):
2494
      node = self.all_node_info[name]
2495
      if (node.group not in other_group_nodes
2496
          and node.group != self.group_uuid
2497
          and not node.offline):
2498
        other_group_nodes[node.group] = node.name
2499

    
2500
    node_verify_param = {
2501
      constants.NV_FILELIST:
2502
        utils.UniqueSequence(filename
2503
                             for files in filemap
2504
                             for filename in files),
2505
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2506
      constants.NV_HYPERVISOR: hypervisors,
2507
      constants.NV_HVPARAMS:
2508
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2509
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2510
                                 for node in node_data_list
2511
                                 if not node.offline],
2512
      constants.NV_INSTANCELIST: hypervisors,
2513
      constants.NV_VERSION: None,
2514
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2515
      constants.NV_NODESETUP: None,
2516
      constants.NV_TIME: None,
2517
      constants.NV_MASTERIP: (master_node, master_ip),
2518
      constants.NV_OSLIST: None,
2519
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2520
      }
2521

    
2522
    if vg_name is not None:
2523
      node_verify_param[constants.NV_VGLIST] = None
2524
      node_verify_param[constants.NV_LVLIST] = vg_name
2525
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2526
      node_verify_param[constants.NV_DRBDLIST] = None
2527

    
2528
    if drbd_helper:
2529
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2530

    
2531
    # bridge checks
2532
    # FIXME: this needs to be changed per node-group, not cluster-wide
2533
    bridges = set()
2534
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2535
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2536
      bridges.add(default_nicpp[constants.NIC_LINK])
2537
    for instance in self.my_inst_info.values():
2538
      for nic in instance.nics:
2539
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2540
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2541
          bridges.add(full_nic[constants.NIC_LINK])
2542

    
2543
    if bridges:
2544
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2545

    
2546
    # Build our expected cluster state
2547
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2548
                                                 name=node.name,
2549
                                                 vm_capable=node.vm_capable))
2550
                      for node in node_data_list)
2551

    
2552
    # Gather OOB paths
2553
    oob_paths = []
2554
    for node in self.all_node_info.values():
2555
      path = _SupportsOob(self.cfg, node)
2556
      if path and path not in oob_paths:
2557
        oob_paths.append(path)
2558

    
2559
    if oob_paths:
2560
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2561

    
2562
    for instance in self.my_inst_names:
2563
      inst_config = self.my_inst_info[instance]
2564

    
2565
      for nname in inst_config.all_nodes:
2566
        if nname not in node_image:
2567
          gnode = self.NodeImage(name=nname)
2568
          gnode.ghost = (nname not in self.all_node_info)
2569
          node_image[nname] = gnode
2570

    
2571
      inst_config.MapLVsByNode(node_vol_should)
2572

    
2573
      pnode = inst_config.primary_node
2574
      node_image[pnode].pinst.append(instance)
2575

    
2576
      for snode in inst_config.secondary_nodes:
2577
        nimg = node_image[snode]
2578
        nimg.sinst.append(instance)
2579
        if pnode not in nimg.sbp:
2580
          nimg.sbp[pnode] = []
2581
        nimg.sbp[pnode].append(instance)
2582

    
2583
    # At this point, we have the in-memory data structures complete,
2584
    # except for the runtime information, which we'll gather next
2585

    
2586
    # Due to the way our RPC system works, exact response times cannot be
2587
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2588
    # time before and after executing the request, we can at least have a time
2589
    # window.
2590
    nvinfo_starttime = time.time()
2591
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2592
                                           node_verify_param,
2593
                                           self.cfg.GetClusterName())
2594
    nvinfo_endtime = time.time()
2595

    
2596
    if self.extra_lv_nodes and vg_name is not None:
2597
      extra_lv_nvinfo = \
2598
          self.rpc.call_node_verify(self.extra_lv_nodes,
2599
                                    {constants.NV_LVLIST: vg_name},
2600
                                    self.cfg.GetClusterName())
2601
    else:
2602
      extra_lv_nvinfo = {}
2603

    
2604
    all_drbd_map = self.cfg.ComputeDRBDMap()
2605

    
2606
    feedback_fn("* Gathering disk information (%s nodes)" %
2607
                len(self.my_node_names))
2608
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2609
                                     self.my_inst_info)
2610

    
2611
    feedback_fn("* Verifying configuration file consistency")
2612

    
2613
    # If not all nodes are being checked, we need to make sure the master node
2614
    # and a non-checked vm_capable node are in the list.
2615
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2616
    if absent_nodes:
2617
      vf_nvinfo = all_nvinfo.copy()
2618
      vf_node_info = list(self.my_node_info.values())
2619
      additional_nodes = []
2620
      if master_node not in self.my_node_info:
2621
        additional_nodes.append(master_node)
2622
        vf_node_info.append(self.all_node_info[master_node])
2623
      # Add the first vm_capable node we find which is not included
2624
      for node in absent_nodes:
2625
        nodeinfo = self.all_node_info[node]
2626
        if nodeinfo.vm_capable and not nodeinfo.offline:
2627
          additional_nodes.append(node)
2628
          vf_node_info.append(self.all_node_info[node])
2629
          break
2630
      key = constants.NV_FILELIST
2631
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2632
                                                 {key: node_verify_param[key]},
2633
                                                 self.cfg.GetClusterName()))
2634
    else:
2635
      vf_nvinfo = all_nvinfo
2636
      vf_node_info = self.my_node_info.values()
2637

    
2638
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2639

    
2640
    feedback_fn("* Verifying node status")
2641

    
2642
    refos_img = None
2643

    
2644
    for node_i in node_data_list:
2645
      node = node_i.name
2646
      nimg = node_image[node]
2647

    
2648
      if node_i.offline:
2649
        if verbose:
2650
          feedback_fn("* Skipping offline node %s" % (node,))
2651
        n_offline += 1
2652
        continue
2653

    
2654
      if node == master_node:
2655
        ntype = "master"
2656
      elif node_i.master_candidate:
2657
        ntype = "master candidate"
2658
      elif node_i.drained:
2659
        ntype = "drained"
2660
        n_drained += 1
2661
      else:
2662
        ntype = "regular"
2663
      if verbose:
2664
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2665

    
2666
      msg = all_nvinfo[node].fail_msg
2667
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2668
      if msg:
2669
        nimg.rpc_fail = True
2670
        continue
2671

    
2672
      nresult = all_nvinfo[node].payload
2673

    
2674
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2675
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2676
      self._VerifyNodeNetwork(node_i, nresult)
2677
      self._VerifyOob(node_i, nresult)
2678

    
2679
      if nimg.vm_capable:
2680
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2681
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2682
                             all_drbd_map)
2683

    
2684
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2685
        self._UpdateNodeInstances(node_i, nresult, nimg)
2686
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2687
        self._UpdateNodeOS(node_i, nresult, nimg)
2688

    
2689
        if not nimg.os_fail:
2690
          if refos_img is None:
2691
            refos_img = nimg
2692
          self._VerifyNodeOS(node_i, nimg, refos_img)
2693
        self._VerifyNodeBridges(node_i, nresult, bridges)
2694

    
2695
        # Check whether all running instancies are primary for the node. (This
2696
        # can no longer be done from _VerifyInstance below, since some of the
2697
        # wrong instances could be from other node groups.)
2698
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2699

    
2700
        for inst in non_primary_inst:
2701
          test = inst in self.all_inst_info
2702
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2703
                   "instance should not run on node %s", node_i.name)
2704
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2705
                   "node is running unknown instance %s", inst)
2706

    
2707
    for node, result in extra_lv_nvinfo.items():
2708
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2709
                              node_image[node], vg_name)
2710

    
2711
    feedback_fn("* Verifying instance status")
2712
    for instance in self.my_inst_names:
2713
      if verbose:
2714
        feedback_fn("* Verifying instance %s" % instance)
2715
      inst_config = self.my_inst_info[instance]
2716
      self._VerifyInstance(instance, inst_config, node_image,
2717
                           instdisk[instance])
2718
      inst_nodes_offline = []
2719

    
2720
      pnode = inst_config.primary_node
2721
      pnode_img = node_image[pnode]
2722
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2723
               self.ENODERPC, pnode, "instance %s, connection to"
2724
               " primary node failed", instance)
2725

    
2726
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2727
               self.EINSTANCEBADNODE, instance,
2728
               "instance is marked as running and lives on offline node %s",
2729
               inst_config.primary_node)
2730

    
2731
      # If the instance is non-redundant we cannot survive losing its primary
2732
      # node, so we are not N+1 compliant. On the other hand we have no disk
2733
      # templates with more than one secondary so that situation is not well
2734
      # supported either.
2735
      # FIXME: does not support file-backed instances
2736
      if not inst_config.secondary_nodes:
2737
        i_non_redundant.append(instance)
2738

    
2739
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2740
               instance, "instance has multiple secondary nodes: %s",
2741
               utils.CommaJoin(inst_config.secondary_nodes),
2742
               code=self.ETYPE_WARNING)
2743

    
2744
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2745
        pnode = inst_config.primary_node
2746
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2747
        instance_groups = {}
2748

    
2749
        for node in instance_nodes:
2750
          instance_groups.setdefault(self.all_node_info[node].group,
2751
                                     []).append(node)
2752

    
2753
        pretty_list = [
2754
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2755
          # Sort so that we always list the primary node first.
2756
          for group, nodes in sorted(instance_groups.items(),
2757
                                     key=lambda (_, nodes): pnode in nodes,
2758
                                     reverse=True)]
2759

    
2760
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2761
                      instance, "instance has primary and secondary nodes in"
2762
                      " different groups: %s", utils.CommaJoin(pretty_list),
2763
                      code=self.ETYPE_WARNING)
2764

    
2765
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2766
        i_non_a_balanced.append(instance)
2767

    
2768
      for snode in inst_config.secondary_nodes:
2769
        s_img = node_image[snode]
2770
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2771
                 "instance %s, connection to secondary node failed", instance)
2772

    
2773
        if s_img.offline:
2774
          inst_nodes_offline.append(snode)
2775

    
2776
      # warn that the instance lives on offline nodes
2777
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2778
               "instance has offline secondary node(s) %s",
2779
               utils.CommaJoin(inst_nodes_offline))
2780
      # ... or ghost/non-vm_capable nodes
2781
      for node in inst_config.all_nodes:
2782
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2783
                 "instance lives on ghost node %s", node)
2784
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2785
                 instance, "instance lives on non-vm_capable node %s", node)
2786

    
2787
    feedback_fn("* Verifying orphan volumes")
2788
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2789

    
2790
    # We will get spurious "unknown volume" warnings if any node of this group
2791
    # is secondary for an instance whose primary is in another group. To avoid
2792
    # them, we find these instances and add their volumes to node_vol_should.
2793
    for inst in self.all_inst_info.values():
2794
      for secondary in inst.secondary_nodes:
2795
        if (secondary in self.my_node_info
2796
            and inst.name not in self.my_inst_info):
2797
          inst.MapLVsByNode(node_vol_should)
2798
          break
2799

    
2800
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2801

    
2802
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2803
      feedback_fn("* Verifying N+1 Memory redundancy")
2804
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2805

    
2806
    feedback_fn("* Other Notes")
2807
    if i_non_redundant:
2808
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2809
                  % len(i_non_redundant))
2810

    
2811
    if i_non_a_balanced:
2812
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2813
                  % len(i_non_a_balanced))
2814

    
2815
    if n_offline:
2816
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2817

    
2818
    if n_drained:
2819
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2820

    
2821
    return not self.bad
2822

    
2823
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2824
    """Analyze the post-hooks' result
2825

2826
    This method analyses the hook result, handles it, and sends some
2827
    nicely-formatted feedback back to the user.
2828

2829
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2830
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2831
    @param hooks_results: the results of the multi-node hooks rpc call
2832
    @param feedback_fn: function used send feedback back to the caller
2833
    @param lu_result: previous Exec result
2834
    @return: the new Exec result, based on the previous result
2835
        and hook results
2836

2837
    """
2838
    # We only really run POST phase hooks, only for non-empty groups,
2839
    # and are only interested in their results
2840
    if not self.my_node_names:
2841
      # empty node group
2842
      pass
2843
    elif phase == constants.HOOKS_PHASE_POST:
2844
      # Used to change hooks' output to proper indentation
2845
      feedback_fn("* Hooks Results")
2846
      assert hooks_results, "invalid result from hooks"
2847

    
2848
      for node_name in hooks_results:
2849
        res = hooks_results[node_name]
2850
        msg = res.fail_msg
2851
        test = msg and not res.offline
2852
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2853
                      "Communication failure in hooks execution: %s", msg)
2854
        if res.offline or msg:
2855
          # No need to investigate payload if node is offline or gave an error.
2856
          # override manually lu_result here as _ErrorIf only
2857
          # overrides self.bad
2858
          lu_result = 1
2859
          continue
2860
        for script, hkr, output in res.payload:
2861
          test = hkr == constants.HKR_FAIL
2862
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2863
                        "Script %s failed, output:", script)
2864
          if test:
2865
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2866
            feedback_fn("%s" % output)
2867
            lu_result = 0
2868

    
2869
    return lu_result
2870

    
2871

    
2872
class LUClusterVerifyDisks(NoHooksLU):
2873
  """Verifies the cluster disks status.
2874

2875
  """
2876
  REQ_BGL = False
2877

    
2878
  def ExpandNames(self):
2879
    self.share_locks = _ShareAll()
2880
    self.needed_locks = {
2881
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2882
      }
2883

    
2884
  def Exec(self, feedback_fn):
2885
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2886

    
2887
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2888
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2889
                           for group in group_names])
2890

    
2891

    
2892
class LUGroupVerifyDisks(NoHooksLU):
2893
  """Verifies the status of all disks in a node group.
2894

2895
  """
2896
  REQ_BGL = False
2897

    
2898
  def ExpandNames(self):
2899
    # Raises errors.OpPrereqError on its own if group can't be found
2900
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2901

    
2902
    self.share_locks = _ShareAll()
2903
    self.needed_locks = {
2904
      locking.LEVEL_INSTANCE: [],
2905
      locking.LEVEL_NODEGROUP: [],
2906
      locking.LEVEL_NODE: [],
2907
      }
2908

    
2909
  def DeclareLocks(self, level):
2910
    if level == locking.LEVEL_INSTANCE:
2911
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2912

    
2913
      # Lock instances optimistically, needs verification once node and group
2914
      # locks have been acquired
2915
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2916
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2917

    
2918
    elif level == locking.LEVEL_NODEGROUP:
2919
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2920

    
2921
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2922
        set([self.group_uuid] +
2923
            # Lock all groups used by instances optimistically; this requires
2924
            # going via the node before it's locked, requiring verification
2925
            # later on
2926
            [group_uuid
2927
             for instance_name in
2928
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2929
             for group_uuid in
2930
               self.cfg.GetInstanceNodeGroups(instance_name)])
2931

    
2932
    elif level == locking.LEVEL_NODE:
2933
      # This will only lock the nodes in the group to be verified which contain
2934
      # actual instances
2935
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2936
      self._LockInstancesNodes()
2937

    
2938
      # Lock all nodes in group to be verified
2939
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2940
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2941
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2942

    
2943
  def CheckPrereq(self):
2944
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2945
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2946
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2947

    
2948
    assert self.group_uuid in owned_groups
2949

    
2950
    # Check if locked instances are still correct
2951
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2952
    if owned_instances != wanted_instances:
2953
      raise errors.OpPrereqError("Instances in node group %s changed since"
2954
                                 " locks were acquired, wanted %s, have %s;"
2955
                                 " retry the operation" %
2956
                                 (self.op.group_name,
2957
                                  utils.CommaJoin(wanted_instances),
2958
                                  utils.CommaJoin(owned_instances)),
2959
                                 errors.ECODE_STATE)
2960

    
2961
    # Get instance information
2962
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
2963
                          for name in owned_instances)
2964

    
2965
    # Check if node groups for locked instances are still correct
2966
    for (instance_name, inst) in self.instances.items():
2967
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2968
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2969
      assert owned_nodes.issuperset(inst.all_nodes), \
2970
        "Instance %s's nodes changed while we kept the lock" % instance_name
2971

    
2972
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2973
      if not owned_groups.issuperset(inst_groups):
2974
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2975
                                   " locks were acquired, current groups are"
2976
                                   " are '%s', owning groups '%s'; retry the"
2977
                                   " operation" %
2978
                                   (instance_name,
2979
                                    utils.CommaJoin(inst_groups),
2980
                                    utils.CommaJoin(owned_groups)),
2981
                                   errors.ECODE_STATE)
2982

    
2983
  def Exec(self, feedback_fn):
2984
    """Verify integrity of cluster disks.
2985

2986
    @rtype: tuple of three items
2987
    @return: a tuple of (dict of node-to-node_error, list of instances
2988
        which need activate-disks, dict of instance: (node, volume) for
2989
        missing volumes
2990

2991
    """
2992
    res_nodes = {}
2993
    res_instances = set()
2994
    res_missing = {}
2995

    
2996
    nv_dict = _MapInstanceDisksToNodes([inst
2997
                                        for inst in self.instances.values()
2998
                                        if inst.admin_up])
2999

    
3000
    if nv_dict:
3001
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3002
                             set(self.cfg.GetVmCapableNodeList()))
3003

    
3004
      node_lvs = self.rpc.call_lv_list(nodes, [])
3005

    
3006
      for (node, node_res) in node_lvs.items():
3007
        if node_res.offline:
3008
          continue
3009

    
3010
        msg = node_res.fail_msg
3011
        if msg:
3012
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3013
          res_nodes[node] = msg
3014
          continue
3015

    
3016
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3017
          inst = nv_dict.pop((node, lv_name), None)
3018
          if not (lv_online or inst is None):
3019
            res_instances.add(inst)
3020

    
3021
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3022
      # better
3023
      for key, inst in nv_dict.iteritems():
3024
        res_missing.setdefault(inst, []).append(key)
3025

    
3026
    return (res_nodes, list(res_instances), res_missing)
3027

    
3028

    
3029
class LUClusterRepairDiskSizes(NoHooksLU):
3030
  """Verifies the cluster disks sizes.
3031

3032
  """
3033
  REQ_BGL = False
3034

    
3035
  def ExpandNames(self):
3036
    if self.op.instances:
3037
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3038
      self.needed_locks = {
3039
        locking.LEVEL_NODE: [],
3040
        locking.LEVEL_INSTANCE: self.wanted_names,
3041
        }
3042
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3043
    else:
3044
      self.wanted_names = None
3045
      self.needed_locks = {
3046
        locking.LEVEL_NODE: locking.ALL_SET,
3047
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3048
        }
3049
    self.share_locks = _ShareAll()
3050

    
3051
  def DeclareLocks(self, level):
3052
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3053
      self._LockInstancesNodes(primary_only=True)
3054

    
3055
  def CheckPrereq(self):
3056
    """Check prerequisites.
3057

3058
    This only checks the optional instance list against the existing names.
3059

3060
    """
3061
    if self.wanted_names is None:
3062
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3063

    
3064
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3065
                             in self.wanted_names]
3066

    
3067
  def _EnsureChildSizes(self, disk):
3068
    """Ensure children of the disk have the needed disk size.
3069

3070
    This is valid mainly for DRBD8 and fixes an issue where the
3071
    children have smaller disk size.
3072

3073
    @param disk: an L{ganeti.objects.Disk} object
3074

3075
    """
3076
    if disk.dev_type == constants.LD_DRBD8:
3077
      assert disk.children, "Empty children for DRBD8?"
3078
      fchild = disk.children[0]
3079
      mismatch = fchild.size < disk.size
3080
      if mismatch:
3081
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3082
                     fchild.size, disk.size)
3083
        fchild.size = disk.size
3084

    
3085
      # and we recurse on this child only, not on the metadev
3086
      return self._EnsureChildSizes(fchild) or mismatch
3087
    else:
3088
      return False
3089

    
3090
  def Exec(self, feedback_fn):
3091
    """Verify the size of cluster disks.
3092

3093
    """
3094
    # TODO: check child disks too
3095
    # TODO: check differences in size between primary/secondary nodes
3096
    per_node_disks = {}
3097
    for instance in self.wanted_instances:
3098
      pnode = instance.primary_node
3099
      if pnode not in per_node_disks:
3100
        per_node_disks[pnode] = []
3101
      for idx, disk in enumerate(instance.disks):
3102
        per_node_disks[pnode].append((instance, idx, disk))
3103

    
3104
    changed = []
3105
    for node, dskl in per_node_disks.items():
3106
      newl = [v[2].Copy() for v in dskl]
3107
      for dsk in newl:
3108
        self.cfg.SetDiskID(dsk, node)
3109
      result = self.rpc.call_blockdev_getsize(node, newl)
3110
      if result.fail_msg:
3111
        self.LogWarning("Failure in blockdev_getsize call to node"
3112
                        " %s, ignoring", node)
3113
        continue
3114
      if len(result.payload) != len(dskl):
3115
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3116
                        " result.payload=%s", node, len(dskl), result.payload)
3117
        self.LogWarning("Invalid result from node %s, ignoring node results",
3118
                        node)
3119
        continue
3120
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3121
        if size is None:
3122
          self.LogWarning("Disk %d of instance %s did not return size"
3123
                          " information, ignoring", idx, instance.name)
3124
          continue
3125
        if not isinstance(size, (int, long)):
3126
          self.LogWarning("Disk %d of instance %s did not return valid"
3127
                          " size information, ignoring", idx, instance.name)
3128
          continue
3129
        size = size >> 20
3130
        if size != disk.size:
3131
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3132
                       " correcting: recorded %d, actual %d", idx,
3133
                       instance.name, disk.size, size)
3134
          disk.size = size
3135
          self.cfg.Update(instance, feedback_fn)
3136
          changed.append((instance.name, idx, size))
3137
        if self._EnsureChildSizes(disk):
3138
          self.cfg.Update(instance, feedback_fn)
3139
          changed.append((instance.name, idx, disk.size))
3140
    return changed
3141

    
3142

    
3143
class LUClusterRename(LogicalUnit):
3144
  """Rename the cluster.
3145

3146
  """
3147
  HPATH = "cluster-rename"
3148
  HTYPE = constants.HTYPE_CLUSTER
3149

    
3150
  def BuildHooksEnv(self):
3151
    """Build hooks env.
3152

3153
    """
3154
    return {
3155
      "OP_TARGET": self.cfg.GetClusterName(),
3156
      "NEW_NAME": self.op.name,
3157
      }
3158

    
3159
  def BuildHooksNodes(self):
3160
    """Build hooks nodes.
3161

3162
    """
3163
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3164

    
3165
  def CheckPrereq(self):
3166
    """Verify that the passed name is a valid one.
3167

3168
    """
3169
    hostname = netutils.GetHostname(name=self.op.name,
3170
                                    family=self.cfg.GetPrimaryIPFamily())
3171

    
3172
    new_name = hostname.name
3173
    self.ip = new_ip = hostname.ip
3174
    old_name = self.cfg.GetClusterName()
3175
    old_ip = self.cfg.GetMasterIP()
3176
    if new_name == old_name and new_ip == old_ip:
3177
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3178
                                 " cluster has changed",
3179
                                 errors.ECODE_INVAL)
3180
    if new_ip != old_ip:
3181
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3182
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3183
                                   " reachable on the network" %
3184
                                   new_ip, errors.ECODE_NOTUNIQUE)
3185

    
3186
    self.op.name = new_name
3187

    
3188
  def Exec(self, feedback_fn):
3189
    """Rename the cluster.
3190

3191
    """
3192
    clustername = self.op.name
3193
    ip = self.ip
3194

    
3195
    # shutdown the master IP
3196
    master = self.cfg.GetMasterNode()
3197
    result = self.rpc.call_node_stop_master(master, False)
3198
    result.Raise("Could not disable the master role")
3199

    
3200
    try:
3201
      cluster = self.cfg.GetClusterInfo()
3202
      cluster.cluster_name = clustername
3203
      cluster.master_ip = ip
3204
      self.cfg.Update(cluster, feedback_fn)
3205

    
3206
      # update the known hosts file
3207
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3208
      node_list = self.cfg.GetOnlineNodeList()
3209
      try:
3210
        node_list.remove(master)
3211
      except ValueError:
3212
        pass
3213
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3214
    finally:
3215
      result = self.rpc.call_node_start_master(master, False, False)
3216
      msg = result.fail_msg
3217
      if msg:
3218
        self.LogWarning("Could not re-enable the master role on"
3219
                        " the master, please restart manually: %s", msg)
3220

    
3221
    return clustername
3222

    
3223

    
3224
class LUClusterSetParams(LogicalUnit):
3225
  """Change the parameters of the cluster.
3226

3227
  """
3228
  HPATH = "cluster-modify"
3229
  HTYPE = constants.HTYPE_CLUSTER
3230
  REQ_BGL = False
3231

    
3232
  def CheckArguments(self):
3233
    """Check parameters
3234

3235
    """
3236
    if self.op.uid_pool:
3237
      uidpool.CheckUidPool(self.op.uid_pool)
3238

    
3239
    if self.op.add_uids:
3240
      uidpool.CheckUidPool(self.op.add_uids)
3241

    
3242
    if self.op.remove_uids:
3243
      uidpool.CheckUidPool(self.op.remove_uids)
3244

    
3245
  def ExpandNames(self):
3246
    # FIXME: in the future maybe other cluster params won't require checking on
3247
    # all nodes to be modified.
3248
    self.needed_locks = {
3249
      locking.LEVEL_NODE: locking.ALL_SET,
3250
    }
3251
    self.share_locks[locking.LEVEL_NODE] = 1
3252

    
3253
  def BuildHooksEnv(self):
3254
    """Build hooks env.
3255

3256
    """
3257
    return {
3258
      "OP_TARGET": self.cfg.GetClusterName(),
3259
      "NEW_VG_NAME": self.op.vg_name,
3260
      }
3261

    
3262
  def BuildHooksNodes(self):
3263
    """Build hooks nodes.
3264

3265
    """
3266
    mn = self.cfg.GetMasterNode()
3267
    return ([mn], [mn])
3268

    
3269
  def CheckPrereq(self):
3270
    """Check prerequisites.
3271

3272
    This checks whether the given params don't conflict and
3273
    if the given volume group is valid.
3274

3275
    """
3276
    if self.op.vg_name is not None and not self.op.vg_name:
3277
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3278
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3279
                                   " instances exist", errors.ECODE_INVAL)
3280

    
3281
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3282
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3283
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3284
                                   " drbd-based instances exist",
3285
                                   errors.ECODE_INVAL)
3286

    
3287
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3288

    
3289
    # if vg_name not None, checks given volume group on all nodes
3290
    if self.op.vg_name:
3291
      vglist = self.rpc.call_vg_list(node_list)
3292
      for node in node_list:
3293
        msg = vglist[node].fail_msg
3294
        if msg:
3295
          # ignoring down node
3296
          self.LogWarning("Error while gathering data on node %s"
3297
                          " (ignoring node): %s", node, msg)
3298
          continue
3299
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3300
                                              self.op.vg_name,
3301
                                              constants.MIN_VG_SIZE)
3302
        if vgstatus:
3303
          raise errors.OpPrereqError("Error on node '%s': %s" %
3304
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3305

    
3306
    if self.op.drbd_helper:
3307
      # checks given drbd helper on all nodes
3308
      helpers = self.rpc.call_drbd_helper(node_list)
3309
      for node in node_list:
3310
        ninfo = self.cfg.GetNodeInfo(node)
3311
        if ninfo.offline:
3312
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3313
          continue
3314
        msg = helpers[node].fail_msg
3315
        if msg:
3316
          raise errors.OpPrereqError("Error checking drbd helper on node"
3317
                                     " '%s': %s" % (node, msg),
3318
                                     errors.ECODE_ENVIRON)
3319
        node_helper = helpers[node].payload
3320
        if node_helper != self.op.drbd_helper:
3321
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3322
                                     (node, node_helper), errors.ECODE_ENVIRON)
3323

    
3324
    self.cluster = cluster = self.cfg.GetClusterInfo()
3325
    # validate params changes
3326
    if self.op.beparams:
3327
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3328
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3329

    
3330
    if self.op.ndparams:
3331
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3332
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3333

    
3334
      # TODO: we need a more general way to handle resetting
3335
      # cluster-level parameters to default values
3336
      if self.new_ndparams["oob_program"] == "":
3337
        self.new_ndparams["oob_program"] = \
3338
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3339

    
3340
    if self.op.nicparams:
3341
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3342
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3343
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3344
      nic_errors = []
3345

    
3346
      # check all instances for consistency
3347
      for instance in self.cfg.GetAllInstancesInfo().values():
3348
        for nic_idx, nic in enumerate(instance.nics):
3349
          params_copy = copy.deepcopy(nic.nicparams)
3350
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3351

    
3352
          # check parameter syntax
3353
          try:
3354
            objects.NIC.CheckParameterSyntax(params_filled)
3355
          except errors.ConfigurationError, err:
3356
            nic_errors.append("Instance %s, nic/%d: %s" %
3357
                              (instance.name, nic_idx, err))
3358

    
3359
          # if we're moving instances to routed, check that they have an ip
3360
          target_mode = params_filled[constants.NIC_MODE]
3361
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3362
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3363
                              " address" % (instance.name, nic_idx))
3364
      if nic_errors:
3365
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3366
                                   "\n".join(nic_errors))
3367

    
3368
    # hypervisor list/parameters
3369
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3370
    if self.op.hvparams:
3371
      for hv_name, hv_dict in self.op.hvparams.items():
3372
        if hv_name not in self.new_hvparams:
3373
          self.new_hvparams[hv_name] = hv_dict
3374
        else:
3375
          self.new_hvparams[hv_name].update(hv_dict)
3376

    
3377
    # os hypervisor parameters
3378
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3379
    if self.op.os_hvp:
3380
      for os_name, hvs in self.op.os_hvp.items():
3381
        if os_name not in self.new_os_hvp:
3382
          self.new_os_hvp[os_name] = hvs
3383
        else:
3384
          for hv_name, hv_dict in hvs.items():
3385
            if hv_name not in self.new_os_hvp[os_name]:
3386
              self.new_os_hvp[os_name][hv_name] = hv_dict
3387
            else:
3388
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3389

    
3390
    # os parameters
3391
    self.new_osp = objects.FillDict(cluster.osparams, {})
3392
    if self.op.osparams:
3393
      for os_name, osp in self.op.osparams.items():
3394
        if os_name not in self.new_osp:
3395
          self.new_osp[os_name] = {}
3396

    
3397
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3398
                                                  use_none=True)
3399

    
3400
        if not self.new_osp[os_name]:
3401
          # we removed all parameters
3402
          del self.new_osp[os_name]
3403
        else:
3404
          # check the parameter validity (remote check)
3405
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3406
                         os_name, self.new_osp[os_name])
3407

    
3408
    # changes to the hypervisor list
3409
    if self.op.enabled_hypervisors is not None:
3410
      self.hv_list = self.op.enabled_hypervisors
3411
      for hv in self.hv_list:
3412
        # if the hypervisor doesn't already exist in the cluster
3413
        # hvparams, we initialize it to empty, and then (in both
3414
        # cases) we make sure to fill the defaults, as we might not
3415
        # have a complete defaults list if the hypervisor wasn't
3416
        # enabled before
3417
        if hv not in new_hvp:
3418
          new_hvp[hv] = {}
3419
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3420
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3421
    else:
3422
      self.hv_list = cluster.enabled_hypervisors
3423

    
3424
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3425
      # either the enabled list has changed, or the parameters have, validate
3426
      for hv_name, hv_params in self.new_hvparams.items():
3427
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3428
            (self.op.enabled_hypervisors and
3429
             hv_name in self.op.enabled_hypervisors)):
3430
          # either this is a new hypervisor, or its parameters have changed
3431
          hv_class = hypervisor.GetHypervisor(hv_name)
3432
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3433
          hv_class.CheckParameterSyntax(hv_params)
3434
          _CheckHVParams(self, node_list, hv_name, hv_params)
3435

    
3436
    if self.op.os_hvp:
3437
      # no need to check any newly-enabled hypervisors, since the
3438
      # defaults have already been checked in the above code-block
3439
      for os_name, os_hvp in self.new_os_hvp.items():
3440
        for hv_name, hv_params in os_hvp.items():
3441
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3442
          # we need to fill in the new os_hvp on top of the actual hv_p
3443
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3444
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3445
          hv_class = hypervisor.GetHypervisor(hv_name)
3446
          hv_class.CheckParameterSyntax(new_osp)
3447
          _CheckHVParams(self, node_list, hv_name, new_osp)
3448

    
3449
    if self.op.default_iallocator:
3450
      alloc_script = utils.FindFile(self.op.default_iallocator,
3451
                                    constants.IALLOCATOR_SEARCH_PATH,
3452
                                    os.path.isfile)
3453
      if alloc_script is None:
3454
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3455
                                   " specified" % self.op.default_iallocator,
3456
                                   errors.ECODE_INVAL)
3457

    
3458
  def Exec(self, feedback_fn):
3459
    """Change the parameters of the cluster.
3460

3461
    """
3462
    if self.op.vg_name is not None:
3463
      new_volume = self.op.vg_name
3464
      if not new_volume:
3465
        new_volume = None
3466
      if new_volume != self.cfg.GetVGName():
3467
        self.cfg.SetVGName(new_volume)
3468
      else:
3469
        feedback_fn("Cluster LVM configuration already in desired"
3470
                    " state, not changing")
3471
    if self.op.drbd_helper is not None:
3472
      new_helper = self.op.drbd_helper
3473
      if not new_helper:
3474
        new_helper = None
3475
      if new_helper != self.cfg.GetDRBDHelper():
3476
        self.cfg.SetDRBDHelper(new_helper)
3477
      else:
3478
        feedback_fn("Cluster DRBD helper already in desired state,"
3479
                    " not changing")
3480
    if self.op.hvparams:
3481
      self.cluster.hvparams = self.new_hvparams
3482
    if self.op.os_hvp:
3483
      self.cluster.os_hvp = self.new_os_hvp
3484
    if self.op.enabled_hypervisors is not None:
3485
      self.cluster.hvparams = self.new_hvparams
3486
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3487
    if self.op.beparams:
3488
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3489
    if self.op.nicparams:
3490
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3491
    if self.op.osparams:
3492
      self.cluster.osparams = self.new_osp
3493
    if self.op.ndparams:
3494
      self.cluster.ndparams = self.new_ndparams
3495

    
3496
    if self.op.candidate_pool_size is not None:
3497
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3498
      # we need to update the pool size here, otherwise the save will fail
3499
      _AdjustCandidatePool(self, [])
3500

    
3501
    if self.op.maintain_node_health is not None:
3502
      self.cluster.maintain_node_health = self.op.maintain_node_health
3503

    
3504
    if self.op.prealloc_wipe_disks is not None:
3505
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3506

    
3507
    if self.op.add_uids is not None:
3508
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3509

    
3510
    if self.op.remove_uids is not None:
3511
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3512

    
3513
    if self.op.uid_pool is not None:
3514
      self.cluster.uid_pool = self.op.uid_pool
3515

    
3516
    if self.op.default_iallocator is not None:
3517
      self.cluster.default_iallocator = self.op.default_iallocator
3518

    
3519
    if self.op.reserved_lvs is not None:
3520
      self.cluster.reserved_lvs = self.op.reserved_lvs
3521

    
3522
    def helper_os(aname, mods, desc):
3523
      desc += " OS list"
3524
      lst = getattr(self.cluster, aname)
3525
      for key, val in mods:
3526
        if key == constants.DDM_ADD:
3527
          if val in lst:
3528
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3529
          else:
3530
            lst.append(val)
3531
        elif key == constants.DDM_REMOVE:
3532
          if val in lst:
3533
            lst.remove(val)
3534
          else:
3535
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3536
        else:
3537
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3538

    
3539
    if self.op.hidden_os:
3540
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3541

    
3542
    if self.op.blacklisted_os:
3543
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3544

    
3545
    if self.op.master_netdev:
3546
      master = self.cfg.GetMasterNode()
3547
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3548
                  self.cluster.master_netdev)
3549
      result = self.rpc.call_node_stop_master(master, False)
3550
      result.Raise("Could not disable the master ip")
3551
      feedback_fn("Changing master_netdev from %s to %s" %
3552
                  (self.cluster.master_netdev, self.op.master_netdev))
3553
      self.cluster.master_netdev = self.op.master_netdev
3554

    
3555
    self.cfg.Update(self.cluster, feedback_fn)
3556

    
3557
    if self.op.master_netdev:
3558
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3559
                  self.op.master_netdev)
3560
      result = self.rpc.call_node_start_master(master, False, False)
3561
      if result.fail_msg:
3562
        self.LogWarning("Could not re-enable the master ip on"
3563
                        " the master, please restart manually: %s",
3564
                        result.fail_msg)
3565

    
3566

    
3567
def _UploadHelper(lu, nodes, fname):
3568
  """Helper for uploading a file and showing warnings.
3569

3570
  """
3571
  if os.path.exists(fname):
3572
    result = lu.rpc.call_upload_file(nodes, fname)
3573
    for to_node, to_result in result.items():
3574
      msg = to_result.fail_msg
3575
      if msg:
3576
        msg = ("Copy of file %s to node %s failed: %s" %
3577
               (fname, to_node, msg))
3578
        lu.proc.LogWarning(msg)
3579

    
3580

    
3581
def _ComputeAncillaryFiles(cluster, redist):
3582
  """Compute files external to Ganeti which need to be consistent.
3583

3584
  @type redist: boolean
3585
  @param redist: Whether to include files which need to be redistributed
3586

3587
  """
3588
  # Compute files for all nodes
3589
  files_all = set([
3590
    constants.SSH_KNOWN_HOSTS_FILE,
3591
    constants.CONFD_HMAC_KEY,
3592
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3593
    ])
3594

    
3595
  if not redist:
3596
    files_all.update(constants.ALL_CERT_FILES)
3597
    files_all.update(ssconf.SimpleStore().GetFileList())
3598

    
3599
  if cluster.modify_etc_hosts:
3600
    files_all.add(constants.ETC_HOSTS)
3601

    
3602
  # Files which must either exist on all nodes or on none
3603
  files_all_opt = set([
3604
    constants.RAPI_USERS_FILE,
3605
    ])
3606

    
3607
  # Files which should only be on master candidates
3608
  files_mc = set()
3609
  if not redist:
3610
    files_mc.add(constants.CLUSTER_CONF_FILE)
3611

    
3612
  # Files which should only be on VM-capable nodes
3613
  files_vm = set(filename
3614
    for hv_name in cluster.enabled_hypervisors
3615
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3616

    
3617
  # Filenames must be unique
3618
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3619
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3620
         "Found file listed in more than one file list"
3621

    
3622
  return (files_all, files_all_opt, files_mc, files_vm)
3623

    
3624

    
3625
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3626
  """Distribute additional files which are part of the cluster configuration.
3627

3628
  ConfigWriter takes care of distributing the config and ssconf files, but
3629
  there are more files which should be distributed to all nodes. This function
3630
  makes sure those are copied.
3631

3632
  @param lu: calling logical unit
3633
  @param additional_nodes: list of nodes not in the config to distribute to
3634
  @type additional_vm: boolean
3635
  @param additional_vm: whether the additional nodes are vm-capable or not
3636

3637
  """
3638
  # Gather target nodes
3639
  cluster = lu.cfg.GetClusterInfo()
3640
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3641

    
3642
  online_nodes = lu.cfg.GetOnlineNodeList()
3643
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3644

    
3645
  if additional_nodes is not None:
3646
    online_nodes.extend(additional_nodes)
3647
    if additional_vm:
3648
      vm_nodes.extend(additional_nodes)
3649

    
3650
  # Never distribute to master node
3651
  for nodelist in [online_nodes, vm_nodes]:
3652
    if master_info.name in nodelist:
3653
      nodelist.remove(master_info.name)
3654

    
3655
  # Gather file lists
3656
  (files_all, files_all_opt, files_mc, files_vm) = \
3657
    _ComputeAncillaryFiles(cluster, True)
3658

    
3659
  # Never re-distribute configuration file from here
3660
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3661
              constants.CLUSTER_CONF_FILE in files_vm)
3662
  assert not files_mc, "Master candidates not handled in this function"
3663

    
3664
  filemap = [
3665
    (online_nodes, files_all),
3666
    (online_nodes, files_all_opt),
3667
    (vm_nodes, files_vm),
3668
    ]
3669

    
3670
  # Upload the files
3671
  for (node_list, files) in filemap:
3672
    for fname in files:
3673
      _UploadHelper(lu, node_list, fname)
3674

    
3675

    
3676
class LUClusterRedistConf(NoHooksLU):
3677
  """Force the redistribution of cluster configuration.
3678

3679
  This is a very simple LU.
3680

3681
  """
3682
  REQ_BGL = False
3683

    
3684
  def ExpandNames(self):
3685
    self.needed_locks = {
3686
      locking.LEVEL_NODE: locking.ALL_SET,
3687
    }
3688
    self.share_locks[locking.LEVEL_NODE] = 1
3689

    
3690
  def Exec(self, feedback_fn):
3691
    """Redistribute the configuration.
3692

3693
    """
3694
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3695
    _RedistributeAncillaryFiles(self)
3696

    
3697

    
3698
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3699
  """Sleep and poll for an instance's disk to sync.
3700

3701
  """
3702
  if not instance.disks or disks is not None and not disks:
3703
    return True
3704

    
3705
  disks = _ExpandCheckDisks(instance, disks)
3706

    
3707
  if not oneshot:
3708
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3709

    
3710
  node = instance.primary_node
3711

    
3712
  for dev in disks:
3713
    lu.cfg.SetDiskID(dev, node)
3714

    
3715
  # TODO: Convert to utils.Retry
3716

    
3717
  retries = 0
3718
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3719
  while True:
3720
    max_time = 0
3721
    done = True
3722
    cumul_degraded = False
3723
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3724
    msg = rstats.fail_msg
3725
    if msg:
3726
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3727
      retries += 1
3728
      if retries >= 10:
3729
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3730
                                 " aborting." % node)
3731
      time.sleep(6)
3732
      continue
3733
    rstats = rstats.payload
3734
    retries = 0
3735
    for i, mstat in enumerate(rstats):
3736
      if mstat is None:
3737
        lu.LogWarning("Can't compute data for node %s/%s",
3738
                           node, disks[i].iv_name)
3739
        continue
3740

    
3741
      cumul_degraded = (cumul_degraded or
3742
                        (mstat.is_degraded and mstat.sync_percent is None))
3743
      if mstat.sync_percent is not None:
3744
        done = False
3745
        if mstat.estimated_time is not None:
3746
          rem_time = ("%s remaining (estimated)" %
3747
                      utils.FormatSeconds(mstat.estimated_time))
3748
          max_time = mstat.estimated_time
3749
        else:
3750
          rem_time = "no time estimate"
3751
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3752
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3753

    
3754
    # if we're done but degraded, let's do a few small retries, to
3755
    # make sure we see a stable and not transient situation; therefore
3756
    # we force restart of the loop
3757
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3758
      logging.info("Degraded disks found, %d retries left", degr_retries)
3759
      degr_retries -= 1
3760
      time.sleep(1)
3761
      continue
3762

    
3763
    if done or oneshot:
3764
      break
3765

    
3766
    time.sleep(min(60, max_time))
3767

    
3768
  if done:
3769
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3770
  return not cumul_degraded
3771

    
3772

    
3773
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3774
  """Check that mirrors are not degraded.
3775

3776
  The ldisk parameter, if True, will change the test from the
3777
  is_degraded attribute (which represents overall non-ok status for
3778
  the device(s)) to the ldisk (representing the local storage status).
3779

3780
  """
3781
  lu.cfg.SetDiskID(dev, node)
3782

    
3783
  result = True
3784

    
3785
  if on_primary or dev.AssembleOnSecondary():
3786
    rstats = lu.rpc.call_blockdev_find(node, dev)
3787
    msg = rstats.fail_msg
3788
    if msg:
3789
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3790
      result = False
3791
    elif not rstats.payload:
3792
      lu.LogWarning("Can't find disk on node %s", node)
3793
      result = False
3794
    else:
3795
      if ldisk:
3796
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3797
      else:
3798
        result = result and not rstats.payload.is_degraded
3799

    
3800
  if dev.children:
3801
    for child in dev.children:
3802
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3803

    
3804
  return result
3805

    
3806

    
3807
class LUOobCommand(NoHooksLU):
3808
  """Logical unit for OOB handling.
3809

3810
  """
3811
  REG_BGL = False
3812
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3813

    
3814
  def ExpandNames(self):
3815
    """Gather locks we need.
3816

3817
    """
3818
    if self.op.node_names:
3819
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3820
      lock_names = self.op.node_names
3821
    else:
3822
      lock_names = locking.ALL_SET
3823

    
3824
    self.needed_locks = {
3825
      locking.LEVEL_NODE: lock_names,
3826
      }
3827

    
3828
  def CheckPrereq(self):
3829
    """Check prerequisites.
3830

3831
    This checks:
3832
     - the node exists in the configuration
3833
     - OOB is supported
3834

3835
    Any errors are signaled by raising errors.OpPrereqError.
3836

3837
    """
3838
    self.nodes = []
3839
    self.master_node = self.cfg.GetMasterNode()
3840

    
3841
    assert self.op.power_delay >= 0.0
3842

    
3843
    if self.op.node_names:
3844
      if (self.op.command in self._SKIP_MASTER and
3845
          self.master_node in self.op.node_names):
3846
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3847
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3848

    
3849
        if master_oob_handler:
3850
          additional_text = ("run '%s %s %s' if you want to operate on the"
3851
                             " master regardless") % (master_oob_handler,
3852
                                                      self.op.command,
3853
                                                      self.master_node)
3854
        else:
3855
          additional_text = "it does not support out-of-band operations"
3856

    
3857
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3858
                                    " allowed for %s; %s") %
3859
                                   (self.master_node, self.op.command,
3860
                                    additional_text), errors.ECODE_INVAL)
3861
    else:
3862
      self.op.node_names = self.cfg.GetNodeList()
3863
      if self.op.command in self._SKIP_MASTER:
3864
        self.op.node_names.remove(self.master_node)
3865

    
3866
    if self.op.command in self._SKIP_MASTER:
3867
      assert self.master_node not in self.op.node_names
3868

    
3869
    for node_name in self.op.node_names:
3870
      node = self.cfg.GetNodeInfo(node_name)
3871

    
3872
      if node is None:
3873
        raise errors.OpPrereqError("Node %s not found" % node_name,
3874
                                   errors.ECODE_NOENT)
3875
      else:
3876
        self.nodes.append(node)
3877

    
3878
      if (not self.op.ignore_status and
3879
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3880
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3881
                                    " not marked offline") % node_name,
3882
                                   errors.ECODE_STATE)
3883

    
3884
  def Exec(self, feedback_fn):
3885
    """Execute OOB and return result if we expect any.
3886

3887
    """
3888
    master_node = self.master_node
3889
    ret = []
3890

    
3891
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3892
                                              key=lambda node: node.name)):
3893
      node_entry = [(constants.RS_NORMAL, node.name)]
3894
      ret.append(node_entry)
3895

    
3896
      oob_program = _SupportsOob(self.cfg, node)
3897

    
3898
      if not oob_program:
3899
        node_entry.append((constants.RS_UNAVAIL, None))
3900
        continue
3901

    
3902
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3903
                   self.op.command, oob_program, node.name)
3904
      result = self.rpc.call_run_oob(master_node, oob_program,
3905
                                     self.op.command, node.name,
3906
                                     self.op.timeout)
3907

    
3908
      if result.fail_msg:
3909
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3910
                        node.name, result.fail_msg)
3911
        node_entry.append((constants.RS_NODATA, None))
3912
      else:
3913
        try:
3914
          self._CheckPayload(result)
3915
        except errors.OpExecError, err:
3916
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3917
                          node.name, err)
3918
          node_entry.append((constants.RS_NODATA, None))
3919
        else:
3920
          if self.op.command == constants.OOB_HEALTH:
3921
            # For health we should log important events
3922
            for item, status in result.payload:
3923
              if status in [constants.OOB_STATUS_WARNING,
3924
                            constants.OOB_STATUS_CRITICAL]:
3925
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3926
                                item, node.name, status)
3927

    
3928
          if self.op.command == constants.OOB_POWER_ON:
3929
            node.powered = True
3930
          elif self.op.command == constants.OOB_POWER_OFF:
3931
            node.powered = False
3932
          elif self.op.command == constants.OOB_POWER_STATUS:
3933
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3934
            if powered != node.powered:
3935
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3936
                               " match actual power state (%s)"), node.powered,
3937
                              node.name, powered)
3938

    
3939
          # For configuration changing commands we should update the node
3940
          if self.op.command in (constants.OOB_POWER_ON,
3941
                                 constants.OOB_POWER_OFF):
3942
            self.cfg.Update(node, feedback_fn)
3943

    
3944
          node_entry.append((constants.RS_NORMAL, result.payload))
3945

    
3946
          if (self.op.command == constants.OOB_POWER_ON and
3947
              idx < len(self.nodes) - 1):
3948
            time.sleep(self.op.power_delay)
3949

    
3950
    return ret
3951

    
3952
  def _CheckPayload(self, result):
3953
    """Checks if the payload is valid.
3954

3955
    @param result: RPC result
3956
    @raises errors.OpExecError: If payload is not valid
3957

3958
    """
3959
    errs = []
3960
    if self.op.command == constants.OOB_HEALTH:
3961
      if not isinstance(result.payload, list):
3962
        errs.append("command 'health' is expected to return a list but got %s" %
3963
                    type(result.payload))
3964
      else:
3965
        for item, status in result.payload:
3966
          if status not in constants.OOB_STATUSES:
3967
            errs.append("health item '%s' has invalid status '%s'" %
3968
                        (item, status))
3969

    
3970
    if self.op.command == constants.OOB_POWER_STATUS:
3971
      if not isinstance(result.payload, dict):
3972
        errs.append("power-status is expected to return a dict but got %s" %
3973
                    type(result.payload))
3974

    
3975
    if self.op.command in [
3976
        constants.OOB_POWER_ON,
3977
        constants.OOB_POWER_OFF,
3978
        constants.OOB_POWER_CYCLE,
3979
        ]:
3980
      if result.payload is not None:
3981
        errs.append("%s is expected to not return payload but got '%s'" %
3982
                    (self.op.command, result.payload))
3983

    
3984
    if errs:
3985
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3986
                               utils.CommaJoin(errs))
3987

    
3988
class _OsQuery(_QueryBase):
3989
  FIELDS = query.OS_FIELDS
3990

    
3991
  def ExpandNames(self, lu):
3992
    # Lock all nodes in shared mode
3993
    # Temporary removal of locks, should be reverted later
3994
    # TODO: reintroduce locks when they are lighter-weight
3995
    lu.needed_locks = {}
3996
    #self.share_locks[locking.LEVEL_NODE] = 1
3997
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3998

    
3999
    # The following variables interact with _QueryBase._GetNames
4000
    if self.names:
4001
      self.wanted = self.names
4002
    else:
4003
      self.wanted = locking.ALL_SET
4004

    
4005
    self.do_locking = self.use_locking
4006

    
4007
  def DeclareLocks(self, lu, level):
4008
    pass
4009

    
4010
  @staticmethod
4011
  def _DiagnoseByOS(rlist):
4012
    """Remaps a per-node return list into an a per-os per-node dictionary
4013

4014
    @param rlist: a map with node names as keys and OS objects as values
4015

4016
    @rtype: dict
4017
    @return: a dictionary with osnames as keys and as value another
4018
        map, with nodes as keys and tuples of (path, status, diagnose,
4019
        variants, parameters, api_versions) as values, eg::
4020

4021
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4022
                                     (/srv/..., False, "invalid api")],
4023
                           "node2": [(/srv/..., True, "", [], [])]}
4024
          }
4025

4026
    """
4027
    all_os = {}
4028
    # we build here the list of nodes that didn't fail the RPC (at RPC
4029
    # level), so that nodes with a non-responding node daemon don't
4030
    # make all OSes invalid
4031
    good_nodes = [node_name for node_name in rlist
4032
                  if not rlist[node_name].fail_msg]
4033
    for node_name, nr in rlist.items():
4034
      if nr.fail_msg or not nr.payload:
4035
        continue
4036
      for (name, path, status, diagnose, variants,
4037
           params, api_versions) in nr.payload:
4038
        if name not in all_os:
4039
          # build a list of nodes for this os containing empty lists
4040
          # for each node in node_list
4041
          all_os[name] = {}
4042
          for nname in good_nodes:
4043
            all_os[name][nname] = []
4044
        # convert params from [name, help] to (name, help)
4045
        params = [tuple(v) for v in params]
4046
        all_os[name][node_name].append((path, status, diagnose,
4047
                                        variants, params, api_versions))
4048
    return all_os
4049

    
4050
  def _GetQueryData(self, lu):
4051
    """Computes the list of nodes and their attributes.
4052

4053
    """
4054
    # Locking is not used
4055
    assert not (compat.any(lu.glm.is_owned(level)
4056
                           for level in locking.LEVELS
4057
                           if level != locking.LEVEL_CLUSTER) or
4058
                self.do_locking or self.use_locking)
4059

    
4060
    valid_nodes = [node.name
4061
                   for node in lu.cfg.GetAllNodesInfo().values()
4062
                   if not node.offline and node.vm_capable]
4063
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4064
    cluster = lu.cfg.GetClusterInfo()
4065

    
4066
    data = {}
4067

    
4068
    for (os_name, os_data) in pol.items():
4069
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4070
                          hidden=(os_name in cluster.hidden_os),
4071
                          blacklisted=(os_name in cluster.blacklisted_os))
4072

    
4073
      variants = set()
4074
      parameters = set()
4075
      api_versions = set()
4076

    
4077
      for idx, osl in enumerate(os_data.values()):
4078
        info.valid = bool(info.valid and osl and osl[0][1])
4079
        if not info.valid:
4080
          break
4081

    
4082
        (node_variants, node_params, node_api) = osl[0][3:6]
4083
        if idx == 0:
4084
          # First entry
4085
          variants.update(node_variants)
4086
          parameters.update(node_params)
4087
          api_versions.update(node_api)
4088
        else:
4089
          # Filter out inconsistent values
4090
          variants.intersection_update(node_variants)
4091
          parameters.intersection_update(node_params)
4092
          api_versions.intersection_update(node_api)
4093

    
4094
      info.variants = list(variants)
4095
      info.parameters = list(parameters)
4096
      info.api_versions = list(api_versions)
4097

    
4098
      data[os_name] = info
4099

    
4100
    # Prepare data in requested order
4101
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4102
            if name in data]
4103

    
4104

    
4105
class LUOsDiagnose(NoHooksLU):
4106
  """Logical unit for OS diagnose/query.
4107

4108
  """
4109
  REQ_BGL = False
4110

    
4111
  @staticmethod
4112
  def _BuildFilter(fields, names):
4113
    """Builds a filter for querying OSes.
4114

4115
    """
4116
    name_filter = qlang.MakeSimpleFilter("name", names)
4117

    
4118
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4119
    # respective field is not requested
4120
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4121
                     for fname in ["hidden", "blacklisted"]
4122
                     if fname not in fields]
4123
    if "valid" not in fields:
4124
      status_filter.append([qlang.OP_TRUE, "valid"])
4125

    
4126
    if status_filter:
4127
      status_filter.insert(0, qlang.OP_AND)
4128
    else:
4129
      status_filter = None
4130

    
4131
    if name_filter and status_filter:
4132
      return [qlang.OP_AND, name_filter, status_filter]
4133
    elif name_filter:
4134
      return name_filter
4135
    else:
4136
      return status_filter
4137

    
4138
  def CheckArguments(self):
4139
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4140
                       self.op.output_fields, False)
4141

    
4142
  def ExpandNames(self):
4143
    self.oq.ExpandNames(self)
4144

    
4145
  def Exec(self, feedback_fn):
4146
    return self.oq.OldStyleQuery(self)
4147

    
4148

    
4149
class LUNodeRemove(LogicalUnit):
4150
  """Logical unit for removing a node.
4151

4152
  """
4153
  HPATH = "node-remove"
4154
  HTYPE = constants.HTYPE_NODE
4155

    
4156
  def BuildHooksEnv(self):
4157
    """Build hooks env.
4158

4159
    This doesn't run on the target node in the pre phase as a failed
4160
    node would then be impossible to remove.
4161

4162
    """
4163
    return {
4164
      "OP_TARGET": self.op.node_name,
4165
      "NODE_NAME": self.op.node_name,
4166
      }
4167

    
4168
  def BuildHooksNodes(self):
4169
    """Build hooks nodes.
4170

4171
    """
4172
    all_nodes = self.cfg.GetNodeList()
4173
    try:
4174
      all_nodes.remove(self.op.node_name)
4175
    except ValueError:
4176
      logging.warning("Node '%s', which is about to be removed, was not found"
4177
                      " in the list of all nodes", self.op.node_name)
4178
    return (all_nodes, all_nodes)
4179

    
4180
  def CheckPrereq(self):
4181
    """Check prerequisites.
4182

4183
    This checks:
4184
     - the node exists in the configuration
4185
     - it does not have primary or secondary instances
4186
     - it's not the master
4187

4188
    Any errors are signaled by raising errors.OpPrereqError.
4189

4190
    """
4191
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4192
    node = self.cfg.GetNodeInfo(self.op.node_name)
4193
    assert node is not None
4194

    
4195
    instance_list = self.cfg.GetInstanceList()
4196

    
4197
    masternode = self.cfg.GetMasterNode()
4198
    if node.name == masternode:
4199
      raise errors.OpPrereqError("Node is the master node, failover to another"
4200
                                 " node is required", errors.ECODE_INVAL)
4201

    
4202
    for instance_name in instance_list:
4203
      instance = self.cfg.GetInstanceInfo(instance_name)
4204
      if node.name in instance.all_nodes:
4205
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4206
                                   " please remove first" % instance_name,
4207
                                   errors.ECODE_INVAL)
4208
    self.op.node_name = node.name
4209
    self.node = node
4210

    
4211
  def Exec(self, feedback_fn):
4212
    """Removes the node from the cluster.
4213

4214
    """
4215
    node = self.node
4216
    logging.info("Stopping the node daemon and removing configs from node %s",
4217
                 node.name)
4218

    
4219
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4220

    
4221
    # Promote nodes to master candidate as needed
4222
    _AdjustCandidatePool(self, exceptions=[node.name])
4223
    self.context.RemoveNode(node.name)
4224

    
4225
    # Run post hooks on the node before it's removed
4226
    _RunPostHook(self, node.name)
4227

    
4228
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4229
    msg = result.fail_msg
4230
    if msg:
4231
      self.LogWarning("Errors encountered on the remote node while leaving"
4232
                      " the cluster: %s", msg)
4233

    
4234
    # Remove node from our /etc/hosts
4235
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4236
      master_node = self.cfg.GetMasterNode()
4237
      result = self.rpc.call_etc_hosts_modify(master_node,
4238
                                              constants.ETC_HOSTS_REMOVE,
4239
                                              node.name, None)
4240
      result.Raise("Can't update hosts file with new host data")
4241
      _RedistributeAncillaryFiles(self)
4242

    
4243

    
4244
class _NodeQuery(_QueryBase):
4245
  FIELDS = query.NODE_FIELDS
4246

    
4247
  def ExpandNames(self, lu):
4248
    lu.needed_locks = {}
4249
    lu.share_locks[locking.LEVEL_NODE] = 1
4250

    
4251
    if self.names:
4252
      self.wanted = _GetWantedNodes(lu, self.names)
4253
    else:
4254
      self.wanted = locking.ALL_SET
4255

    
4256
    self.do_locking = (self.use_locking and
4257
                       query.NQ_LIVE in self.requested_data)
4258

    
4259
    if self.do_locking:
4260
      # if we don't request only static fields, we need to lock the nodes
4261
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4262

    
4263
  def DeclareLocks(self, lu, level):
4264
    pass
4265

    
4266
  def _GetQueryData(self, lu):
4267
    """Computes the list of nodes and their attributes.
4268

4269
    """
4270
    all_info = lu.cfg.GetAllNodesInfo()
4271

    
4272
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4273

    
4274
    # Gather data as requested
4275
    if query.NQ_LIVE in self.requested_data:
4276
      # filter out non-vm_capable nodes
4277
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4278

    
4279
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4280
                                        lu.cfg.GetHypervisorType())
4281
      live_data = dict((name, nresult.payload)
4282
                       for (name, nresult) in node_data.items()
4283
                       if not nresult.fail_msg and nresult.payload)
4284
    else:
4285
      live_data = None
4286

    
4287
    if query.NQ_INST in self.requested_data:
4288
      node_to_primary = dict([(name, set()) for name in nodenames])
4289
      node_to_secondary = dict([(name, set()) for name in nodenames])
4290

    
4291
      inst_data = lu.cfg.GetAllInstancesInfo()
4292

    
4293
      for inst in inst_data.values():
4294
        if inst.primary_node in node_to_primary:
4295
          node_to_primary[inst.primary_node].add(inst.name)
4296
        for secnode in inst.secondary_nodes:
4297
          if secnode in node_to_secondary:
4298
            node_to_secondary[secnode].add(inst.name)
4299
    else:
4300
      node_to_primary = None
4301
      node_to_secondary = None
4302

    
4303
    if query.NQ_OOB in self.requested_data:
4304
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4305
                         for name, node in all_info.iteritems())
4306
    else:
4307
      oob_support = None
4308

    
4309
    if query.NQ_GROUP in self.requested_data:
4310
      groups = lu.cfg.GetAllNodeGroupsInfo()
4311
    else:
4312
      groups = {}
4313

    
4314
    return query.NodeQueryData([all_info[name] for name in nodenames],
4315
                               live_data, lu.cfg.GetMasterNode(),
4316
                               node_to_primary, node_to_secondary, groups,
4317
                               oob_support, lu.cfg.GetClusterInfo())
4318

    
4319

    
4320
class LUNodeQuery(NoHooksLU):
4321
  """Logical unit for querying nodes.
4322

4323
  """
4324
  # pylint: disable-msg=W0142
4325
  REQ_BGL = False
4326

    
4327
  def CheckArguments(self):
4328
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4329
                         self.op.output_fields, self.op.use_locking)
4330

    
4331
  def ExpandNames(self):
4332
    self.nq.ExpandNames(self)
4333

    
4334
  def Exec(self, feedback_fn):
4335
    return self.nq.OldStyleQuery(self)
4336

    
4337

    
4338
class LUNodeQueryvols(NoHooksLU):
4339
  """Logical unit for getting volumes on node(s).
4340

4341
  """
4342
  REQ_BGL = False
4343
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4344
  _FIELDS_STATIC = utils.FieldSet("node")
4345

    
4346
  def CheckArguments(self):
4347
    _CheckOutputFields(static=self._FIELDS_STATIC,
4348
                       dynamic=self._FIELDS_DYNAMIC,
4349
                       selected=self.op.output_fields)
4350

    
4351
  def ExpandNames(self):
4352
    self.needed_locks = {}
4353
    self.share_locks[locking.LEVEL_NODE] = 1
4354
    if not self.op.nodes:
4355
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4356
    else:
4357
      self.needed_locks[locking.LEVEL_NODE] = \
4358
        _GetWantedNodes(self, self.op.nodes)
4359

    
4360
  def Exec(self, feedback_fn):
4361
    """Computes the list of nodes and their attributes.
4362

4363
    """
4364
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4365
    volumes = self.rpc.call_node_volumes(nodenames)
4366

    
4367
    ilist = self.cfg.GetAllInstancesInfo()
4368
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4369

    
4370
    output = []
4371
    for node in nodenames:
4372
      nresult = volumes[node]
4373
      if nresult.offline:
4374
        continue
4375
      msg = nresult.fail_msg
4376
      if msg:
4377
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4378
        continue
4379

    
4380
      node_vols = sorted(nresult.payload,
4381
                         key=operator.itemgetter("dev"))
4382

    
4383
      for vol in node_vols:
4384
        node_output = []
4385
        for field in self.op.output_fields:
4386
          if field == "node":
4387
            val = node
4388
          elif field == "phys":
4389
            val = vol["dev"]
4390
          elif field == "vg":
4391
            val = vol["vg"]
4392
          elif field == "name":
4393
            val = vol["name"]
4394
          elif field == "size":
4395
            val = int(float(vol["size"]))
4396
          elif field == "instance":
4397
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4398
          else:
4399
            raise errors.ParameterError(field)
4400
          node_output.append(str(val))
4401

    
4402
        output.append(node_output)
4403

    
4404
    return output
4405

    
4406

    
4407
class LUNodeQueryStorage(NoHooksLU):
4408
  """Logical unit for getting information on storage units on node(s).
4409

4410
  """
4411
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4412
  REQ_BGL = False
4413

    
4414
  def CheckArguments(self):
4415
    _CheckOutputFields(static=self._FIELDS_STATIC,
4416
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4417
                       selected=self.op.output_fields)
4418

    
4419
  def ExpandNames(self):
4420
    self.needed_locks = {}
4421
    self.share_locks[locking.LEVEL_NODE] = 1
4422

    
4423
    if self.op.nodes:
4424
      self.needed_locks[locking.LEVEL_NODE] = \
4425
        _GetWantedNodes(self, self.op.nodes)
4426
    else:
4427
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4428

    
4429
  def Exec(self, feedback_fn):
4430
    """Computes the list of nodes and their attributes.
4431

4432
    """
4433
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4434

    
4435
    # Always get name to sort by
4436
    if constants.SF_NAME in self.op.output_fields:
4437
      fields = self.op.output_fields[:]
4438
    else:
4439
      fields = [constants.SF_NAME] + self.op.output_fields
4440

    
4441
    # Never ask for node or type as it's only known to the LU
4442
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4443
      while extra in fields:
4444
        fields.remove(extra)
4445

    
4446
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4447
    name_idx = field_idx[constants.SF_NAME]
4448

    
4449
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4450
    data = self.rpc.call_storage_list(self.nodes,
4451
                                      self.op.storage_type, st_args,
4452
                                      self.op.name, fields)
4453

    
4454
    result = []
4455

    
4456
    for node in utils.NiceSort(self.nodes):
4457
      nresult = data[node]
4458
      if nresult.offline:
4459
        continue
4460

    
4461
      msg = nresult.fail_msg
4462
      if msg:
4463
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4464
        continue
4465

    
4466
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4467

    
4468
      for name in utils.NiceSort(rows.keys()):
4469
        row = rows[name]
4470

    
4471
        out = []
4472

    
4473
        for field in self.op.output_fields:
4474
          if field == constants.SF_NODE:
4475
            val = node
4476
          elif field == constants.SF_TYPE:
4477
            val = self.op.storage_type
4478
          elif field in field_idx:
4479
            val = row[field_idx[field]]
4480
          else:
4481
            raise errors.ParameterError(field)
4482

    
4483
          out.append(val)
4484

    
4485
        result.append(out)
4486

    
4487
    return result
4488

    
4489

    
4490
class _InstanceQuery(_QueryBase):
4491
  FIELDS = query.INSTANCE_FIELDS
4492

    
4493
  def ExpandNames(self, lu):
4494
    lu.needed_locks = {}
4495
    lu.share_locks = _ShareAll()
4496

    
4497
    if self.names:
4498
      self.wanted = _GetWantedInstances(lu, self.names)
4499
    else:
4500
      self.wanted = locking.ALL_SET
4501

    
4502
    self.do_locking = (self.use_locking and
4503
                       query.IQ_LIVE in self.requested_data)
4504
    if self.do_locking:
4505
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4506
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4507
      lu.needed_locks[locking.LEVEL_NODE] = []
4508
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4509

    
4510
    self.do_grouplocks = (self.do_locking and
4511
                          query.IQ_NODES in self.requested_data)
4512

    
4513
  def DeclareLocks(self, lu, level):
4514
    if self.do_locking:
4515
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4516
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4517

    
4518
        # Lock all groups used by instances optimistically; this requires going
4519
        # via the node before it's locked, requiring verification later on
4520
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4521
          set(group_uuid
4522
              for instance_name in
4523
                lu.glm.list_owned(locking.LEVEL_INSTANCE)
4524
              for group_uuid in
4525
                lu.cfg.GetInstanceNodeGroups(instance_name))
4526
      elif level == locking.LEVEL_NODE:
4527
        lu._LockInstancesNodes() # pylint: disable-msg=W0212
4528

    
4529
  @staticmethod
4530
  def _CheckGroupLocks(lu):
4531
    owned_instances = frozenset(lu.glm.list_owned(locking.LEVEL_INSTANCE))
4532
    owned_groups = frozenset(lu.glm.list_owned(locking.LEVEL_NODEGROUP))
4533

    
4534
    # Check if node groups for locked instances are still correct
4535
    for instance_name in owned_instances:
4536
      inst_groups = lu.cfg.GetInstanceNodeGroups(instance_name)
4537
      if not owned_groups.issuperset(inst_groups):
4538
        raise errors.OpPrereqError("Instance %s's node groups changed since"
4539
                                   " locks were acquired, current groups are"
4540
                                   " are '%s', owning groups '%s'; retry the"
4541
                                   " operation" %
4542
                                   (instance_name,
4543
                                    utils.CommaJoin(inst_groups),
4544
                                    utils.CommaJoin(owned_groups)),
4545
                                   errors.ECODE_STATE)
4546

    
4547
  def _GetQueryData(self, lu):
4548
    """Computes the list of instances and their attributes.
4549

4550
    """
4551
    if self.do_grouplocks:
4552
      self._CheckGroupLocks(lu)
4553

    
4554
    cluster = lu.cfg.GetClusterInfo()
4555
    all_info = lu.cfg.GetAllInstancesInfo()
4556

    
4557
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4558

    
4559
    instance_list = [all_info[name] for name in instance_names]
4560
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4561
                                        for inst in instance_list)))
4562
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4563
    bad_nodes = []
4564
    offline_nodes = []
4565
    wrongnode_inst = set()
4566

    
4567
    # Gather data as requested
4568
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4569
      live_data = {}
4570
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4571
      for name in nodes:
4572
        result = node_data[name]
4573
        if result.offline:
4574
          # offline nodes will be in both lists
4575
          assert result.fail_msg
4576
          offline_nodes.append(name)
4577
        if result.fail_msg:
4578
          bad_nodes.append(name)
4579
        elif result.payload:
4580
          for inst in result.payload:
4581
            if inst in all_info:
4582
              if all_info[inst].primary_node == name:
4583
                live_data.update(result.payload)
4584
              else:
4585
                wrongnode_inst.add(inst)
4586
            else:
4587
              # orphan instance; we don't list it here as we don't
4588
              # handle this case yet in the output of instance listing
4589
              logging.warning("Orphan instance '%s' found on node %s",
4590
                              inst, name)
4591
        # else no instance is alive
4592
    else:
4593
      live_data = {}
4594

    
4595
    if query.IQ_DISKUSAGE in self.requested_data:
4596
      disk_usage = dict((inst.name,
4597
                         _ComputeDiskSize(inst.disk_template,
4598
                                          [{constants.IDISK_SIZE: disk.size}
4599
                                           for disk in inst.disks]))
4600
                        for inst in instance_list)
4601
    else:
4602
      disk_usage = None
4603

    
4604
    if query.IQ_CONSOLE in self.requested_data:
4605
      consinfo = {}
4606
      for inst in instance_list:
4607
        if inst.name in live_data:
4608
          # Instance is running
4609
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4610
        else:
4611
          consinfo[inst.name] = None
4612
      assert set(consinfo.keys()) == set(instance_names)
4613
    else:
4614
      consinfo = None
4615

    
4616
    if query.IQ_NODES in self.requested_data:
4617
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4618
                                            instance_list)))
4619
      nodes = dict((name, lu.cfg.GetNodeInfo(name)) for name in node_names)
4620
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4621
                    for uuid in set(map(operator.attrgetter("group"),
4622
                                        nodes.values())))
4623
    else:
4624
      nodes = None
4625
      groups = None
4626

    
4627
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4628
                                   disk_usage, offline_nodes, bad_nodes,
4629
                                   live_data, wrongnode_inst, consinfo,
4630
                                   nodes, groups)
4631

    
4632

    
4633
class LUQuery(NoHooksLU):
4634
  """Query for resources/items of a certain kind.
4635

4636
  """
4637
  # pylint: disable-msg=W0142
4638
  REQ_BGL = False
4639

    
4640
  def CheckArguments(self):
4641
    qcls = _GetQueryImplementation(self.op.what)
4642

    
4643
    self.impl = qcls(self.op.filter, self.op.fields, False)
4644

    
4645
  def ExpandNames(self):
4646
    self.impl.ExpandNames(self)
4647

    
4648
  def DeclareLocks(self, level):
4649
    self.impl.DeclareLocks(self, level)
4650

    
4651
  def Exec(self, feedback_fn):
4652
    return self.impl.NewStyleQuery(self)
4653

    
4654

    
4655
class LUQueryFields(NoHooksLU):
4656
  """Query for resources/items of a certain kind.
4657

4658
  """
4659
  # pylint: disable-msg=W0142
4660
  REQ_BGL = False
4661

    
4662
  def CheckArguments(self):
4663
    self.qcls = _GetQueryImplementation(self.op.what)
4664

    
4665
  def ExpandNames(self):
4666
    self.needed_locks = {}
4667

    
4668
  def Exec(self, feedback_fn):
4669
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4670

    
4671

    
4672
class LUNodeModifyStorage(NoHooksLU):
4673
  """Logical unit for modifying a storage volume on a node.
4674

4675
  """
4676
  REQ_BGL = False
4677

    
4678
  def CheckArguments(self):
4679
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4680

    
4681
    storage_type = self.op.storage_type
4682

    
4683
    try:
4684
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4685
    except KeyError:
4686
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4687
                                 " modified" % storage_type,
4688
                                 errors.ECODE_INVAL)
4689

    
4690
    diff = set(self.op.changes.keys()) - modifiable
4691
    if diff:
4692
      raise errors.OpPrereqError("The following fields can not be modified for"
4693
                                 " storage units of type '%s': %r" %
4694
                                 (storage_type, list(diff)),
4695
                                 errors.ECODE_INVAL)
4696

    
4697
  def ExpandNames(self):
4698
    self.needed_locks = {
4699
      locking.LEVEL_NODE: self.op.node_name,
4700
      }
4701

    
4702
  def Exec(self, feedback_fn):
4703
    """Computes the list of nodes and their attributes.
4704

4705
    """
4706
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4707
    result = self.rpc.call_storage_modify(self.op.node_name,
4708
                                          self.op.storage_type, st_args,
4709
                                          self.op.name, self.op.changes)
4710
    result.Raise("Failed to modify storage unit '%s' on %s" %
4711
                 (self.op.name, self.op.node_name))
4712

    
4713

    
4714
class LUNodeAdd(LogicalUnit):
4715
  """Logical unit for adding node to the cluster.
4716

4717
  """
4718
  HPATH = "node-add"
4719
  HTYPE = constants.HTYPE_NODE
4720
  _NFLAGS = ["master_capable", "vm_capable"]
4721

    
4722
  def CheckArguments(self):
4723
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4724
    # validate/normalize the node name
4725
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4726
                                         family=self.primary_ip_family)
4727
    self.op.node_name = self.hostname.name
4728

    
4729
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4730
      raise errors.OpPrereqError("Cannot readd the master node",
4731
                                 errors.ECODE_STATE)
4732

    
4733
    if self.op.readd and self.op.group:
4734
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4735
                                 " being readded", errors.ECODE_INVAL)
4736

    
4737
  def BuildHooksEnv(self):
4738
    """Build hooks env.
4739

4740
    This will run on all nodes before, and on all nodes + the new node after.
4741

4742
    """
4743
    return {
4744
      "OP_TARGET": self.op.node_name,
4745
      "NODE_NAME": self.op.node_name,
4746
      "NODE_PIP": self.op.primary_ip,
4747
      "NODE_SIP": self.op.secondary_ip,
4748
      "MASTER_CAPABLE": str(self.op.master_capable),
4749
      "VM_CAPABLE": str(self.op.vm_capable),
4750
      }
4751

    
4752
  def BuildHooksNodes(self):
4753
    """Build hooks nodes.
4754

4755
    """
4756
    # Exclude added node
4757
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4758
    post_nodes = pre_nodes + [self.op.node_name, ]
4759

    
4760
    return (pre_nodes, post_nodes)
4761

    
4762
  def CheckPrereq(self):
4763
    """Check prerequisites.
4764

4765
    This checks:
4766
     - the new node is not already in the config
4767
     - it is resolvable
4768
     - its parameters (single/dual homed) matches the cluster
4769

4770
    Any errors are signaled by raising errors.OpPrereqError.
4771

4772
    """
4773
    cfg = self.cfg
4774
    hostname = self.hostname
4775
    node = hostname.name
4776
    primary_ip = self.op.primary_ip = hostname.ip
4777
    if self.op.secondary_ip is None:
4778
      if self.primary_ip_family == netutils.IP6Address.family:
4779
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4780
                                   " IPv4 address must be given as secondary",
4781
                                   errors.ECODE_INVAL)
4782
      self.op.secondary_ip = primary_ip
4783

    
4784
    secondary_ip = self.op.secondary_ip
4785
    if not netutils.IP4Address.IsValid(secondary_ip):
4786
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4787
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4788

    
4789
    node_list = cfg.GetNodeList()
4790
    if not self.op.readd and node in node_list:
4791
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4792
                                 node, errors.ECODE_EXISTS)
4793
    elif self.op.readd and node not in node_list:
4794
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4795
                                 errors.ECODE_NOENT)
4796

    
4797
    self.changed_primary_ip = False
4798

    
4799
    for existing_node_name in node_list:
4800
      existing_node = cfg.GetNodeInfo(existing_node_name)
4801

    
4802
      if self.op.readd and node == existing_node_name:
4803
        if existing_node.secondary_ip != secondary_ip:
4804
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4805
                                     " address configuration as before",
4806
                                     errors.ECODE_INVAL)
4807
        if existing_node.primary_ip != primary_ip:
4808
          self.changed_primary_ip = True
4809

    
4810
        continue
4811

    
4812
      if (existing_node.primary_ip == primary_ip or
4813
          existing_node.secondary_ip == primary_ip or
4814
          existing_node.primary_ip == secondary_ip or
4815
          existing_node.secondary_ip == secondary_ip):
4816
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4817
                                   " existing node %s" % existing_node.name,
4818
                                   errors.ECODE_NOTUNIQUE)
4819

    
4820
    # After this 'if' block, None is no longer a valid value for the
4821
    # _capable op attributes
4822
    if self.op.readd:
4823
      old_node = self.cfg.GetNodeInfo(node)
4824
      assert old_node is not None, "Can't retrieve locked node %s" % node
4825
      for attr in self._NFLAGS:
4826
        if getattr(self.op, attr) is None:
4827
          setattr(self.op, attr, getattr(old_node, attr))
4828
    else:
4829
      for attr in self._NFLAGS:
4830
        if getattr(self.op, attr) is None:
4831
          setattr(self.op, attr, True)
4832

    
4833
    if self.op.readd and not self.op.vm_capable:
4834
      pri, sec = cfg.GetNodeInstances(node)
4835
      if pri or sec:
4836
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4837
                                   " flag set to false, but it already holds"
4838
                                   " instances" % node,
4839
                                   errors.ECODE_STATE)
4840

    
4841
    # check that the type of the node (single versus dual homed) is the
4842
    # same as for the master
4843
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4844
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4845
    newbie_singlehomed = secondary_ip == primary_ip
4846
    if master_singlehomed != newbie_singlehomed:
4847
      if master_singlehomed:
4848
        raise errors.OpPrereqError("The master has no secondary ip but the"
4849
                                   " new node has one",
4850
                                   errors.ECODE_INVAL)
4851
      else:
4852
        raise errors.OpPrereqError("The master has a secondary ip but the"
4853
                                   " new node doesn't have one",
4854
                                   errors.ECODE_INVAL)
4855

    
4856
    # checks reachability
4857
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4858
      raise errors.OpPrereqError("Node not reachable by ping",
4859
                                 errors.ECODE_ENVIRON)
4860

    
4861
    if not newbie_singlehomed:
4862
      # check reachability from my secondary ip to newbie's secondary ip
4863
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4864
                           source=myself.secondary_ip):
4865
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4866
                                   " based ping to node daemon port",
4867
                                   errors.ECODE_ENVIRON)
4868

    
4869
    if self.op.readd:
4870
      exceptions = [node]
4871
    else:
4872
      exceptions = []
4873

    
4874
    if self.op.master_capable:
4875
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4876
    else:
4877
      self.master_candidate = False
4878

    
4879
    if self.op.readd:
4880
      self.new_node = old_node
4881
    else:
4882
      node_group = cfg.LookupNodeGroup(self.op.group)
4883
      self.new_node = objects.Node(name=node,
4884
                                   primary_ip=primary_ip,
4885
                                   secondary_ip=secondary_ip,
4886
                                   master_candidate=self.master_candidate,
4887
                                   offline=False, drained=False,
4888
                                   group=node_group)
4889

    
4890
    if self.op.ndparams:
4891
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4892

    
4893
  def Exec(self, feedback_fn):
4894
    """Adds the new node to the cluster.
4895

4896
    """
4897
    new_node = self.new_node
4898
    node = new_node.name
4899

    
4900
    # We adding a new node so we assume it's powered
4901
    new_node.powered = True
4902

    
4903
    # for re-adds, reset the offline/drained/master-candidate flags;
4904
    # we need to reset here, otherwise offline would prevent RPC calls
4905
    # later in the procedure; this also means that if the re-add
4906
    # fails, we are left with a non-offlined, broken node
4907
    if self.op.readd:
4908
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4909
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4910
      # if we demote the node, we do cleanup later in the procedure
4911
      new_node.master_candidate = self.master_candidate
4912
      if self.changed_primary_ip:
4913
        new_node.primary_ip = self.op.primary_ip
4914

    
4915
    # copy the master/vm_capable flags
4916
    for attr in self._NFLAGS:
4917
      setattr(new_node, attr, getattr(self.op, attr))
4918

    
4919
    # notify the user about any possible mc promotion
4920
    if new_node.master_candidate:
4921
      self.LogInfo("Node will be a master candidate")
4922

    
4923
    if self.op.ndparams:
4924
      new_node.ndparams = self.op.ndparams
4925
    else:
4926
      new_node.ndparams = {}
4927

    
4928
    # check connectivity
4929
    result = self.rpc.call_version([node])[node]
4930
    result.Raise("Can't get version information from node %s" % node)
4931
    if constants.PROTOCOL_VERSION == result.payload:
4932
      logging.info("Communication to node %s fine, sw version %s match",
4933
                   node, result.payload)
4934
    else:
4935
      raise errors.OpExecError("Version mismatch master version %s,"
4936
                               " node version %s" %
4937
                               (constants.PROTOCOL_VERSION, result.payload))
4938

    
4939
    # Add node to our /etc/hosts, and add key to known_hosts
4940
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4941
      master_node = self.cfg.GetMasterNode()
4942
      result = self.rpc.call_etc_hosts_modify(master_node,
4943
                                              constants.ETC_HOSTS_ADD,
4944
                                              self.hostname.name,
4945
                                              self.hostname.ip)
4946
      result.Raise("Can't update hosts file with new host data")
4947

    
4948
    if new_node.secondary_ip != new_node.primary_ip:
4949
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4950
                               False)
4951

    
4952
    node_verify_list = [self.cfg.GetMasterNode()]
4953
    node_verify_param = {
4954
      constants.NV_NODELIST: [node],
4955
      # TODO: do a node-net-test as well?
4956
    }
4957

    
4958
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4959
                                       self.cfg.GetClusterName())
4960
    for verifier in node_verify_list:
4961
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4962
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4963
      if nl_payload:
4964
        for failed in nl_payload:
4965
          feedback_fn("ssh/hostname verification failed"
4966
                      " (checking from %s): %s" %
4967
                      (verifier, nl_payload[failed]))
4968
        raise errors.OpExecError("ssh/hostname verification failed")
4969

    
4970
    if self.op.readd:
4971
      _RedistributeAncillaryFiles(self)
4972
      self.context.ReaddNode(new_node)
4973
      # make sure we redistribute the config
4974
      self.cfg.Update(new_node, feedback_fn)
4975
      # and make sure the new node will not have old files around
4976
      if not new_node.master_candidate:
4977
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4978
        msg = result.fail_msg
4979
        if msg:
4980
          self.LogWarning("Node failed to demote itself from master"
4981
                          " candidate status: %s" % msg)
4982
    else:
4983
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4984
                                  additional_vm=self.op.vm_capable)
4985
      self.context.AddNode(new_node, self.proc.GetECId())
4986

    
4987

    
4988
class LUNodeSetParams(LogicalUnit):
4989
  """Modifies the parameters of a node.
4990

4991
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4992
      to the node role (as _ROLE_*)
4993
  @cvar _R2F: a dictionary from node role to tuples of flags
4994
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4995

4996
  """
4997
  HPATH = "node-modify"
4998
  HTYPE = constants.HTYPE_NODE
4999
  REQ_BGL = False
5000
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5001
  _F2R = {
5002
    (True, False, False): _ROLE_CANDIDATE,
5003
    (False, True, False): _ROLE_DRAINED,
5004
    (False, False, True): _ROLE_OFFLINE,
5005
    (False, False, False): _ROLE_REGULAR,
5006
    }
5007
  _R2F = dict((v, k) for k, v in _F2R.items())
5008
  _FLAGS = ["master_candidate", "drained", "offline"]
5009

    
5010
  def CheckArguments(self):
5011
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5012
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5013
                self.op.master_capable, self.op.vm_capable,
5014
                self.op.secondary_ip, self.op.ndparams]
5015
    if all_mods.count(None) == len(all_mods):
5016
      raise errors.OpPrereqError("Please pass at least one modification",
5017
                                 errors.ECODE_INVAL)
5018
    if all_mods.count(True) > 1:
5019
      raise errors.OpPrereqError("Can't set the node into more than one"
5020
                                 " state at the same time",
5021
                                 errors.ECODE_INVAL)
5022

    
5023
    # Boolean value that tells us whether we might be demoting from MC
5024
    self.might_demote = (self.op.master_candidate == False or
5025
                         self.op.offline == True or
5026
                         self.op.drained == True or
5027
                         self.op.master_capable == False)
5028

    
5029
    if self.op.secondary_ip:
5030
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5031
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5032
                                   " address" % self.op.secondary_ip,
5033
                                   errors.ECODE_INVAL)
5034

    
5035
    self.lock_all = self.op.auto_promote and self.might_demote
5036
    self.lock_instances = self.op.secondary_ip is not None
5037

    
5038
  def ExpandNames(self):
5039
    if self.lock_all:
5040
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5041
    else:
5042
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5043

    
5044
    if self.lock_instances:
5045
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5046

    
5047
  def DeclareLocks(self, level):
5048
    # If we have locked all instances, before waiting to lock nodes, release
5049
    # all the ones living on nodes unrelated to the current operation.
5050
    if level == locking.LEVEL_NODE and self.lock_instances:
5051
      self.affected_instances = []
5052
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5053
        instances_keep = []
5054

    
5055
        # Build list of instances to release
5056
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
5057
          instance = self.context.cfg.GetInstanceInfo(instance_name)
5058
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5059
              self.op.node_name in instance.all_nodes):
5060
            instances_keep.append(instance_name)
5061
            self.affected_instances.append(instance)
5062

    
5063
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5064

    
5065
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5066
                set(instances_keep))
5067

    
5068
  def BuildHooksEnv(self):
5069
    """Build hooks env.
5070

5071
    This runs on the master node.
5072

5073
    """
5074
    return {
5075
      "OP_TARGET": self.op.node_name,
5076
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5077
      "OFFLINE": str(self.op.offline),
5078
      "DRAINED": str(self.op.drained),
5079
      "MASTER_CAPABLE": str(self.op.master_capable),
5080
      "VM_CAPABLE": str(self.op.vm_capable),
5081
      }
5082

    
5083
  def BuildHooksNodes(self):
5084
    """Build hooks nodes.
5085

5086
    """
5087
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5088
    return (nl, nl)
5089

    
5090
  def CheckPrereq(self):
5091
    """Check prerequisites.
5092

5093
    This only checks the instance list against the existing names.
5094

5095
    """
5096
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5097

    
5098
    if (self.op.master_candidate is not None or
5099
        self.op.drained is not None or
5100
        self.op.offline is not None):
5101
      # we can't change the master's node flags
5102
      if self.op.node_name == self.cfg.GetMasterNode():
5103
        raise errors.OpPrereqError("The master role can be changed"
5104
                                   " only via master-failover",
5105
                                   errors.ECODE_INVAL)
5106

    
5107
    if self.op.master_candidate and not node.master_capable:
5108
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5109
                                 " it a master candidate" % node.name,
5110
                                 errors.ECODE_STATE)
5111

    
5112
    if self.op.vm_capable == False:
5113
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5114
      if ipri or isec:
5115
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5116
                                   " the vm_capable flag" % node.name,
5117
                                   errors.ECODE_STATE)
5118

    
5119
    if node.master_candidate and self.might_demote and not self.lock_all:
5120
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5121
      # check if after removing the current node, we're missing master
5122
      # candidates
5123
      (mc_remaining, mc_should, _) = \
5124
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5125
      if mc_remaining < mc_should:
5126
        raise errors.OpPrereqError("Not enough master candidates, please"
5127
                                   " pass auto promote option to allow"
5128
                                   " promotion", errors.ECODE_STATE)
5129

    
5130
    self.old_flags = old_flags = (node.master_candidate,
5131
                                  node.drained, node.offline)
5132
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5133
    self.old_role = old_role = self._F2R[old_flags]
5134

    
5135
    # Check for ineffective changes
5136
    for attr in self._FLAGS:
5137
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5138
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5139
        setattr(self.op, attr, None)
5140

    
5141
    # Past this point, any flag change to False means a transition
5142
    # away from the respective state, as only real changes are kept
5143

    
5144
    # TODO: We might query the real power state if it supports OOB
5145
    if _SupportsOob(self.cfg, node):
5146
      if self.op.offline is False and not (node.powered or
5147
                                           self.op.powered == True):
5148
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5149
                                    " offline status can be reset") %
5150
                                   self.op.node_name)
5151
    elif self.op.powered is not None:
5152
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5153
                                  " as it does not support out-of-band"
5154
                                  " handling") % self.op.node_name)
5155

    
5156
    # If we're being deofflined/drained, we'll MC ourself if needed
5157
    if (self.op.drained == False or self.op.offline == False or
5158
        (self.op.master_capable and not node.master_capable)):
5159
      if _DecideSelfPromotion(self):
5160
        self.op.master_candidate = True
5161
        self.LogInfo("Auto-promoting node to master candidate")
5162

    
5163
    # If we're no longer master capable, we'll demote ourselves from MC
5164
    if self.op.master_capable == False and node.master_candidate:
5165
      self.LogInfo("Demoting from master candidate")
5166
      self.op.master_candidate = False
5167

    
5168
    # Compute new role
5169
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5170
    if self.op.master_candidate:
5171
      new_role = self._ROLE_CANDIDATE
5172
    elif self.op.drained:
5173
      new_role = self._ROLE_DRAINED
5174
    elif self.op.offline:
5175
      new_role = self._ROLE_OFFLINE
5176
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5177
      # False is still in new flags, which means we're un-setting (the
5178
      # only) True flag
5179
      new_role = self._ROLE_REGULAR
5180
    else: # no new flags, nothing, keep old role
5181
      new_role = old_role
5182

    
5183
    self.new_role = new_role
5184

    
5185
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5186
      # Trying to transition out of offline status
5187
      result = self.rpc.call_version([node.name])[node.name]
5188
      if result.fail_msg:
5189
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5190
                                   " to report its version: %s" %
5191
                                   (node.name, result.fail_msg),
5192
                                   errors.ECODE_STATE)
5193
      else:
5194
        self.LogWarning("Transitioning node from offline to online state"
5195
                        " without using re-add. Please make sure the node"
5196
                        " is healthy!")
5197

    
5198
    if self.op.secondary_ip:
5199
      # Ok even without locking, because this can't be changed by any LU
5200
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5201
      master_singlehomed = master.secondary_ip == master.primary_ip
5202
      if master_singlehomed and self.op.secondary_ip:
5203
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5204
                                   " homed cluster", errors.ECODE_INVAL)
5205

    
5206
      if node.offline:
5207
        if self.affected_instances:
5208
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5209
                                     " node has instances (%s) configured"
5210
                                     " to use it" % self.affected_instances)
5211
      else:
5212
        # On online nodes, check that no instances are running, and that
5213
        # the node has the new ip and we can reach it.
5214
        for instance in self.affected_instances:
5215
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5216

    
5217
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5218
        if master.name != node.name:
5219
          # check reachability from master secondary ip to new secondary ip
5220
          if not netutils.TcpPing(self.op.secondary_ip,
5221
                                  constants.DEFAULT_NODED_PORT,
5222
                                  source=master.secondary_ip):
5223
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5224
                                       " based ping to node daemon port",
5225
                                       errors.ECODE_ENVIRON)
5226

    
5227
    if self.op.ndparams:
5228
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5229
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5230
      self.new_ndparams = new_ndparams
5231

    
5232
  def Exec(self, feedback_fn):
5233
    """Modifies a node.
5234

5235
    """
5236
    node = self.node
5237
    old_role = self.old_role
5238
    new_role = self.new_role
5239

    
5240
    result = []
5241

    
5242
    if self.op.ndparams:
5243
      node.ndparams = self.new_ndparams
5244

    
5245
    if self.op.powered is not None:
5246
      node.powered = self.op.powered
5247

    
5248
    for attr in ["master_capable", "vm_capable"]:
5249
      val = getattr(self.op, attr)
5250
      if val is not None:
5251
        setattr(node, attr, val)
5252
        result.append((attr, str(val)))
5253

    
5254
    if new_role != old_role:
5255
      # Tell the node to demote itself, if no longer MC and not offline
5256
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5257
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5258
        if msg:
5259
          self.LogWarning("Node failed to demote itself: %s", msg)
5260

    
5261
      new_flags = self._R2F[new_role]
5262
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5263
        if of != nf:
5264
          result.append((desc, str(nf)))
5265
      (node.master_candidate, node.drained, node.offline) = new_flags
5266

    
5267
      # we locked all nodes, we adjust the CP before updating this node
5268
      if self.lock_all:
5269
        _AdjustCandidatePool(self, [node.name])
5270

    
5271
    if self.op.secondary_ip:
5272
      node.secondary_ip = self.op.secondary_ip
5273
      result.append(("secondary_ip", self.op.secondary_ip))
5274

    
5275
    # this will trigger configuration file update, if needed
5276
    self.cfg.Update(node, feedback_fn)
5277

    
5278
    # this will trigger job queue propagation or cleanup if the mc
5279
    # flag changed
5280
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5281
      self.context.ReaddNode(node)
5282

    
5283
    return result
5284

    
5285

    
5286
class LUNodePowercycle(NoHooksLU):
5287
  """Powercycles a node.
5288

5289
  """
5290
  REQ_BGL = False
5291

    
5292
  def CheckArguments(self):
5293
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5294
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5295
      raise errors.OpPrereqError("The node is the master and the force"
5296
                                 " parameter was not set",
5297
                                 errors.ECODE_INVAL)
5298

    
5299
  def ExpandNames(self):
5300
    """Locking for PowercycleNode.
5301

5302
    This is a last-resort option and shouldn't block on other
5303
    jobs. Therefore, we grab no locks.
5304

5305
    """
5306
    self.needed_locks = {}
5307

    
5308
  def Exec(self, feedback_fn):
5309
    """Reboots a node.
5310

5311
    """
5312
    result = self.rpc.call_node_powercycle(self.op.node_name,
5313
                                           self.cfg.GetHypervisorType())
5314
    result.Raise("Failed to schedule the reboot")
5315
    return result.payload
5316

    
5317

    
5318
class LUClusterQuery(NoHooksLU):
5319
  """Query cluster configuration.
5320

5321
  """
5322
  REQ_BGL = False
5323

    
5324
  def ExpandNames(self):
5325
    self.needed_locks = {}
5326

    
5327
  def Exec(self, feedback_fn):
5328
    """Return cluster config.
5329

5330
    """
5331
    cluster = self.cfg.GetClusterInfo()
5332
    os_hvp = {}
5333

    
5334
    # Filter just for enabled hypervisors
5335
    for os_name, hv_dict in cluster.os_hvp.items():
5336
      os_hvp[os_name] = {}
5337
      for hv_name, hv_params in hv_dict.items():
5338
        if hv_name in cluster.enabled_hypervisors:
5339
          os_hvp[os_name][hv_name] = hv_params
5340

    
5341
    # Convert ip_family to ip_version
5342
    primary_ip_version = constants.IP4_VERSION
5343
    if cluster.primary_ip_family == netutils.IP6Address.family:
5344
      primary_ip_version = constants.IP6_VERSION
5345

    
5346
    result = {
5347
      "software_version": constants.RELEASE_VERSION,
5348
      "protocol_version": constants.PROTOCOL_VERSION,
5349
      "config_version": constants.CONFIG_VERSION,
5350
      "os_api_version": max(constants.OS_API_VERSIONS),
5351
      "export_version": constants.EXPORT_VERSION,
5352
      "architecture": (platform.architecture()[0], platform.machine()),
5353
      "name": cluster.cluster_name,
5354
      "master": cluster.master_node,
5355
      "default_hypervisor": cluster.enabled_hypervisors[0],
5356
      "enabled_hypervisors": cluster.enabled_hypervisors,
5357
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5358
                        for hypervisor_name in cluster.enabled_hypervisors]),
5359
      "os_hvp": os_hvp,
5360
      "beparams": cluster.beparams,
5361
      "osparams": cluster.osparams,
5362
      "nicparams": cluster.nicparams,
5363
      "ndparams": cluster.ndparams,
5364
      "candidate_pool_size": cluster.candidate_pool_size,
5365
      "master_netdev": cluster.master_netdev,
5366
      "volume_group_name": cluster.volume_group_name,
5367
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5368
      "file_storage_dir": cluster.file_storage_dir,
5369
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5370
      "maintain_node_health": cluster.maintain_node_health,
5371
      "ctime": cluster.ctime,
5372
      "mtime": cluster.mtime,
5373
      "uuid": cluster.uuid,
5374
      "tags": list(cluster.GetTags()),
5375
      "uid_pool": cluster.uid_pool,
5376
      "default_iallocator": cluster.default_iallocator,
5377
      "reserved_lvs": cluster.reserved_lvs,
5378
      "primary_ip_version": primary_ip_version,
5379
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5380
      "hidden_os": cluster.hidden_os,
5381
      "blacklisted_os": cluster.blacklisted_os,
5382
      }
5383

    
5384
    return result
5385

    
5386

    
5387
class LUClusterConfigQuery(NoHooksLU):
5388
  """Return configuration values.
5389

5390
  """
5391
  REQ_BGL = False
5392
  _FIELDS_DYNAMIC = utils.FieldSet()
5393
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5394
                                  "watcher_pause", "volume_group_name")
5395

    
5396
  def CheckArguments(self):
5397
    _CheckOutputFields(static=self._FIELDS_STATIC,
5398
                       dynamic=self._FIELDS_DYNAMIC,
5399
                       selected=self.op.output_fields)
5400

    
5401
  def ExpandNames(self):
5402
    self.needed_locks = {}
5403

    
5404
  def Exec(self, feedback_fn):
5405
    """Dump a representation of the cluster config to the standard output.
5406

5407
    """
5408
    values = []
5409
    for field in self.op.output_fields:
5410
      if field == "cluster_name":
5411
        entry = self.cfg.GetClusterName()
5412
      elif field == "master_node":
5413
        entry = self.cfg.GetMasterNode()
5414
      elif field == "drain_flag":
5415
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5416
      elif field == "watcher_pause":
5417
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5418
      elif field == "volume_group_name":
5419
        entry = self.cfg.GetVGName()
5420
      else:
5421
        raise errors.ParameterError(field)
5422
      values.append(entry)
5423
    return values
5424

    
5425

    
5426
class LUInstanceActivateDisks(NoHooksLU):
5427
  """Bring up an instance's disks.
5428

5429
  """
5430
  REQ_BGL = False
5431

    
5432
  def ExpandNames(self):
5433
    self._ExpandAndLockInstance()
5434
    self.needed_locks[locking.LEVEL_NODE] = []
5435
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5436

    
5437
  def DeclareLocks(self, level):
5438
    if level == locking.LEVEL_NODE:
5439
      self._LockInstancesNodes()
5440

    
5441
  def CheckPrereq(self):
5442
    """Check prerequisites.
5443

5444
    This checks that the instance is in the cluster.
5445

5446
    """
5447
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5448
    assert self.instance is not None, \
5449
      "Cannot retrieve locked instance %s" % self.op.instance_name
5450
    _CheckNodeOnline(self, self.instance.primary_node)
5451

    
5452
  def Exec(self, feedback_fn):
5453
    """Activate the disks.
5454

5455
    """
5456
    disks_ok, disks_info = \
5457
              _AssembleInstanceDisks(self, self.instance,
5458
                                     ignore_size=self.op.ignore_size)
5459
    if not disks_ok:
5460
      raise errors.OpExecError("Cannot activate block devices")
5461

    
5462
    return disks_info
5463

    
5464

    
5465
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5466
                           ignore_size=False):
5467
  """Prepare the block devices for an instance.
5468

5469
  This sets up the block devices on all nodes.
5470

5471
  @type lu: L{LogicalUnit}
5472
  @param lu: the logical unit on whose behalf we execute
5473
  @type instance: L{objects.Instance}
5474
  @param instance: the instance for whose disks we assemble
5475
  @type disks: list of L{objects.Disk} or None
5476
  @param disks: which disks to assemble (or all, if None)
5477
  @type ignore_secondaries: boolean
5478
  @param ignore_secondaries: if true, errors on secondary nodes
5479
      won't result in an error return from the function
5480
  @type ignore_size: boolean
5481
  @param ignore_size: if true, the current known size of the disk
5482
      will not be used during the disk activation, useful for cases
5483
      when the size is wrong
5484
  @return: False if the operation failed, otherwise a list of
5485
      (host, instance_visible_name, node_visible_name)
5486
      with the mapping from node devices to instance devices
5487

5488
  """
5489
  device_info = []
5490
  disks_ok = True
5491
  iname = instance.name
5492
  disks = _ExpandCheckDisks(instance, disks)
5493

    
5494
  # With the two passes mechanism we try to reduce the window of
5495
  # opportunity for the race condition of switching DRBD to primary
5496
  # before handshaking occured, but we do not eliminate it
5497

    
5498
  # The proper fix would be to wait (with some limits) until the
5499
  # connection has been made and drbd transitions from WFConnection
5500
  # into any other network-connected state (Connected, SyncTarget,
5501
  # SyncSource, etc.)
5502

    
5503
  # 1st pass, assemble on all nodes in secondary mode
5504
  for idx, inst_disk in enumerate(disks):
5505
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5506
      if ignore_size:
5507
        node_disk = node_disk.Copy()
5508
        node_disk.UnsetSize()
5509
      lu.cfg.SetDiskID(node_disk, node)
5510
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5511
      msg = result.fail_msg
5512
      if msg:
5513
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5514
                           " (is_primary=False, pass=1): %s",
5515
                           inst_disk.iv_name, node, msg)
5516
        if not ignore_secondaries:
5517
          disks_ok = False
5518

    
5519
  # FIXME: race condition on drbd migration to primary
5520

    
5521
  # 2nd pass, do only the primary node
5522
  for idx, inst_disk in enumerate(disks):
5523
    dev_path = None
5524

    
5525
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5526
      if node != instance.primary_node:
5527
        continue
5528
      if ignore_size:
5529
        node_disk = node_disk.Copy()
5530
        node_disk.UnsetSize()
5531
      lu.cfg.SetDiskID(node_disk, node)
5532
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5533
      msg = result.fail_msg
5534
      if msg:
5535
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5536
                           " (is_primary=True, pass=2): %s",
5537
                           inst_disk.iv_name, node, msg)
5538
        disks_ok = False
5539
      else:
5540
        dev_path = result.payload
5541

    
5542
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5543

    
5544
  # leave the disks configured for the primary node
5545
  # this is a workaround that would be fixed better by
5546
  # improving the logical/physical id handling
5547
  for disk in disks:
5548
    lu.cfg.SetDiskID(disk, instance.primary_node)
5549

    
5550
  return disks_ok, device_info
5551

    
5552

    
5553
def _StartInstanceDisks(lu, instance, force):
5554
  """Start the disks of an instance.
5555

5556
  """
5557
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5558
                                           ignore_secondaries=force)
5559
  if not disks_ok:
5560
    _ShutdownInstanceDisks(lu, instance)
5561
    if force is not None and not force:
5562
      lu.proc.LogWarning("", hint="If the message above refers to a"
5563
                         " secondary node,"
5564
                         " you can retry the operation using '--force'.")
5565
    raise errors.OpExecError("Disk consistency error")
5566

    
5567

    
5568
class LUInstanceDeactivateDisks(NoHooksLU):
5569
  """Shutdown an instance's disks.
5570

5571
  """
5572
  REQ_BGL = False
5573

    
5574
  def ExpandNames(self):
5575
    self._ExpandAndLockInstance()
5576
    self.needed_locks[locking.LEVEL_NODE] = []
5577
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5578

    
5579
  def DeclareLocks(self, level):
5580
    if level == locking.LEVEL_NODE:
5581
      self._LockInstancesNodes()
5582

    
5583
  def CheckPrereq(self):
5584
    """Check prerequisites.
5585

5586
    This checks that the instance is in the cluster.
5587

5588
    """
5589
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5590
    assert self.instance is not None, \
5591
      "Cannot retrieve locked instance %s" % self.op.instance_name
5592

    
5593
  def Exec(self, feedback_fn):
5594
    """Deactivate the disks
5595

5596
    """
5597
    instance = self.instance
5598
    if self.op.force:
5599
      _ShutdownInstanceDisks(self, instance)
5600
    else:
5601
      _SafeShutdownInstanceDisks(self, instance)
5602

    
5603

    
5604
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5605
  """Shutdown block devices of an instance.
5606

5607
  This function checks if an instance is running, before calling
5608
  _ShutdownInstanceDisks.
5609

5610
  """
5611
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5612
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5613

    
5614

    
5615
def _ExpandCheckDisks(instance, disks):
5616
  """Return the instance disks selected by the disks list
5617

5618
  @type disks: list of L{objects.Disk} or None
5619
  @param disks: selected disks
5620
  @rtype: list of L{objects.Disk}
5621
  @return: selected instance disks to act on
5622

5623
  """
5624
  if disks is None:
5625
    return instance.disks
5626
  else:
5627
    if not set(disks).issubset(instance.disks):
5628
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5629
                                   " target instance")
5630
    return disks
5631

    
5632

    
5633
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5634
  """Shutdown block devices of an instance.
5635

5636
  This does the shutdown on all nodes of the instance.
5637

5638
  If the ignore_primary is false, errors on the primary node are
5639
  ignored.
5640

5641
  """
5642
  all_result = True
5643
  disks = _ExpandCheckDisks(instance, disks)
5644

    
5645
  for disk in disks:
5646
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5647
      lu.cfg.SetDiskID(top_disk, node)
5648
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5649
      msg = result.fail_msg
5650
      if msg:
5651
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5652
                      disk.iv_name, node, msg)
5653
        if ((node == instance.primary_node and not ignore_primary) or
5654
            (node != instance.primary_node and not result.offline)):
5655
          all_result = False
5656
  return all_result
5657

    
5658

    
5659
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5660
  """Checks if a node has enough free memory.
5661

5662
  This function check if a given node has the needed amount of free
5663
  memory. In case the node has less memory or we cannot get the
5664
  information from the node, this function raise an OpPrereqError
5665
  exception.
5666

5667
  @type lu: C{LogicalUnit}
5668
  @param lu: a logical unit from which we get configuration data
5669
  @type node: C{str}
5670
  @param node: the node to check
5671
  @type reason: C{str}
5672
  @param reason: string to use in the error message
5673
  @type requested: C{int}
5674
  @param requested: the amount of memory in MiB to check for
5675
  @type hypervisor_name: C{str}
5676
  @param hypervisor_name: the hypervisor to ask for memory stats
5677
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5678
      we cannot check the node
5679

5680
  """
5681
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5682
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5683
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5684
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5685
  if not isinstance(free_mem, int):
5686
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5687
                               " was '%s'" % (node, free_mem),
5688
                               errors.ECODE_ENVIRON)
5689
  if requested > free_mem:
5690
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5691
                               " needed %s MiB, available %s MiB" %
5692
                               (node, reason, requested, free_mem),
5693
                               errors.ECODE_NORES)
5694

    
5695

    
5696
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5697
  """Checks if nodes have enough free disk space in the all VGs.
5698

5699
  This function check if all given nodes have the needed amount of
5700
  free disk. In case any node has less disk or we cannot get the
5701
  information from the node, this function raise an OpPrereqError
5702
  exception.
5703

5704
  @type lu: C{LogicalUnit}
5705
  @param lu: a logical unit from which we get configuration data
5706
  @type nodenames: C{list}
5707
  @param nodenames: the list of node names to check
5708
  @type req_sizes: C{dict}
5709
  @param req_sizes: the hash of vg and corresponding amount of disk in
5710
      MiB to check for
5711
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5712
      or we cannot check the node
5713

5714
  """
5715
  for vg, req_size in req_sizes.items():
5716
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5717

    
5718

    
5719
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5720
  """Checks if nodes have enough free disk space in the specified VG.
5721

5722
  This function check if all given nodes have the needed amount of
5723
  free disk. In case any node has less disk or we cannot get the
5724
  information from the node, this function raise an OpPrereqError
5725
  exception.
5726

5727
  @type lu: C{LogicalUnit}
5728
  @param lu: a logical unit from which we get configuration data
5729
  @type nodenames: C{list}
5730
  @param nodenames: the list of node names to check
5731
  @type vg: C{str}
5732
  @param vg: the volume group to check
5733
  @type requested: C{int}
5734
  @param requested: the amount of disk in MiB to check for
5735
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5736
      or we cannot check the node
5737

5738
  """
5739
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5740
  for node in nodenames:
5741
    info = nodeinfo[node]
5742
    info.Raise("Cannot get current information from node %s" % node,
5743
               prereq=True, ecode=errors.ECODE_ENVIRON)
5744
    vg_free = info.payload.get("vg_free", None)
5745
    if not isinstance(vg_free, int):
5746
      raise errors.OpPrereqError("Can't compute free disk space on node"
5747
                                 " %s for vg %s, result was '%s'" %
5748
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5749
    if requested > vg_free:
5750
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5751
                                 " vg %s: required %d MiB, available %d MiB" %
5752
                                 (node, vg, requested, vg_free),
5753
                                 errors.ECODE_NORES)
5754

    
5755

    
5756
class LUInstanceStartup(LogicalUnit):
5757
  """Starts an instance.
5758

5759
  """
5760
  HPATH = "instance-start"
5761
  HTYPE = constants.HTYPE_INSTANCE
5762
  REQ_BGL = False
5763

    
5764
  def CheckArguments(self):
5765
    # extra beparams
5766
    if self.op.beparams:
5767
      # fill the beparams dict
5768
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5769

    
5770
  def ExpandNames(self):
5771
    self._ExpandAndLockInstance()
5772

    
5773
  def BuildHooksEnv(self):
5774
    """Build hooks env.
5775

5776
    This runs on master, primary and secondary nodes of the instance.
5777

5778
    """
5779
    env = {
5780
      "FORCE": self.op.force,
5781
      }
5782

    
5783
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5784

    
5785
    return env
5786

    
5787
  def BuildHooksNodes(self):
5788
    """Build hooks nodes.
5789

5790
    """
5791
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5792
    return (nl, nl)
5793

    
5794
  def CheckPrereq(self):
5795
    """Check prerequisites.
5796

5797
    This checks that the instance is in the cluster.
5798

5799
    """
5800
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5801
    assert self.instance is not None, \
5802
      "Cannot retrieve locked instance %s" % self.op.instance_name
5803

    
5804
    # extra hvparams
5805
    if self.op.hvparams:
5806
      # check hypervisor parameter syntax (locally)
5807
      cluster = self.cfg.GetClusterInfo()
5808
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5809
      filled_hvp = cluster.FillHV(instance)
5810
      filled_hvp.update(self.op.hvparams)
5811
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5812
      hv_type.CheckParameterSyntax(filled_hvp)
5813
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5814

    
5815
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5816

    
5817
    if self.primary_offline and self.op.ignore_offline_nodes:
5818
      self.proc.LogWarning("Ignoring offline primary node")
5819

    
5820
      if self.op.hvparams or self.op.beparams:
5821
        self.proc.LogWarning("Overridden parameters are ignored")
5822
    else:
5823
      _CheckNodeOnline(self, instance.primary_node)
5824

    
5825
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5826

    
5827
      # check bridges existence
5828
      _CheckInstanceBridgesExist(self, instance)
5829

    
5830
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5831
                                                instance.name,
5832
                                                instance.hypervisor)
5833
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5834
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5835
      if not remote_info.payload: # not running already
5836
        _CheckNodeFreeMemory(self, instance.primary_node,
5837
                             "starting instance %s" % instance.name,
5838
                             bep[constants.BE_MEMORY], instance.hypervisor)
5839

    
5840
  def Exec(self, feedback_fn):
5841
    """Start the instance.
5842

5843
    """
5844
    instance = self.instance
5845
    force = self.op.force
5846

    
5847
    if not self.op.no_remember:
5848
      self.cfg.MarkInstanceUp(instance.name)
5849

    
5850
    if self.primary_offline:
5851
      assert self.op.ignore_offline_nodes
5852
      self.proc.LogInfo("Primary node offline, marked instance as started")
5853
    else:
5854
      node_current = instance.primary_node
5855

    
5856
      _StartInstanceDisks(self, instance, force)
5857

    
5858
      result = self.rpc.call_instance_start(node_current, instance,
5859
                                            self.op.hvparams, self.op.beparams,
5860
                                            self.op.startup_paused)
5861
      msg = result.fail_msg
5862
      if msg:
5863
        _ShutdownInstanceDisks(self, instance)
5864
        raise errors.OpExecError("Could not start instance: %s" % msg)
5865

    
5866

    
5867
class LUInstanceReboot(LogicalUnit):
5868
  """Reboot an instance.
5869

5870
  """
5871
  HPATH = "instance-reboot"
5872
  HTYPE = constants.HTYPE_INSTANCE
5873
  REQ_BGL = False
5874

    
5875
  def ExpandNames(self):
5876
    self._ExpandAndLockInstance()
5877

    
5878
  def BuildHooksEnv(self):
5879
    """Build hooks env.
5880

5881
    This runs on master, primary and secondary nodes of the instance.
5882

5883
    """
5884
    env = {
5885
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5886
      "REBOOT_TYPE": self.op.reboot_type,
5887
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5888
      }
5889

    
5890
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5891

    
5892
    return env
5893

    
5894
  def BuildHooksNodes(self):
5895
    """Build hooks nodes.
5896

5897
    """
5898
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5899
    return (nl, nl)
5900

    
5901
  def CheckPrereq(self):
5902
    """Check prerequisites.
5903

5904
    This checks that the instance is in the cluster.
5905

5906
    """
5907
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908
    assert self.instance is not None, \
5909
      "Cannot retrieve locked instance %s" % self.op.instance_name
5910

    
5911
    _CheckNodeOnline(self, instance.primary_node)
5912

    
5913
    # check bridges existence
5914
    _CheckInstanceBridgesExist(self, instance)
5915

    
5916
  def Exec(self, feedback_fn):
5917
    """Reboot the instance.
5918

5919
    """
5920
    instance = self.instance
5921
    ignore_secondaries = self.op.ignore_secondaries
5922
    reboot_type = self.op.reboot_type
5923

    
5924
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5925
                                              instance.name,
5926
                                              instance.hypervisor)
5927
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5928
    instance_running = bool(remote_info.payload)
5929

    
5930
    node_current = instance.primary_node
5931

    
5932
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5933
                                            constants.INSTANCE_REBOOT_HARD]:
5934
      for disk in instance.disks:
5935
        self.cfg.SetDiskID(disk, node_current)
5936
      result = self.rpc.call_instance_reboot(node_current, instance,
5937
                                             reboot_type,
5938
                                             self.op.shutdown_timeout)
5939
      result.Raise("Could not reboot instance")
5940
    else:
5941
      if instance_running:
5942
        result = self.rpc.call_instance_shutdown(node_current, instance,
5943
                                                 self.op.shutdown_timeout)
5944
        result.Raise("Could not shutdown instance for full reboot")
5945
        _ShutdownInstanceDisks(self, instance)
5946
      else:
5947
        self.LogInfo("Instance %s was already stopped, starting now",
5948
                     instance.name)
5949
      _StartInstanceDisks(self, instance, ignore_secondaries)
5950
      result = self.rpc.call_instance_start(node_current, instance,
5951
                                            None, None, False)
5952
      msg = result.fail_msg
5953
      if msg:
5954
        _ShutdownInstanceDisks(self, instance)
5955
        raise errors.OpExecError("Could not start instance for"
5956
                                 " full reboot: %s" % msg)
5957

    
5958
    self.cfg.MarkInstanceUp(instance.name)
5959

    
5960

    
5961
class LUInstanceShutdown(LogicalUnit):
5962
  """Shutdown an instance.
5963

5964
  """
5965
  HPATH = "instance-stop"
5966
  HTYPE = constants.HTYPE_INSTANCE
5967
  REQ_BGL = False
5968

    
5969
  def ExpandNames(self):
5970
    self._ExpandAndLockInstance()
5971

    
5972
  def BuildHooksEnv(self):
5973
    """Build hooks env.
5974

5975
    This runs on master, primary and secondary nodes of the instance.
5976

5977
    """
5978
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5979
    env["TIMEOUT"] = self.op.timeout
5980
    return env
5981

    
5982
  def BuildHooksNodes(self):
5983
    """Build hooks nodes.
5984

5985
    """
5986
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5987
    return (nl, nl)
5988

    
5989
  def CheckPrereq(self):
5990
    """Check prerequisites.
5991

5992
    This checks that the instance is in the cluster.
5993

5994
    """
5995
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5996
    assert self.instance is not None, \
5997
      "Cannot retrieve locked instance %s" % self.op.instance_name
5998

    
5999
    self.primary_offline = \
6000
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6001

    
6002
    if self.primary_offline and self.op.ignore_offline_nodes:
6003
      self.proc.LogWarning("Ignoring offline primary node")
6004
    else:
6005
      _CheckNodeOnline(self, self.instance.primary_node)
6006

    
6007
  def Exec(self, feedback_fn):
6008
    """Shutdown the instance.
6009

6010
    """
6011
    instance = self.instance
6012
    node_current = instance.primary_node
6013
    timeout = self.op.timeout
6014

    
6015
    if not self.op.no_remember:
6016
      self.cfg.MarkInstanceDown(instance.name)
6017

    
6018
    if self.primary_offline:
6019
      assert self.op.ignore_offline_nodes
6020
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6021
    else:
6022
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6023
      msg = result.fail_msg
6024
      if msg:
6025
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6026

    
6027
      _ShutdownInstanceDisks(self, instance)
6028

    
6029

    
6030
class LUInstanceReinstall(LogicalUnit):
6031
  """Reinstall an instance.
6032

6033
  """
6034
  HPATH = "instance-reinstall"
6035
  HTYPE = constants.HTYPE_INSTANCE
6036
  REQ_BGL = False
6037

    
6038
  def ExpandNames(self):
6039
    self._ExpandAndLockInstance()
6040

    
6041
  def BuildHooksEnv(self):
6042
    """Build hooks env.
6043

6044
    This runs on master, primary and secondary nodes of the instance.
6045

6046
    """
6047
    return _BuildInstanceHookEnvByObject(self, self.instance)
6048

    
6049
  def BuildHooksNodes(self):
6050
    """Build hooks nodes.
6051

6052
    """
6053
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6054
    return (nl, nl)
6055

    
6056
  def CheckPrereq(self):
6057
    """Check prerequisites.
6058

6059
    This checks that the instance is in the cluster and is not running.
6060

6061
    """
6062
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6063
    assert instance is not None, \
6064
      "Cannot retrieve locked instance %s" % self.op.instance_name
6065
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6066
                     " offline, cannot reinstall")
6067
    for node in instance.secondary_nodes:
6068
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6069
                       " cannot reinstall")
6070

    
6071
    if instance.disk_template == constants.DT_DISKLESS:
6072
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6073
                                 self.op.instance_name,
6074
                                 errors.ECODE_INVAL)
6075
    _CheckInstanceDown(self, instance, "cannot reinstall")
6076

    
6077
    if self.op.os_type is not None:
6078
      # OS verification
6079
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6080
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6081
      instance_os = self.op.os_type
6082
    else:
6083
      instance_os = instance.os
6084

    
6085
    nodelist = list(instance.all_nodes)
6086

    
6087
    if self.op.osparams:
6088
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6089
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6090
      self.os_inst = i_osdict # the new dict (without defaults)
6091
    else:
6092
      self.os_inst = None
6093

    
6094
    self.instance = instance
6095

    
6096
  def Exec(self, feedback_fn):
6097
    """Reinstall the instance.
6098

6099
    """
6100
    inst = self.instance
6101

    
6102
    if self.op.os_type is not None:
6103
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6104
      inst.os = self.op.os_type
6105
      # Write to configuration
6106
      self.cfg.Update(inst, feedback_fn)
6107

    
6108
    _StartInstanceDisks(self, inst, None)
6109
    try:
6110
      feedback_fn("Running the instance OS create scripts...")
6111
      # FIXME: pass debug option from opcode to backend
6112
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6113
                                             self.op.debug_level,
6114
                                             osparams=self.os_inst)
6115
      result.Raise("Could not install OS for instance %s on node %s" %
6116
                   (inst.name, inst.primary_node))
6117
    finally:
6118
      _ShutdownInstanceDisks(self, inst)
6119

    
6120

    
6121
class LUInstanceRecreateDisks(LogicalUnit):
6122
  """Recreate an instance's missing disks.
6123

6124
  """
6125
  HPATH = "instance-recreate-disks"
6126
  HTYPE = constants.HTYPE_INSTANCE
6127
  REQ_BGL = False
6128

    
6129
  def CheckArguments(self):
6130
    # normalise the disk list
6131
    self.op.disks = sorted(frozenset(self.op.disks))
6132

    
6133
  def ExpandNames(self):
6134
    self._ExpandAndLockInstance()
6135
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6136
    if self.op.nodes:
6137
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6138
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6139
    else:
6140
      self.needed_locks[locking.LEVEL_NODE] = []
6141

    
6142
  def DeclareLocks(self, level):
6143
    if level == locking.LEVEL_NODE:
6144
      # if we replace the nodes, we only need to lock the old primary,
6145
      # otherwise we need to lock all nodes for disk re-creation
6146
      primary_only = bool(self.op.nodes)
6147
      self._LockInstancesNodes(primary_only=primary_only)
6148

    
6149
  def BuildHooksEnv(self):
6150
    """Build hooks env.
6151

6152
    This runs on master, primary and secondary nodes of the instance.
6153

6154
    """
6155
    return _BuildInstanceHookEnvByObject(self, self.instance)
6156

    
6157
  def BuildHooksNodes(self):
6158
    """Build hooks nodes.
6159

6160
    """
6161
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6162
    return (nl, nl)
6163

    
6164
  def CheckPrereq(self):
6165
    """Check prerequisites.
6166

6167
    This checks that the instance is in the cluster and is not running.
6168

6169
    """
6170
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6171
    assert instance is not None, \
6172
      "Cannot retrieve locked instance %s" % self.op.instance_name
6173
    if self.op.nodes:
6174
      if len(self.op.nodes) != len(instance.all_nodes):
6175
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6176
                                   " %d replacement nodes were specified" %
6177
                                   (instance.name, len(instance.all_nodes),
6178
                                    len(self.op.nodes)),
6179
                                   errors.ECODE_INVAL)
6180
      assert instance.disk_template != constants.DT_DRBD8 or \
6181
          len(self.op.nodes) == 2
6182
      assert instance.disk_template != constants.DT_PLAIN or \
6183
          len(self.op.nodes) == 1
6184
      primary_node = self.op.nodes[0]
6185
    else:
6186
      primary_node = instance.primary_node
6187
    _CheckNodeOnline(self, primary_node)
6188

    
6189
    if instance.disk_template == constants.DT_DISKLESS:
6190
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6191
                                 self.op.instance_name, errors.ECODE_INVAL)
6192
    # if we replace nodes *and* the old primary is offline, we don't
6193
    # check
6194
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6195
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6196
    if not (self.op.nodes and old_pnode.offline):
6197
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6198

    
6199
    if not self.op.disks:
6200
      self.op.disks = range(len(instance.disks))
6201
    else:
6202
      for idx in self.op.disks:
6203
        if idx >= len(instance.disks):
6204
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6205
                                     errors.ECODE_INVAL)
6206
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6207
      raise errors.OpPrereqError("Can't recreate disks partially and"
6208
                                 " change the nodes at the same time",
6209
                                 errors.ECODE_INVAL)
6210
    self.instance = instance
6211

    
6212
  def Exec(self, feedback_fn):
6213
    """Recreate the disks.
6214

6215
    """
6216
    instance = self.instance
6217

    
6218
    to_skip = []
6219
    mods = [] # keeps track of needed logical_id changes
6220

    
6221
    for idx, disk in enumerate(instance.disks):
6222
      if idx not in self.op.disks: # disk idx has not been passed in
6223
        to_skip.append(idx)
6224
        continue
6225
      # update secondaries for disks, if needed
6226
      if self.op.nodes:
6227
        if disk.dev_type == constants.LD_DRBD8:
6228
          # need to update the nodes and minors
6229
          assert len(self.op.nodes) == 2
6230
          assert len(disk.logical_id) == 6 # otherwise disk internals
6231
                                           # have changed
6232
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6233
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6234
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6235
                    new_minors[0], new_minors[1], old_secret)
6236
          assert len(disk.logical_id) == len(new_id)
6237
          mods.append((idx, new_id))
6238

    
6239
    # now that we have passed all asserts above, we can apply the mods
6240
    # in a single run (to avoid partial changes)
6241
    for idx, new_id in mods:
6242
      instance.disks[idx].logical_id = new_id
6243

    
6244
    # change primary node, if needed
6245
    if self.op.nodes:
6246
      instance.primary_node = self.op.nodes[0]
6247
      self.LogWarning("Changing the instance's nodes, you will have to"
6248
                      " remove any disks left on the older nodes manually")
6249

    
6250
    if self.op.nodes:
6251
      self.cfg.Update(instance, feedback_fn)
6252

    
6253
    _CreateDisks(self, instance, to_skip=to_skip)
6254

    
6255

    
6256
class LUInstanceRename(LogicalUnit):
6257
  """Rename an instance.
6258

6259
  """
6260
  HPATH = "instance-rename"
6261
  HTYPE = constants.HTYPE_INSTANCE
6262

    
6263
  def CheckArguments(self):
6264
    """Check arguments.
6265

6266
    """
6267
    if self.op.ip_check and not self.op.name_check:
6268
      # TODO: make the ip check more flexible and not depend on the name check
6269
      raise errors.OpPrereqError("IP address check requires a name check",
6270
                                 errors.ECODE_INVAL)
6271

    
6272
  def BuildHooksEnv(self):
6273
    """Build hooks env.
6274

6275
    This runs on master, primary and secondary nodes of the instance.
6276

6277
    """
6278
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6279
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6280
    return env
6281

    
6282
  def BuildHooksNodes(self):
6283
    """Build hooks nodes.
6284

6285
    """
6286
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6287
    return (nl, nl)
6288

    
6289
  def CheckPrereq(self):
6290
    """Check prerequisites.
6291

6292
    This checks that the instance is in the cluster and is not running.
6293

6294
    """
6295
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6296
                                                self.op.instance_name)
6297
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6298
    assert instance is not None
6299
    _CheckNodeOnline(self, instance.primary_node)
6300
    _CheckInstanceDown(self, instance, "cannot rename")
6301
    self.instance = instance
6302

    
6303
    new_name = self.op.new_name
6304
    if self.op.name_check:
6305
      hostname = netutils.GetHostname(name=new_name)
6306
      if hostname != new_name:
6307
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6308
                     hostname.name)
6309
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6310
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6311
                                    " same as given hostname '%s'") %
6312
                                    (hostname.name, self.op.new_name),
6313
                                    errors.ECODE_INVAL)
6314
      new_name = self.op.new_name = hostname.name
6315
      if (self.op.ip_check and
6316
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6317
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6318
                                   (hostname.ip, new_name),
6319
                                   errors.ECODE_NOTUNIQUE)
6320

    
6321
    instance_list = self.cfg.GetInstanceList()
6322
    if new_name in instance_list and new_name != instance.name:
6323
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6324
                                 new_name, errors.ECODE_EXISTS)
6325

    
6326
  def Exec(self, feedback_fn):
6327
    """Rename the instance.
6328

6329
    """
6330
    inst = self.instance
6331
    old_name = inst.name
6332

    
6333
    rename_file_storage = False
6334
    if (inst.disk_template in constants.DTS_FILEBASED and
6335
        self.op.new_name != inst.name):
6336
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6337
      rename_file_storage = True
6338

    
6339
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6340
    # Change the instance lock. This is definitely safe while we hold the BGL.
6341
    # Otherwise the new lock would have to be added in acquired mode.
6342
    assert self.REQ_BGL
6343
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6344
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6345

    
6346
    # re-read the instance from the configuration after rename
6347
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6348

    
6349
    if rename_file_storage:
6350
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6351
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6352
                                                     old_file_storage_dir,
6353
                                                     new_file_storage_dir)
6354
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6355
                   " (but the instance has been renamed in Ganeti)" %
6356
                   (inst.primary_node, old_file_storage_dir,
6357
                    new_file_storage_dir))
6358

    
6359
    _StartInstanceDisks(self, inst, None)
6360
    try:
6361
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6362
                                                 old_name, self.op.debug_level)
6363
      msg = result.fail_msg
6364
      if msg:
6365
        msg = ("Could not run OS rename script for instance %s on node %s"
6366
               " (but the instance has been renamed in Ganeti): %s" %
6367
               (inst.name, inst.primary_node, msg))
6368
        self.proc.LogWarning(msg)
6369
    finally:
6370
      _ShutdownInstanceDisks(self, inst)
6371

    
6372
    return inst.name
6373

    
6374

    
6375
class LUInstanceRemove(LogicalUnit):
6376
  """Remove an instance.
6377

6378
  """
6379
  HPATH = "instance-remove"
6380
  HTYPE = constants.HTYPE_INSTANCE
6381
  REQ_BGL = False
6382

    
6383
  def ExpandNames(self):
6384
    self._ExpandAndLockInstance()
6385
    self.needed_locks[locking.LEVEL_NODE] = []
6386
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6387

    
6388
  def DeclareLocks(self, level):
6389
    if level == locking.LEVEL_NODE:
6390
      self._LockInstancesNodes()
6391

    
6392
  def BuildHooksEnv(self):
6393
    """Build hooks env.
6394

6395
    This runs on master, primary and secondary nodes of the instance.
6396

6397
    """
6398
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6399
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6400
    return env
6401

    
6402
  def BuildHooksNodes(self):
6403
    """Build hooks nodes.
6404

6405
    """
6406
    nl = [self.cfg.GetMasterNode()]
6407
    nl_post = list(self.instance.all_nodes) + nl
6408
    return (nl, nl_post)
6409

    
6410
  def CheckPrereq(self):
6411
    """Check prerequisites.
6412

6413
    This checks that the instance is in the cluster.
6414

6415
    """
6416
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6417
    assert self.instance is not None, \
6418
      "Cannot retrieve locked instance %s" % self.op.instance_name
6419

    
6420
  def Exec(self, feedback_fn):
6421
    """Remove the instance.
6422

6423
    """
6424
    instance = self.instance
6425
    logging.info("Shutting down instance %s on node %s",
6426
                 instance.name, instance.primary_node)
6427

    
6428
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6429
                                             self.op.shutdown_timeout)
6430
    msg = result.fail_msg
6431
    if msg:
6432
      if self.op.ignore_failures:
6433
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6434
      else:
6435
        raise errors.OpExecError("Could not shutdown instance %s on"
6436
                                 " node %s: %s" %
6437
                                 (instance.name, instance.primary_node, msg))
6438

    
6439
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6440

    
6441

    
6442
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6443
  """Utility function to remove an instance.
6444

6445
  """
6446
  logging.info("Removing block devices for instance %s", instance.name)
6447

    
6448
  if not _RemoveDisks(lu, instance):
6449
    if not ignore_failures:
6450
      raise errors.OpExecError("Can't remove instance's disks")
6451
    feedback_fn("Warning: can't remove instance's disks")
6452

    
6453
  logging.info("Removing instance %s out of cluster config", instance.name)
6454

    
6455
  lu.cfg.RemoveInstance(instance.name)
6456

    
6457
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6458
    "Instance lock removal conflict"
6459

    
6460
  # Remove lock for the instance
6461
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6462

    
6463

    
6464
class LUInstanceQuery(NoHooksLU):
6465
  """Logical unit for querying instances.
6466

6467
  """
6468
  # pylint: disable-msg=W0142
6469
  REQ_BGL = False
6470

    
6471
  def CheckArguments(self):
6472
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6473
                             self.op.output_fields, self.op.use_locking)
6474

    
6475
  def ExpandNames(self):
6476
    self.iq.ExpandNames(self)
6477

    
6478
  def DeclareLocks(self, level):
6479
    self.iq.DeclareLocks(self, level)
6480

    
6481
  def Exec(self, feedback_fn):
6482
    return self.iq.OldStyleQuery(self)
6483

    
6484

    
6485
class LUInstanceFailover(LogicalUnit):
6486
  """Failover an instance.
6487

6488
  """
6489
  HPATH = "instance-failover"
6490
  HTYPE = constants.HTYPE_INSTANCE
6491
  REQ_BGL = False
6492

    
6493
  def CheckArguments(self):
6494
    """Check the arguments.
6495

6496
    """
6497
    self.iallocator = getattr(self.op, "iallocator", None)
6498
    self.target_node = getattr(self.op, "target_node", None)
6499

    
6500
  def ExpandNames(self):
6501
    self._ExpandAndLockInstance()
6502

    
6503
    if self.op.target_node is not None:
6504
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6505

    
6506
    self.needed_locks[locking.LEVEL_NODE] = []
6507
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6508

    
6509
    ignore_consistency = self.op.ignore_consistency
6510
    shutdown_timeout = self.op.shutdown_timeout
6511
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6512
                                       cleanup=False,
6513
                                       failover=True,
6514
                                       ignore_consistency=ignore_consistency,
6515
                                       shutdown_timeout=shutdown_timeout)
6516
    self.tasklets = [self._migrater]
6517

    
6518
  def DeclareLocks(self, level):
6519
    if level == locking.LEVEL_NODE:
6520
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6521
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6522
        if self.op.target_node is None:
6523
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6524
        else:
6525
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6526
                                                   self.op.target_node]
6527
        del self.recalculate_locks[locking.LEVEL_NODE]
6528
      else:
6529
        self._LockInstancesNodes()
6530

    
6531
  def BuildHooksEnv(self):
6532
    """Build hooks env.
6533

6534
    This runs on master, primary and secondary nodes of the instance.
6535

6536
    """
6537
    instance = self._migrater.instance
6538
    source_node = instance.primary_node
6539
    target_node = self.op.target_node
6540
    env = {
6541
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6542
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6543
      "OLD_PRIMARY": source_node,
6544
      "NEW_PRIMARY": target_node,
6545
      }
6546

    
6547
    if instance.disk_template in constants.DTS_INT_MIRROR:
6548
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6549
      env["NEW_SECONDARY"] = source_node
6550
    else:
6551
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6552

    
6553
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6554

    
6555
    return env
6556

    
6557
  def BuildHooksNodes(self):
6558
    """Build hooks nodes.
6559

6560
    """
6561
    instance = self._migrater.instance
6562
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6563
    return (nl, nl + [instance.primary_node])
6564

    
6565

    
6566
class LUInstanceMigrate(LogicalUnit):
6567
  """Migrate an instance.
6568

6569
  This is migration without shutting down, compared to the failover,
6570
  which is done with shutdown.
6571

6572
  """
6573
  HPATH = "instance-migrate"
6574
  HTYPE = constants.HTYPE_INSTANCE
6575
  REQ_BGL = False
6576

    
6577
  def ExpandNames(self):
6578
    self._ExpandAndLockInstance()
6579

    
6580
    if self.op.target_node is not None:
6581
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6582

    
6583
    self.needed_locks[locking.LEVEL_NODE] = []
6584
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6585

    
6586
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6587
                                       cleanup=self.op.cleanup,
6588
                                       failover=False,
6589
                                       fallback=self.op.allow_failover)
6590
    self.tasklets = [self._migrater]
6591

    
6592
  def DeclareLocks(self, level):
6593
    if level == locking.LEVEL_NODE:
6594
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6595
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6596
        if self.op.target_node is None:
6597
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6598
        else:
6599
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6600
                                                   self.op.target_node]
6601
        del self.recalculate_locks[locking.LEVEL_NODE]
6602
      else:
6603
        self._LockInstancesNodes()
6604

    
6605
  def BuildHooksEnv(self):
6606
    """Build hooks env.
6607

6608
    This runs on master, primary and secondary nodes of the instance.
6609

6610
    """
6611
    instance = self._migrater.instance
6612
    source_node = instance.primary_node
6613
    target_node = self.op.target_node
6614
    env = _BuildInstanceHookEnvByObject(self, instance)
6615
    env.update({
6616
      "MIGRATE_LIVE": self._migrater.live,
6617
      "MIGRATE_CLEANUP": self.op.cleanup,
6618
      "OLD_PRIMARY": source_node,
6619
      "NEW_PRIMARY": target_node,
6620
      })
6621

    
6622
    if instance.disk_template in constants.DTS_INT_MIRROR:
6623
      env["OLD_SECONDARY"] = target_node
6624
      env["NEW_SECONDARY"] = source_node
6625
    else:
6626
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6627

    
6628
    return env
6629

    
6630
  def BuildHooksNodes(self):
6631
    """Build hooks nodes.
6632

6633
    """
6634
    instance = self._migrater.instance
6635
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6636
    return (nl, nl + [instance.primary_node])
6637

    
6638

    
6639
class LUInstanceMove(LogicalUnit):
6640
  """Move an instance by data-copying.
6641

6642
  """
6643
  HPATH = "instance-move"
6644
  HTYPE = constants.HTYPE_INSTANCE
6645
  REQ_BGL = False
6646

    
6647
  def ExpandNames(self):
6648
    self._ExpandAndLockInstance()
6649
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6650
    self.op.target_node = target_node
6651
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6652
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6653

    
6654
  def DeclareLocks(self, level):
6655
    if level == locking.LEVEL_NODE:
6656
      self._LockInstancesNodes(primary_only=True)
6657

    
6658
  def BuildHooksEnv(self):
6659
    """Build hooks env.
6660

6661
    This runs on master, primary and secondary nodes of the instance.
6662

6663
    """
6664
    env = {
6665
      "TARGET_NODE": self.op.target_node,
6666
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6667
      }
6668
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6669
    return env
6670

    
6671
  def BuildHooksNodes(self):
6672
    """Build hooks nodes.
6673

6674
    """
6675
    nl = [
6676
      self.cfg.GetMasterNode(),
6677
      self.instance.primary_node,
6678
      self.op.target_node,
6679
      ]
6680
    return (nl, nl)
6681

    
6682
  def CheckPrereq(self):
6683
    """Check prerequisites.
6684

6685
    This checks that the instance is in the cluster.
6686

6687
    """
6688
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6689
    assert self.instance is not None, \
6690
      "Cannot retrieve locked instance %s" % self.op.instance_name
6691

    
6692
    node = self.cfg.GetNodeInfo(self.op.target_node)
6693
    assert node is not None, \
6694
      "Cannot retrieve locked node %s" % self.op.target_node
6695

    
6696
    self.target_node = target_node = node.name
6697

    
6698
    if target_node == instance.primary_node:
6699
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6700
                                 (instance.name, target_node),
6701
                                 errors.ECODE_STATE)
6702

    
6703
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6704

    
6705
    for idx, dsk in enumerate(instance.disks):
6706
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6707
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6708
                                   " cannot copy" % idx, errors.ECODE_STATE)
6709

    
6710
    _CheckNodeOnline(self, target_node)
6711
    _CheckNodeNotDrained(self, target_node)
6712
    _CheckNodeVmCapable(self, target_node)
6713

    
6714
    if instance.admin_up:
6715
      # check memory requirements on the secondary node
6716
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6717
                           instance.name, bep[constants.BE_MEMORY],
6718
                           instance.hypervisor)
6719
    else:
6720
      self.LogInfo("Not checking memory on the secondary node as"
6721
                   " instance will not be started")
6722

    
6723
    # check bridge existance
6724
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6725

    
6726
  def Exec(self, feedback_fn):
6727
    """Move an instance.
6728

6729
    The move is done by shutting it down on its present node, copying
6730
    the data over (slow) and starting it on the new node.
6731

6732
    """
6733
    instance = self.instance
6734

    
6735
    source_node = instance.primary_node
6736
    target_node = self.target_node
6737

    
6738
    self.LogInfo("Shutting down instance %s on source node %s",
6739
                 instance.name, source_node)
6740

    
6741
    result = self.rpc.call_instance_shutdown(source_node, instance,
6742
                                             self.op.shutdown_timeout)
6743
    msg = result.fail_msg
6744
    if msg:
6745
      if self.op.ignore_consistency:
6746
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6747
                             " Proceeding anyway. Please make sure node"
6748
                             " %s is down. Error details: %s",
6749
                             instance.name, source_node, source_node, msg)
6750
      else:
6751
        raise errors.OpExecError("Could not shutdown instance %s on"
6752
                                 " node %s: %s" %
6753
                                 (instance.name, source_node, msg))
6754

    
6755
    # create the target disks
6756
    try:
6757
      _CreateDisks(self, instance, target_node=target_node)
6758
    except errors.OpExecError:
6759
      self.LogWarning("Device creation failed, reverting...")
6760
      try:
6761
        _RemoveDisks(self, instance, target_node=target_node)
6762
      finally:
6763
        self.cfg.ReleaseDRBDMinors(instance.name)
6764
        raise
6765

    
6766
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6767

    
6768
    errs = []
6769
    # activate, get path, copy the data over
6770
    for idx, disk in enumerate(instance.disks):
6771
      self.LogInfo("Copying data for disk %d", idx)
6772
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6773
                                               instance.name, True, idx)
6774
      if result.fail_msg:
6775
        self.LogWarning("Can't assemble newly created disk %d: %s",
6776
                        idx, result.fail_msg)
6777
        errs.append(result.fail_msg)
6778
        break
6779
      dev_path = result.payload
6780
      result = self.rpc.call_blockdev_export(source_node, disk,
6781
                                             target_node, dev_path,
6782
                                             cluster_name)
6783
      if result.fail_msg:
6784
        self.LogWarning("Can't copy data over for disk %d: %s",
6785
                        idx, result.fail_msg)
6786
        errs.append(result.fail_msg)
6787
        break
6788

    
6789
    if errs:
6790
      self.LogWarning("Some disks failed to copy, aborting")
6791
      try:
6792
        _RemoveDisks(self, instance, target_node=target_node)
6793
      finally:
6794
        self.cfg.ReleaseDRBDMinors(instance.name)
6795
        raise errors.OpExecError("Errors during disk copy: %s" %
6796
                                 (",".join(errs),))
6797

    
6798
    instance.primary_node = target_node
6799
    self.cfg.Update(instance, feedback_fn)
6800

    
6801
    self.LogInfo("Removing the disks on the original node")
6802
    _RemoveDisks(self, instance, target_node=source_node)
6803

    
6804
    # Only start the instance if it's marked as up
6805
    if instance.admin_up:
6806
      self.LogInfo("Starting instance %s on node %s",
6807
                   instance.name, target_node)
6808

    
6809
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6810
                                           ignore_secondaries=True)
6811
      if not disks_ok:
6812
        _ShutdownInstanceDisks(self, instance)
6813
        raise errors.OpExecError("Can't activate the instance's disks")
6814

    
6815
      result = self.rpc.call_instance_start(target_node, instance,
6816
                                            None, None, False)
6817
      msg = result.fail_msg
6818
      if msg:
6819
        _ShutdownInstanceDisks(self, instance)
6820
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6821
                                 (instance.name, target_node, msg))
6822

    
6823

    
6824
class LUNodeMigrate(LogicalUnit):
6825
  """Migrate all instances from a node.
6826

6827
  """
6828
  HPATH = "node-migrate"
6829
  HTYPE = constants.HTYPE_NODE
6830
  REQ_BGL = False
6831

    
6832
  def CheckArguments(self):
6833
    pass
6834

    
6835
  def ExpandNames(self):
6836
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6837

    
6838
    self.share_locks = _ShareAll()
6839
    self.needed_locks = {
6840
      locking.LEVEL_NODE: [self.op.node_name],
6841
      }
6842

    
6843
  def BuildHooksEnv(self):
6844
    """Build hooks env.
6845

6846
    This runs on the master, the primary and all the secondaries.
6847

6848
    """
6849
    return {
6850
      "NODE_NAME": self.op.node_name,
6851
      }
6852

    
6853
  def BuildHooksNodes(self):
6854
    """Build hooks nodes.
6855

6856
    """
6857
    nl = [self.cfg.GetMasterNode()]
6858
    return (nl, nl)
6859

    
6860
  def CheckPrereq(self):
6861
    pass
6862

    
6863
  def Exec(self, feedback_fn):
6864
    # Prepare jobs for migration instances
6865
    jobs = [
6866
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6867
                                 mode=self.op.mode,
6868
                                 live=self.op.live,
6869
                                 iallocator=self.op.iallocator,
6870
                                 target_node=self.op.target_node)]
6871
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6872
      ]
6873

    
6874
    # TODO: Run iallocator in this opcode and pass correct placement options to
6875
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6876
    # running the iallocator and the actual migration, a good consistency model
6877
    # will have to be found.
6878

    
6879
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6880
            frozenset([self.op.node_name]))
6881

    
6882
    return ResultWithJobs(jobs)
6883

    
6884

    
6885
class TLMigrateInstance(Tasklet):
6886
  """Tasklet class for instance migration.
6887

6888
  @type live: boolean
6889
  @ivar live: whether the migration will be done live or non-live;
6890
      this variable is initalized only after CheckPrereq has run
6891
  @type cleanup: boolean
6892
  @ivar cleanup: Wheater we cleanup from a failed migration
6893
  @type iallocator: string
6894
  @ivar iallocator: The iallocator used to determine target_node
6895
  @type target_node: string
6896
  @ivar target_node: If given, the target_node to reallocate the instance to
6897
  @type failover: boolean
6898
  @ivar failover: Whether operation results in failover or migration
6899
  @type fallback: boolean
6900
  @ivar fallback: Whether fallback to failover is allowed if migration not
6901
                  possible
6902
  @type ignore_consistency: boolean
6903
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6904
                            and target node
6905
  @type shutdown_timeout: int
6906
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6907

6908
  """
6909
  def __init__(self, lu, instance_name, cleanup=False,
6910
               failover=False, fallback=False,
6911
               ignore_consistency=False,
6912
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6913
    """Initializes this class.
6914

6915
    """
6916
    Tasklet.__init__(self, lu)
6917

    
6918
    # Parameters
6919
    self.instance_name = instance_name
6920
    self.cleanup = cleanup
6921
    self.live = False # will be overridden later
6922
    self.failover = failover
6923
    self.fallback = fallback
6924
    self.ignore_consistency = ignore_consistency
6925
    self.shutdown_timeout = shutdown_timeout
6926

    
6927
  def CheckPrereq(self):
6928
    """Check prerequisites.
6929

6930
    This checks that the instance is in the cluster.
6931

6932
    """
6933
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6934
    instance = self.cfg.GetInstanceInfo(instance_name)
6935
    assert instance is not None
6936
    self.instance = instance
6937

    
6938
    if (not self.cleanup and not instance.admin_up and not self.failover and
6939
        self.fallback):
6940
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6941
                      " to failover")
6942
      self.failover = True
6943

    
6944
    if instance.disk_template not in constants.DTS_MIRRORED:
6945
      if self.failover:
6946
        text = "failovers"
6947
      else:
6948
        text = "migrations"
6949
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6950
                                 " %s" % (instance.disk_template, text),
6951
                                 errors.ECODE_STATE)
6952

    
6953
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6954
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6955

    
6956
      if self.lu.op.iallocator:
6957
        self._RunAllocator()
6958
      else:
6959
        # We set set self.target_node as it is required by
6960
        # BuildHooksEnv
6961
        self.target_node = self.lu.op.target_node
6962

    
6963
      # self.target_node is already populated, either directly or by the
6964
      # iallocator run
6965
      target_node = self.target_node
6966
      if self.target_node == instance.primary_node:
6967
        raise errors.OpPrereqError("Cannot migrate instance %s"
6968
                                   " to its primary (%s)" %
6969
                                   (instance.name, instance.primary_node))
6970

    
6971
      if len(self.lu.tasklets) == 1:
6972
        # It is safe to release locks only when we're the only tasklet
6973
        # in the LU
6974
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6975
                      keep=[instance.primary_node, self.target_node])
6976

    
6977
    else:
6978
      secondary_nodes = instance.secondary_nodes
6979
      if not secondary_nodes:
6980
        raise errors.ConfigurationError("No secondary node but using"
6981
                                        " %s disk template" %
6982
                                        instance.disk_template)
6983
      target_node = secondary_nodes[0]
6984
      if self.lu.op.iallocator or (self.lu.op.target_node and
6985
                                   self.lu.op.target_node != target_node):
6986
        if self.failover:
6987
          text = "failed over"
6988
        else:
6989
          text = "migrated"
6990
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6991
                                   " be %s to arbitrary nodes"
6992
                                   " (neither an iallocator nor a target"
6993
                                   " node can be passed)" %
6994
                                   (instance.disk_template, text),
6995
                                   errors.ECODE_INVAL)
6996

    
6997
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6998

    
6999
    # check memory requirements on the secondary node
7000
    if not self.failover or instance.admin_up:
7001
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7002
                           instance.name, i_be[constants.BE_MEMORY],
7003
                           instance.hypervisor)
7004
    else:
7005
      self.lu.LogInfo("Not checking memory on the secondary node as"
7006
                      " instance will not be started")
7007

    
7008
    # check bridge existance
7009
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7010

    
7011
    if not self.cleanup:
7012
      _CheckNodeNotDrained(self.lu, target_node)
7013
      if not self.failover:
7014
        result = self.rpc.call_instance_migratable(instance.primary_node,
7015
                                                   instance)
7016
        if result.fail_msg and self.fallback:
7017
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7018
                          " failover")
7019
          self.failover = True
7020
        else:
7021
          result.Raise("Can't migrate, please use failover",
7022
                       prereq=True, ecode=errors.ECODE_STATE)
7023

    
7024
    assert not (self.failover and self.cleanup)
7025

    
7026
    if not self.failover:
7027
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7028
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7029
                                   " parameters are accepted",
7030
                                   errors.ECODE_INVAL)
7031
      if self.lu.op.live is not None:
7032
        if self.lu.op.live:
7033
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7034
        else:
7035
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7036
        # reset the 'live' parameter to None so that repeated
7037
        # invocations of CheckPrereq do not raise an exception
7038
        self.lu.op.live = None
7039
      elif self.lu.op.mode is None:
7040
        # read the default value from the hypervisor
7041
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7042
                                                skip_globals=False)
7043
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7044

    
7045
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7046
    else:
7047
      # Failover is never live
7048
      self.live = False
7049

    
7050
  def _RunAllocator(self):
7051
    """Run the allocator based on input opcode.
7052

7053
    """
7054
    ial = IAllocator(self.cfg, self.rpc,
7055
                     mode=constants.IALLOCATOR_MODE_RELOC,
7056
                     name=self.instance_name,
7057
                     # TODO See why hail breaks with a single node below
7058
                     relocate_from=[self.instance.primary_node,
7059
                                    self.instance.primary_node],
7060
                     )
7061

    
7062
    ial.Run(self.lu.op.iallocator)
7063

    
7064
    if not ial.success:
7065
      raise errors.OpPrereqError("Can't compute nodes using"
7066
                                 " iallocator '%s': %s" %
7067
                                 (self.lu.op.iallocator, ial.info),
7068
                                 errors.ECODE_NORES)
7069
    if len(ial.result) != ial.required_nodes:
7070
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7071
                                 " of nodes (%s), required %s" %
7072
                                 (self.lu.op.iallocator, len(ial.result),
7073
                                  ial.required_nodes), errors.ECODE_FAULT)
7074
    self.target_node = ial.result[0]
7075
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7076
                 self.instance_name, self.lu.op.iallocator,
7077
                 utils.CommaJoin(ial.result))
7078

    
7079
  def _WaitUntilSync(self):
7080
    """Poll with custom rpc for disk sync.
7081

7082
    This uses our own step-based rpc call.
7083

7084
    """
7085
    self.feedback_fn("* wait until resync is done")
7086
    all_done = False
7087
    while not all_done:
7088
      all_done = True
7089
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7090
                                            self.nodes_ip,
7091
                                            self.instance.disks)
7092
      min_percent = 100
7093
      for node, nres in result.items():
7094
        nres.Raise("Cannot resync disks on node %s" % node)
7095
        node_done, node_percent = nres.payload
7096
        all_done = all_done and node_done
7097
        if node_percent is not None:
7098
          min_percent = min(min_percent, node_percent)
7099
      if not all_done:
7100
        if min_percent < 100:
7101
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7102
        time.sleep(2)
7103

    
7104
  def _EnsureSecondary(self, node):
7105
    """Demote a node to secondary.
7106

7107
    """
7108
    self.feedback_fn("* switching node %s to secondary mode" % node)
7109

    
7110
    for dev in self.instance.disks:
7111
      self.cfg.SetDiskID(dev, node)
7112

    
7113
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7114
                                          self.instance.disks)
7115
    result.Raise("Cannot change disk to secondary on node %s" % node)
7116

    
7117
  def _GoStandalone(self):
7118
    """Disconnect from the network.
7119

7120
    """
7121
    self.feedback_fn("* changing into standalone mode")
7122
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7123
                                               self.instance.disks)
7124
    for node, nres in result.items():
7125
      nres.Raise("Cannot disconnect disks node %s" % node)
7126

    
7127
  def _GoReconnect(self, multimaster):
7128
    """Reconnect to the network.
7129

7130
    """
7131
    if multimaster:
7132
      msg = "dual-master"
7133
    else:
7134
      msg = "single-master"
7135
    self.feedback_fn("* changing disks into %s mode" % msg)
7136
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7137
                                           self.instance.disks,
7138
                                           self.instance.name, multimaster)
7139
    for node, nres in result.items():
7140
      nres.Raise("Cannot change disks config on node %s" % node)
7141

    
7142
  def _ExecCleanup(self):
7143
    """Try to cleanup after a failed migration.
7144

7145
    The cleanup is done by:
7146
      - check that the instance is running only on one node
7147
        (and update the config if needed)
7148
      - change disks on its secondary node to secondary
7149
      - wait until disks are fully synchronized
7150
      - disconnect from the network
7151
      - change disks into single-master mode
7152
      - wait again until disks are fully synchronized
7153

7154
    """
7155
    instance = self.instance
7156
    target_node = self.target_node
7157
    source_node = self.source_node
7158

    
7159
    # check running on only one node
7160
    self.feedback_fn("* checking where the instance actually runs"
7161
                     " (if this hangs, the hypervisor might be in"
7162
                     " a bad state)")
7163
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7164
    for node, result in ins_l.items():
7165
      result.Raise("Can't contact node %s" % node)
7166

    
7167
    runningon_source = instance.name in ins_l[source_node].payload
7168
    runningon_target = instance.name in ins_l[target_node].payload
7169

    
7170
    if runningon_source and runningon_target:
7171
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7172
                               " or the hypervisor is confused; you will have"
7173
                               " to ensure manually that it runs only on one"
7174
                               " and restart this operation")
7175

    
7176
    if not (runningon_source or runningon_target):
7177
      raise errors.OpExecError("Instance does not seem to be running at all;"
7178
                               " in this case it's safer to repair by"
7179
                               " running 'gnt-instance stop' to ensure disk"
7180
                               " shutdown, and then restarting it")
7181

    
7182
    if runningon_target:
7183
      # the migration has actually succeeded, we need to update the config
7184
      self.feedback_fn("* instance running on secondary node (%s),"
7185
                       " updating config" % target_node)
7186
      instance.primary_node = target_node
7187
      self.cfg.Update(instance, self.feedback_fn)
7188
      demoted_node = source_node
7189
    else:
7190
      self.feedback_fn("* instance confirmed to be running on its"
7191
                       " primary node (%s)" % source_node)
7192
      demoted_node = target_node
7193

    
7194
    if instance.disk_template in constants.DTS_INT_MIRROR:
7195
      self._EnsureSecondary(demoted_node)
7196
      try:
7197
        self._WaitUntilSync()
7198
      except errors.OpExecError:
7199
        # we ignore here errors, since if the device is standalone, it
7200
        # won't be able to sync
7201
        pass
7202
      self._GoStandalone()
7203
      self._GoReconnect(False)
7204
      self._WaitUntilSync()
7205

    
7206
    self.feedback_fn("* done")
7207

    
7208
  def _RevertDiskStatus(self):
7209
    """Try to revert the disk status after a failed migration.
7210

7211
    """
7212
    target_node = self.target_node
7213
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7214
      return
7215

    
7216
    try:
7217
      self._EnsureSecondary(target_node)
7218
      self._GoStandalone()
7219
      self._GoReconnect(False)
7220
      self._WaitUntilSync()
7221
    except errors.OpExecError, err:
7222
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7223
                         " please try to recover the instance manually;"
7224
                         " error '%s'" % str(err))
7225

    
7226
  def _AbortMigration(self):
7227
    """Call the hypervisor code to abort a started migration.
7228

7229
    """
7230
    instance = self.instance
7231
    target_node = self.target_node
7232
    migration_info = self.migration_info
7233

    
7234
    abort_result = self.rpc.call_finalize_migration(target_node,
7235
                                                    instance,
7236
                                                    migration_info,
7237
                                                    False)
7238
    abort_msg = abort_result.fail_msg
7239
    if abort_msg:
7240
      logging.error("Aborting migration failed on target node %s: %s",
7241
                    target_node, abort_msg)
7242
      # Don't raise an exception here, as we stil have to try to revert the
7243
      # disk status, even if this step failed.
7244

    
7245
  def _ExecMigration(self):
7246
    """Migrate an instance.
7247

7248
    The migrate is done by:
7249
      - change the disks into dual-master mode
7250
      - wait until disks are fully synchronized again
7251
      - migrate the instance
7252
      - change disks on the new secondary node (the old primary) to secondary
7253
      - wait until disks are fully synchronized
7254
      - change disks into single-master mode
7255

7256
    """
7257
    instance = self.instance
7258
    target_node = self.target_node
7259
    source_node = self.source_node
7260

    
7261
    self.feedback_fn("* checking disk consistency between source and target")
7262
    for dev in instance.disks:
7263
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7264
        raise errors.OpExecError("Disk %s is degraded or not fully"
7265
                                 " synchronized on target node,"
7266
                                 " aborting migration" % dev.iv_name)
7267

    
7268
    # First get the migration information from the remote node
7269
    result = self.rpc.call_migration_info(source_node, instance)
7270
    msg = result.fail_msg
7271
    if msg:
7272
      log_err = ("Failed fetching source migration information from %s: %s" %
7273
                 (source_node, msg))
7274
      logging.error(log_err)
7275
      raise errors.OpExecError(log_err)
7276

    
7277
    self.migration_info = migration_info = result.payload
7278

    
7279
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7280
      # Then switch the disks to master/master mode
7281
      self._EnsureSecondary(target_node)
7282
      self._GoStandalone()
7283
      self._GoReconnect(True)
7284
      self._WaitUntilSync()
7285

    
7286
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7287
    result = self.rpc.call_accept_instance(target_node,
7288
                                           instance,
7289
                                           migration_info,
7290
                                           self.nodes_ip[target_node])
7291

    
7292
    msg = result.fail_msg
7293
    if msg:
7294
      logging.error("Instance pre-migration failed, trying to revert"
7295
                    " disk status: %s", msg)
7296
      self.feedback_fn("Pre-migration failed, aborting")
7297
      self._AbortMigration()
7298
      self._RevertDiskStatus()
7299
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7300
                               (instance.name, msg))
7301

    
7302
    self.feedback_fn("* migrating instance to %s" % target_node)
7303
    result = self.rpc.call_instance_migrate(source_node, instance,
7304
                                            self.nodes_ip[target_node],
7305
                                            self.live)
7306
    msg = result.fail_msg
7307
    if msg:
7308
      logging.error("Instance migration failed, trying to revert"
7309
                    " disk status: %s", msg)
7310
      self.feedback_fn("Migration failed, aborting")
7311
      self._AbortMigration()
7312
      self._RevertDiskStatus()
7313
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7314
                               (instance.name, msg))
7315

    
7316
    instance.primary_node = target_node
7317
    # distribute new instance config to the other nodes
7318
    self.cfg.Update(instance, self.feedback_fn)
7319

    
7320
    result = self.rpc.call_finalize_migration(target_node,
7321
                                              instance,
7322
                                              migration_info,
7323
                                              True)
7324
    msg = result.fail_msg
7325
    if msg:
7326
      logging.error("Instance migration succeeded, but finalization failed:"
7327
                    " %s", msg)
7328
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7329
                               msg)
7330

    
7331
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7332
      self._EnsureSecondary(source_node)
7333
      self._WaitUntilSync()
7334
      self._GoStandalone()
7335
      self._GoReconnect(False)
7336
      self._WaitUntilSync()
7337

    
7338
    self.feedback_fn("* done")
7339

    
7340
  def _ExecFailover(self):
7341
    """Failover an instance.
7342

7343
    The failover is done by shutting it down on its present node and
7344
    starting it on the secondary.
7345

7346
    """
7347
    instance = self.instance
7348
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7349

    
7350
    source_node = instance.primary_node
7351
    target_node = self.target_node
7352

    
7353
    if instance.admin_up:
7354
      self.feedback_fn("* checking disk consistency between source and target")
7355
      for dev in instance.disks:
7356
        # for drbd, these are drbd over lvm
7357
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7358
          if primary_node.offline:
7359
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7360
                             " target node %s" %
7361
                             (primary_node.name, dev.iv_name, target_node))
7362
          elif not self.ignore_consistency:
7363
            raise errors.OpExecError("Disk %s is degraded on target node,"
7364
                                     " aborting failover" % dev.iv_name)
7365
    else:
7366
      self.feedback_fn("* not checking disk consistency as instance is not"
7367
                       " running")
7368

    
7369
    self.feedback_fn("* shutting down instance on source node")
7370
    logging.info("Shutting down instance %s on node %s",
7371
                 instance.name, source_node)
7372

    
7373
    result = self.rpc.call_instance_shutdown(source_node, instance,
7374
                                             self.shutdown_timeout)
7375
    msg = result.fail_msg
7376
    if msg:
7377
      if self.ignore_consistency or primary_node.offline:
7378
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7379
                           " proceeding anyway; please make sure node"
7380
                           " %s is down; error details: %s",
7381
                           instance.name, source_node, source_node, msg)
7382
      else:
7383
        raise errors.OpExecError("Could not shutdown instance %s on"
7384
                                 " node %s: %s" %
7385
                                 (instance.name, source_node, msg))
7386

    
7387
    self.feedback_fn("* deactivating the instance's disks on source node")
7388
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7389
      raise errors.OpExecError("Can't shut down the instance's disks")
7390

    
7391
    instance.primary_node = target_node
7392
    # distribute new instance config to the other nodes
7393
    self.cfg.Update(instance, self.feedback_fn)
7394

    
7395
    # Only start the instance if it's marked as up
7396
    if instance.admin_up:
7397
      self.feedback_fn("* activating the instance's disks on target node %s" %
7398
                       target_node)
7399
      logging.info("Starting instance %s on node %s",
7400
                   instance.name, target_node)
7401

    
7402
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7403
                                           ignore_secondaries=True)
7404
      if not disks_ok:
7405
        _ShutdownInstanceDisks(self.lu, instance)
7406
        raise errors.OpExecError("Can't activate the instance's disks")
7407

    
7408
      self.feedback_fn("* starting the instance on the target node %s" %
7409
                       target_node)
7410
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7411
                                            False)
7412
      msg = result.fail_msg
7413
      if msg:
7414
        _ShutdownInstanceDisks(self.lu, instance)
7415
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7416
                                 (instance.name, target_node, msg))
7417

    
7418
  def Exec(self, feedback_fn):
7419
    """Perform the migration.
7420

7421
    """
7422
    self.feedback_fn = feedback_fn
7423
    self.source_node = self.instance.primary_node
7424

    
7425
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7426
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7427
      self.target_node = self.instance.secondary_nodes[0]
7428
      # Otherwise self.target_node has been populated either
7429
      # directly, or through an iallocator.
7430

    
7431
    self.all_nodes = [self.source_node, self.target_node]
7432
    self.nodes_ip = {
7433
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7434
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7435
      }
7436

    
7437
    if self.failover:
7438
      feedback_fn("Failover instance %s" % self.instance.name)
7439
      self._ExecFailover()
7440
    else:
7441
      feedback_fn("Migrating instance %s" % self.instance.name)
7442

    
7443
      if self.cleanup:
7444
        return self._ExecCleanup()
7445
      else:
7446
        return self._ExecMigration()
7447

    
7448

    
7449
def _CreateBlockDev(lu, node, instance, device, force_create,
7450
                    info, force_open):
7451
  """Create a tree of block devices on a given node.
7452

7453
  If this device type has to be created on secondaries, create it and
7454
  all its children.
7455

7456
  If not, just recurse to children keeping the same 'force' value.
7457

7458
  @param lu: the lu on whose behalf we execute
7459
  @param node: the node on which to create the device
7460
  @type instance: L{objects.Instance}
7461
  @param instance: the instance which owns the device
7462
  @type device: L{objects.Disk}
7463
  @param device: the device to create
7464
  @type force_create: boolean
7465
  @param force_create: whether to force creation of this device; this
7466
      will be change to True whenever we find a device which has
7467
      CreateOnSecondary() attribute
7468
  @param info: the extra 'metadata' we should attach to the device
7469
      (this will be represented as a LVM tag)
7470
  @type force_open: boolean
7471
  @param force_open: this parameter will be passes to the
7472
      L{backend.BlockdevCreate} function where it specifies
7473
      whether we run on primary or not, and it affects both
7474
      the child assembly and the device own Open() execution
7475

7476
  """
7477
  if device.CreateOnSecondary():
7478
    force_create = True
7479

    
7480
  if device.children:
7481
    for child in device.children:
7482
      _CreateBlockDev(lu, node, instance, child, force_create,
7483
                      info, force_open)
7484

    
7485
  if not force_create:
7486
    return
7487

    
7488
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7489

    
7490

    
7491
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7492
  """Create a single block device on a given node.
7493

7494
  This will not recurse over children of the device, so they must be
7495
  created in advance.
7496

7497
  @param lu: the lu on whose behalf we execute
7498
  @param node: the node on which to create the device
7499
  @type instance: L{objects.Instance}
7500
  @param instance: the instance which owns the device
7501
  @type device: L{objects.Disk}
7502
  @param device: the device to create
7503
  @param info: the extra 'metadata' we should attach to the device
7504
      (this will be represented as a LVM tag)
7505
  @type force_open: boolean
7506
  @param force_open: this parameter will be passes to the
7507
      L{backend.BlockdevCreate} function where it specifies
7508
      whether we run on primary or not, and it affects both
7509
      the child assembly and the device own Open() execution
7510

7511
  """
7512
  lu.cfg.SetDiskID(device, node)
7513
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7514
                                       instance.name, force_open, info)
7515
  result.Raise("Can't create block device %s on"
7516
               " node %s for instance %s" % (device, node, instance.name))
7517
  if device.physical_id is None:
7518
    device.physical_id = result.payload
7519

    
7520

    
7521
def _GenerateUniqueNames(lu, exts):
7522
  """Generate a suitable LV name.
7523

7524
  This will generate a logical volume name for the given instance.
7525

7526
  """
7527
  results = []
7528
  for val in exts:
7529
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7530
    results.append("%s%s" % (new_id, val))
7531
  return results
7532

    
7533

    
7534
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7535
                         iv_name, p_minor, s_minor):
7536
  """Generate a drbd8 device complete with its children.
7537

7538
  """
7539
  assert len(vgnames) == len(names) == 2
7540
  port = lu.cfg.AllocatePort()
7541
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7542
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7543
                          logical_id=(vgnames[0], names[0]))
7544
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7545
                          logical_id=(vgnames[1], names[1]))
7546
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7547
                          logical_id=(primary, secondary, port,
7548
                                      p_minor, s_minor,
7549
                                      shared_secret),
7550
                          children=[dev_data, dev_meta],
7551
                          iv_name=iv_name)
7552
  return drbd_dev
7553

    
7554

    
7555
def _GenerateDiskTemplate(lu, template_name,
7556
                          instance_name, primary_node,
7557
                          secondary_nodes, disk_info,
7558
                          file_storage_dir, file_driver,
7559
                          base_index, feedback_fn):
7560
  """Generate the entire disk layout for a given template type.
7561

7562
  """
7563
  #TODO: compute space requirements
7564

    
7565
  vgname = lu.cfg.GetVGName()
7566
  disk_count = len(disk_info)
7567
  disks = []
7568
  if template_name == constants.DT_DISKLESS:
7569
    pass
7570
  elif template_name == constants.DT_PLAIN:
7571
    if len(secondary_nodes) != 0:
7572
      raise errors.ProgrammerError("Wrong template configuration")
7573

    
7574
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7575
                                      for i in range(disk_count)])
7576
    for idx, disk in enumerate(disk_info):
7577
      disk_index = idx + base_index
7578
      vg = disk.get(constants.IDISK_VG, vgname)
7579
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7580
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7581
                              size=disk[constants.IDISK_SIZE],
7582
                              logical_id=(vg, names[idx]),
7583
                              iv_name="disk/%d" % disk_index,
7584
                              mode=disk[constants.IDISK_MODE])
7585
      disks.append(disk_dev)
7586
  elif template_name == constants.DT_DRBD8:
7587
    if len(secondary_nodes) != 1:
7588
      raise errors.ProgrammerError("Wrong template configuration")
7589
    remote_node = secondary_nodes[0]
7590
    minors = lu.cfg.AllocateDRBDMinor(
7591
      [primary_node, remote_node] * len(disk_info), instance_name)
7592

    
7593
    names = []
7594
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7595
                                               for i in range(disk_count)]):
7596
      names.append(lv_prefix + "_data")
7597
      names.append(lv_prefix + "_meta")
7598
    for idx, disk in enumerate(disk_info):
7599
      disk_index = idx + base_index
7600
      data_vg = disk.get(constants.IDISK_VG, vgname)
7601
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7602
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7603
                                      disk[constants.IDISK_SIZE],
7604
                                      [data_vg, meta_vg],
7605
                                      names[idx * 2:idx * 2 + 2],
7606
                                      "disk/%d" % disk_index,
7607
                                      minors[idx * 2], minors[idx * 2 + 1])
7608
      disk_dev.mode = disk[constants.IDISK_MODE]
7609
      disks.append(disk_dev)
7610
  elif template_name == constants.DT_FILE:
7611
    if len(secondary_nodes) != 0:
7612
      raise errors.ProgrammerError("Wrong template configuration")
7613

    
7614
    opcodes.RequireFileStorage()
7615

    
7616
    for idx, disk in enumerate(disk_info):
7617
      disk_index = idx + base_index
7618
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7619
                              size=disk[constants.IDISK_SIZE],
7620
                              iv_name="disk/%d" % disk_index,
7621
                              logical_id=(file_driver,
7622
                                          "%s/disk%d" % (file_storage_dir,
7623
                                                         disk_index)),
7624
                              mode=disk[constants.IDISK_MODE])
7625
      disks.append(disk_dev)
7626
  elif template_name == constants.DT_SHARED_FILE:
7627
    if len(secondary_nodes) != 0:
7628
      raise errors.ProgrammerError("Wrong template configuration")
7629

    
7630
    opcodes.RequireSharedFileStorage()
7631

    
7632
    for idx, disk in enumerate(disk_info):
7633
      disk_index = idx + base_index
7634
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7635
                              size=disk[constants.IDISK_SIZE],
7636
                              iv_name="disk/%d" % disk_index,
7637
                              logical_id=(file_driver,
7638
                                          "%s/disk%d" % (file_storage_dir,
7639
                                                         disk_index)),
7640
                              mode=disk[constants.IDISK_MODE])
7641
      disks.append(disk_dev)
7642
  elif template_name == constants.DT_BLOCK:
7643
    if len(secondary_nodes) != 0:
7644
      raise errors.ProgrammerError("Wrong template configuration")
7645

    
7646
    for idx, disk in enumerate(disk_info):
7647
      disk_index = idx + base_index
7648
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7649
                              size=disk[constants.IDISK_SIZE],
7650
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7651
                                          disk[constants.IDISK_ADOPT]),
7652
                              iv_name="disk/%d" % disk_index,
7653
                              mode=disk[constants.IDISK_MODE])
7654
      disks.append(disk_dev)
7655

    
7656
  else:
7657
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7658
  return disks
7659

    
7660

    
7661
def _GetInstanceInfoText(instance):
7662
  """Compute that text that should be added to the disk's metadata.
7663

7664
  """
7665
  return "originstname+%s" % instance.name
7666

    
7667

    
7668
def _CalcEta(time_taken, written, total_size):
7669
  """Calculates the ETA based on size written and total size.
7670

7671
  @param time_taken: The time taken so far
7672
  @param written: amount written so far
7673
  @param total_size: The total size of data to be written
7674
  @return: The remaining time in seconds
7675

7676
  """
7677
  avg_time = time_taken / float(written)
7678
  return (total_size - written) * avg_time
7679

    
7680

    
7681
def _WipeDisks(lu, instance):
7682
  """Wipes instance disks.
7683

7684
  @type lu: L{LogicalUnit}
7685
  @param lu: the logical unit on whose behalf we execute
7686
  @type instance: L{objects.Instance}
7687
  @param instance: the instance whose disks we should create
7688
  @return: the success of the wipe
7689

7690
  """
7691
  node = instance.primary_node
7692

    
7693
  for device in instance.disks:
7694
    lu.cfg.SetDiskID(device, node)
7695

    
7696
  logging.info("Pause sync of instance %s disks", instance.name)
7697
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7698

    
7699
  for idx, success in enumerate(result.payload):
7700
    if not success:
7701
      logging.warn("pause-sync of instance %s for disks %d failed",
7702
                   instance.name, idx)
7703

    
7704
  try:
7705
    for idx, device in enumerate(instance.disks):
7706
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7707
      # MAX_WIPE_CHUNK at max
7708
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7709
                            constants.MIN_WIPE_CHUNK_PERCENT)
7710
      # we _must_ make this an int, otherwise rounding errors will
7711
      # occur
7712
      wipe_chunk_size = int(wipe_chunk_size)
7713

    
7714
      lu.LogInfo("* Wiping disk %d", idx)
7715
      logging.info("Wiping disk %d for instance %s, node %s using"
7716
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7717

    
7718
      offset = 0
7719
      size = device.size
7720
      last_output = 0
7721
      start_time = time.time()
7722

    
7723
      while offset < size:
7724
        wipe_size = min(wipe_chunk_size, size - offset)
7725
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7726
                      idx, offset, wipe_size)
7727
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7728
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7729
                     (idx, offset, wipe_size))
7730
        now = time.time()
7731
        offset += wipe_size
7732
        if now - last_output >= 60:
7733
          eta = _CalcEta(now - start_time, offset, size)
7734
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7735
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7736
          last_output = now
7737
  finally:
7738
    logging.info("Resume sync of instance %s disks", instance.name)
7739

    
7740
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7741

    
7742
    for idx, success in enumerate(result.payload):
7743
      if not success:
7744
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7745
                      " look at the status and troubleshoot the issue", idx)
7746
        logging.warn("resume-sync of instance %s for disks %d failed",
7747
                     instance.name, idx)
7748

    
7749

    
7750
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7751
  """Create all disks for an instance.
7752

7753
  This abstracts away some work from AddInstance.
7754

7755
  @type lu: L{LogicalUnit}
7756
  @param lu: the logical unit on whose behalf we execute
7757
  @type instance: L{objects.Instance}
7758
  @param instance: the instance whose disks we should create
7759
  @type to_skip: list
7760
  @param to_skip: list of indices to skip
7761
  @type target_node: string
7762
  @param target_node: if passed, overrides the target node for creation
7763
  @rtype: boolean
7764
  @return: the success of the creation
7765

7766
  """
7767
  info = _GetInstanceInfoText(instance)
7768
  if target_node is None:
7769
    pnode = instance.primary_node
7770
    all_nodes = instance.all_nodes
7771
  else:
7772
    pnode = target_node
7773
    all_nodes = [pnode]
7774

    
7775
  if instance.disk_template in constants.DTS_FILEBASED:
7776
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7777
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7778

    
7779
    result.Raise("Failed to create directory '%s' on"
7780
                 " node %s" % (file_storage_dir, pnode))
7781

    
7782
  # Note: this needs to be kept in sync with adding of disks in
7783
  # LUInstanceSetParams
7784
  for idx, device in enumerate(instance.disks):
7785
    if to_skip and idx in to_skip:
7786
      continue
7787
    logging.info("Creating volume %s for instance %s",
7788
                 device.iv_name, instance.name)
7789
    #HARDCODE
7790
    for node in all_nodes:
7791
      f_create = node == pnode
7792
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7793

    
7794

    
7795
def _RemoveDisks(lu, instance, target_node=None):
7796
  """Remove all disks for an instance.
7797

7798
  This abstracts away some work from `AddInstance()` and
7799
  `RemoveInstance()`. Note that in case some of the devices couldn't
7800
  be removed, the removal will continue with the other ones (compare
7801
  with `_CreateDisks()`).
7802

7803
  @type lu: L{LogicalUnit}
7804
  @param lu: the logical unit on whose behalf we execute
7805
  @type instance: L{objects.Instance}
7806
  @param instance: the instance whose disks we should remove
7807
  @type target_node: string
7808
  @param target_node: used to override the node on which to remove the disks
7809
  @rtype: boolean
7810
  @return: the success of the removal
7811

7812
  """
7813
  logging.info("Removing block devices for instance %s", instance.name)
7814

    
7815
  all_result = True
7816
  for device in instance.disks:
7817
    if target_node:
7818
      edata = [(target_node, device)]
7819
    else:
7820
      edata = device.ComputeNodeTree(instance.primary_node)
7821
    for node, disk in edata:
7822
      lu.cfg.SetDiskID(disk, node)
7823
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7824
      if msg:
7825
        lu.LogWarning("Could not remove block device %s on node %s,"
7826
                      " continuing anyway: %s", device.iv_name, node, msg)
7827
        all_result = False
7828

    
7829
  if instance.disk_template == constants.DT_FILE:
7830
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7831
    if target_node:
7832
      tgt = target_node
7833
    else:
7834
      tgt = instance.primary_node
7835
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7836
    if result.fail_msg:
7837
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7838
                    file_storage_dir, instance.primary_node, result.fail_msg)
7839
      all_result = False
7840

    
7841
  return all_result
7842

    
7843

    
7844
def _ComputeDiskSizePerVG(disk_template, disks):
7845
  """Compute disk size requirements in the volume group
7846

7847
  """
7848
  def _compute(disks, payload):
7849
    """Universal algorithm.
7850

7851
    """
7852
    vgs = {}
7853
    for disk in disks:
7854
      vgs[disk[constants.IDISK_VG]] = \
7855
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7856

    
7857
    return vgs
7858

    
7859
  # Required free disk space as a function of disk and swap space
7860
  req_size_dict = {
7861
    constants.DT_DISKLESS: {},
7862
    constants.DT_PLAIN: _compute(disks, 0),
7863
    # 128 MB are added for drbd metadata for each disk
7864
    constants.DT_DRBD8: _compute(disks, 128),
7865
    constants.DT_FILE: {},
7866
    constants.DT_SHARED_FILE: {},
7867
  }
7868

    
7869
  if disk_template not in req_size_dict:
7870
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7871
                                 " is unknown" %  disk_template)
7872

    
7873
  return req_size_dict[disk_template]
7874

    
7875

    
7876
def _ComputeDiskSize(disk_template, disks):
7877
  """Compute disk size requirements in the volume group
7878

7879
  """
7880
  # Required free disk space as a function of disk and swap space
7881
  req_size_dict = {
7882
    constants.DT_DISKLESS: None,
7883
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7884
    # 128 MB are added for drbd metadata for each disk
7885
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7886
    constants.DT_FILE: None,
7887
    constants.DT_SHARED_FILE: 0,
7888
    constants.DT_BLOCK: 0,
7889
  }
7890

    
7891
  if disk_template not in req_size_dict:
7892
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7893
                                 " is unknown" %  disk_template)
7894

    
7895
  return req_size_dict[disk_template]
7896

    
7897

    
7898
def _FilterVmNodes(lu, nodenames):
7899
  """Filters out non-vm_capable nodes from a list.
7900

7901
  @type lu: L{LogicalUnit}
7902
  @param lu: the logical unit for which we check
7903
  @type nodenames: list
7904
  @param nodenames: the list of nodes on which we should check
7905
  @rtype: list
7906
  @return: the list of vm-capable nodes
7907

7908
  """
7909
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7910
  return [name for name in nodenames if name not in vm_nodes]
7911

    
7912

    
7913
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7914
  """Hypervisor parameter validation.
7915

7916
  This function abstract the hypervisor parameter validation to be
7917
  used in both instance create and instance modify.
7918

7919
  @type lu: L{LogicalUnit}
7920
  @param lu: the logical unit for which we check
7921
  @type nodenames: list
7922
  @param nodenames: the list of nodes on which we should check
7923
  @type hvname: string
7924
  @param hvname: the name of the hypervisor we should use
7925
  @type hvparams: dict
7926
  @param hvparams: the parameters which we need to check
7927
  @raise errors.OpPrereqError: if the parameters are not valid
7928

7929
  """
7930
  nodenames = _FilterVmNodes(lu, nodenames)
7931
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7932
                                                  hvname,
7933
                                                  hvparams)
7934
  for node in nodenames:
7935
    info = hvinfo[node]
7936
    if info.offline:
7937
      continue
7938
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7939

    
7940

    
7941
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7942
  """OS parameters validation.
7943

7944
  @type lu: L{LogicalUnit}
7945
  @param lu: the logical unit for which we check
7946
  @type required: boolean
7947
  @param required: whether the validation should fail if the OS is not
7948
      found
7949
  @type nodenames: list
7950
  @param nodenames: the list of nodes on which we should check
7951
  @type osname: string
7952
  @param osname: the name of the hypervisor we should use
7953
  @type osparams: dict
7954
  @param osparams: the parameters which we need to check
7955
  @raise errors.OpPrereqError: if the parameters are not valid
7956

7957
  """
7958
  nodenames = _FilterVmNodes(lu, nodenames)
7959
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7960
                                   [constants.OS_VALIDATE_PARAMETERS],
7961
                                   osparams)
7962
  for node, nres in result.items():
7963
    # we don't check for offline cases since this should be run only
7964
    # against the master node and/or an instance's nodes
7965
    nres.Raise("OS Parameters validation failed on node %s" % node)
7966
    if not nres.payload:
7967
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7968
                 osname, node)
7969

    
7970

    
7971
class LUInstanceCreate(LogicalUnit):
7972
  """Create an instance.
7973

7974
  """
7975
  HPATH = "instance-add"
7976
  HTYPE = constants.HTYPE_INSTANCE
7977
  REQ_BGL = False
7978

    
7979
  def CheckArguments(self):
7980
    """Check arguments.
7981

7982
    """
7983
    # do not require name_check to ease forward/backward compatibility
7984
    # for tools
7985
    if self.op.no_install and self.op.start:
7986
      self.LogInfo("No-installation mode selected, disabling startup")
7987
      self.op.start = False
7988
    # validate/normalize the instance name
7989
    self.op.instance_name = \
7990
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7991

    
7992
    if self.op.ip_check and not self.op.name_check:
7993
      # TODO: make the ip check more flexible and not depend on the name check
7994
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7995
                                 " check", errors.ECODE_INVAL)
7996

    
7997
    # check nics' parameter names
7998
    for nic in self.op.nics:
7999
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8000

    
8001
    # check disks. parameter names and consistent adopt/no-adopt strategy
8002
    has_adopt = has_no_adopt = False
8003
    for disk in self.op.disks:
8004
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8005
      if constants.IDISK_ADOPT in disk:
8006
        has_adopt = True
8007
      else:
8008
        has_no_adopt = True
8009
    if has_adopt and has_no_adopt:
8010
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8011
                                 errors.ECODE_INVAL)
8012
    if has_adopt:
8013
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8014
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8015
                                   " '%s' disk template" %
8016
                                   self.op.disk_template,
8017
                                   errors.ECODE_INVAL)
8018
      if self.op.iallocator is not None:
8019
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8020
                                   " iallocator script", errors.ECODE_INVAL)
8021
      if self.op.mode == constants.INSTANCE_IMPORT:
8022
        raise errors.OpPrereqError("Disk adoption not allowed for"
8023
                                   " instance import", errors.ECODE_INVAL)
8024
    else:
8025
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8026
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8027
                                   " but no 'adopt' parameter given" %
8028
                                   self.op.disk_template,
8029
                                   errors.ECODE_INVAL)
8030

    
8031
    self.adopt_disks = has_adopt
8032

    
8033
    # instance name verification
8034
    if self.op.name_check:
8035
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8036
      self.op.instance_name = self.hostname1.name
8037
      # used in CheckPrereq for ip ping check
8038
      self.check_ip = self.hostname1.ip
8039
    else:
8040
      self.check_ip = None
8041

    
8042
    # file storage checks
8043
    if (self.op.file_driver and
8044
        not self.op.file_driver in constants.FILE_DRIVER):
8045
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8046
                                 self.op.file_driver, errors.ECODE_INVAL)
8047

    
8048
    if self.op.disk_template == constants.DT_FILE:
8049
      opcodes.RequireFileStorage()
8050
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8051
      opcodes.RequireSharedFileStorage()
8052

    
8053
    ### Node/iallocator related checks
8054
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8055

    
8056
    if self.op.pnode is not None:
8057
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8058
        if self.op.snode is None:
8059
          raise errors.OpPrereqError("The networked disk templates need"
8060
                                     " a mirror node", errors.ECODE_INVAL)
8061
      elif self.op.snode:
8062
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8063
                        " template")
8064
        self.op.snode = None
8065

    
8066
    self._cds = _GetClusterDomainSecret()
8067

    
8068
    if self.op.mode == constants.INSTANCE_IMPORT:
8069
      # On import force_variant must be True, because if we forced it at
8070
      # initial install, our only chance when importing it back is that it
8071
      # works again!
8072
      self.op.force_variant = True
8073

    
8074
      if self.op.no_install:
8075
        self.LogInfo("No-installation mode has no effect during import")
8076

    
8077
    elif self.op.mode == constants.INSTANCE_CREATE:
8078
      if self.op.os_type is None:
8079
        raise errors.OpPrereqError("No guest OS specified",
8080
                                   errors.ECODE_INVAL)
8081
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8082
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8083
                                   " installation" % self.op.os_type,
8084
                                   errors.ECODE_STATE)
8085
      if self.op.disk_template is None:
8086
        raise errors.OpPrereqError("No disk template specified",
8087
                                   errors.ECODE_INVAL)
8088

    
8089
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8090
      # Check handshake to ensure both clusters have the same domain secret
8091
      src_handshake = self.op.source_handshake
8092
      if not src_handshake:
8093
        raise errors.OpPrereqError("Missing source handshake",
8094
                                   errors.ECODE_INVAL)
8095

    
8096
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8097
                                                           src_handshake)
8098
      if errmsg:
8099
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8100
                                   errors.ECODE_INVAL)
8101

    
8102
      # Load and check source CA
8103
      self.source_x509_ca_pem = self.op.source_x509_ca
8104
      if not self.source_x509_ca_pem:
8105
        raise errors.OpPrereqError("Missing source X509 CA",
8106
                                   errors.ECODE_INVAL)
8107

    
8108
      try:
8109
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8110
                                                    self._cds)
8111
      except OpenSSL.crypto.Error, err:
8112
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8113
                                   (err, ), errors.ECODE_INVAL)
8114

    
8115
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8116
      if errcode is not None:
8117
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8118
                                   errors.ECODE_INVAL)
8119

    
8120
      self.source_x509_ca = cert
8121

    
8122
      src_instance_name = self.op.source_instance_name
8123
      if not src_instance_name:
8124
        raise errors.OpPrereqError("Missing source instance name",
8125
                                   errors.ECODE_INVAL)
8126

    
8127
      self.source_instance_name = \
8128
          netutils.GetHostname(name=src_instance_name).name
8129

    
8130
    else:
8131
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8132
                                 self.op.mode, errors.ECODE_INVAL)
8133

    
8134
  def ExpandNames(self):
8135
    """ExpandNames for CreateInstance.
8136

8137
    Figure out the right locks for instance creation.
8138

8139
    """
8140
    self.needed_locks = {}
8141

    
8142
    instance_name = self.op.instance_name
8143
    # this is just a preventive check, but someone might still add this
8144
    # instance in the meantime, and creation will fail at lock-add time
8145
    if instance_name in self.cfg.GetInstanceList():
8146
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8147
                                 instance_name, errors.ECODE_EXISTS)
8148

    
8149
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8150

    
8151
    if self.op.iallocator:
8152
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8153
    else:
8154
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8155
      nodelist = [self.op.pnode]
8156
      if self.op.snode is not None:
8157
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8158
        nodelist.append(self.op.snode)
8159
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8160

    
8161
    # in case of import lock the source node too
8162
    if self.op.mode == constants.INSTANCE_IMPORT:
8163
      src_node = self.op.src_node
8164
      src_path = self.op.src_path
8165

    
8166
      if src_path is None:
8167
        self.op.src_path = src_path = self.op.instance_name
8168

    
8169
      if src_node is None:
8170
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8171
        self.op.src_node = None
8172
        if os.path.isabs(src_path):
8173
          raise errors.OpPrereqError("Importing an instance from an absolute"
8174
                                     " path requires a source node option",
8175
                                     errors.ECODE_INVAL)
8176
      else:
8177
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8178
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8179
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8180
        if not os.path.isabs(src_path):
8181
          self.op.src_path = src_path = \
8182
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8183

    
8184
  def _RunAllocator(self):
8185
    """Run the allocator based on input opcode.
8186

8187
    """
8188
    nics = [n.ToDict() for n in self.nics]
8189
    ial = IAllocator(self.cfg, self.rpc,
8190
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8191
                     name=self.op.instance_name,
8192
                     disk_template=self.op.disk_template,
8193
                     tags=self.op.tags,
8194
                     os=self.op.os_type,
8195
                     vcpus=self.be_full[constants.BE_VCPUS],
8196
                     memory=self.be_full[constants.BE_MEMORY],
8197
                     disks=self.disks,
8198
                     nics=nics,
8199
                     hypervisor=self.op.hypervisor,
8200
                     )
8201

    
8202
    ial.Run(self.op.iallocator)
8203

    
8204
    if not ial.success:
8205
      raise errors.OpPrereqError("Can't compute nodes using"
8206
                                 " iallocator '%s': %s" %
8207
                                 (self.op.iallocator, ial.info),
8208
                                 errors.ECODE_NORES)
8209
    if len(ial.result) != ial.required_nodes:
8210
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8211
                                 " of nodes (%s), required %s" %
8212
                                 (self.op.iallocator, len(ial.result),
8213
                                  ial.required_nodes), errors.ECODE_FAULT)
8214
    self.op.pnode = ial.result[0]
8215
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8216
                 self.op.instance_name, self.op.iallocator,
8217
                 utils.CommaJoin(ial.result))
8218
    if ial.required_nodes == 2:
8219
      self.op.snode = ial.result[1]
8220

    
8221
  def BuildHooksEnv(self):
8222
    """Build hooks env.
8223

8224
    This runs on master, primary and secondary nodes of the instance.
8225

8226
    """
8227
    env = {
8228
      "ADD_MODE": self.op.mode,
8229
      }
8230
    if self.op.mode == constants.INSTANCE_IMPORT:
8231
      env["SRC_NODE"] = self.op.src_node
8232
      env["SRC_PATH"] = self.op.src_path
8233
      env["SRC_IMAGES"] = self.src_images
8234

    
8235
    env.update(_BuildInstanceHookEnv(
8236
      name=self.op.instance_name,
8237
      primary_node=self.op.pnode,
8238
      secondary_nodes=self.secondaries,
8239
      status=self.op.start,
8240
      os_type=self.op.os_type,
8241
      memory=self.be_full[constants.BE_MEMORY],
8242
      vcpus=self.be_full[constants.BE_VCPUS],
8243
      nics=_NICListToTuple(self, self.nics),
8244
      disk_template=self.op.disk_template,
8245
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8246
             for d in self.disks],
8247
      bep=self.be_full,
8248
      hvp=self.hv_full,
8249
      hypervisor_name=self.op.hypervisor,
8250
      tags=self.op.tags,
8251
    ))
8252

    
8253
    return env
8254

    
8255
  def BuildHooksNodes(self):
8256
    """Build hooks nodes.
8257

8258
    """
8259
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8260
    return nl, nl
8261

    
8262
  def _ReadExportInfo(self):
8263
    """Reads the export information from disk.
8264

8265
    It will override the opcode source node and path with the actual
8266
    information, if these two were not specified before.
8267

8268
    @return: the export information
8269

8270
    """
8271
    assert self.op.mode == constants.INSTANCE_IMPORT
8272

    
8273
    src_node = self.op.src_node
8274
    src_path = self.op.src_path
8275

    
8276
    if src_node is None:
8277
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8278
      exp_list = self.rpc.call_export_list(locked_nodes)
8279
      found = False
8280
      for node in exp_list:
8281
        if exp_list[node].fail_msg:
8282
          continue
8283
        if src_path in exp_list[node].payload:
8284
          found = True
8285
          self.op.src_node = src_node = node
8286
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8287
                                                       src_path)
8288
          break
8289
      if not found:
8290
        raise errors.OpPrereqError("No export found for relative path %s" %
8291
                                    src_path, errors.ECODE_INVAL)
8292

    
8293
    _CheckNodeOnline(self, src_node)
8294
    result = self.rpc.call_export_info(src_node, src_path)
8295
    result.Raise("No export or invalid export found in dir %s" % src_path)
8296

    
8297
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8298
    if not export_info.has_section(constants.INISECT_EXP):
8299
      raise errors.ProgrammerError("Corrupted export config",
8300
                                   errors.ECODE_ENVIRON)
8301

    
8302
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8303
    if (int(ei_version) != constants.EXPORT_VERSION):
8304
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8305
                                 (ei_version, constants.EXPORT_VERSION),
8306
                                 errors.ECODE_ENVIRON)
8307
    return export_info
8308

    
8309
  def _ReadExportParams(self, einfo):
8310
    """Use export parameters as defaults.
8311

8312
    In case the opcode doesn't specify (as in override) some instance
8313
    parameters, then try to use them from the export information, if
8314
    that declares them.
8315

8316
    """
8317
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8318

    
8319
    if self.op.disk_template is None:
8320
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8321
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8322
                                          "disk_template")
8323
      else:
8324
        raise errors.OpPrereqError("No disk template specified and the export"
8325
                                   " is missing the disk_template information",
8326
                                   errors.ECODE_INVAL)
8327

    
8328
    if not self.op.disks:
8329
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8330
        disks = []
8331
        # TODO: import the disk iv_name too
8332
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8333
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8334
          disks.append({constants.IDISK_SIZE: disk_sz})
8335
        self.op.disks = disks
8336
      else:
8337
        raise errors.OpPrereqError("No disk info specified and the export"
8338
                                   " is missing the disk information",
8339
                                   errors.ECODE_INVAL)
8340

    
8341
    if (not self.op.nics and
8342
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8343
      nics = []
8344
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8345
        ndict = {}
8346
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8347
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8348
          ndict[name] = v
8349
        nics.append(ndict)
8350
      self.op.nics = nics
8351

    
8352
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8353
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8354

    
8355
    if (self.op.hypervisor is None and
8356
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8357
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8358

    
8359
    if einfo.has_section(constants.INISECT_HYP):
8360
      # use the export parameters but do not override the ones
8361
      # specified by the user
8362
      for name, value in einfo.items(constants.INISECT_HYP):
8363
        if name not in self.op.hvparams:
8364
          self.op.hvparams[name] = value
8365

    
8366
    if einfo.has_section(constants.INISECT_BEP):
8367
      # use the parameters, without overriding
8368
      for name, value in einfo.items(constants.INISECT_BEP):
8369
        if name not in self.op.beparams:
8370
          self.op.beparams[name] = value
8371
    else:
8372
      # try to read the parameters old style, from the main section
8373
      for name in constants.BES_PARAMETERS:
8374
        if (name not in self.op.beparams and
8375
            einfo.has_option(constants.INISECT_INS, name)):
8376
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8377

    
8378
    if einfo.has_section(constants.INISECT_OSP):
8379
      # use the parameters, without overriding
8380
      for name, value in einfo.items(constants.INISECT_OSP):
8381
        if name not in self.op.osparams:
8382
          self.op.osparams[name] = value
8383

    
8384
  def _RevertToDefaults(self, cluster):
8385
    """Revert the instance parameters to the default values.
8386

8387
    """
8388
    # hvparams
8389
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8390
    for name in self.op.hvparams.keys():
8391
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8392
        del self.op.hvparams[name]
8393
    # beparams
8394
    be_defs = cluster.SimpleFillBE({})
8395
    for name in self.op.beparams.keys():
8396
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8397
        del self.op.beparams[name]
8398
    # nic params
8399
    nic_defs = cluster.SimpleFillNIC({})
8400
    for nic in self.op.nics:
8401
      for name in constants.NICS_PARAMETERS:
8402
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8403
          del nic[name]
8404
    # osparams
8405
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8406
    for name in self.op.osparams.keys():
8407
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8408
        del self.op.osparams[name]
8409

    
8410
  def _CalculateFileStorageDir(self):
8411
    """Calculate final instance file storage dir.
8412

8413
    """
8414
    # file storage dir calculation/check
8415
    self.instance_file_storage_dir = None
8416
    if self.op.disk_template in constants.DTS_FILEBASED:
8417
      # build the full file storage dir path
8418
      joinargs = []
8419

    
8420
      if self.op.disk_template == constants.DT_SHARED_FILE:
8421
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8422
      else:
8423
        get_fsd_fn = self.cfg.GetFileStorageDir
8424

    
8425
      cfg_storagedir = get_fsd_fn()
8426
      if not cfg_storagedir:
8427
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8428
      joinargs.append(cfg_storagedir)
8429

    
8430
      if self.op.file_storage_dir is not None:
8431
        joinargs.append(self.op.file_storage_dir)
8432

    
8433
      joinargs.append(self.op.instance_name)
8434

    
8435
      # pylint: disable-msg=W0142
8436
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8437

    
8438
  def CheckPrereq(self):
8439
    """Check prerequisites.
8440

8441
    """
8442
    self._CalculateFileStorageDir()
8443

    
8444
    if self.op.mode == constants.INSTANCE_IMPORT:
8445
      export_info = self._ReadExportInfo()
8446
      self._ReadExportParams(export_info)
8447

    
8448
    if (not self.cfg.GetVGName() and
8449
        self.op.disk_template not in constants.DTS_NOT_LVM):
8450
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8451
                                 " instances", errors.ECODE_STATE)
8452

    
8453
    if self.op.hypervisor is None:
8454
      self.op.hypervisor = self.cfg.GetHypervisorType()
8455

    
8456
    cluster = self.cfg.GetClusterInfo()
8457
    enabled_hvs = cluster.enabled_hypervisors
8458
    if self.op.hypervisor not in enabled_hvs:
8459
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8460
                                 " cluster (%s)" % (self.op.hypervisor,
8461
                                  ",".join(enabled_hvs)),
8462
                                 errors.ECODE_STATE)
8463

    
8464
    # Check tag validity
8465
    for tag in self.op.tags:
8466
      objects.TaggableObject.ValidateTag(tag)
8467

    
8468
    # check hypervisor parameter syntax (locally)
8469
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8470
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8471
                                      self.op.hvparams)
8472
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8473
    hv_type.CheckParameterSyntax(filled_hvp)
8474
    self.hv_full = filled_hvp
8475
    # check that we don't specify global parameters on an instance
8476
    _CheckGlobalHvParams(self.op.hvparams)
8477

    
8478
    # fill and remember the beparams dict
8479
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8480
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8481

    
8482
    # build os parameters
8483
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8484

    
8485
    # now that hvp/bep are in final format, let's reset to defaults,
8486
    # if told to do so
8487
    if self.op.identify_defaults:
8488
      self._RevertToDefaults(cluster)
8489

    
8490
    # NIC buildup
8491
    self.nics = []
8492
    for idx, nic in enumerate(self.op.nics):
8493
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8494
      nic_mode = nic_mode_req
8495
      if nic_mode is None:
8496
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8497

    
8498
      # in routed mode, for the first nic, the default ip is 'auto'
8499
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8500
        default_ip_mode = constants.VALUE_AUTO
8501
      else:
8502
        default_ip_mode = constants.VALUE_NONE
8503

    
8504
      # ip validity checks
8505
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8506
      if ip is None or ip.lower() == constants.VALUE_NONE:
8507
        nic_ip = None
8508
      elif ip.lower() == constants.VALUE_AUTO:
8509
        if not self.op.name_check:
8510
          raise errors.OpPrereqError("IP address set to auto but name checks"
8511
                                     " have been skipped",
8512
                                     errors.ECODE_INVAL)
8513
        nic_ip = self.hostname1.ip
8514
      else:
8515
        if not netutils.IPAddress.IsValid(ip):
8516
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8517
                                     errors.ECODE_INVAL)
8518
        nic_ip = ip
8519

    
8520
      # TODO: check the ip address for uniqueness
8521
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8522
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8523
                                   errors.ECODE_INVAL)
8524

    
8525
      # MAC address verification
8526
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8527
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8528
        mac = utils.NormalizeAndValidateMac(mac)
8529

    
8530
        try:
8531
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8532
        except errors.ReservationError:
8533
          raise errors.OpPrereqError("MAC address %s already in use"
8534
                                     " in cluster" % mac,
8535
                                     errors.ECODE_NOTUNIQUE)
8536

    
8537
      #  Build nic parameters
8538
      link = nic.get(constants.INIC_LINK, None)
8539
      nicparams = {}
8540
      if nic_mode_req:
8541
        nicparams[constants.NIC_MODE] = nic_mode_req
8542
      if link:
8543
        nicparams[constants.NIC_LINK] = link
8544

    
8545
      check_params = cluster.SimpleFillNIC(nicparams)
8546
      objects.NIC.CheckParameterSyntax(check_params)
8547
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8548

    
8549
    # disk checks/pre-build
8550
    default_vg = self.cfg.GetVGName()
8551
    self.disks = []
8552
    for disk in self.op.disks:
8553
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8554
      if mode not in constants.DISK_ACCESS_SET:
8555
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8556
                                   mode, errors.ECODE_INVAL)
8557
      size = disk.get(constants.IDISK_SIZE, None)
8558
      if size is None:
8559
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8560
      try:
8561
        size = int(size)
8562
      except (TypeError, ValueError):
8563
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8564
                                   errors.ECODE_INVAL)
8565

    
8566
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8567
      new_disk = {
8568
        constants.IDISK_SIZE: size,
8569
        constants.IDISK_MODE: mode,
8570
        constants.IDISK_VG: data_vg,
8571
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8572
        }
8573
      if constants.IDISK_ADOPT in disk:
8574
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8575
      self.disks.append(new_disk)
8576

    
8577
    if self.op.mode == constants.INSTANCE_IMPORT:
8578

    
8579
      # Check that the new instance doesn't have less disks than the export
8580
      instance_disks = len(self.disks)
8581
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8582
      if instance_disks < export_disks:
8583
        raise errors.OpPrereqError("Not enough disks to import."
8584
                                   " (instance: %d, export: %d)" %
8585
                                   (instance_disks, export_disks),
8586
                                   errors.ECODE_INVAL)
8587

    
8588
      disk_images = []
8589
      for idx in range(export_disks):
8590
        option = "disk%d_dump" % idx
8591
        if export_info.has_option(constants.INISECT_INS, option):
8592
          # FIXME: are the old os-es, disk sizes, etc. useful?
8593
          export_name = export_info.get(constants.INISECT_INS, option)
8594
          image = utils.PathJoin(self.op.src_path, export_name)
8595
          disk_images.append(image)
8596
        else:
8597
          disk_images.append(False)
8598

    
8599
      self.src_images = disk_images
8600

    
8601
      old_name = export_info.get(constants.INISECT_INS, "name")
8602
      try:
8603
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8604
      except (TypeError, ValueError), err:
8605
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8606
                                   " an integer: %s" % str(err),
8607
                                   errors.ECODE_STATE)
8608
      if self.op.instance_name == old_name:
8609
        for idx, nic in enumerate(self.nics):
8610
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8611
            nic_mac_ini = "nic%d_mac" % idx
8612
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8613

    
8614
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8615

    
8616
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8617
    if self.op.ip_check:
8618
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8619
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8620
                                   (self.check_ip, self.op.instance_name),
8621
                                   errors.ECODE_NOTUNIQUE)
8622

    
8623
    #### mac address generation
8624
    # By generating here the mac address both the allocator and the hooks get
8625
    # the real final mac address rather than the 'auto' or 'generate' value.
8626
    # There is a race condition between the generation and the instance object
8627
    # creation, which means that we know the mac is valid now, but we're not
8628
    # sure it will be when we actually add the instance. If things go bad
8629
    # adding the instance will abort because of a duplicate mac, and the
8630
    # creation job will fail.
8631
    for nic in self.nics:
8632
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8633
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8634

    
8635
    #### allocator run
8636

    
8637
    if self.op.iallocator is not None:
8638
      self._RunAllocator()
8639

    
8640
    #### node related checks
8641

    
8642
    # check primary node
8643
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8644
    assert self.pnode is not None, \
8645
      "Cannot retrieve locked node %s" % self.op.pnode
8646
    if pnode.offline:
8647
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8648
                                 pnode.name, errors.ECODE_STATE)
8649
    if pnode.drained:
8650
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8651
                                 pnode.name, errors.ECODE_STATE)
8652
    if not pnode.vm_capable:
8653
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8654
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8655

    
8656
    self.secondaries = []
8657

    
8658
    # mirror node verification
8659
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8660
      if self.op.snode == pnode.name:
8661
        raise errors.OpPrereqError("The secondary node cannot be the"
8662
                                   " primary node", errors.ECODE_INVAL)
8663
      _CheckNodeOnline(self, self.op.snode)
8664
      _CheckNodeNotDrained(self, self.op.snode)
8665
      _CheckNodeVmCapable(self, self.op.snode)
8666
      self.secondaries.append(self.op.snode)
8667

    
8668
    nodenames = [pnode.name] + self.secondaries
8669

    
8670
    if not self.adopt_disks:
8671
      # Check lv size requirements, if not adopting
8672
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8673
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8674

    
8675
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8676
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8677
                                disk[constants.IDISK_ADOPT])
8678
                     for disk in self.disks])
8679
      if len(all_lvs) != len(self.disks):
8680
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8681
                                   errors.ECODE_INVAL)
8682
      for lv_name in all_lvs:
8683
        try:
8684
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8685
          # to ReserveLV uses the same syntax
8686
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8687
        except errors.ReservationError:
8688
          raise errors.OpPrereqError("LV named %s used by another instance" %
8689
                                     lv_name, errors.ECODE_NOTUNIQUE)
8690

    
8691
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8692
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8693

    
8694
      node_lvs = self.rpc.call_lv_list([pnode.name],
8695
                                       vg_names.payload.keys())[pnode.name]
8696
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8697
      node_lvs = node_lvs.payload
8698

    
8699
      delta = all_lvs.difference(node_lvs.keys())
8700
      if delta:
8701
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8702
                                   utils.CommaJoin(delta),
8703
                                   errors.ECODE_INVAL)
8704
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8705
      if online_lvs:
8706
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8707
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8708
                                   errors.ECODE_STATE)
8709
      # update the size of disk based on what is found
8710
      for dsk in self.disks:
8711
        dsk[constants.IDISK_SIZE] = \
8712
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8713
                                        dsk[constants.IDISK_ADOPT])][0]))
8714

    
8715
    elif self.op.disk_template == constants.DT_BLOCK:
8716
      # Normalize and de-duplicate device paths
8717
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8718
                       for disk in self.disks])
8719
      if len(all_disks) != len(self.disks):
8720
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8721
                                   errors.ECODE_INVAL)
8722
      baddisks = [d for d in all_disks
8723
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8724
      if baddisks:
8725
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8726
                                   " cannot be adopted" %
8727
                                   (", ".join(baddisks),
8728
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8729
                                   errors.ECODE_INVAL)
8730

    
8731
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8732
                                            list(all_disks))[pnode.name]
8733
      node_disks.Raise("Cannot get block device information from node %s" %
8734
                       pnode.name)
8735
      node_disks = node_disks.payload
8736
      delta = all_disks.difference(node_disks.keys())
8737
      if delta:
8738
        raise errors.OpPrereqError("Missing block device(s): %s" %
8739
                                   utils.CommaJoin(delta),
8740
                                   errors.ECODE_INVAL)
8741
      for dsk in self.disks:
8742
        dsk[constants.IDISK_SIZE] = \
8743
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8744

    
8745
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8746

    
8747
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8748
    # check OS parameters (remotely)
8749
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8750

    
8751
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8752

    
8753
    # memory check on primary node
8754
    if self.op.start:
8755
      _CheckNodeFreeMemory(self, self.pnode.name,
8756
                           "creating instance %s" % self.op.instance_name,
8757
                           self.be_full[constants.BE_MEMORY],
8758
                           self.op.hypervisor)
8759

    
8760
    self.dry_run_result = list(nodenames)
8761

    
8762
  def Exec(self, feedback_fn):
8763
    """Create and add the instance to the cluster.
8764

8765
    """
8766
    instance = self.op.instance_name
8767
    pnode_name = self.pnode.name
8768

    
8769
    ht_kind = self.op.hypervisor
8770
    if ht_kind in constants.HTS_REQ_PORT:
8771
      network_port = self.cfg.AllocatePort()
8772
    else:
8773
      network_port = None
8774

    
8775
    disks = _GenerateDiskTemplate(self,
8776
                                  self.op.disk_template,
8777
                                  instance, pnode_name,
8778
                                  self.secondaries,
8779
                                  self.disks,
8780
                                  self.instance_file_storage_dir,
8781
                                  self.op.file_driver,
8782
                                  0,
8783
                                  feedback_fn)
8784

    
8785
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8786
                            primary_node=pnode_name,
8787
                            nics=self.nics, disks=disks,
8788
                            disk_template=self.op.disk_template,
8789
                            admin_up=False,
8790
                            network_port=network_port,
8791
                            beparams=self.op.beparams,
8792
                            hvparams=self.op.hvparams,
8793
                            hypervisor=self.op.hypervisor,
8794
                            osparams=self.op.osparams,
8795
                            )
8796

    
8797
    if self.op.tags:
8798
      for tag in self.op.tags:
8799
        iobj.AddTag(tag)
8800

    
8801
    if self.adopt_disks:
8802
      if self.op.disk_template == constants.DT_PLAIN:
8803
        # rename LVs to the newly-generated names; we need to construct
8804
        # 'fake' LV disks with the old data, plus the new unique_id
8805
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8806
        rename_to = []
8807
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8808
          rename_to.append(t_dsk.logical_id)
8809
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8810
          self.cfg.SetDiskID(t_dsk, pnode_name)
8811
        result = self.rpc.call_blockdev_rename(pnode_name,
8812
                                               zip(tmp_disks, rename_to))
8813
        result.Raise("Failed to rename adoped LVs")
8814
    else:
8815
      feedback_fn("* creating instance disks...")
8816
      try:
8817
        _CreateDisks(self, iobj)
8818
      except errors.OpExecError:
8819
        self.LogWarning("Device creation failed, reverting...")
8820
        try:
8821
          _RemoveDisks(self, iobj)
8822
        finally:
8823
          self.cfg.ReleaseDRBDMinors(instance)
8824
          raise
8825

    
8826
    feedback_fn("adding instance %s to cluster config" % instance)
8827

    
8828
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8829

    
8830
    # Declare that we don't want to remove the instance lock anymore, as we've
8831
    # added the instance to the config
8832
    del self.remove_locks[locking.LEVEL_INSTANCE]
8833

    
8834
    if self.op.mode == constants.INSTANCE_IMPORT:
8835
      # Release unused nodes
8836
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8837
    else:
8838
      # Release all nodes
8839
      _ReleaseLocks(self, locking.LEVEL_NODE)
8840

    
8841
    disk_abort = False
8842
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8843
      feedback_fn("* wiping instance disks...")
8844
      try:
8845
        _WipeDisks(self, iobj)
8846
      except errors.OpExecError, err:
8847
        logging.exception("Wiping disks failed")
8848
        self.LogWarning("Wiping instance disks failed (%s)", err)
8849
        disk_abort = True
8850

    
8851
    if disk_abort:
8852
      # Something is already wrong with the disks, don't do anything else
8853
      pass
8854
    elif self.op.wait_for_sync:
8855
      disk_abort = not _WaitForSync(self, iobj)
8856
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8857
      # make sure the disks are not degraded (still sync-ing is ok)
8858
      time.sleep(15)
8859
      feedback_fn("* checking mirrors status")
8860
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8861
    else:
8862
      disk_abort = False
8863

    
8864
    if disk_abort:
8865
      _RemoveDisks(self, iobj)
8866
      self.cfg.RemoveInstance(iobj.name)
8867
      # Make sure the instance lock gets removed
8868
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8869
      raise errors.OpExecError("There are some degraded disks for"
8870
                               " this instance")
8871

    
8872
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8873
      if self.op.mode == constants.INSTANCE_CREATE:
8874
        if not self.op.no_install:
8875
          feedback_fn("* running the instance OS create scripts...")
8876
          # FIXME: pass debug option from opcode to backend
8877
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8878
                                                 self.op.debug_level)
8879
          result.Raise("Could not add os for instance %s"
8880
                       " on node %s" % (instance, pnode_name))
8881

    
8882
      elif self.op.mode == constants.INSTANCE_IMPORT:
8883
        feedback_fn("* running the instance OS import scripts...")
8884

    
8885
        transfers = []
8886

    
8887
        for idx, image in enumerate(self.src_images):
8888
          if not image:
8889
            continue
8890

    
8891
          # FIXME: pass debug option from opcode to backend
8892
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8893
                                             constants.IEIO_FILE, (image, ),
8894
                                             constants.IEIO_SCRIPT,
8895
                                             (iobj.disks[idx], idx),
8896
                                             None)
8897
          transfers.append(dt)
8898

    
8899
        import_result = \
8900
          masterd.instance.TransferInstanceData(self, feedback_fn,
8901
                                                self.op.src_node, pnode_name,
8902
                                                self.pnode.secondary_ip,
8903
                                                iobj, transfers)
8904
        if not compat.all(import_result):
8905
          self.LogWarning("Some disks for instance %s on node %s were not"
8906
                          " imported successfully" % (instance, pnode_name))
8907

    
8908
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8909
        feedback_fn("* preparing remote import...")
8910
        # The source cluster will stop the instance before attempting to make a
8911
        # connection. In some cases stopping an instance can take a long time,
8912
        # hence the shutdown timeout is added to the connection timeout.
8913
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8914
                           self.op.source_shutdown_timeout)
8915
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8916

    
8917
        assert iobj.primary_node == self.pnode.name
8918
        disk_results = \
8919
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8920
                                        self.source_x509_ca,
8921
                                        self._cds, timeouts)
8922
        if not compat.all(disk_results):
8923
          # TODO: Should the instance still be started, even if some disks
8924
          # failed to import (valid for local imports, too)?
8925
          self.LogWarning("Some disks for instance %s on node %s were not"
8926
                          " imported successfully" % (instance, pnode_name))
8927

    
8928
        # Run rename script on newly imported instance
8929
        assert iobj.name == instance
8930
        feedback_fn("Running rename script for %s" % instance)
8931
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8932
                                                   self.source_instance_name,
8933
                                                   self.op.debug_level)
8934
        if result.fail_msg:
8935
          self.LogWarning("Failed to run rename script for %s on node"
8936
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8937

    
8938
      else:
8939
        # also checked in the prereq part
8940
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8941
                                     % self.op.mode)
8942

    
8943
    if self.op.start:
8944
      iobj.admin_up = True
8945
      self.cfg.Update(iobj, feedback_fn)
8946
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8947
      feedback_fn("* starting instance...")
8948
      result = self.rpc.call_instance_start(pnode_name, iobj,
8949
                                            None, None, False)
8950
      result.Raise("Could not start instance")
8951

    
8952
    return list(iobj.all_nodes)
8953

    
8954

    
8955
class LUInstanceConsole(NoHooksLU):
8956
  """Connect to an instance's console.
8957

8958
  This is somewhat special in that it returns the command line that
8959
  you need to run on the master node in order to connect to the
8960
  console.
8961

8962
  """
8963
  REQ_BGL = False
8964

    
8965
  def ExpandNames(self):
8966
    self._ExpandAndLockInstance()
8967

    
8968
  def CheckPrereq(self):
8969
    """Check prerequisites.
8970

8971
    This checks that the instance is in the cluster.
8972

8973
    """
8974
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8975
    assert self.instance is not None, \
8976
      "Cannot retrieve locked instance %s" % self.op.instance_name
8977
    _CheckNodeOnline(self, self.instance.primary_node)
8978

    
8979
  def Exec(self, feedback_fn):
8980
    """Connect to the console of an instance
8981

8982
    """
8983
    instance = self.instance
8984
    node = instance.primary_node
8985

    
8986
    node_insts = self.rpc.call_instance_list([node],
8987
                                             [instance.hypervisor])[node]
8988
    node_insts.Raise("Can't get node information from %s" % node)
8989

    
8990
    if instance.name not in node_insts.payload:
8991
      if instance.admin_up:
8992
        state = constants.INSTST_ERRORDOWN
8993
      else:
8994
        state = constants.INSTST_ADMINDOWN
8995
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8996
                               (instance.name, state))
8997

    
8998
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8999

    
9000
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9001

    
9002

    
9003
def _GetInstanceConsole(cluster, instance):
9004
  """Returns console information for an instance.
9005

9006
  @type cluster: L{objects.Cluster}
9007
  @type instance: L{objects.Instance}
9008
  @rtype: dict
9009

9010
  """
9011
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9012
  # beparams and hvparams are passed separately, to avoid editing the
9013
  # instance and then saving the defaults in the instance itself.
9014
  hvparams = cluster.FillHV(instance)
9015
  beparams = cluster.FillBE(instance)
9016
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9017

    
9018
  assert console.instance == instance.name
9019
  assert console.Validate()
9020

    
9021
  return console.ToDict()
9022

    
9023

    
9024
class LUInstanceReplaceDisks(LogicalUnit):
9025
  """Replace the disks of an instance.
9026

9027
  """
9028
  HPATH = "mirrors-replace"
9029
  HTYPE = constants.HTYPE_INSTANCE
9030
  REQ_BGL = False
9031

    
9032
  def CheckArguments(self):
9033
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9034
                                  self.op.iallocator)
9035

    
9036
  def ExpandNames(self):
9037
    self._ExpandAndLockInstance()
9038

    
9039
    assert locking.LEVEL_NODE not in self.needed_locks
9040
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9041

    
9042
    assert self.op.iallocator is None or self.op.remote_node is None, \
9043
      "Conflicting options"
9044

    
9045
    if self.op.remote_node is not None:
9046
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9047

    
9048
      # Warning: do not remove the locking of the new secondary here
9049
      # unless DRBD8.AddChildren is changed to work in parallel;
9050
      # currently it doesn't since parallel invocations of
9051
      # FindUnusedMinor will conflict
9052
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9053
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9054
    else:
9055
      self.needed_locks[locking.LEVEL_NODE] = []
9056
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9057

    
9058
      if self.op.iallocator is not None:
9059
        # iallocator will select a new node in the same group
9060
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9061

    
9062
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9063
                                   self.op.iallocator, self.op.remote_node,
9064
                                   self.op.disks, False, self.op.early_release)
9065

    
9066
    self.tasklets = [self.replacer]
9067

    
9068
  def DeclareLocks(self, level):
9069
    if level == locking.LEVEL_NODEGROUP:
9070
      assert self.op.remote_node is None
9071
      assert self.op.iallocator is not None
9072
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9073

    
9074
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9075
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9076
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9077

    
9078
    elif level == locking.LEVEL_NODE:
9079
      if self.op.iallocator is not None:
9080
        assert self.op.remote_node is None
9081
        assert not self.needed_locks[locking.LEVEL_NODE]
9082

    
9083
        # Lock member nodes of all locked groups
9084
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9085
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9086
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9087
      else:
9088
        self._LockInstancesNodes()
9089

    
9090
  def BuildHooksEnv(self):
9091
    """Build hooks env.
9092

9093
    This runs on the master, the primary and all the secondaries.
9094

9095
    """
9096
    instance = self.replacer.instance
9097
    env = {
9098
      "MODE": self.op.mode,
9099
      "NEW_SECONDARY": self.op.remote_node,
9100
      "OLD_SECONDARY": instance.secondary_nodes[0],
9101
      }
9102
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9103
    return env
9104

    
9105
  def BuildHooksNodes(self):
9106
    """Build hooks nodes.
9107

9108
    """
9109
    instance = self.replacer.instance
9110
    nl = [
9111
      self.cfg.GetMasterNode(),
9112
      instance.primary_node,
9113
      ]
9114
    if self.op.remote_node is not None:
9115
      nl.append(self.op.remote_node)
9116
    return nl, nl
9117

    
9118
  def CheckPrereq(self):
9119
    """Check prerequisites.
9120

9121
    """
9122
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9123
            self.op.iallocator is None)
9124

    
9125
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9126
    if owned_groups:
9127
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9128
      if owned_groups != groups:
9129
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9130
                                 " since lock was acquired, current list is %r,"
9131
                                 " used to be '%s'" %
9132
                                 (self.op.instance_name,
9133
                                  utils.CommaJoin(groups),
9134
                                  utils.CommaJoin(owned_groups)))
9135

    
9136
    return LogicalUnit.CheckPrereq(self)
9137

    
9138

    
9139
class TLReplaceDisks(Tasklet):
9140
  """Replaces disks for an instance.
9141

9142
  Note: Locking is not within the scope of this class.
9143

9144
  """
9145
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9146
               disks, delay_iallocator, early_release):
9147
    """Initializes this class.
9148

9149
    """
9150
    Tasklet.__init__(self, lu)
9151

    
9152
    # Parameters
9153
    self.instance_name = instance_name
9154
    self.mode = mode
9155
    self.iallocator_name = iallocator_name
9156
    self.remote_node = remote_node
9157
    self.disks = disks
9158
    self.delay_iallocator = delay_iallocator
9159
    self.early_release = early_release
9160

    
9161
    # Runtime data
9162
    self.instance = None
9163
    self.new_node = None
9164
    self.target_node = None
9165
    self.other_node = None
9166
    self.remote_node_info = None
9167
    self.node_secondary_ip = None
9168

    
9169
  @staticmethod
9170
  def CheckArguments(mode, remote_node, iallocator):
9171
    """Helper function for users of this class.
9172

9173
    """
9174
    # check for valid parameter combination
9175
    if mode == constants.REPLACE_DISK_CHG:
9176
      if remote_node is None and iallocator is None:
9177
        raise errors.OpPrereqError("When changing the secondary either an"
9178
                                   " iallocator script must be used or the"
9179
                                   " new node given", errors.ECODE_INVAL)
9180

    
9181
      if remote_node is not None and iallocator is not None:
9182
        raise errors.OpPrereqError("Give either the iallocator or the new"
9183
                                   " secondary, not both", errors.ECODE_INVAL)
9184

    
9185
    elif remote_node is not None or iallocator is not None:
9186
      # Not replacing the secondary
9187
      raise errors.OpPrereqError("The iallocator and new node options can"
9188
                                 " only be used when changing the"
9189
                                 " secondary node", errors.ECODE_INVAL)
9190

    
9191
  @staticmethod
9192
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9193
    """Compute a new secondary node using an IAllocator.
9194

9195
    """
9196
    ial = IAllocator(lu.cfg, lu.rpc,
9197
                     mode=constants.IALLOCATOR_MODE_RELOC,
9198
                     name=instance_name,
9199
                     relocate_from=relocate_from)
9200

    
9201
    ial.Run(iallocator_name)
9202

    
9203
    if not ial.success:
9204
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9205
                                 " %s" % (iallocator_name, ial.info),
9206
                                 errors.ECODE_NORES)
9207

    
9208
    if len(ial.result) != ial.required_nodes:
9209
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9210
                                 " of nodes (%s), required %s" %
9211
                                 (iallocator_name,
9212
                                  len(ial.result), ial.required_nodes),
9213
                                 errors.ECODE_FAULT)
9214

    
9215
    remote_node_name = ial.result[0]
9216

    
9217
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9218
               instance_name, remote_node_name)
9219

    
9220
    return remote_node_name
9221

    
9222
  def _FindFaultyDisks(self, node_name):
9223
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9224
                                    node_name, True)
9225

    
9226
  def _CheckDisksActivated(self, instance):
9227
    """Checks if the instance disks are activated.
9228

9229
    @param instance: The instance to check disks
9230
    @return: True if they are activated, False otherwise
9231

9232
    """
9233
    nodes = instance.all_nodes
9234

    
9235
    for idx, dev in enumerate(instance.disks):
9236
      for node in nodes:
9237
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9238
        self.cfg.SetDiskID(dev, node)
9239

    
9240
        result = self.rpc.call_blockdev_find(node, dev)
9241

    
9242
        if result.offline:
9243
          continue
9244
        elif result.fail_msg or not result.payload:
9245
          return False
9246

    
9247
    return True
9248

    
9249
  def CheckPrereq(self):
9250
    """Check prerequisites.
9251

9252
    This checks that the instance is in the cluster.
9253

9254
    """
9255
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9256
    assert instance is not None, \
9257
      "Cannot retrieve locked instance %s" % self.instance_name
9258

    
9259
    if instance.disk_template != constants.DT_DRBD8:
9260
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9261
                                 " instances", errors.ECODE_INVAL)
9262

    
9263
    if len(instance.secondary_nodes) != 1:
9264
      raise errors.OpPrereqError("The instance has a strange layout,"
9265
                                 " expected one secondary but found %d" %
9266
                                 len(instance.secondary_nodes),
9267
                                 errors.ECODE_FAULT)
9268

    
9269
    if not self.delay_iallocator:
9270
      self._CheckPrereq2()
9271

    
9272
  def _CheckPrereq2(self):
9273
    """Check prerequisites, second part.
9274

9275
    This function should always be part of CheckPrereq. It was separated and is
9276
    now called from Exec because during node evacuation iallocator was only
9277
    called with an unmodified cluster model, not taking planned changes into
9278
    account.
9279

9280
    """
9281
    instance = self.instance
9282
    secondary_node = instance.secondary_nodes[0]
9283

    
9284
    if self.iallocator_name is None:
9285
      remote_node = self.remote_node
9286
    else:
9287
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9288
                                       instance.name, instance.secondary_nodes)
9289

    
9290
    if remote_node is None:
9291
      self.remote_node_info = None
9292
    else:
9293
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9294
             "Remote node '%s' is not locked" % remote_node
9295

    
9296
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9297
      assert self.remote_node_info is not None, \
9298
        "Cannot retrieve locked node %s" % remote_node
9299

    
9300
    if remote_node == self.instance.primary_node:
9301
      raise errors.OpPrereqError("The specified node is the primary node of"
9302
                                 " the instance", errors.ECODE_INVAL)
9303

    
9304
    if remote_node == secondary_node:
9305
      raise errors.OpPrereqError("The specified node is already the"
9306
                                 " secondary node of the instance",
9307
                                 errors.ECODE_INVAL)
9308

    
9309
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9310
                                    constants.REPLACE_DISK_CHG):
9311
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9312
                                 errors.ECODE_INVAL)
9313

    
9314
    if self.mode == constants.REPLACE_DISK_AUTO:
9315
      if not self._CheckDisksActivated(instance):
9316
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9317
                                   " first" % self.instance_name,
9318
                                   errors.ECODE_STATE)
9319
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9320
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9321

    
9322
      if faulty_primary and faulty_secondary:
9323
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9324
                                   " one node and can not be repaired"
9325
                                   " automatically" % self.instance_name,
9326
                                   errors.ECODE_STATE)
9327

    
9328
      if faulty_primary:
9329
        self.disks = faulty_primary
9330
        self.target_node = instance.primary_node
9331
        self.other_node = secondary_node
9332
        check_nodes = [self.target_node, self.other_node]
9333
      elif faulty_secondary:
9334
        self.disks = faulty_secondary
9335
        self.target_node = secondary_node
9336
        self.other_node = instance.primary_node
9337
        check_nodes = [self.target_node, self.other_node]
9338
      else:
9339
        self.disks = []
9340
        check_nodes = []
9341

    
9342
    else:
9343
      # Non-automatic modes
9344
      if self.mode == constants.REPLACE_DISK_PRI:
9345
        self.target_node = instance.primary_node
9346
        self.other_node = secondary_node
9347
        check_nodes = [self.target_node, self.other_node]
9348

    
9349
      elif self.mode == constants.REPLACE_DISK_SEC:
9350
        self.target_node = secondary_node
9351
        self.other_node = instance.primary_node
9352
        check_nodes = [self.target_node, self.other_node]
9353

    
9354
      elif self.mode == constants.REPLACE_DISK_CHG:
9355
        self.new_node = remote_node
9356
        self.other_node = instance.primary_node
9357
        self.target_node = secondary_node
9358
        check_nodes = [self.new_node, self.other_node]
9359

    
9360
        _CheckNodeNotDrained(self.lu, remote_node)
9361
        _CheckNodeVmCapable(self.lu, remote_node)
9362

    
9363
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9364
        assert old_node_info is not None
9365
        if old_node_info.offline and not self.early_release:
9366
          # doesn't make sense to delay the release
9367
          self.early_release = True
9368
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9369
                          " early-release mode", secondary_node)
9370

    
9371
      else:
9372
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9373
                                     self.mode)
9374

    
9375
      # If not specified all disks should be replaced
9376
      if not self.disks:
9377
        self.disks = range(len(self.instance.disks))
9378

    
9379
    for node in check_nodes:
9380
      _CheckNodeOnline(self.lu, node)
9381

    
9382
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9383
                                                          self.other_node,
9384
                                                          self.target_node]
9385
                              if node_name is not None)
9386

    
9387
    # Release unneeded node locks
9388
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9389

    
9390
    # Release any owned node group
9391
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9392
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9393

    
9394
    # Check whether disks are valid
9395
    for disk_idx in self.disks:
9396
      instance.FindDisk(disk_idx)
9397

    
9398
    # Get secondary node IP addresses
9399
    self.node_secondary_ip = \
9400
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9401
           for node_name in touched_nodes)
9402

    
9403
  def Exec(self, feedback_fn):
9404
    """Execute disk replacement.
9405

9406
    This dispatches the disk replacement to the appropriate handler.
9407

9408
    """
9409
    if self.delay_iallocator:
9410
      self._CheckPrereq2()
9411

    
9412
    if __debug__:
9413
      # Verify owned locks before starting operation
9414
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9415
      assert set(owned_locks) == set(self.node_secondary_ip), \
9416
          ("Incorrect node locks, owning %s, expected %s" %
9417
           (owned_locks, self.node_secondary_ip.keys()))
9418

    
9419
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9420
      assert list(owned_locks) == [self.instance_name], \
9421
          "Instance '%s' not locked" % self.instance_name
9422

    
9423
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9424
          "Should not own any node group lock at this point"
9425

    
9426
    if not self.disks:
9427
      feedback_fn("No disks need replacement")
9428
      return
9429

    
9430
    feedback_fn("Replacing disk(s) %s for %s" %
9431
                (utils.CommaJoin(self.disks), self.instance.name))
9432

    
9433
    activate_disks = (not self.instance.admin_up)
9434

    
9435
    # Activate the instance disks if we're replacing them on a down instance
9436
    if activate_disks:
9437
      _StartInstanceDisks(self.lu, self.instance, True)
9438

    
9439
    try:
9440
      # Should we replace the secondary node?
9441
      if self.new_node is not None:
9442
        fn = self._ExecDrbd8Secondary
9443
      else:
9444
        fn = self._ExecDrbd8DiskOnly
9445

    
9446
      result = fn(feedback_fn)
9447
    finally:
9448
      # Deactivate the instance disks if we're replacing them on a
9449
      # down instance
9450
      if activate_disks:
9451
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9452

    
9453
    if __debug__:
9454
      # Verify owned locks
9455
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9456
      nodes = frozenset(self.node_secondary_ip)
9457
      assert ((self.early_release and not owned_locks) or
9458
              (not self.early_release and not (set(owned_locks) - nodes))), \
9459
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9460
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9461

    
9462
    return result
9463

    
9464
  def _CheckVolumeGroup(self, nodes):
9465
    self.lu.LogInfo("Checking volume groups")
9466

    
9467
    vgname = self.cfg.GetVGName()
9468

    
9469
    # Make sure volume group exists on all involved nodes
9470
    results = self.rpc.call_vg_list(nodes)
9471
    if not results:
9472
      raise errors.OpExecError("Can't list volume groups on the nodes")
9473

    
9474
    for node in nodes:
9475
      res = results[node]
9476
      res.Raise("Error checking node %s" % node)
9477
      if vgname not in res.payload:
9478
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9479
                                 (vgname, node))
9480

    
9481
  def _CheckDisksExistence(self, nodes):
9482
    # Check disk existence
9483
    for idx, dev in enumerate(self.instance.disks):
9484
      if idx not in self.disks:
9485
        continue
9486

    
9487
      for node in nodes:
9488
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9489
        self.cfg.SetDiskID(dev, node)
9490

    
9491
        result = self.rpc.call_blockdev_find(node, dev)
9492

    
9493
        msg = result.fail_msg
9494
        if msg or not result.payload:
9495
          if not msg:
9496
            msg = "disk not found"
9497
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9498
                                   (idx, node, msg))
9499

    
9500
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9501
    for idx, dev in enumerate(self.instance.disks):
9502
      if idx not in self.disks:
9503
        continue
9504

    
9505
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9506
                      (idx, node_name))
9507

    
9508
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9509
                                   ldisk=ldisk):
9510
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9511
                                 " replace disks for instance %s" %
9512
                                 (node_name, self.instance.name))
9513

    
9514
  def _CreateNewStorage(self, node_name):
9515
    """Create new storage on the primary or secondary node.
9516

9517
    This is only used for same-node replaces, not for changing the
9518
    secondary node, hence we don't want to modify the existing disk.
9519

9520
    """
9521
    iv_names = {}
9522

    
9523
    for idx, dev in enumerate(self.instance.disks):
9524
      if idx not in self.disks:
9525
        continue
9526

    
9527
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9528

    
9529
      self.cfg.SetDiskID(dev, node_name)
9530

    
9531
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9532
      names = _GenerateUniqueNames(self.lu, lv_names)
9533

    
9534
      vg_data = dev.children[0].logical_id[0]
9535
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9536
                             logical_id=(vg_data, names[0]))
9537
      vg_meta = dev.children[1].logical_id[0]
9538
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9539
                             logical_id=(vg_meta, names[1]))
9540

    
9541
      new_lvs = [lv_data, lv_meta]
9542
      old_lvs = [child.Copy() for child in dev.children]
9543
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9544

    
9545
      # we pass force_create=True to force the LVM creation
9546
      for new_lv in new_lvs:
9547
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9548
                        _GetInstanceInfoText(self.instance), False)
9549

    
9550
    return iv_names
9551

    
9552
  def _CheckDevices(self, node_name, iv_names):
9553
    for name, (dev, _, _) in iv_names.iteritems():
9554
      self.cfg.SetDiskID(dev, node_name)
9555

    
9556
      result = self.rpc.call_blockdev_find(node_name, dev)
9557

    
9558
      msg = result.fail_msg
9559
      if msg or not result.payload:
9560
        if not msg:
9561
          msg = "disk not found"
9562
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9563
                                 (name, msg))
9564

    
9565
      if result.payload.is_degraded:
9566
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9567

    
9568
  def _RemoveOldStorage(self, node_name, iv_names):
9569
    for name, (_, old_lvs, _) in iv_names.iteritems():
9570
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9571

    
9572
      for lv in old_lvs:
9573
        self.cfg.SetDiskID(lv, node_name)
9574

    
9575
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9576
        if msg:
9577
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9578
                             hint="remove unused LVs manually")
9579

    
9580
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9581
    """Replace a disk on the primary or secondary for DRBD 8.
9582

9583
    The algorithm for replace is quite complicated:
9584

9585
      1. for each disk to be replaced:
9586

9587
        1. create new LVs on the target node with unique names
9588
        1. detach old LVs from the drbd device
9589
        1. rename old LVs to name_replaced.<time_t>
9590
        1. rename new LVs to old LVs
9591
        1. attach the new LVs (with the old names now) to the drbd device
9592

9593
      1. wait for sync across all devices
9594

9595
      1. for each modified disk:
9596

9597
        1. remove old LVs (which have the name name_replaces.<time_t>)
9598

9599
    Failures are not very well handled.
9600

9601
    """
9602
    steps_total = 6
9603

    
9604
    # Step: check device activation
9605
    self.lu.LogStep(1, steps_total, "Check device existence")
9606
    self._CheckDisksExistence([self.other_node, self.target_node])
9607
    self._CheckVolumeGroup([self.target_node, self.other_node])
9608

    
9609
    # Step: check other node consistency
9610
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9611
    self._CheckDisksConsistency(self.other_node,
9612
                                self.other_node == self.instance.primary_node,
9613
                                False)
9614

    
9615
    # Step: create new storage
9616
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9617
    iv_names = self._CreateNewStorage(self.target_node)
9618

    
9619
    # Step: for each lv, detach+rename*2+attach
9620
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9621
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9622
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9623

    
9624
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9625
                                                     old_lvs)
9626
      result.Raise("Can't detach drbd from local storage on node"
9627
                   " %s for device %s" % (self.target_node, dev.iv_name))
9628
      #dev.children = []
9629
      #cfg.Update(instance)
9630

    
9631
      # ok, we created the new LVs, so now we know we have the needed
9632
      # storage; as such, we proceed on the target node to rename
9633
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9634
      # using the assumption that logical_id == physical_id (which in
9635
      # turn is the unique_id on that node)
9636

    
9637
      # FIXME(iustin): use a better name for the replaced LVs
9638
      temp_suffix = int(time.time())
9639
      ren_fn = lambda d, suff: (d.physical_id[0],
9640
                                d.physical_id[1] + "_replaced-%s" % suff)
9641

    
9642
      # Build the rename list based on what LVs exist on the node
9643
      rename_old_to_new = []
9644
      for to_ren in old_lvs:
9645
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9646
        if not result.fail_msg and result.payload:
9647
          # device exists
9648
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9649

    
9650
      self.lu.LogInfo("Renaming the old LVs on the target node")
9651
      result = self.rpc.call_blockdev_rename(self.target_node,
9652
                                             rename_old_to_new)
9653
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9654

    
9655
      # Now we rename the new LVs to the old LVs
9656
      self.lu.LogInfo("Renaming the new LVs on the target node")
9657
      rename_new_to_old = [(new, old.physical_id)
9658
                           for old, new in zip(old_lvs, new_lvs)]
9659
      result = self.rpc.call_blockdev_rename(self.target_node,
9660
                                             rename_new_to_old)
9661
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9662

    
9663
      # Intermediate steps of in memory modifications
9664
      for old, new in zip(old_lvs, new_lvs):
9665
        new.logical_id = old.logical_id
9666
        self.cfg.SetDiskID(new, self.target_node)
9667

    
9668
      # We need to modify old_lvs so that removal later removes the
9669
      # right LVs, not the newly added ones; note that old_lvs is a
9670
      # copy here
9671
      for disk in old_lvs:
9672
        disk.logical_id = ren_fn(disk, temp_suffix)
9673
        self.cfg.SetDiskID(disk, self.target_node)
9674

    
9675
      # Now that the new lvs have the old name, we can add them to the device
9676
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9677
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9678
                                                  new_lvs)
9679
      msg = result.fail_msg
9680
      if msg:
9681
        for new_lv in new_lvs:
9682
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9683
                                               new_lv).fail_msg
9684
          if msg2:
9685
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9686
                               hint=("cleanup manually the unused logical"
9687
                                     "volumes"))
9688
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9689

    
9690
    cstep = 5
9691
    if self.early_release:
9692
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9693
      cstep += 1
9694
      self._RemoveOldStorage(self.target_node, iv_names)
9695
      # WARNING: we release both node locks here, do not do other RPCs
9696
      # than WaitForSync to the primary node
9697
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9698
                    names=[self.target_node, self.other_node])
9699

    
9700
    # Wait for sync
9701
    # This can fail as the old devices are degraded and _WaitForSync
9702
    # does a combined result over all disks, so we don't check its return value
9703
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9704
    cstep += 1
9705
    _WaitForSync(self.lu, self.instance)
9706

    
9707
    # Check all devices manually
9708
    self._CheckDevices(self.instance.primary_node, iv_names)
9709

    
9710
    # Step: remove old storage
9711
    if not self.early_release:
9712
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9713
      cstep += 1
9714
      self._RemoveOldStorage(self.target_node, iv_names)
9715

    
9716
  def _ExecDrbd8Secondary(self, feedback_fn):
9717
    """Replace the secondary node for DRBD 8.
9718

9719
    The algorithm for replace is quite complicated:
9720
      - for all disks of the instance:
9721
        - create new LVs on the new node with same names
9722
        - shutdown the drbd device on the old secondary
9723
        - disconnect the drbd network on the primary
9724
        - create the drbd device on the new secondary
9725
        - network attach the drbd on the primary, using an artifice:
9726
          the drbd code for Attach() will connect to the network if it
9727
          finds a device which is connected to the good local disks but
9728
          not network enabled
9729
      - wait for sync across all devices
9730
      - remove all disks from the old secondary
9731

9732
    Failures are not very well handled.
9733

9734
    """
9735
    steps_total = 6
9736

    
9737
    # Step: check device activation
9738
    self.lu.LogStep(1, steps_total, "Check device existence")
9739
    self._CheckDisksExistence([self.instance.primary_node])
9740
    self._CheckVolumeGroup([self.instance.primary_node])
9741

    
9742
    # Step: check other node consistency
9743
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9744
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9745

    
9746
    # Step: create new storage
9747
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9748
    for idx, dev in enumerate(self.instance.disks):
9749
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9750
                      (self.new_node, idx))
9751
      # we pass force_create=True to force LVM creation
9752
      for new_lv in dev.children:
9753
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9754
                        _GetInstanceInfoText(self.instance), False)
9755

    
9756
    # Step 4: dbrd minors and drbd setups changes
9757
    # after this, we must manually remove the drbd minors on both the
9758
    # error and the success paths
9759
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9760
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9761
                                         for dev in self.instance.disks],
9762
                                        self.instance.name)
9763
    logging.debug("Allocated minors %r", minors)
9764

    
9765
    iv_names = {}
9766
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9767
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9768
                      (self.new_node, idx))
9769
      # create new devices on new_node; note that we create two IDs:
9770
      # one without port, so the drbd will be activated without
9771
      # networking information on the new node at this stage, and one
9772
      # with network, for the latter activation in step 4
9773
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9774
      if self.instance.primary_node == o_node1:
9775
        p_minor = o_minor1
9776
      else:
9777
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9778
        p_minor = o_minor2
9779

    
9780
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9781
                      p_minor, new_minor, o_secret)
9782
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9783
                    p_minor, new_minor, o_secret)
9784

    
9785
      iv_names[idx] = (dev, dev.children, new_net_id)
9786
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9787
                    new_net_id)
9788
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9789
                              logical_id=new_alone_id,
9790
                              children=dev.children,
9791
                              size=dev.size)
9792
      try:
9793
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9794
                              _GetInstanceInfoText(self.instance), False)
9795
      except errors.GenericError:
9796
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9797
        raise
9798

    
9799
    # We have new devices, shutdown the drbd on the old secondary
9800
    for idx, dev in enumerate(self.instance.disks):
9801
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9802
      self.cfg.SetDiskID(dev, self.target_node)
9803
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9804
      if msg:
9805
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9806
                           "node: %s" % (idx, msg),
9807
                           hint=("Please cleanup this device manually as"
9808
                                 " soon as possible"))
9809

    
9810
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9811
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9812
                                               self.node_secondary_ip,
9813
                                               self.instance.disks)\
9814
                                              [self.instance.primary_node]
9815

    
9816
    msg = result.fail_msg
9817
    if msg:
9818
      # detaches didn't succeed (unlikely)
9819
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9820
      raise errors.OpExecError("Can't detach the disks from the network on"
9821
                               " old node: %s" % (msg,))
9822

    
9823
    # if we managed to detach at least one, we update all the disks of
9824
    # the instance to point to the new secondary
9825
    self.lu.LogInfo("Updating instance configuration")
9826
    for dev, _, new_logical_id in iv_names.itervalues():
9827
      dev.logical_id = new_logical_id
9828
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9829

    
9830
    self.cfg.Update(self.instance, feedback_fn)
9831

    
9832
    # and now perform the drbd attach
9833
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9834
                    " (standalone => connected)")
9835
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9836
                                            self.new_node],
9837
                                           self.node_secondary_ip,
9838
                                           self.instance.disks,
9839
                                           self.instance.name,
9840
                                           False)
9841
    for to_node, to_result in result.items():
9842
      msg = to_result.fail_msg
9843
      if msg:
9844
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9845
                           to_node, msg,
9846
                           hint=("please do a gnt-instance info to see the"
9847
                                 " status of disks"))
9848
    cstep = 5
9849
    if self.early_release:
9850
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9851
      cstep += 1
9852
      self._RemoveOldStorage(self.target_node, iv_names)
9853
      # WARNING: we release all node locks here, do not do other RPCs
9854
      # than WaitForSync to the primary node
9855
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9856
                    names=[self.instance.primary_node,
9857
                           self.target_node,
9858
                           self.new_node])
9859

    
9860
    # Wait for sync
9861
    # This can fail as the old devices are degraded and _WaitForSync
9862
    # does a combined result over all disks, so we don't check its return value
9863
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9864
    cstep += 1
9865
    _WaitForSync(self.lu, self.instance)
9866

    
9867
    # Check all devices manually
9868
    self._CheckDevices(self.instance.primary_node, iv_names)
9869

    
9870
    # Step: remove old storage
9871
    if not self.early_release:
9872
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9873
      self._RemoveOldStorage(self.target_node, iv_names)
9874

    
9875

    
9876
class LURepairNodeStorage(NoHooksLU):
9877
  """Repairs the volume group on a node.
9878

9879
  """
9880
  REQ_BGL = False
9881

    
9882
  def CheckArguments(self):
9883
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9884

    
9885
    storage_type = self.op.storage_type
9886

    
9887
    if (constants.SO_FIX_CONSISTENCY not in
9888
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9889
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9890
                                 " repaired" % storage_type,
9891
                                 errors.ECODE_INVAL)
9892

    
9893
  def ExpandNames(self):
9894
    self.needed_locks = {
9895
      locking.LEVEL_NODE: [self.op.node_name],
9896
      }
9897

    
9898
  def _CheckFaultyDisks(self, instance, node_name):
9899
    """Ensure faulty disks abort the opcode or at least warn."""
9900
    try:
9901
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9902
                                  node_name, True):
9903
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9904
                                   " node '%s'" % (instance.name, node_name),
9905
                                   errors.ECODE_STATE)
9906
    except errors.OpPrereqError, err:
9907
      if self.op.ignore_consistency:
9908
        self.proc.LogWarning(str(err.args[0]))
9909
      else:
9910
        raise
9911

    
9912
  def CheckPrereq(self):
9913
    """Check prerequisites.
9914

9915
    """
9916
    # Check whether any instance on this node has faulty disks
9917
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9918
      if not inst.admin_up:
9919
        continue
9920
      check_nodes = set(inst.all_nodes)
9921
      check_nodes.discard(self.op.node_name)
9922
      for inst_node_name in check_nodes:
9923
        self._CheckFaultyDisks(inst, inst_node_name)
9924

    
9925
  def Exec(self, feedback_fn):
9926
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9927
                (self.op.name, self.op.node_name))
9928

    
9929
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9930
    result = self.rpc.call_storage_execute(self.op.node_name,
9931
                                           self.op.storage_type, st_args,
9932
                                           self.op.name,
9933
                                           constants.SO_FIX_CONSISTENCY)
9934
    result.Raise("Failed to repair storage unit '%s' on %s" %
9935
                 (self.op.name, self.op.node_name))
9936

    
9937

    
9938
class LUNodeEvacuate(NoHooksLU):
9939
  """Evacuates instances off a list of nodes.
9940

9941
  """
9942
  REQ_BGL = False
9943

    
9944
  def CheckArguments(self):
9945
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9946

    
9947
  def ExpandNames(self):
9948
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9949

    
9950
    if self.op.remote_node is not None:
9951
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9952
      assert self.op.remote_node
9953

    
9954
      if self.op.remote_node == self.op.node_name:
9955
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9956
                                   " secondary node", errors.ECODE_INVAL)
9957

    
9958
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9959
        raise errors.OpPrereqError("Without the use of an iallocator only"
9960
                                   " secondary instances can be evacuated",
9961
                                   errors.ECODE_INVAL)
9962

    
9963
    # Declare locks
9964
    self.share_locks = _ShareAll()
9965
    self.needed_locks = {
9966
      locking.LEVEL_INSTANCE: [],
9967
      locking.LEVEL_NODEGROUP: [],
9968
      locking.LEVEL_NODE: [],
9969
      }
9970

    
9971
    if self.op.remote_node is None:
9972
      # Iallocator will choose any node(s) in the same group
9973
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9974
    else:
9975
      group_nodes = frozenset([self.op.remote_node])
9976

    
9977
    # Determine nodes to be locked
9978
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9979

    
9980
  def _DetermineInstances(self):
9981
    """Builds list of instances to operate on.
9982

9983
    """
9984
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9985

    
9986
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9987
      # Primary instances only
9988
      inst_fn = _GetNodePrimaryInstances
9989
      assert self.op.remote_node is None, \
9990
        "Evacuating primary instances requires iallocator"
9991
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9992
      # Secondary instances only
9993
      inst_fn = _GetNodeSecondaryInstances
9994
    else:
9995
      # All instances
9996
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9997
      inst_fn = _GetNodeInstances
9998

    
9999
    return inst_fn(self.cfg, self.op.node_name)
10000

    
10001
  def DeclareLocks(self, level):
10002
    if level == locking.LEVEL_INSTANCE:
10003
      # Lock instances optimistically, needs verification once node and group
10004
      # locks have been acquired
10005
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10006
        set(i.name for i in self._DetermineInstances())
10007

    
10008
    elif level == locking.LEVEL_NODEGROUP:
10009
      # Lock node groups optimistically, needs verification once nodes have
10010
      # been acquired
10011
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10012
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10013

    
10014
    elif level == locking.LEVEL_NODE:
10015
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10016

    
10017
  def CheckPrereq(self):
10018
    # Verify locks
10019
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
10020
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10021
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
10022

    
10023
    assert owned_nodes == self.lock_nodes
10024

    
10025
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10026
    if owned_groups != wanted_groups:
10027
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10028
                               " current groups are '%s', used to be '%s'" %
10029
                               (utils.CommaJoin(wanted_groups),
10030
                                utils.CommaJoin(owned_groups)))
10031

    
10032
    # Determine affected instances
10033
    self.instances = self._DetermineInstances()
10034
    self.instance_names = [i.name for i in self.instances]
10035

    
10036
    if set(self.instance_names) != owned_instances:
10037
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10038
                               " were acquired, current instances are '%s',"
10039
                               " used to be '%s'" %
10040
                               (self.op.node_name,
10041
                                utils.CommaJoin(self.instance_names),
10042
                                utils.CommaJoin(owned_instances)))
10043

    
10044
    if self.instance_names:
10045
      self.LogInfo("Evacuating instances from node '%s': %s",
10046
                   self.op.node_name,
10047
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10048
    else:
10049
      self.LogInfo("No instances to evacuate from node '%s'",
10050
                   self.op.node_name)
10051

    
10052
    if self.op.remote_node is not None:
10053
      for i in self.instances:
10054
        if i.primary_node == self.op.remote_node:
10055
          raise errors.OpPrereqError("Node %s is the primary node of"
10056
                                     " instance %s, cannot use it as"
10057
                                     " secondary" %
10058
                                     (self.op.remote_node, i.name),
10059
                                     errors.ECODE_INVAL)
10060

    
10061
  def Exec(self, feedback_fn):
10062
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10063

    
10064
    if not self.instance_names:
10065
      # No instances to evacuate
10066
      jobs = []
10067

    
10068
    elif self.op.iallocator is not None:
10069
      # TODO: Implement relocation to other group
10070
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10071
                       evac_mode=self.op.mode,
10072
                       instances=list(self.instance_names))
10073

    
10074
      ial.Run(self.op.iallocator)
10075

    
10076
      if not ial.success:
10077
        raise errors.OpPrereqError("Can't compute node evacuation using"
10078
                                   " iallocator '%s': %s" %
10079
                                   (self.op.iallocator, ial.info),
10080
                                   errors.ECODE_NORES)
10081

    
10082
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10083

    
10084
    elif self.op.remote_node is not None:
10085
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10086
      jobs = [
10087
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10088
                                        remote_node=self.op.remote_node,
10089
                                        disks=[],
10090
                                        mode=constants.REPLACE_DISK_CHG,
10091
                                        early_release=self.op.early_release)]
10092
        for instance_name in self.instance_names
10093
        ]
10094

    
10095
    else:
10096
      raise errors.ProgrammerError("No iallocator or remote node")
10097

    
10098
    return ResultWithJobs(jobs)
10099

    
10100

    
10101
def _SetOpEarlyRelease(early_release, op):
10102
  """Sets C{early_release} flag on opcodes if available.
10103

10104
  """
10105
  try:
10106
    op.early_release = early_release
10107
  except AttributeError:
10108
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10109

    
10110
  return op
10111

    
10112

    
10113
def _NodeEvacDest(use_nodes, group, nodes):
10114
  """Returns group or nodes depending on caller's choice.
10115

10116
  """
10117
  if use_nodes:
10118
    return utils.CommaJoin(nodes)
10119
  else:
10120
    return group
10121

    
10122

    
10123
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10124
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10125

10126
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10127
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10128

10129
  @type lu: L{LogicalUnit}
10130
  @param lu: Logical unit instance
10131
  @type alloc_result: tuple/list
10132
  @param alloc_result: Result from iallocator
10133
  @type early_release: bool
10134
  @param early_release: Whether to release locks early if possible
10135
  @type use_nodes: bool
10136
  @param use_nodes: Whether to display node names instead of groups
10137

10138
  """
10139
  (moved, failed, jobs) = alloc_result
10140

    
10141
  if failed:
10142
    lu.LogWarning("Unable to evacuate instances %s",
10143
                  utils.CommaJoin("%s (%s)" % (name, reason)
10144
                                  for (name, reason) in failed))
10145

    
10146
  if moved:
10147
    lu.LogInfo("Instances to be moved: %s",
10148
               utils.CommaJoin("%s (to %s)" %
10149
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10150
                               for (name, group, nodes) in moved))
10151

    
10152
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10153
              map(opcodes.OpCode.LoadOpCode, ops))
10154
          for ops in jobs]
10155

    
10156

    
10157
class LUInstanceGrowDisk(LogicalUnit):
10158
  """Grow a disk of an instance.
10159

10160
  """
10161
  HPATH = "disk-grow"
10162
  HTYPE = constants.HTYPE_INSTANCE
10163
  REQ_BGL = False
10164

    
10165
  def ExpandNames(self):
10166
    self._ExpandAndLockInstance()
10167
    self.needed_locks[locking.LEVEL_NODE] = []
10168
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10169

    
10170
  def DeclareLocks(self, level):
10171
    if level == locking.LEVEL_NODE:
10172
      self._LockInstancesNodes()
10173

    
10174
  def BuildHooksEnv(self):
10175
    """Build hooks env.
10176

10177
    This runs on the master, the primary and all the secondaries.
10178

10179
    """
10180
    env = {
10181
      "DISK": self.op.disk,
10182
      "AMOUNT": self.op.amount,
10183
      }
10184
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10185
    return env
10186

    
10187
  def BuildHooksNodes(self):
10188
    """Build hooks nodes.
10189

10190
    """
10191
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10192
    return (nl, nl)
10193

    
10194
  def CheckPrereq(self):
10195
    """Check prerequisites.
10196

10197
    This checks that the instance is in the cluster.
10198

10199
    """
10200
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10201
    assert instance is not None, \
10202
      "Cannot retrieve locked instance %s" % self.op.instance_name
10203
    nodenames = list(instance.all_nodes)
10204
    for node in nodenames:
10205
      _CheckNodeOnline(self, node)
10206

    
10207
    self.instance = instance
10208

    
10209
    if instance.disk_template not in constants.DTS_GROWABLE:
10210
      raise errors.OpPrereqError("Instance's disk layout does not support"
10211
                                 " growing", errors.ECODE_INVAL)
10212

    
10213
    self.disk = instance.FindDisk(self.op.disk)
10214

    
10215
    if instance.disk_template not in (constants.DT_FILE,
10216
                                      constants.DT_SHARED_FILE):
10217
      # TODO: check the free disk space for file, when that feature will be
10218
      # supported
10219
      _CheckNodesFreeDiskPerVG(self, nodenames,
10220
                               self.disk.ComputeGrowth(self.op.amount))
10221

    
10222
  def Exec(self, feedback_fn):
10223
    """Execute disk grow.
10224

10225
    """
10226
    instance = self.instance
10227
    disk = self.disk
10228

    
10229
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10230
    if not disks_ok:
10231
      raise errors.OpExecError("Cannot activate block device to grow")
10232

    
10233
    # First run all grow ops in dry-run mode
10234
    for node in instance.all_nodes:
10235
      self.cfg.SetDiskID(disk, node)
10236
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10237
      result.Raise("Grow request failed to node %s" % node)
10238

    
10239
    # We know that (as far as we can test) operations across different
10240
    # nodes will succeed, time to run it for real
10241
    for node in instance.all_nodes:
10242
      self.cfg.SetDiskID(disk, node)
10243
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10244
      result.Raise("Grow request failed to node %s" % node)
10245

    
10246
      # TODO: Rewrite code to work properly
10247
      # DRBD goes into sync mode for a short amount of time after executing the
10248
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10249
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10250
      # time is a work-around.
10251
      time.sleep(5)
10252

    
10253
    disk.RecordGrow(self.op.amount)
10254
    self.cfg.Update(instance, feedback_fn)
10255
    if self.op.wait_for_sync:
10256
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10257
      if disk_abort:
10258
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10259
                             " status; please check the instance")
10260
      if not instance.admin_up:
10261
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10262
    elif not instance.admin_up:
10263
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10264
                           " not supposed to be running because no wait for"
10265
                           " sync mode was requested")
10266

    
10267

    
10268
class LUInstanceQueryData(NoHooksLU):
10269
  """Query runtime instance data.
10270

10271
  """
10272
  REQ_BGL = False
10273

    
10274
  def ExpandNames(self):
10275
    self.needed_locks = {}
10276

    
10277
    # Use locking if requested or when non-static information is wanted
10278
    if not (self.op.static or self.op.use_locking):
10279
      self.LogWarning("Non-static data requested, locks need to be acquired")
10280
      self.op.use_locking = True
10281

    
10282
    if self.op.instances or not self.op.use_locking:
10283
      # Expand instance names right here
10284
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10285
    else:
10286
      # Will use acquired locks
10287
      self.wanted_names = None
10288

    
10289
    if self.op.use_locking:
10290
      self.share_locks = _ShareAll()
10291

    
10292
      if self.wanted_names is None:
10293
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10294
      else:
10295
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10296

    
10297
      self.needed_locks[locking.LEVEL_NODE] = []
10298
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10299

    
10300
  def DeclareLocks(self, level):
10301
    if self.op.use_locking and level == locking.LEVEL_NODE:
10302
      self._LockInstancesNodes()
10303

    
10304
  def CheckPrereq(self):
10305
    """Check prerequisites.
10306

10307
    This only checks the optional instance list against the existing names.
10308

10309
    """
10310
    if self.wanted_names is None:
10311
      assert self.op.use_locking, "Locking was not used"
10312
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10313

    
10314
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10315
                             for name in self.wanted_names]
10316

    
10317
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10318
    """Returns the status of a block device
10319

10320
    """
10321
    if self.op.static or not node:
10322
      return None
10323

    
10324
    self.cfg.SetDiskID(dev, node)
10325

    
10326
    result = self.rpc.call_blockdev_find(node, dev)
10327
    if result.offline:
10328
      return None
10329

    
10330
    result.Raise("Can't compute disk status for %s" % instance_name)
10331

    
10332
    status = result.payload
10333
    if status is None:
10334
      return None
10335

    
10336
    return (status.dev_path, status.major, status.minor,
10337
            status.sync_percent, status.estimated_time,
10338
            status.is_degraded, status.ldisk_status)
10339

    
10340
  def _ComputeDiskStatus(self, instance, snode, dev):
10341
    """Compute block device status.
10342

10343
    """
10344
    if dev.dev_type in constants.LDS_DRBD:
10345
      # we change the snode then (otherwise we use the one passed in)
10346
      if dev.logical_id[0] == instance.primary_node:
10347
        snode = dev.logical_id[1]
10348
      else:
10349
        snode = dev.logical_id[0]
10350

    
10351
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10352
                                              instance.name, dev)
10353
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10354

    
10355
    if dev.children:
10356
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10357
                                        instance, snode),
10358
                         dev.children)
10359
    else:
10360
      dev_children = []
10361

    
10362
    return {
10363
      "iv_name": dev.iv_name,
10364
      "dev_type": dev.dev_type,
10365
      "logical_id": dev.logical_id,
10366
      "physical_id": dev.physical_id,
10367
      "pstatus": dev_pstatus,
10368
      "sstatus": dev_sstatus,
10369
      "children": dev_children,
10370
      "mode": dev.mode,
10371
      "size": dev.size,
10372
      }
10373

    
10374
  def Exec(self, feedback_fn):
10375
    """Gather and return data"""
10376
    result = {}
10377

    
10378
    cluster = self.cfg.GetClusterInfo()
10379

    
10380
    for instance in self.wanted_instances:
10381
      pnode = self.cfg.GetNodeInfo(instance.primary_node)
10382

    
10383
      if self.op.static or pnode.offline:
10384
        remote_state = None
10385
        if pnode.offline:
10386
          self.LogWarning("Primary node %s is marked offline, returning static"
10387
                          " information only for instance %s" %
10388
                          (pnode.name, instance.name))
10389
      else:
10390
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10391
                                                  instance.name,
10392
                                                  instance.hypervisor)
10393
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10394
        remote_info = remote_info.payload
10395
        if remote_info and "state" in remote_info:
10396
          remote_state = "up"
10397
        else:
10398
          remote_state = "down"
10399

    
10400
      if instance.admin_up:
10401
        config_state = "up"
10402
      else:
10403
        config_state = "down"
10404

    
10405
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10406
                  instance.disks)
10407

    
10408
      result[instance.name] = {
10409
        "name": instance.name,
10410
        "config_state": config_state,
10411
        "run_state": remote_state,
10412
        "pnode": instance.primary_node,
10413
        "snodes": instance.secondary_nodes,
10414
        "os": instance.os,
10415
        # this happens to be the same format used for hooks
10416
        "nics": _NICListToTuple(self, instance.nics),
10417
        "disk_template": instance.disk_template,
10418
        "disks": disks,
10419
        "hypervisor": instance.hypervisor,
10420
        "network_port": instance.network_port,
10421
        "hv_instance": instance.hvparams,
10422
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10423
        "be_instance": instance.beparams,
10424
        "be_actual": cluster.FillBE(instance),
10425
        "os_instance": instance.osparams,
10426
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10427
        "serial_no": instance.serial_no,
10428
        "mtime": instance.mtime,
10429
        "ctime": instance.ctime,
10430
        "uuid": instance.uuid,
10431
        }
10432

    
10433
    return result
10434

    
10435

    
10436
class LUInstanceSetParams(LogicalUnit):
10437
  """Modifies an instances's parameters.
10438

10439
  """
10440
  HPATH = "instance-modify"
10441
  HTYPE = constants.HTYPE_INSTANCE
10442
  REQ_BGL = False
10443

    
10444
  def CheckArguments(self):
10445
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10446
            self.op.hvparams or self.op.beparams or self.op.os_name):
10447
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10448

    
10449
    if self.op.hvparams:
10450
      _CheckGlobalHvParams(self.op.hvparams)
10451

    
10452
    # Disk validation
10453
    disk_addremove = 0
10454
    for disk_op, disk_dict in self.op.disks:
10455
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10456
      if disk_op == constants.DDM_REMOVE:
10457
        disk_addremove += 1
10458
        continue
10459
      elif disk_op == constants.DDM_ADD:
10460
        disk_addremove += 1
10461
      else:
10462
        if not isinstance(disk_op, int):
10463
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10464
        if not isinstance(disk_dict, dict):
10465
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10466
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10467

    
10468
      if disk_op == constants.DDM_ADD:
10469
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10470
        if mode not in constants.DISK_ACCESS_SET:
10471
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10472
                                     errors.ECODE_INVAL)
10473
        size = disk_dict.get(constants.IDISK_SIZE, None)
10474
        if size is None:
10475
          raise errors.OpPrereqError("Required disk parameter size missing",
10476
                                     errors.ECODE_INVAL)
10477
        try:
10478
          size = int(size)
10479
        except (TypeError, ValueError), err:
10480
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10481
                                     str(err), errors.ECODE_INVAL)
10482
        disk_dict[constants.IDISK_SIZE] = size
10483
      else:
10484
        # modification of disk
10485
        if constants.IDISK_SIZE in disk_dict:
10486
          raise errors.OpPrereqError("Disk size change not possible, use"
10487
                                     " grow-disk", errors.ECODE_INVAL)
10488

    
10489
    if disk_addremove > 1:
10490
      raise errors.OpPrereqError("Only one disk add or remove operation"
10491
                                 " supported at a time", errors.ECODE_INVAL)
10492

    
10493
    if self.op.disks and self.op.disk_template is not None:
10494
      raise errors.OpPrereqError("Disk template conversion and other disk"
10495
                                 " changes not supported at the same time",
10496
                                 errors.ECODE_INVAL)
10497

    
10498
    if (self.op.disk_template and
10499
        self.op.disk_template in constants.DTS_INT_MIRROR and
10500
        self.op.remote_node is None):
10501
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10502
                                 " one requires specifying a secondary node",
10503
                                 errors.ECODE_INVAL)
10504

    
10505
    # NIC validation
10506
    nic_addremove = 0
10507
    for nic_op, nic_dict in self.op.nics:
10508
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10509
      if nic_op == constants.DDM_REMOVE:
10510
        nic_addremove += 1
10511
        continue
10512
      elif nic_op == constants.DDM_ADD:
10513
        nic_addremove += 1
10514
      else:
10515
        if not isinstance(nic_op, int):
10516
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10517
        if not isinstance(nic_dict, dict):
10518
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10519
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10520

    
10521
      # nic_dict should be a dict
10522
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10523
      if nic_ip is not None:
10524
        if nic_ip.lower() == constants.VALUE_NONE:
10525
          nic_dict[constants.INIC_IP] = None
10526
        else:
10527
          if not netutils.IPAddress.IsValid(nic_ip):
10528
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10529
                                       errors.ECODE_INVAL)
10530

    
10531
      nic_bridge = nic_dict.get("bridge", None)
10532
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10533
      if nic_bridge and nic_link:
10534
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10535
                                   " at the same time", errors.ECODE_INVAL)
10536
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10537
        nic_dict["bridge"] = None
10538
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10539
        nic_dict[constants.INIC_LINK] = None
10540

    
10541
      if nic_op == constants.DDM_ADD:
10542
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10543
        if nic_mac is None:
10544
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10545

    
10546
      if constants.INIC_MAC in nic_dict:
10547
        nic_mac = nic_dict[constants.INIC_MAC]
10548
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10549
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10550

    
10551
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10552
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10553
                                     " modifying an existing nic",
10554
                                     errors.ECODE_INVAL)
10555

    
10556
    if nic_addremove > 1:
10557
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10558
                                 " supported at a time", errors.ECODE_INVAL)
10559

    
10560
  def ExpandNames(self):
10561
    self._ExpandAndLockInstance()
10562
    self.needed_locks[locking.LEVEL_NODE] = []
10563
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10564

    
10565
  def DeclareLocks(self, level):
10566
    if level == locking.LEVEL_NODE:
10567
      self._LockInstancesNodes()
10568
      if self.op.disk_template and self.op.remote_node:
10569
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10570
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10571

    
10572
  def BuildHooksEnv(self):
10573
    """Build hooks env.
10574

10575
    This runs on the master, primary and secondaries.
10576

10577
    """
10578
    args = dict()
10579
    if constants.BE_MEMORY in self.be_new:
10580
      args["memory"] = self.be_new[constants.BE_MEMORY]
10581
    if constants.BE_VCPUS in self.be_new:
10582
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10583
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10584
    # information at all.
10585
    if self.op.nics:
10586
      args["nics"] = []
10587
      nic_override = dict(self.op.nics)
10588
      for idx, nic in enumerate(self.instance.nics):
10589
        if idx in nic_override:
10590
          this_nic_override = nic_override[idx]
10591
        else:
10592
          this_nic_override = {}
10593
        if constants.INIC_IP in this_nic_override:
10594
          ip = this_nic_override[constants.INIC_IP]
10595
        else:
10596
          ip = nic.ip
10597
        if constants.INIC_MAC in this_nic_override:
10598
          mac = this_nic_override[constants.INIC_MAC]
10599
        else:
10600
          mac = nic.mac
10601
        if idx in self.nic_pnew:
10602
          nicparams = self.nic_pnew[idx]
10603
        else:
10604
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10605
        mode = nicparams[constants.NIC_MODE]
10606
        link = nicparams[constants.NIC_LINK]
10607
        args["nics"].append((ip, mac, mode, link))
10608
      if constants.DDM_ADD in nic_override:
10609
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10610
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10611
        nicparams = self.nic_pnew[constants.DDM_ADD]
10612
        mode = nicparams[constants.NIC_MODE]
10613
        link = nicparams[constants.NIC_LINK]
10614
        args["nics"].append((ip, mac, mode, link))
10615
      elif constants.DDM_REMOVE in nic_override:
10616
        del args["nics"][-1]
10617

    
10618
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10619
    if self.op.disk_template:
10620
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10621

    
10622
    return env
10623

    
10624
  def BuildHooksNodes(self):
10625
    """Build hooks nodes.
10626

10627
    """
10628
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10629
    return (nl, nl)
10630

    
10631
  def CheckPrereq(self):
10632
    """Check prerequisites.
10633

10634
    This only checks the instance list against the existing names.
10635

10636
    """
10637
    # checking the new params on the primary/secondary nodes
10638

    
10639
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10640
    cluster = self.cluster = self.cfg.GetClusterInfo()
10641
    assert self.instance is not None, \
10642
      "Cannot retrieve locked instance %s" % self.op.instance_name
10643
    pnode = instance.primary_node
10644
    nodelist = list(instance.all_nodes)
10645

    
10646
    # OS change
10647
    if self.op.os_name and not self.op.force:
10648
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10649
                      self.op.force_variant)
10650
      instance_os = self.op.os_name
10651
    else:
10652
      instance_os = instance.os
10653

    
10654
    if self.op.disk_template:
10655
      if instance.disk_template == self.op.disk_template:
10656
        raise errors.OpPrereqError("Instance already has disk template %s" %
10657
                                   instance.disk_template, errors.ECODE_INVAL)
10658

    
10659
      if (instance.disk_template,
10660
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10661
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10662
                                   " %s to %s" % (instance.disk_template,
10663
                                                  self.op.disk_template),
10664
                                   errors.ECODE_INVAL)
10665
      _CheckInstanceDown(self, instance, "cannot change disk template")
10666
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10667
        if self.op.remote_node == pnode:
10668
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10669
                                     " as the primary node of the instance" %
10670
                                     self.op.remote_node, errors.ECODE_STATE)
10671
        _CheckNodeOnline(self, self.op.remote_node)
10672
        _CheckNodeNotDrained(self, self.op.remote_node)
10673
        # FIXME: here we assume that the old instance type is DT_PLAIN
10674
        assert instance.disk_template == constants.DT_PLAIN
10675
        disks = [{constants.IDISK_SIZE: d.size,
10676
                  constants.IDISK_VG: d.logical_id[0]}
10677
                 for d in instance.disks]
10678
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10679
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10680

    
10681
    # hvparams processing
10682
    if self.op.hvparams:
10683
      hv_type = instance.hypervisor
10684
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10685
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10686
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10687

    
10688
      # local check
10689
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10690
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10691
      self.hv_new = hv_new # the new actual values
10692
      self.hv_inst = i_hvdict # the new dict (without defaults)
10693
    else:
10694
      self.hv_new = self.hv_inst = {}
10695

    
10696
    # beparams processing
10697
    if self.op.beparams:
10698
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10699
                                   use_none=True)
10700
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10701
      be_new = cluster.SimpleFillBE(i_bedict)
10702
      self.be_new = be_new # the new actual values
10703
      self.be_inst = i_bedict # the new dict (without defaults)
10704
    else:
10705
      self.be_new = self.be_inst = {}
10706
    be_old = cluster.FillBE(instance)
10707

    
10708
    # osparams processing
10709
    if self.op.osparams:
10710
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10711
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10712
      self.os_inst = i_osdict # the new dict (without defaults)
10713
    else:
10714
      self.os_inst = {}
10715

    
10716
    self.warn = []
10717

    
10718
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10719
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10720
      mem_check_list = [pnode]
10721
      if be_new[constants.BE_AUTO_BALANCE]:
10722
        # either we changed auto_balance to yes or it was from before
10723
        mem_check_list.extend(instance.secondary_nodes)
10724
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10725
                                                  instance.hypervisor)
10726
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10727
                                         instance.hypervisor)
10728
      pninfo = nodeinfo[pnode]
10729
      msg = pninfo.fail_msg
10730
      if msg:
10731
        # Assume the primary node is unreachable and go ahead
10732
        self.warn.append("Can't get info from primary node %s: %s" %
10733
                         (pnode,  msg))
10734
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10735
        self.warn.append("Node data from primary node %s doesn't contain"
10736
                         " free memory information" % pnode)
10737
      elif instance_info.fail_msg:
10738
        self.warn.append("Can't get instance runtime information: %s" %
10739
                        instance_info.fail_msg)
10740
      else:
10741
        if instance_info.payload:
10742
          current_mem = int(instance_info.payload["memory"])
10743
        else:
10744
          # Assume instance not running
10745
          # (there is a slight race condition here, but it's not very probable,
10746
          # and we have no other way to check)
10747
          current_mem = 0
10748
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10749
                    pninfo.payload["memory_free"])
10750
        if miss_mem > 0:
10751
          raise errors.OpPrereqError("This change will prevent the instance"
10752
                                     " from starting, due to %d MB of memory"
10753
                                     " missing on its primary node" % miss_mem,
10754
                                     errors.ECODE_NORES)
10755

    
10756
      if be_new[constants.BE_AUTO_BALANCE]:
10757
        for node, nres in nodeinfo.items():
10758
          if node not in instance.secondary_nodes:
10759
            continue
10760
          nres.Raise("Can't get info from secondary node %s" % node,
10761
                     prereq=True, ecode=errors.ECODE_STATE)
10762
          if not isinstance(nres.payload.get("memory_free", None), int):
10763
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10764
                                       " memory information" % node,
10765
                                       errors.ECODE_STATE)
10766
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10767
            raise errors.OpPrereqError("This change will prevent the instance"
10768
                                       " from failover to its secondary node"
10769
                                       " %s, due to not enough memory" % node,
10770
                                       errors.ECODE_STATE)
10771

    
10772
    # NIC processing
10773
    self.nic_pnew = {}
10774
    self.nic_pinst = {}
10775
    for nic_op, nic_dict in self.op.nics:
10776
      if nic_op == constants.DDM_REMOVE:
10777
        if not instance.nics:
10778
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10779
                                     errors.ECODE_INVAL)
10780
        continue
10781
      if nic_op != constants.DDM_ADD:
10782
        # an existing nic
10783
        if not instance.nics:
10784
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10785
                                     " no NICs" % nic_op,
10786
                                     errors.ECODE_INVAL)
10787
        if nic_op < 0 or nic_op >= len(instance.nics):
10788
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10789
                                     " are 0 to %d" %
10790
                                     (nic_op, len(instance.nics) - 1),
10791
                                     errors.ECODE_INVAL)
10792
        old_nic_params = instance.nics[nic_op].nicparams
10793
        old_nic_ip = instance.nics[nic_op].ip
10794
      else:
10795
        old_nic_params = {}
10796
        old_nic_ip = None
10797

    
10798
      update_params_dict = dict([(key, nic_dict[key])
10799
                                 for key in constants.NICS_PARAMETERS
10800
                                 if key in nic_dict])
10801

    
10802
      if "bridge" in nic_dict:
10803
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10804

    
10805
      new_nic_params = _GetUpdatedParams(old_nic_params,
10806
                                         update_params_dict)
10807
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10808
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10809
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10810
      self.nic_pinst[nic_op] = new_nic_params
10811
      self.nic_pnew[nic_op] = new_filled_nic_params
10812
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10813

    
10814
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10815
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10816
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10817
        if msg:
10818
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10819
          if self.op.force:
10820
            self.warn.append(msg)
10821
          else:
10822
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10823
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10824
        if constants.INIC_IP in nic_dict:
10825
          nic_ip = nic_dict[constants.INIC_IP]
10826
        else:
10827
          nic_ip = old_nic_ip
10828
        if nic_ip is None:
10829
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10830
                                     " on a routed nic", errors.ECODE_INVAL)
10831
      if constants.INIC_MAC in nic_dict:
10832
        nic_mac = nic_dict[constants.INIC_MAC]
10833
        if nic_mac is None:
10834
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10835
                                     errors.ECODE_INVAL)
10836
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10837
          # otherwise generate the mac
10838
          nic_dict[constants.INIC_MAC] = \
10839
            self.cfg.GenerateMAC(self.proc.GetECId())
10840
        else:
10841
          # or validate/reserve the current one
10842
          try:
10843
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10844
          except errors.ReservationError:
10845
            raise errors.OpPrereqError("MAC address %s already in use"
10846
                                       " in cluster" % nic_mac,
10847
                                       errors.ECODE_NOTUNIQUE)
10848

    
10849
    # DISK processing
10850
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10851
      raise errors.OpPrereqError("Disk operations not supported for"
10852
                                 " diskless instances",
10853
                                 errors.ECODE_INVAL)
10854
    for disk_op, _ in self.op.disks:
10855
      if disk_op == constants.DDM_REMOVE:
10856
        if len(instance.disks) == 1:
10857
          raise errors.OpPrereqError("Cannot remove the last disk of"
10858
                                     " an instance", errors.ECODE_INVAL)
10859
        _CheckInstanceDown(self, instance, "cannot remove disks")
10860

    
10861
      if (disk_op == constants.DDM_ADD and
10862
          len(instance.disks) >= constants.MAX_DISKS):
10863
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10864
                                   " add more" % constants.MAX_DISKS,
10865
                                   errors.ECODE_STATE)
10866
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10867
        # an existing disk
10868
        if disk_op < 0 or disk_op >= len(instance.disks):
10869
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10870
                                     " are 0 to %d" %
10871
                                     (disk_op, len(instance.disks)),
10872
                                     errors.ECODE_INVAL)
10873

    
10874
    return
10875

    
10876
  def _ConvertPlainToDrbd(self, feedback_fn):
10877
    """Converts an instance from plain to drbd.
10878

10879
    """
10880
    feedback_fn("Converting template to drbd")
10881
    instance = self.instance
10882
    pnode = instance.primary_node
10883
    snode = self.op.remote_node
10884

    
10885
    # create a fake disk info for _GenerateDiskTemplate
10886
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10887
                  constants.IDISK_VG: d.logical_id[0]}
10888
                 for d in instance.disks]
10889
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10890
                                      instance.name, pnode, [snode],
10891
                                      disk_info, None, None, 0, feedback_fn)
10892
    info = _GetInstanceInfoText(instance)
10893
    feedback_fn("Creating aditional volumes...")
10894
    # first, create the missing data and meta devices
10895
    for disk in new_disks:
10896
      # unfortunately this is... not too nice
10897
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10898
                            info, True)
10899
      for child in disk.children:
10900
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10901
    # at this stage, all new LVs have been created, we can rename the
10902
    # old ones
10903
    feedback_fn("Renaming original volumes...")
10904
    rename_list = [(o, n.children[0].logical_id)
10905
                   for (o, n) in zip(instance.disks, new_disks)]
10906
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10907
    result.Raise("Failed to rename original LVs")
10908

    
10909
    feedback_fn("Initializing DRBD devices...")
10910
    # all child devices are in place, we can now create the DRBD devices
10911
    for disk in new_disks:
10912
      for node in [pnode, snode]:
10913
        f_create = node == pnode
10914
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10915

    
10916
    # at this point, the instance has been modified
10917
    instance.disk_template = constants.DT_DRBD8
10918
    instance.disks = new_disks
10919
    self.cfg.Update(instance, feedback_fn)
10920

    
10921
    # disks are created, waiting for sync
10922
    disk_abort = not _WaitForSync(self, instance,
10923
                                  oneshot=not self.op.wait_for_sync)
10924
    if disk_abort:
10925
      raise errors.OpExecError("There are some degraded disks for"
10926
                               " this instance, please cleanup manually")
10927

    
10928
  def _ConvertDrbdToPlain(self, feedback_fn):
10929
    """Converts an instance from drbd to plain.
10930

10931
    """
10932
    instance = self.instance
10933
    assert len(instance.secondary_nodes) == 1
10934
    pnode = instance.primary_node
10935
    snode = instance.secondary_nodes[0]
10936
    feedback_fn("Converting template to plain")
10937

    
10938
    old_disks = instance.disks
10939
    new_disks = [d.children[0] for d in old_disks]
10940

    
10941
    # copy over size and mode
10942
    for parent, child in zip(old_disks, new_disks):
10943
      child.size = parent.size
10944
      child.mode = parent.mode
10945

    
10946
    # update instance structure
10947
    instance.disks = new_disks
10948
    instance.disk_template = constants.DT_PLAIN
10949
    self.cfg.Update(instance, feedback_fn)
10950

    
10951
    feedback_fn("Removing volumes on the secondary node...")
10952
    for disk in old_disks:
10953
      self.cfg.SetDiskID(disk, snode)
10954
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10955
      if msg:
10956
        self.LogWarning("Could not remove block device %s on node %s,"
10957
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10958

    
10959
    feedback_fn("Removing unneeded volumes on the primary node...")
10960
    for idx, disk in enumerate(old_disks):
10961
      meta = disk.children[1]
10962
      self.cfg.SetDiskID(meta, pnode)
10963
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10964
      if msg:
10965
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10966
                        " continuing anyway: %s", idx, pnode, msg)
10967

    
10968
  def Exec(self, feedback_fn):
10969
    """Modifies an instance.
10970

10971
    All parameters take effect only at the next restart of the instance.
10972

10973
    """
10974
    # Process here the warnings from CheckPrereq, as we don't have a
10975
    # feedback_fn there.
10976
    for warn in self.warn:
10977
      feedback_fn("WARNING: %s" % warn)
10978

    
10979
    result = []
10980
    instance = self.instance
10981
    # disk changes
10982
    for disk_op, disk_dict in self.op.disks:
10983
      if disk_op == constants.DDM_REMOVE:
10984
        # remove the last disk
10985
        device = instance.disks.pop()
10986
        device_idx = len(instance.disks)
10987
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10988
          self.cfg.SetDiskID(disk, node)
10989
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10990
          if msg:
10991
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10992
                            " continuing anyway", device_idx, node, msg)
10993
        result.append(("disk/%d" % device_idx, "remove"))
10994
      elif disk_op == constants.DDM_ADD:
10995
        # add a new disk
10996
        if instance.disk_template in (constants.DT_FILE,
10997
                                        constants.DT_SHARED_FILE):
10998
          file_driver, file_path = instance.disks[0].logical_id
10999
          file_path = os.path.dirname(file_path)
11000
        else:
11001
          file_driver = file_path = None
11002
        disk_idx_base = len(instance.disks)
11003
        new_disk = _GenerateDiskTemplate(self,
11004
                                         instance.disk_template,
11005
                                         instance.name, instance.primary_node,
11006
                                         instance.secondary_nodes,
11007
                                         [disk_dict],
11008
                                         file_path,
11009
                                         file_driver,
11010
                                         disk_idx_base, feedback_fn)[0]
11011
        instance.disks.append(new_disk)
11012
        info = _GetInstanceInfoText(instance)
11013

    
11014
        logging.info("Creating volume %s for instance %s",
11015
                     new_disk.iv_name, instance.name)
11016
        # Note: this needs to be kept in sync with _CreateDisks
11017
        #HARDCODE
11018
        for node in instance.all_nodes:
11019
          f_create = node == instance.primary_node
11020
          try:
11021
            _CreateBlockDev(self, node, instance, new_disk,
11022
                            f_create, info, f_create)
11023
          except errors.OpExecError, err:
11024
            self.LogWarning("Failed to create volume %s (%s) on"
11025
                            " node %s: %s",
11026
                            new_disk.iv_name, new_disk, node, err)
11027
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11028
                       (new_disk.size, new_disk.mode)))
11029
      else:
11030
        # change a given disk
11031
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11032
        result.append(("disk.mode/%d" % disk_op,
11033
                       disk_dict[constants.IDISK_MODE]))
11034

    
11035
    if self.op.disk_template:
11036
      r_shut = _ShutdownInstanceDisks(self, instance)
11037
      if not r_shut:
11038
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11039
                                 " proceed with disk template conversion")
11040
      mode = (instance.disk_template, self.op.disk_template)
11041
      try:
11042
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11043
      except:
11044
        self.cfg.ReleaseDRBDMinors(instance.name)
11045
        raise
11046
      result.append(("disk_template", self.op.disk_template))
11047

    
11048
    # NIC changes
11049
    for nic_op, nic_dict in self.op.nics:
11050
      if nic_op == constants.DDM_REMOVE:
11051
        # remove the last nic
11052
        del instance.nics[-1]
11053
        result.append(("nic.%d" % len(instance.nics), "remove"))
11054
      elif nic_op == constants.DDM_ADD:
11055
        # mac and bridge should be set, by now
11056
        mac = nic_dict[constants.INIC_MAC]
11057
        ip = nic_dict.get(constants.INIC_IP, None)
11058
        nicparams = self.nic_pinst[constants.DDM_ADD]
11059
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11060
        instance.nics.append(new_nic)
11061
        result.append(("nic.%d" % (len(instance.nics) - 1),
11062
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11063
                       (new_nic.mac, new_nic.ip,
11064
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11065
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11066
                       )))
11067
      else:
11068
        for key in (constants.INIC_MAC, constants.INIC_IP):
11069
          if key in nic_dict:
11070
            setattr(instance.nics[nic_op], key, nic_dict[key])
11071
        if nic_op in self.nic_pinst:
11072
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11073
        for key, val in nic_dict.iteritems():
11074
          result.append(("nic.%s/%d" % (key, nic_op), val))
11075

    
11076
    # hvparams changes
11077
    if self.op.hvparams:
11078
      instance.hvparams = self.hv_inst
11079
      for key, val in self.op.hvparams.iteritems():
11080
        result.append(("hv/%s" % key, val))
11081

    
11082
    # beparams changes
11083
    if self.op.beparams:
11084
      instance.beparams = self.be_inst
11085
      for key, val in self.op.beparams.iteritems():
11086
        result.append(("be/%s" % key, val))
11087

    
11088
    # OS change
11089
    if self.op.os_name:
11090
      instance.os = self.op.os_name
11091

    
11092
    # osparams changes
11093
    if self.op.osparams:
11094
      instance.osparams = self.os_inst
11095
      for key, val in self.op.osparams.iteritems():
11096
        result.append(("os/%s" % key, val))
11097

    
11098
    self.cfg.Update(instance, feedback_fn)
11099

    
11100
    return result
11101

    
11102
  _DISK_CONVERSIONS = {
11103
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11104
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11105
    }
11106

    
11107

    
11108
class LUBackupQuery(NoHooksLU):
11109
  """Query the exports list
11110

11111
  """
11112
  REQ_BGL = False
11113

    
11114
  def ExpandNames(self):
11115
    self.needed_locks = {}
11116
    self.share_locks[locking.LEVEL_NODE] = 1
11117
    if not self.op.nodes:
11118
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11119
    else:
11120
      self.needed_locks[locking.LEVEL_NODE] = \
11121
        _GetWantedNodes(self, self.op.nodes)
11122

    
11123
  def Exec(self, feedback_fn):
11124
    """Compute the list of all the exported system images.
11125

11126
    @rtype: dict
11127
    @return: a dictionary with the structure node->(export-list)
11128
        where export-list is a list of the instances exported on
11129
        that node.
11130

11131
    """
11132
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11133
    rpcresult = self.rpc.call_export_list(self.nodes)
11134
    result = {}
11135
    for node in rpcresult:
11136
      if rpcresult[node].fail_msg:
11137
        result[node] = False
11138
      else:
11139
        result[node] = rpcresult[node].payload
11140

    
11141
    return result
11142

    
11143

    
11144
class LUBackupPrepare(NoHooksLU):
11145
  """Prepares an instance for an export and returns useful information.
11146

11147
  """
11148
  REQ_BGL = False
11149

    
11150
  def ExpandNames(self):
11151
    self._ExpandAndLockInstance()
11152

    
11153
  def CheckPrereq(self):
11154
    """Check prerequisites.
11155

11156
    """
11157
    instance_name = self.op.instance_name
11158

    
11159
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11160
    assert self.instance is not None, \
11161
          "Cannot retrieve locked instance %s" % self.op.instance_name
11162
    _CheckNodeOnline(self, self.instance.primary_node)
11163

    
11164
    self._cds = _GetClusterDomainSecret()
11165

    
11166
  def Exec(self, feedback_fn):
11167
    """Prepares an instance for an export.
11168

11169
    """
11170
    instance = self.instance
11171

    
11172
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11173
      salt = utils.GenerateSecret(8)
11174

    
11175
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11176
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11177
                                              constants.RIE_CERT_VALIDITY)
11178
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11179

    
11180
      (name, cert_pem) = result.payload
11181

    
11182
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11183
                                             cert_pem)
11184

    
11185
      return {
11186
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11187
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11188
                          salt),
11189
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11190
        }
11191

    
11192
    return None
11193

    
11194

    
11195
class LUBackupExport(LogicalUnit):
11196
  """Export an instance to an image in the cluster.
11197

11198
  """
11199
  HPATH = "instance-export"
11200
  HTYPE = constants.HTYPE_INSTANCE
11201
  REQ_BGL = False
11202

    
11203
  def CheckArguments(self):
11204
    """Check the arguments.
11205

11206
    """
11207
    self.x509_key_name = self.op.x509_key_name
11208
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11209

    
11210
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11211
      if not self.x509_key_name:
11212
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11213
                                   errors.ECODE_INVAL)
11214

    
11215
      if not self.dest_x509_ca_pem:
11216
        raise errors.OpPrereqError("Missing destination X509 CA",
11217
                                   errors.ECODE_INVAL)
11218

    
11219
  def ExpandNames(self):
11220
    self._ExpandAndLockInstance()
11221

    
11222
    # Lock all nodes for local exports
11223
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11224
      # FIXME: lock only instance primary and destination node
11225
      #
11226
      # Sad but true, for now we have do lock all nodes, as we don't know where
11227
      # the previous export might be, and in this LU we search for it and
11228
      # remove it from its current node. In the future we could fix this by:
11229
      #  - making a tasklet to search (share-lock all), then create the
11230
      #    new one, then one to remove, after
11231
      #  - removing the removal operation altogether
11232
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11233

    
11234
  def DeclareLocks(self, level):
11235
    """Last minute lock declaration."""
11236
    # All nodes are locked anyway, so nothing to do here.
11237

    
11238
  def BuildHooksEnv(self):
11239
    """Build hooks env.
11240

11241
    This will run on the master, primary node and target node.
11242

11243
    """
11244
    env = {
11245
      "EXPORT_MODE": self.op.mode,
11246
      "EXPORT_NODE": self.op.target_node,
11247
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11248
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11249
      # TODO: Generic function for boolean env variables
11250
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11251
      }
11252

    
11253
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11254

    
11255
    return env
11256

    
11257
  def BuildHooksNodes(self):
11258
    """Build hooks nodes.
11259

11260
    """
11261
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11262

    
11263
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11264
      nl.append(self.op.target_node)
11265

    
11266
    return (nl, nl)
11267

    
11268
  def CheckPrereq(self):
11269
    """Check prerequisites.
11270

11271
    This checks that the instance and node names are valid.
11272

11273
    """
11274
    instance_name = self.op.instance_name
11275

    
11276
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11277
    assert self.instance is not None, \
11278
          "Cannot retrieve locked instance %s" % self.op.instance_name
11279
    _CheckNodeOnline(self, self.instance.primary_node)
11280

    
11281
    if (self.op.remove_instance and self.instance.admin_up and
11282
        not self.op.shutdown):
11283
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11284
                                 " down before")
11285

    
11286
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11287
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11288
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11289
      assert self.dst_node is not None
11290

    
11291
      _CheckNodeOnline(self, self.dst_node.name)
11292
      _CheckNodeNotDrained(self, self.dst_node.name)
11293

    
11294
      self._cds = None
11295
      self.dest_disk_info = None
11296
      self.dest_x509_ca = None
11297

    
11298
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11299
      self.dst_node = None
11300

    
11301
      if len(self.op.target_node) != len(self.instance.disks):
11302
        raise errors.OpPrereqError(("Received destination information for %s"
11303
                                    " disks, but instance %s has %s disks") %
11304
                                   (len(self.op.target_node), instance_name,
11305
                                    len(self.instance.disks)),
11306
                                   errors.ECODE_INVAL)
11307

    
11308
      cds = _GetClusterDomainSecret()
11309

    
11310
      # Check X509 key name
11311
      try:
11312
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11313
      except (TypeError, ValueError), err:
11314
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11315

    
11316
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11317
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11318
                                   errors.ECODE_INVAL)
11319

    
11320
      # Load and verify CA
11321
      try:
11322
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11323
      except OpenSSL.crypto.Error, err:
11324
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11325
                                   (err, ), errors.ECODE_INVAL)
11326

    
11327
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11328
      if errcode is not None:
11329
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11330
                                   (msg, ), errors.ECODE_INVAL)
11331

    
11332
      self.dest_x509_ca = cert
11333

    
11334
      # Verify target information
11335
      disk_info = []
11336
      for idx, disk_data in enumerate(self.op.target_node):
11337
        try:
11338
          (host, port, magic) = \
11339
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11340
        except errors.GenericError, err:
11341
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11342
                                     (idx, err), errors.ECODE_INVAL)
11343

    
11344
        disk_info.append((host, port, magic))
11345

    
11346
      assert len(disk_info) == len(self.op.target_node)
11347
      self.dest_disk_info = disk_info
11348

    
11349
    else:
11350
      raise errors.ProgrammerError("Unhandled export mode %r" %
11351
                                   self.op.mode)
11352

    
11353
    # instance disk type verification
11354
    # TODO: Implement export support for file-based disks
11355
    for disk in self.instance.disks:
11356
      if disk.dev_type == constants.LD_FILE:
11357
        raise errors.OpPrereqError("Export not supported for instances with"
11358
                                   " file-based disks", errors.ECODE_INVAL)
11359

    
11360
  def _CleanupExports(self, feedback_fn):
11361
    """Removes exports of current instance from all other nodes.
11362

11363
    If an instance in a cluster with nodes A..D was exported to node C, its
11364
    exports will be removed from the nodes A, B and D.
11365

11366
    """
11367
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11368

    
11369
    nodelist = self.cfg.GetNodeList()
11370
    nodelist.remove(self.dst_node.name)
11371

    
11372
    # on one-node clusters nodelist will be empty after the removal
11373
    # if we proceed the backup would be removed because OpBackupQuery
11374
    # substitutes an empty list with the full cluster node list.
11375
    iname = self.instance.name
11376
    if nodelist:
11377
      feedback_fn("Removing old exports for instance %s" % iname)
11378
      exportlist = self.rpc.call_export_list(nodelist)
11379
      for node in exportlist:
11380
        if exportlist[node].fail_msg:
11381
          continue
11382
        if iname in exportlist[node].payload:
11383
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11384
          if msg:
11385
            self.LogWarning("Could not remove older export for instance %s"
11386
                            " on node %s: %s", iname, node, msg)
11387

    
11388
  def Exec(self, feedback_fn):
11389
    """Export an instance to an image in the cluster.
11390

11391
    """
11392
    assert self.op.mode in constants.EXPORT_MODES
11393

    
11394
    instance = self.instance
11395
    src_node = instance.primary_node
11396

    
11397
    if self.op.shutdown:
11398
      # shutdown the instance, but not the disks
11399
      feedback_fn("Shutting down instance %s" % instance.name)
11400
      result = self.rpc.call_instance_shutdown(src_node, instance,
11401
                                               self.op.shutdown_timeout)
11402
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11403
      result.Raise("Could not shutdown instance %s on"
11404
                   " node %s" % (instance.name, src_node))
11405

    
11406
    # set the disks ID correctly since call_instance_start needs the
11407
    # correct drbd minor to create the symlinks
11408
    for disk in instance.disks:
11409
      self.cfg.SetDiskID(disk, src_node)
11410

    
11411
    activate_disks = (not instance.admin_up)
11412

    
11413
    if activate_disks:
11414
      # Activate the instance disks if we'exporting a stopped instance
11415
      feedback_fn("Activating disks for %s" % instance.name)
11416
      _StartInstanceDisks(self, instance, None)
11417

    
11418
    try:
11419
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11420
                                                     instance)
11421

    
11422
      helper.CreateSnapshots()
11423
      try:
11424
        if (self.op.shutdown and instance.admin_up and
11425
            not self.op.remove_instance):
11426
          assert not activate_disks
11427
          feedback_fn("Starting instance %s" % instance.name)
11428
          result = self.rpc.call_instance_start(src_node, instance,
11429
                                                None, None, False)
11430
          msg = result.fail_msg
11431
          if msg:
11432
            feedback_fn("Failed to start instance: %s" % msg)
11433
            _ShutdownInstanceDisks(self, instance)
11434
            raise errors.OpExecError("Could not start instance: %s" % msg)
11435

    
11436
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11437
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11438
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11439
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11440
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11441

    
11442
          (key_name, _, _) = self.x509_key_name
11443

    
11444
          dest_ca_pem = \
11445
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11446
                                            self.dest_x509_ca)
11447

    
11448
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11449
                                                     key_name, dest_ca_pem,
11450
                                                     timeouts)
11451
      finally:
11452
        helper.Cleanup()
11453

    
11454
      # Check for backwards compatibility
11455
      assert len(dresults) == len(instance.disks)
11456
      assert compat.all(isinstance(i, bool) for i in dresults), \
11457
             "Not all results are boolean: %r" % dresults
11458

    
11459
    finally:
11460
      if activate_disks:
11461
        feedback_fn("Deactivating disks for %s" % instance.name)
11462
        _ShutdownInstanceDisks(self, instance)
11463

    
11464
    if not (compat.all(dresults) and fin_resu):
11465
      failures = []
11466
      if not fin_resu:
11467
        failures.append("export finalization")
11468
      if not compat.all(dresults):
11469
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11470
                               if not dsk)
11471
        failures.append("disk export: disk(s) %s" % fdsk)
11472

    
11473
      raise errors.OpExecError("Export failed, errors in %s" %
11474
                               utils.CommaJoin(failures))
11475

    
11476
    # At this point, the export was successful, we can cleanup/finish
11477

    
11478
    # Remove instance if requested
11479
    if self.op.remove_instance:
11480
      feedback_fn("Removing instance %s" % instance.name)
11481
      _RemoveInstance(self, feedback_fn, instance,
11482
                      self.op.ignore_remove_failures)
11483

    
11484
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11485
      self._CleanupExports(feedback_fn)
11486

    
11487
    return fin_resu, dresults
11488

    
11489

    
11490
class LUBackupRemove(NoHooksLU):
11491
  """Remove exports related to the named instance.
11492

11493
  """
11494
  REQ_BGL = False
11495

    
11496
  def ExpandNames(self):
11497
    self.needed_locks = {}
11498
    # We need all nodes to be locked in order for RemoveExport to work, but we
11499
    # don't need to lock the instance itself, as nothing will happen to it (and
11500
    # we can remove exports also for a removed instance)
11501
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11502

    
11503
  def Exec(self, feedback_fn):
11504
    """Remove any export.
11505

11506
    """
11507
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11508
    # If the instance was not found we'll try with the name that was passed in.
11509
    # This will only work if it was an FQDN, though.
11510
    fqdn_warn = False
11511
    if not instance_name:
11512
      fqdn_warn = True
11513
      instance_name = self.op.instance_name
11514

    
11515
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11516
    exportlist = self.rpc.call_export_list(locked_nodes)
11517
    found = False
11518
    for node in exportlist:
11519
      msg = exportlist[node].fail_msg
11520
      if msg:
11521
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11522
        continue
11523
      if instance_name in exportlist[node].payload:
11524
        found = True
11525
        result = self.rpc.call_export_remove(node, instance_name)
11526
        msg = result.fail_msg
11527
        if msg:
11528
          logging.error("Could not remove export for instance %s"
11529
                        " on node %s: %s", instance_name, node, msg)
11530

    
11531
    if fqdn_warn and not found:
11532
      feedback_fn("Export not found. If trying to remove an export belonging"
11533
                  " to a deleted instance please use its Fully Qualified"
11534
                  " Domain Name.")
11535

    
11536

    
11537
class LUGroupAdd(LogicalUnit):
11538
  """Logical unit for creating node groups.
11539

11540
  """
11541
  HPATH = "group-add"
11542
  HTYPE = constants.HTYPE_GROUP
11543
  REQ_BGL = False
11544

    
11545
  def ExpandNames(self):
11546
    # We need the new group's UUID here so that we can create and acquire the
11547
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11548
    # that it should not check whether the UUID exists in the configuration.
11549
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11550
    self.needed_locks = {}
11551
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11552

    
11553
  def CheckPrereq(self):
11554
    """Check prerequisites.
11555

11556
    This checks that the given group name is not an existing node group
11557
    already.
11558

11559
    """
11560
    try:
11561
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11562
    except errors.OpPrereqError:
11563
      pass
11564
    else:
11565
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11566
                                 " node group (UUID: %s)" %
11567
                                 (self.op.group_name, existing_uuid),
11568
                                 errors.ECODE_EXISTS)
11569

    
11570
    if self.op.ndparams:
11571
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11572

    
11573
  def BuildHooksEnv(self):
11574
    """Build hooks env.
11575

11576
    """
11577
    return {
11578
      "GROUP_NAME": self.op.group_name,
11579
      }
11580

    
11581
  def BuildHooksNodes(self):
11582
    """Build hooks nodes.
11583

11584
    """
11585
    mn = self.cfg.GetMasterNode()
11586
    return ([mn], [mn])
11587

    
11588
  def Exec(self, feedback_fn):
11589
    """Add the node group to the cluster.
11590

11591
    """
11592
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11593
                                  uuid=self.group_uuid,
11594
                                  alloc_policy=self.op.alloc_policy,
11595
                                  ndparams=self.op.ndparams)
11596

    
11597
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11598
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11599

    
11600

    
11601
class LUGroupAssignNodes(NoHooksLU):
11602
  """Logical unit for assigning nodes to groups.
11603

11604
  """
11605
  REQ_BGL = False
11606

    
11607
  def ExpandNames(self):
11608
    # These raise errors.OpPrereqError on their own:
11609
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11610
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11611

    
11612
    # We want to lock all the affected nodes and groups. We have readily
11613
    # available the list of nodes, and the *destination* group. To gather the
11614
    # list of "source" groups, we need to fetch node information later on.
11615
    self.needed_locks = {
11616
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11617
      locking.LEVEL_NODE: self.op.nodes,
11618
      }
11619

    
11620
  def DeclareLocks(self, level):
11621
    if level == locking.LEVEL_NODEGROUP:
11622
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11623

    
11624
      # Try to get all affected nodes' groups without having the group or node
11625
      # lock yet. Needs verification later in the code flow.
11626
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11627

    
11628
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11629

    
11630
  def CheckPrereq(self):
11631
    """Check prerequisites.
11632

11633
    """
11634
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11635
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11636
            frozenset(self.op.nodes))
11637

    
11638
    expected_locks = (set([self.group_uuid]) |
11639
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11640
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11641
    if actual_locks != expected_locks:
11642
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11643
                               " current groups are '%s', used to be '%s'" %
11644
                               (utils.CommaJoin(expected_locks),
11645
                                utils.CommaJoin(actual_locks)))
11646

    
11647
    self.node_data = self.cfg.GetAllNodesInfo()
11648
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11649
    instance_data = self.cfg.GetAllInstancesInfo()
11650

    
11651
    if self.group is None:
11652
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11653
                               (self.op.group_name, self.group_uuid))
11654

    
11655
    (new_splits, previous_splits) = \
11656
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11657
                                             for node in self.op.nodes],
11658
                                            self.node_data, instance_data)
11659

    
11660
    if new_splits:
11661
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11662

    
11663
      if not self.op.force:
11664
        raise errors.OpExecError("The following instances get split by this"
11665
                                 " change and --force was not given: %s" %
11666
                                 fmt_new_splits)
11667
      else:
11668
        self.LogWarning("This operation will split the following instances: %s",
11669
                        fmt_new_splits)
11670

    
11671
        if previous_splits:
11672
          self.LogWarning("In addition, these already-split instances continue"
11673
                          " to be split across groups: %s",
11674
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11675

    
11676
  def Exec(self, feedback_fn):
11677
    """Assign nodes to a new group.
11678

11679
    """
11680
    for node in self.op.nodes:
11681
      self.node_data[node].group = self.group_uuid
11682

    
11683
    # FIXME: Depends on side-effects of modifying the result of
11684
    # C{cfg.GetAllNodesInfo}
11685

    
11686
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11687

    
11688
  @staticmethod
11689
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11690
    """Check for split instances after a node assignment.
11691

11692
    This method considers a series of node assignments as an atomic operation,
11693
    and returns information about split instances after applying the set of
11694
    changes.
11695

11696
    In particular, it returns information about newly split instances, and
11697
    instances that were already split, and remain so after the change.
11698

11699
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11700
    considered.
11701

11702
    @type changes: list of (node_name, new_group_uuid) pairs.
11703
    @param changes: list of node assignments to consider.
11704
    @param node_data: a dict with data for all nodes
11705
    @param instance_data: a dict with all instances to consider
11706
    @rtype: a two-tuple
11707
    @return: a list of instances that were previously okay and result split as a
11708
      consequence of this change, and a list of instances that were previously
11709
      split and this change does not fix.
11710

11711
    """
11712
    changed_nodes = dict((node, group) for node, group in changes
11713
                         if node_data[node].group != group)
11714

    
11715
    all_split_instances = set()
11716
    previously_split_instances = set()
11717

    
11718
    def InstanceNodes(instance):
11719
      return [instance.primary_node] + list(instance.secondary_nodes)
11720

    
11721
    for inst in instance_data.values():
11722
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11723
        continue
11724

    
11725
      instance_nodes = InstanceNodes(inst)
11726

    
11727
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11728
        previously_split_instances.add(inst.name)
11729

    
11730
      if len(set(changed_nodes.get(node, node_data[node].group)
11731
                 for node in instance_nodes)) > 1:
11732
        all_split_instances.add(inst.name)
11733

    
11734
    return (list(all_split_instances - previously_split_instances),
11735
            list(previously_split_instances & all_split_instances))
11736

    
11737

    
11738
class _GroupQuery(_QueryBase):
11739
  FIELDS = query.GROUP_FIELDS
11740

    
11741
  def ExpandNames(self, lu):
11742
    lu.needed_locks = {}
11743

    
11744
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11745
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11746

    
11747
    if not self.names:
11748
      self.wanted = [name_to_uuid[name]
11749
                     for name in utils.NiceSort(name_to_uuid.keys())]
11750
    else:
11751
      # Accept names to be either names or UUIDs.
11752
      missing = []
11753
      self.wanted = []
11754
      all_uuid = frozenset(self._all_groups.keys())
11755

    
11756
      for name in self.names:
11757
        if name in all_uuid:
11758
          self.wanted.append(name)
11759
        elif name in name_to_uuid:
11760
          self.wanted.append(name_to_uuid[name])
11761
        else:
11762
          missing.append(name)
11763

    
11764
      if missing:
11765
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11766
                                   utils.CommaJoin(missing),
11767
                                   errors.ECODE_NOENT)
11768

    
11769
  def DeclareLocks(self, lu, level):
11770
    pass
11771

    
11772
  def _GetQueryData(self, lu):
11773
    """Computes the list of node groups and their attributes.
11774

11775
    """
11776
    do_nodes = query.GQ_NODE in self.requested_data
11777
    do_instances = query.GQ_INST in self.requested_data
11778

    
11779
    group_to_nodes = None
11780
    group_to_instances = None
11781

    
11782
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11783
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11784
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11785
    # instance->node. Hence, we will need to process nodes even if we only need
11786
    # instance information.
11787
    if do_nodes or do_instances:
11788
      all_nodes = lu.cfg.GetAllNodesInfo()
11789
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11790
      node_to_group = {}
11791

    
11792
      for node in all_nodes.values():
11793
        if node.group in group_to_nodes:
11794
          group_to_nodes[node.group].append(node.name)
11795
          node_to_group[node.name] = node.group
11796

    
11797
      if do_instances:
11798
        all_instances = lu.cfg.GetAllInstancesInfo()
11799
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11800

    
11801
        for instance in all_instances.values():
11802
          node = instance.primary_node
11803
          if node in node_to_group:
11804
            group_to_instances[node_to_group[node]].append(instance.name)
11805

    
11806
        if not do_nodes:
11807
          # Do not pass on node information if it was not requested.
11808
          group_to_nodes = None
11809

    
11810
    return query.GroupQueryData([self._all_groups[uuid]
11811
                                 for uuid in self.wanted],
11812
                                group_to_nodes, group_to_instances)
11813

    
11814

    
11815
class LUGroupQuery(NoHooksLU):
11816
  """Logical unit for querying node groups.
11817

11818
  """
11819
  REQ_BGL = False
11820

    
11821
  def CheckArguments(self):
11822
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11823
                          self.op.output_fields, False)
11824

    
11825
  def ExpandNames(self):
11826
    self.gq.ExpandNames(self)
11827

    
11828
  def Exec(self, feedback_fn):
11829
    return self.gq.OldStyleQuery(self)
11830

    
11831

    
11832
class LUGroupSetParams(LogicalUnit):
11833
  """Modifies the parameters of a node group.
11834

11835
  """
11836
  HPATH = "group-modify"
11837
  HTYPE = constants.HTYPE_GROUP
11838
  REQ_BGL = False
11839

    
11840
  def CheckArguments(self):
11841
    all_changes = [
11842
      self.op.ndparams,
11843
      self.op.alloc_policy,
11844
      ]
11845

    
11846
    if all_changes.count(None) == len(all_changes):
11847
      raise errors.OpPrereqError("Please pass at least one modification",
11848
                                 errors.ECODE_INVAL)
11849

    
11850
  def ExpandNames(self):
11851
    # This raises errors.OpPrereqError on its own:
11852
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11853

    
11854
    self.needed_locks = {
11855
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11856
      }
11857

    
11858
  def CheckPrereq(self):
11859
    """Check prerequisites.
11860

11861
    """
11862
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11863

    
11864
    if self.group is None:
11865
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11866
                               (self.op.group_name, self.group_uuid))
11867

    
11868
    if self.op.ndparams:
11869
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11870
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11871
      self.new_ndparams = new_ndparams
11872

    
11873
  def BuildHooksEnv(self):
11874
    """Build hooks env.
11875

11876
    """
11877
    return {
11878
      "GROUP_NAME": self.op.group_name,
11879
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11880
      }
11881

    
11882
  def BuildHooksNodes(self):
11883
    """Build hooks nodes.
11884

11885
    """
11886
    mn = self.cfg.GetMasterNode()
11887
    return ([mn], [mn])
11888

    
11889
  def Exec(self, feedback_fn):
11890
    """Modifies the node group.
11891

11892
    """
11893
    result = []
11894

    
11895
    if self.op.ndparams:
11896
      self.group.ndparams = self.new_ndparams
11897
      result.append(("ndparams", str(self.group.ndparams)))
11898

    
11899
    if self.op.alloc_policy:
11900
      self.group.alloc_policy = self.op.alloc_policy
11901

    
11902
    self.cfg.Update(self.group, feedback_fn)
11903
    return result
11904

    
11905

    
11906

    
11907
class LUGroupRemove(LogicalUnit):
11908
  HPATH = "group-remove"
11909
  HTYPE = constants.HTYPE_GROUP
11910
  REQ_BGL = False
11911

    
11912
  def ExpandNames(self):
11913
    # This will raises errors.OpPrereqError on its own:
11914
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11915
    self.needed_locks = {
11916
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11917
      }
11918

    
11919
  def CheckPrereq(self):
11920
    """Check prerequisites.
11921

11922
    This checks that the given group name exists as a node group, that is
11923
    empty (i.e., contains no nodes), and that is not the last group of the
11924
    cluster.
11925

11926
    """
11927
    # Verify that the group is empty.
11928
    group_nodes = [node.name
11929
                   for node in self.cfg.GetAllNodesInfo().values()
11930
                   if node.group == self.group_uuid]
11931

    
11932
    if group_nodes:
11933
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11934
                                 " nodes: %s" %
11935
                                 (self.op.group_name,
11936
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11937
                                 errors.ECODE_STATE)
11938

    
11939
    # Verify the cluster would not be left group-less.
11940
    if len(self.cfg.GetNodeGroupList()) == 1:
11941
      raise errors.OpPrereqError("Group '%s' is the only group,"
11942
                                 " cannot be removed" %
11943
                                 self.op.group_name,
11944
                                 errors.ECODE_STATE)
11945

    
11946
  def BuildHooksEnv(self):
11947
    """Build hooks env.
11948

11949
    """
11950
    return {
11951
      "GROUP_NAME": self.op.group_name,
11952
      }
11953

    
11954
  def BuildHooksNodes(self):
11955
    """Build hooks nodes.
11956

11957
    """
11958
    mn = self.cfg.GetMasterNode()
11959
    return ([mn], [mn])
11960

    
11961
  def Exec(self, feedback_fn):
11962
    """Remove the node group.
11963

11964
    """
11965
    try:
11966
      self.cfg.RemoveNodeGroup(self.group_uuid)
11967
    except errors.ConfigurationError:
11968
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11969
                               (self.op.group_name, self.group_uuid))
11970

    
11971
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11972

    
11973

    
11974
class LUGroupRename(LogicalUnit):
11975
  HPATH = "group-rename"
11976
  HTYPE = constants.HTYPE_GROUP
11977
  REQ_BGL = False
11978

    
11979
  def ExpandNames(self):
11980
    # This raises errors.OpPrereqError on its own:
11981
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11982

    
11983
    self.needed_locks = {
11984
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11985
      }
11986

    
11987
  def CheckPrereq(self):
11988
    """Check prerequisites.
11989

11990
    Ensures requested new name is not yet used.
11991

11992
    """
11993
    try:
11994
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11995
    except errors.OpPrereqError:
11996
      pass
11997
    else:
11998
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11999
                                 " node group (UUID: %s)" %
12000
                                 (self.op.new_name, new_name_uuid),
12001
                                 errors.ECODE_EXISTS)
12002

    
12003
  def BuildHooksEnv(self):
12004
    """Build hooks env.
12005

12006
    """
12007
    return {
12008
      "OLD_NAME": self.op.group_name,
12009
      "NEW_NAME": self.op.new_name,
12010
      }
12011

    
12012
  def BuildHooksNodes(self):
12013
    """Build hooks nodes.
12014

12015
    """
12016
    mn = self.cfg.GetMasterNode()
12017

    
12018
    all_nodes = self.cfg.GetAllNodesInfo()
12019
    all_nodes.pop(mn, None)
12020

    
12021
    run_nodes = [mn]
12022
    run_nodes.extend(node.name for node in all_nodes.values()
12023
                     if node.group == self.group_uuid)
12024

    
12025
    return (run_nodes, run_nodes)
12026

    
12027
  def Exec(self, feedback_fn):
12028
    """Rename the node group.
12029

12030
    """
12031
    group = self.cfg.GetNodeGroup(self.group_uuid)
12032

    
12033
    if group is None:
12034
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12035
                               (self.op.group_name, self.group_uuid))
12036

    
12037
    group.name = self.op.new_name
12038
    self.cfg.Update(group, feedback_fn)
12039

    
12040
    return self.op.new_name
12041

    
12042

    
12043
class LUGroupEvacuate(LogicalUnit):
12044
  HPATH = "group-evacuate"
12045
  HTYPE = constants.HTYPE_GROUP
12046
  REQ_BGL = False
12047

    
12048
  def ExpandNames(self):
12049
    # This raises errors.OpPrereqError on its own:
12050
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12051

    
12052
    if self.op.target_groups:
12053
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12054
                                  self.op.target_groups)
12055
    else:
12056
      self.req_target_uuids = []
12057

    
12058
    if self.group_uuid in self.req_target_uuids:
12059
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12060
                                 " as a target group (targets are %s)" %
12061
                                 (self.group_uuid,
12062
                                  utils.CommaJoin(self.req_target_uuids)),
12063
                                 errors.ECODE_INVAL)
12064

    
12065
    if not self.op.iallocator:
12066
      # Use default iallocator
12067
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
12068

    
12069
    if not self.op.iallocator:
12070
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
12071
                                 " opcode nor as a cluster-wide default",
12072
                                 errors.ECODE_INVAL)
12073

    
12074
    self.share_locks = _ShareAll()
12075
    self.needed_locks = {
12076
      locking.LEVEL_INSTANCE: [],
12077
      locking.LEVEL_NODEGROUP: [],
12078
      locking.LEVEL_NODE: [],
12079
      }
12080

    
12081
  def DeclareLocks(self, level):
12082
    if level == locking.LEVEL_INSTANCE:
12083
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12084

    
12085
      # Lock instances optimistically, needs verification once node and group
12086
      # locks have been acquired
12087
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12088
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12089

    
12090
    elif level == locking.LEVEL_NODEGROUP:
12091
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12092

    
12093
      if self.req_target_uuids:
12094
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12095

    
12096
        # Lock all groups used by instances optimistically; this requires going
12097
        # via the node before it's locked, requiring verification later on
12098
        lock_groups.update(group_uuid
12099
                           for instance_name in
12100
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12101
                           for group_uuid in
12102
                             self.cfg.GetInstanceNodeGroups(instance_name))
12103
      else:
12104
        # No target groups, need to lock all of them
12105
        lock_groups = locking.ALL_SET
12106

    
12107
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12108

    
12109
    elif level == locking.LEVEL_NODE:
12110
      # This will only lock the nodes in the group to be evacuated which
12111
      # contain actual instances
12112
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12113
      self._LockInstancesNodes()
12114

    
12115
      # Lock all nodes in group to be evacuated
12116
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12117
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12118
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12119

    
12120
  def CheckPrereq(self):
12121
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12122
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12123
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12124

    
12125
    assert owned_groups.issuperset(self.req_target_uuids)
12126
    assert self.group_uuid in owned_groups
12127

    
12128
    # Check if locked instances are still correct
12129
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12130
    if owned_instances != wanted_instances:
12131
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12132
                                 " changed since locks were acquired, wanted"
12133
                                 " %s, have %s; retry the operation" %
12134
                                 (self.group_uuid,
12135
                                  utils.CommaJoin(wanted_instances),
12136
                                  utils.CommaJoin(owned_instances)),
12137
                                 errors.ECODE_STATE)
12138

    
12139
    # Get instance information
12140
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
12141
                          for name in owned_instances)
12142

    
12143
    # Check if node groups for locked instances are still correct
12144
    for instance_name in owned_instances:
12145
      inst = self.instances[instance_name]
12146
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12147
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12148
      assert owned_nodes.issuperset(inst.all_nodes), \
12149
        "Instance %s's nodes changed while we kept the lock" % instance_name
12150

    
12151
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12152
      if not owned_groups.issuperset(inst_groups):
12153
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12154
                                   " locks were acquired, current groups"
12155
                                   " are '%s', owning groups '%s'; retry the"
12156
                                   " operation" %
12157
                                   (instance_name,
12158
                                    utils.CommaJoin(inst_groups),
12159
                                    utils.CommaJoin(owned_groups)),
12160
                                   errors.ECODE_STATE)
12161

    
12162
    if self.req_target_uuids:
12163
      # User requested specific target groups
12164
      self.target_uuids = self.req_target_uuids
12165
    else:
12166
      # All groups except the one to be evacuated are potential targets
12167
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12168
                           if group_uuid != self.group_uuid]
12169

    
12170
      if not self.target_uuids:
12171
        raise errors.OpExecError("There are no possible target groups")
12172

    
12173
  def BuildHooksEnv(self):
12174
    """Build hooks env.
12175

12176
    """
12177
    return {
12178
      "GROUP_NAME": self.op.group_name,
12179
      "TARGET_GROUPS": " ".join(self.target_uuids),
12180
      }
12181

    
12182
  def BuildHooksNodes(self):
12183
    """Build hooks nodes.
12184

12185
    """
12186
    mn = self.cfg.GetMasterNode()
12187

    
12188
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12189

    
12190
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12191

    
12192
    return (run_nodes, run_nodes)
12193

    
12194
  def Exec(self, feedback_fn):
12195
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12196

    
12197
    assert self.group_uuid not in self.target_uuids
12198

    
12199
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12200
                     instances=instances, target_groups=self.target_uuids)
12201

    
12202
    ial.Run(self.op.iallocator)
12203

    
12204
    if not ial.success:
12205
      raise errors.OpPrereqError("Can't compute group evacuation using"
12206
                                 " iallocator '%s': %s" %
12207
                                 (self.op.iallocator, ial.info),
12208
                                 errors.ECODE_NORES)
12209

    
12210
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12211

    
12212
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12213
                 len(jobs), self.op.group_name)
12214

    
12215
    return ResultWithJobs(jobs)
12216

    
12217

    
12218
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12219
  """Generic tags LU.
12220

12221
  This is an abstract class which is the parent of all the other tags LUs.
12222

12223
  """
12224
  def ExpandNames(self):
12225
    self.group_uuid = None
12226
    self.needed_locks = {}
12227
    if self.op.kind == constants.TAG_NODE:
12228
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12229
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12230
    elif self.op.kind == constants.TAG_INSTANCE:
12231
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12232
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12233
    elif self.op.kind == constants.TAG_NODEGROUP:
12234
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12235

    
12236
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12237
    # not possible to acquire the BGL based on opcode parameters)
12238

    
12239
  def CheckPrereq(self):
12240
    """Check prerequisites.
12241

12242
    """
12243
    if self.op.kind == constants.TAG_CLUSTER:
12244
      self.target = self.cfg.GetClusterInfo()
12245
    elif self.op.kind == constants.TAG_NODE:
12246
      self.target = self.cfg.GetNodeInfo(self.op.name)
12247
    elif self.op.kind == constants.TAG_INSTANCE:
12248
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12249
    elif self.op.kind == constants.TAG_NODEGROUP:
12250
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12251
    else:
12252
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12253
                                 str(self.op.kind), errors.ECODE_INVAL)
12254

    
12255

    
12256
class LUTagsGet(TagsLU):
12257
  """Returns the tags of a given object.
12258

12259
  """
12260
  REQ_BGL = False
12261

    
12262
  def ExpandNames(self):
12263
    TagsLU.ExpandNames(self)
12264

    
12265
    # Share locks as this is only a read operation
12266
    self.share_locks = _ShareAll()
12267

    
12268
  def Exec(self, feedback_fn):
12269
    """Returns the tag list.
12270

12271
    """
12272
    return list(self.target.GetTags())
12273

    
12274

    
12275
class LUTagsSearch(NoHooksLU):
12276
  """Searches the tags for a given pattern.
12277

12278
  """
12279
  REQ_BGL = False
12280

    
12281
  def ExpandNames(self):
12282
    self.needed_locks = {}
12283

    
12284
  def CheckPrereq(self):
12285
    """Check prerequisites.
12286

12287
    This checks the pattern passed for validity by compiling it.
12288

12289
    """
12290
    try:
12291
      self.re = re.compile(self.op.pattern)
12292
    except re.error, err:
12293
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12294
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12295

    
12296
  def Exec(self, feedback_fn):
12297
    """Returns the tag list.
12298

12299
    """
12300
    cfg = self.cfg
12301
    tgts = [("/cluster", cfg.GetClusterInfo())]
12302
    ilist = cfg.GetAllInstancesInfo().values()
12303
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12304
    nlist = cfg.GetAllNodesInfo().values()
12305
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12306
    tgts.extend(("/nodegroup/%s" % n.name, n)
12307
                for n in cfg.GetAllNodeGroupsInfo().values())
12308
    results = []
12309
    for path, target in tgts:
12310
      for tag in target.GetTags():
12311
        if self.re.search(tag):
12312
          results.append((path, tag))
12313
    return results
12314

    
12315

    
12316
class LUTagsSet(TagsLU):
12317
  """Sets a tag on a given object.
12318

12319
  """
12320
  REQ_BGL = False
12321

    
12322
  def CheckPrereq(self):
12323
    """Check prerequisites.
12324

12325
    This checks the type and length of the tag name and value.
12326

12327
    """
12328
    TagsLU.CheckPrereq(self)
12329
    for tag in self.op.tags:
12330
      objects.TaggableObject.ValidateTag(tag)
12331

    
12332
  def Exec(self, feedback_fn):
12333
    """Sets the tag.
12334

12335
    """
12336
    try:
12337
      for tag in self.op.tags:
12338
        self.target.AddTag(tag)
12339
    except errors.TagError, err:
12340
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12341
    self.cfg.Update(self.target, feedback_fn)
12342

    
12343

    
12344
class LUTagsDel(TagsLU):
12345
  """Delete a list of tags from a given object.
12346

12347
  """
12348
  REQ_BGL = False
12349

    
12350
  def CheckPrereq(self):
12351
    """Check prerequisites.
12352

12353
    This checks that we have the given tag.
12354

12355
    """
12356
    TagsLU.CheckPrereq(self)
12357
    for tag in self.op.tags:
12358
      objects.TaggableObject.ValidateTag(tag)
12359
    del_tags = frozenset(self.op.tags)
12360
    cur_tags = self.target.GetTags()
12361

    
12362
    diff_tags = del_tags - cur_tags
12363
    if diff_tags:
12364
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12365
      raise errors.OpPrereqError("Tag(s) %s not found" %
12366
                                 (utils.CommaJoin(diff_names), ),
12367
                                 errors.ECODE_NOENT)
12368

    
12369
  def Exec(self, feedback_fn):
12370
    """Remove the tag from the object.
12371

12372
    """
12373
    for tag in self.op.tags:
12374
      self.target.RemoveTag(tag)
12375
    self.cfg.Update(self.target, feedback_fn)
12376

    
12377

    
12378
class LUTestDelay(NoHooksLU):
12379
  """Sleep for a specified amount of time.
12380

12381
  This LU sleeps on the master and/or nodes for a specified amount of
12382
  time.
12383

12384
  """
12385
  REQ_BGL = False
12386

    
12387
  def ExpandNames(self):
12388
    """Expand names and set required locks.
12389

12390
    This expands the node list, if any.
12391

12392
    """
12393
    self.needed_locks = {}
12394
    if self.op.on_nodes:
12395
      # _GetWantedNodes can be used here, but is not always appropriate to use
12396
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12397
      # more information.
12398
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12399
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12400

    
12401
  def _TestDelay(self):
12402
    """Do the actual sleep.
12403

12404
    """
12405
    if self.op.on_master:
12406
      if not utils.TestDelay(self.op.duration):
12407
        raise errors.OpExecError("Error during master delay test")
12408
    if self.op.on_nodes:
12409
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12410
      for node, node_result in result.items():
12411
        node_result.Raise("Failure during rpc call to node %s" % node)
12412

    
12413
  def Exec(self, feedback_fn):
12414
    """Execute the test delay opcode, with the wanted repetitions.
12415

12416
    """
12417
    if self.op.repeat == 0:
12418
      self._TestDelay()
12419
    else:
12420
      top_value = self.op.repeat - 1
12421
      for i in range(self.op.repeat):
12422
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12423
        self._TestDelay()
12424

    
12425

    
12426
class LUTestJqueue(NoHooksLU):
12427
  """Utility LU to test some aspects of the job queue.
12428

12429
  """
12430
  REQ_BGL = False
12431

    
12432
  # Must be lower than default timeout for WaitForJobChange to see whether it
12433
  # notices changed jobs
12434
  _CLIENT_CONNECT_TIMEOUT = 20.0
12435
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12436

    
12437
  @classmethod
12438
  def _NotifyUsingSocket(cls, cb, errcls):
12439
    """Opens a Unix socket and waits for another program to connect.
12440

12441
    @type cb: callable
12442
    @param cb: Callback to send socket name to client
12443
    @type errcls: class
12444
    @param errcls: Exception class to use for errors
12445

12446
    """
12447
    # Using a temporary directory as there's no easy way to create temporary
12448
    # sockets without writing a custom loop around tempfile.mktemp and
12449
    # socket.bind
12450
    tmpdir = tempfile.mkdtemp()
12451
    try:
12452
      tmpsock = utils.PathJoin(tmpdir, "sock")
12453

    
12454
      logging.debug("Creating temporary socket at %s", tmpsock)
12455
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12456
      try:
12457
        sock.bind(tmpsock)
12458
        sock.listen(1)
12459

    
12460
        # Send details to client
12461
        cb(tmpsock)
12462

    
12463
        # Wait for client to connect before continuing
12464
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12465
        try:
12466
          (conn, _) = sock.accept()
12467
        except socket.error, err:
12468
          raise errcls("Client didn't connect in time (%s)" % err)
12469
      finally:
12470
        sock.close()
12471
    finally:
12472
      # Remove as soon as client is connected
12473
      shutil.rmtree(tmpdir)
12474

    
12475
    # Wait for client to close
12476
    try:
12477
      try:
12478
        # pylint: disable-msg=E1101
12479
        # Instance of '_socketobject' has no ... member
12480
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12481
        conn.recv(1)
12482
      except socket.error, err:
12483
        raise errcls("Client failed to confirm notification (%s)" % err)
12484
    finally:
12485
      conn.close()
12486

    
12487
  def _SendNotification(self, test, arg, sockname):
12488
    """Sends a notification to the client.
12489

12490
    @type test: string
12491
    @param test: Test name
12492
    @param arg: Test argument (depends on test)
12493
    @type sockname: string
12494
    @param sockname: Socket path
12495

12496
    """
12497
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12498

    
12499
  def _Notify(self, prereq, test, arg):
12500
    """Notifies the client of a test.
12501

12502
    @type prereq: bool
12503
    @param prereq: Whether this is a prereq-phase test
12504
    @type test: string
12505
    @param test: Test name
12506
    @param arg: Test argument (depends on test)
12507

12508
    """
12509
    if prereq:
12510
      errcls = errors.OpPrereqError
12511
    else:
12512
      errcls = errors.OpExecError
12513

    
12514
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12515
                                                  test, arg),
12516
                                   errcls)
12517

    
12518
  def CheckArguments(self):
12519
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12520
    self.expandnames_calls = 0
12521

    
12522
  def ExpandNames(self):
12523
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12524
    if checkargs_calls < 1:
12525
      raise errors.ProgrammerError("CheckArguments was not called")
12526

    
12527
    self.expandnames_calls += 1
12528

    
12529
    if self.op.notify_waitlock:
12530
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12531

    
12532
    self.LogInfo("Expanding names")
12533

    
12534
    # Get lock on master node (just to get a lock, not for a particular reason)
12535
    self.needed_locks = {
12536
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12537
      }
12538

    
12539
  def Exec(self, feedback_fn):
12540
    if self.expandnames_calls < 1:
12541
      raise errors.ProgrammerError("ExpandNames was not called")
12542

    
12543
    if self.op.notify_exec:
12544
      self._Notify(False, constants.JQT_EXEC, None)
12545

    
12546
    self.LogInfo("Executing")
12547

    
12548
    if self.op.log_messages:
12549
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12550
      for idx, msg in enumerate(self.op.log_messages):
12551
        self.LogInfo("Sending log message %s", idx + 1)
12552
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12553
        # Report how many test messages have been sent
12554
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12555

    
12556
    if self.op.fail:
12557
      raise errors.OpExecError("Opcode failure was requested")
12558

    
12559
    return True
12560

    
12561

    
12562
class IAllocator(object):
12563
  """IAllocator framework.
12564

12565
  An IAllocator instance has three sets of attributes:
12566
    - cfg that is needed to query the cluster
12567
    - input data (all members of the _KEYS class attribute are required)
12568
    - four buffer attributes (in|out_data|text), that represent the
12569
      input (to the external script) in text and data structure format,
12570
      and the output from it, again in two formats
12571
    - the result variables from the script (success, info, nodes) for
12572
      easy usage
12573

12574
  """
12575
  # pylint: disable-msg=R0902
12576
  # lots of instance attributes
12577

    
12578
  def __init__(self, cfg, rpc, mode, **kwargs):
12579
    self.cfg = cfg
12580
    self.rpc = rpc
12581
    # init buffer variables
12582
    self.in_text = self.out_text = self.in_data = self.out_data = None
12583
    # init all input fields so that pylint is happy
12584
    self.mode = mode
12585
    self.memory = self.disks = self.disk_template = None
12586
    self.os = self.tags = self.nics = self.vcpus = None
12587
    self.hypervisor = None
12588
    self.relocate_from = None
12589
    self.name = None
12590
    self.evac_nodes = None
12591
    self.instances = None
12592
    self.evac_mode = None
12593
    self.target_groups = []
12594
    # computed fields
12595
    self.required_nodes = None
12596
    # init result fields
12597
    self.success = self.info = self.result = None
12598

    
12599
    try:
12600
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12601
    except KeyError:
12602
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12603
                                   " IAllocator" % self.mode)
12604

    
12605
    keyset = [n for (n, _) in keydata]
12606

    
12607
    for key in kwargs:
12608
      if key not in keyset:
12609
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12610
                                     " IAllocator" % key)
12611
      setattr(self, key, kwargs[key])
12612

    
12613
    for key in keyset:
12614
      if key not in kwargs:
12615
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12616
                                     " IAllocator" % key)
12617
    self._BuildInputData(compat.partial(fn, self), keydata)
12618

    
12619
  def _ComputeClusterData(self):
12620
    """Compute the generic allocator input data.
12621

12622
    This is the data that is independent of the actual operation.
12623

12624
    """
12625
    cfg = self.cfg
12626
    cluster_info = cfg.GetClusterInfo()
12627
    # cluster data
12628
    data = {
12629
      "version": constants.IALLOCATOR_VERSION,
12630
      "cluster_name": cfg.GetClusterName(),
12631
      "cluster_tags": list(cluster_info.GetTags()),
12632
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12633
      # we don't have job IDs
12634
      }
12635
    ninfo = cfg.GetAllNodesInfo()
12636
    iinfo = cfg.GetAllInstancesInfo().values()
12637
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12638

    
12639
    # node data
12640
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12641

    
12642
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12643
      hypervisor_name = self.hypervisor
12644
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12645
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12646
    else:
12647
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12648

    
12649
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12650
                                        hypervisor_name)
12651
    node_iinfo = \
12652
      self.rpc.call_all_instances_info(node_list,
12653
                                       cluster_info.enabled_hypervisors)
12654

    
12655
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12656

    
12657
    config_ndata = self._ComputeBasicNodeData(ninfo)
12658
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12659
                                                 i_list, config_ndata)
12660
    assert len(data["nodes"]) == len(ninfo), \
12661
        "Incomplete node data computed"
12662

    
12663
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12664

    
12665
    self.in_data = data
12666

    
12667
  @staticmethod
12668
  def _ComputeNodeGroupData(cfg):
12669
    """Compute node groups data.
12670

12671
    """
12672
    ng = dict((guuid, {
12673
      "name": gdata.name,
12674
      "alloc_policy": gdata.alloc_policy,
12675
      })
12676
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12677

    
12678
    return ng
12679

    
12680
  @staticmethod
12681
  def _ComputeBasicNodeData(node_cfg):
12682
    """Compute global node data.
12683

12684
    @rtype: dict
12685
    @returns: a dict of name: (node dict, node config)
12686

12687
    """
12688
    # fill in static (config-based) values
12689
    node_results = dict((ninfo.name, {
12690
      "tags": list(ninfo.GetTags()),
12691
      "primary_ip": ninfo.primary_ip,
12692
      "secondary_ip": ninfo.secondary_ip,
12693
      "offline": ninfo.offline,
12694
      "drained": ninfo.drained,
12695
      "master_candidate": ninfo.master_candidate,
12696
      "group": ninfo.group,
12697
      "master_capable": ninfo.master_capable,
12698
      "vm_capable": ninfo.vm_capable,
12699
      })
12700
      for ninfo in node_cfg.values())
12701

    
12702
    return node_results
12703

    
12704
  @staticmethod
12705
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12706
                              node_results):
12707
    """Compute global node data.
12708

12709
    @param node_results: the basic node structures as filled from the config
12710

12711
    """
12712
    # make a copy of the current dict
12713
    node_results = dict(node_results)
12714
    for nname, nresult in node_data.items():
12715
      assert nname in node_results, "Missing basic data for node %s" % nname
12716
      ninfo = node_cfg[nname]
12717

    
12718
      if not (ninfo.offline or ninfo.drained):
12719
        nresult.Raise("Can't get data for node %s" % nname)
12720
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12721
                                nname)
12722
        remote_info = nresult.payload
12723

    
12724
        for attr in ["memory_total", "memory_free", "memory_dom0",
12725
                     "vg_size", "vg_free", "cpu_total"]:
12726
          if attr not in remote_info:
12727
            raise errors.OpExecError("Node '%s' didn't return attribute"
12728
                                     " '%s'" % (nname, attr))
12729
          if not isinstance(remote_info[attr], int):
12730
            raise errors.OpExecError("Node '%s' returned invalid value"
12731
                                     " for '%s': %s" %
12732
                                     (nname, attr, remote_info[attr]))
12733
        # compute memory used by primary instances
12734
        i_p_mem = i_p_up_mem = 0
12735
        for iinfo, beinfo in i_list:
12736
          if iinfo.primary_node == nname:
12737
            i_p_mem += beinfo[constants.BE_MEMORY]
12738
            if iinfo.name not in node_iinfo[nname].payload:
12739
              i_used_mem = 0
12740
            else:
12741
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12742
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12743
            remote_info["memory_free"] -= max(0, i_mem_diff)
12744

    
12745
            if iinfo.admin_up:
12746
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12747

    
12748
        # compute memory used by instances
12749
        pnr_dyn = {
12750
          "total_memory": remote_info["memory_total"],
12751
          "reserved_memory": remote_info["memory_dom0"],
12752
          "free_memory": remote_info["memory_free"],
12753
          "total_disk": remote_info["vg_size"],
12754
          "free_disk": remote_info["vg_free"],
12755
          "total_cpus": remote_info["cpu_total"],
12756
          "i_pri_memory": i_p_mem,
12757
          "i_pri_up_memory": i_p_up_mem,
12758
          }
12759
        pnr_dyn.update(node_results[nname])
12760
        node_results[nname] = pnr_dyn
12761

    
12762
    return node_results
12763

    
12764
  @staticmethod
12765
  def _ComputeInstanceData(cluster_info, i_list):
12766
    """Compute global instance data.
12767

12768
    """
12769
    instance_data = {}
12770
    for iinfo, beinfo in i_list:
12771
      nic_data = []
12772
      for nic in iinfo.nics:
12773
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12774
        nic_dict = {
12775
          "mac": nic.mac,
12776
          "ip": nic.ip,
12777
          "mode": filled_params[constants.NIC_MODE],
12778
          "link": filled_params[constants.NIC_LINK],
12779
          }
12780
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12781
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12782
        nic_data.append(nic_dict)
12783
      pir = {
12784
        "tags": list(iinfo.GetTags()),
12785
        "admin_up": iinfo.admin_up,
12786
        "vcpus": beinfo[constants.BE_VCPUS],
12787
        "memory": beinfo[constants.BE_MEMORY],
12788
        "os": iinfo.os,
12789
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12790
        "nics": nic_data,
12791
        "disks": [{constants.IDISK_SIZE: dsk.size,
12792
                   constants.IDISK_MODE: dsk.mode}
12793
                  for dsk in iinfo.disks],
12794
        "disk_template": iinfo.disk_template,
12795
        "hypervisor": iinfo.hypervisor,
12796
        }
12797
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12798
                                                 pir["disks"])
12799
      instance_data[iinfo.name] = pir
12800

    
12801
    return instance_data
12802

    
12803
  def _AddNewInstance(self):
12804
    """Add new instance data to allocator structure.
12805

12806
    This in combination with _AllocatorGetClusterData will create the
12807
    correct structure needed as input for the allocator.
12808

12809
    The checks for the completeness of the opcode must have already been
12810
    done.
12811

12812
    """
12813
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12814

    
12815
    if self.disk_template in constants.DTS_INT_MIRROR:
12816
      self.required_nodes = 2
12817
    else:
12818
      self.required_nodes = 1
12819

    
12820
    request = {
12821
      "name": self.name,
12822
      "disk_template": self.disk_template,
12823
      "tags": self.tags,
12824
      "os": self.os,
12825
      "vcpus": self.vcpus,
12826
      "memory": self.memory,
12827
      "disks": self.disks,
12828
      "disk_space_total": disk_space,
12829
      "nics": self.nics,
12830
      "required_nodes": self.required_nodes,
12831
      "hypervisor": self.hypervisor,
12832
      }
12833

    
12834
    return request
12835

    
12836
  def _AddRelocateInstance(self):
12837
    """Add relocate instance data to allocator structure.
12838

12839
    This in combination with _IAllocatorGetClusterData will create the
12840
    correct structure needed as input for the allocator.
12841

12842
    The checks for the completeness of the opcode must have already been
12843
    done.
12844

12845
    """
12846
    instance = self.cfg.GetInstanceInfo(self.name)
12847
    if instance is None:
12848
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12849
                                   " IAllocator" % self.name)
12850

    
12851
    if instance.disk_template not in constants.DTS_MIRRORED:
12852
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12853
                                 errors.ECODE_INVAL)
12854

    
12855
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12856
        len(instance.secondary_nodes) != 1:
12857
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12858
                                 errors.ECODE_STATE)
12859

    
12860
    self.required_nodes = 1
12861
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12862
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12863

    
12864
    request = {
12865
      "name": self.name,
12866
      "disk_space_total": disk_space,
12867
      "required_nodes": self.required_nodes,
12868
      "relocate_from": self.relocate_from,
12869
      }
12870
    return request
12871

    
12872
  def _AddEvacuateNodes(self):
12873
    """Add evacuate nodes data to allocator structure.
12874

12875
    """
12876
    request = {
12877
      "evac_nodes": self.evac_nodes
12878
      }
12879
    return request
12880

    
12881
  def _AddNodeEvacuate(self):
12882
    """Get data for node-evacuate requests.
12883

12884
    """
12885
    return {
12886
      "instances": self.instances,
12887
      "evac_mode": self.evac_mode,
12888
      }
12889

    
12890
  def _AddChangeGroup(self):
12891
    """Get data for node-evacuate requests.
12892

12893
    """
12894
    return {
12895
      "instances": self.instances,
12896
      "target_groups": self.target_groups,
12897
      }
12898

    
12899
  def _BuildInputData(self, fn, keydata):
12900
    """Build input data structures.
12901

12902
    """
12903
    self._ComputeClusterData()
12904

    
12905
    request = fn()
12906
    request["type"] = self.mode
12907
    for keyname, keytype in keydata:
12908
      if keyname not in request:
12909
        raise errors.ProgrammerError("Request parameter %s is missing" %
12910
                                     keyname)
12911
      val = request[keyname]
12912
      if not keytype(val):
12913
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12914
                                     " validation, value %s, expected"
12915
                                     " type %s" % (keyname, val, keytype))
12916
    self.in_data["request"] = request
12917

    
12918
    self.in_text = serializer.Dump(self.in_data)
12919

    
12920
  _STRING_LIST = ht.TListOf(ht.TString)
12921
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12922
     # pylint: disable-msg=E1101
12923
     # Class '...' has no 'OP_ID' member
12924
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12925
                          opcodes.OpInstanceMigrate.OP_ID,
12926
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12927
     })))
12928

    
12929
  _NEVAC_MOVED = \
12930
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12931
                       ht.TItems([ht.TNonEmptyString,
12932
                                  ht.TNonEmptyString,
12933
                                  ht.TListOf(ht.TNonEmptyString),
12934
                                 ])))
12935
  _NEVAC_FAILED = \
12936
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12937
                       ht.TItems([ht.TNonEmptyString,
12938
                                  ht.TMaybeString,
12939
                                 ])))
12940
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12941
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12942

    
12943
  _MODE_DATA = {
12944
    constants.IALLOCATOR_MODE_ALLOC:
12945
      (_AddNewInstance,
12946
       [
12947
        ("name", ht.TString),
12948
        ("memory", ht.TInt),
12949
        ("disks", ht.TListOf(ht.TDict)),
12950
        ("disk_template", ht.TString),
12951
        ("os", ht.TString),
12952
        ("tags", _STRING_LIST),
12953
        ("nics", ht.TListOf(ht.TDict)),
12954
        ("vcpus", ht.TInt),
12955
        ("hypervisor", ht.TString),
12956
        ], ht.TList),
12957
    constants.IALLOCATOR_MODE_RELOC:
12958
      (_AddRelocateInstance,
12959
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12960
       ht.TList),
12961
    constants.IALLOCATOR_MODE_MEVAC:
12962
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12963
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12964
     constants.IALLOCATOR_MODE_NODE_EVAC:
12965
      (_AddNodeEvacuate, [
12966
        ("instances", _STRING_LIST),
12967
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12968
        ], _NEVAC_RESULT),
12969
     constants.IALLOCATOR_MODE_CHG_GROUP:
12970
      (_AddChangeGroup, [
12971
        ("instances", _STRING_LIST),
12972
        ("target_groups", _STRING_LIST),
12973
        ], _NEVAC_RESULT),
12974
    }
12975

    
12976
  def Run(self, name, validate=True, call_fn=None):
12977
    """Run an instance allocator and return the results.
12978

12979
    """
12980
    if call_fn is None:
12981
      call_fn = self.rpc.call_iallocator_runner
12982

    
12983
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12984
    result.Raise("Failure while running the iallocator script")
12985

    
12986
    self.out_text = result.payload
12987
    if validate:
12988
      self._ValidateResult()
12989

    
12990
  def _ValidateResult(self):
12991
    """Process the allocator results.
12992

12993
    This will process and if successful save the result in
12994
    self.out_data and the other parameters.
12995

12996
    """
12997
    try:
12998
      rdict = serializer.Load(self.out_text)
12999
    except Exception, err:
13000
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13001

    
13002
    if not isinstance(rdict, dict):
13003
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13004

    
13005
    # TODO: remove backwards compatiblity in later versions
13006
    if "nodes" in rdict and "result" not in rdict:
13007
      rdict["result"] = rdict["nodes"]
13008
      del rdict["nodes"]
13009

    
13010
    for key in "success", "info", "result":
13011
      if key not in rdict:
13012
        raise errors.OpExecError("Can't parse iallocator results:"
13013
                                 " missing key '%s'" % key)
13014
      setattr(self, key, rdict[key])
13015

    
13016
    if not self._result_check(self.result):
13017
      raise errors.OpExecError("Iallocator returned invalid result,"
13018
                               " expected %s, got %s" %
13019
                               (self._result_check, self.result),
13020
                               errors.ECODE_INVAL)
13021

    
13022
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13023
                     constants.IALLOCATOR_MODE_MEVAC):
13024
      node2group = dict((name, ndata["group"])
13025
                        for (name, ndata) in self.in_data["nodes"].items())
13026

    
13027
      fn = compat.partial(self._NodesToGroups, node2group,
13028
                          self.in_data["nodegroups"])
13029

    
13030
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
13031
        assert self.relocate_from is not None
13032
        assert self.required_nodes == 1
13033

    
13034
        request_groups = fn(self.relocate_from)
13035
        result_groups = fn(rdict["result"])
13036

    
13037
        if result_groups != request_groups:
13038
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13039
                                   " differ from original groups (%s)" %
13040
                                   (utils.CommaJoin(result_groups),
13041
                                    utils.CommaJoin(request_groups)))
13042
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13043
        request_groups = fn(self.evac_nodes)
13044
        for (instance_name, secnode) in self.result:
13045
          result_groups = fn([secnode])
13046
          if result_groups != request_groups:
13047
            raise errors.OpExecError("Iallocator returned new secondary node"
13048
                                     " '%s' (group '%s') for instance '%s'"
13049
                                     " which is not in original group '%s'" %
13050
                                     (secnode, utils.CommaJoin(result_groups),
13051
                                      instance_name,
13052
                                      utils.CommaJoin(request_groups)))
13053
      else:
13054
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13055

    
13056
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13057
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13058

    
13059
    self.out_data = rdict
13060

    
13061
  @staticmethod
13062
  def _NodesToGroups(node2group, groups, nodes):
13063
    """Returns a list of unique group names for a list of nodes.
13064

13065
    @type node2group: dict
13066
    @param node2group: Map from node name to group UUID
13067
    @type groups: dict
13068
    @param groups: Group information
13069
    @type nodes: list
13070
    @param nodes: Node names
13071

13072
    """
13073
    result = set()
13074

    
13075
    for node in nodes:
13076
      try:
13077
        group_uuid = node2group[node]
13078
      except KeyError:
13079
        # Ignore unknown node
13080
        pass
13081
      else:
13082
        try:
13083
          group = groups[group_uuid]
13084
        except KeyError:
13085
          # Can't find group, let's use UUID
13086
          group_name = group_uuid
13087
        else:
13088
          group_name = group["name"]
13089

    
13090
        result.add(group_name)
13091

    
13092
    return sorted(result)
13093

    
13094

    
13095
class LUTestAllocator(NoHooksLU):
13096
  """Run allocator tests.
13097

13098
  This LU runs the allocator tests
13099

13100
  """
13101
  def CheckPrereq(self):
13102
    """Check prerequisites.
13103

13104
    This checks the opcode parameters depending on the director and mode test.
13105

13106
    """
13107
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13108
      for attr in ["memory", "disks", "disk_template",
13109
                   "os", "tags", "nics", "vcpus"]:
13110
        if not hasattr(self.op, attr):
13111
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13112
                                     attr, errors.ECODE_INVAL)
13113
      iname = self.cfg.ExpandInstanceName(self.op.name)
13114
      if iname is not None:
13115
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13116
                                   iname, errors.ECODE_EXISTS)
13117
      if not isinstance(self.op.nics, list):
13118
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13119
                                   errors.ECODE_INVAL)
13120
      if not isinstance(self.op.disks, list):
13121
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13122
                                   errors.ECODE_INVAL)
13123
      for row in self.op.disks:
13124
        if (not isinstance(row, dict) or
13125
            constants.IDISK_SIZE not in row or
13126
            not isinstance(row[constants.IDISK_SIZE], int) or
13127
            constants.IDISK_MODE not in row or
13128
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13129
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13130
                                     " parameter", errors.ECODE_INVAL)
13131
      if self.op.hypervisor is None:
13132
        self.op.hypervisor = self.cfg.GetHypervisorType()
13133
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13134
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13135
      self.op.name = fname
13136
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
13137
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13138
      if not hasattr(self.op, "evac_nodes"):
13139
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13140
                                   " opcode input", errors.ECODE_INVAL)
13141
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13142
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13143
      if not self.op.instances:
13144
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13145
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13146
    else:
13147
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13148
                                 self.op.mode, errors.ECODE_INVAL)
13149

    
13150
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13151
      if self.op.allocator is None:
13152
        raise errors.OpPrereqError("Missing allocator name",
13153
                                   errors.ECODE_INVAL)
13154
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13155
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13156
                                 self.op.direction, errors.ECODE_INVAL)
13157

    
13158
  def Exec(self, feedback_fn):
13159
    """Run the allocator test.
13160

13161
    """
13162
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13163
      ial = IAllocator(self.cfg, self.rpc,
13164
                       mode=self.op.mode,
13165
                       name=self.op.name,
13166
                       memory=self.op.memory,
13167
                       disks=self.op.disks,
13168
                       disk_template=self.op.disk_template,
13169
                       os=self.op.os,
13170
                       tags=self.op.tags,
13171
                       nics=self.op.nics,
13172
                       vcpus=self.op.vcpus,
13173
                       hypervisor=self.op.hypervisor,
13174
                       )
13175
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13176
      ial = IAllocator(self.cfg, self.rpc,
13177
                       mode=self.op.mode,
13178
                       name=self.op.name,
13179
                       relocate_from=list(self.relocate_from),
13180
                       )
13181
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13182
      ial = IAllocator(self.cfg, self.rpc,
13183
                       mode=self.op.mode,
13184
                       evac_nodes=self.op.evac_nodes)
13185
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13186
      ial = IAllocator(self.cfg, self.rpc,
13187
                       mode=self.op.mode,
13188
                       instances=self.op.instances,
13189
                       target_groups=self.op.target_groups)
13190
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13191
      ial = IAllocator(self.cfg, self.rpc,
13192
                       mode=self.op.mode,
13193
                       instances=self.op.instances,
13194
                       evac_mode=self.op.evac_mode)
13195
    else:
13196
      raise errors.ProgrammerError("Uncatched mode %s in"
13197
                                   " LUTestAllocator.Exec", self.op.mode)
13198

    
13199
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13200
      result = ial.in_text
13201
    else:
13202
      ial.Run(self.op.allocator, validate=False)
13203
      result = ial.out_text
13204
    return result
13205

    
13206

    
13207
#: Query type implementations
13208
_QUERY_IMPL = {
13209
  constants.QR_INSTANCE: _InstanceQuery,
13210
  constants.QR_NODE: _NodeQuery,
13211
  constants.QR_GROUP: _GroupQuery,
13212
  constants.QR_OS: _OsQuery,
13213
  }
13214

    
13215
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13216

    
13217

    
13218
def _GetQueryImplementation(name):
13219
  """Returns the implemtnation for a query type.
13220

13221
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13222

13223
  """
13224
  try:
13225
    return _QUERY_IMPL[name]
13226
  except KeyError:
13227
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13228
                               errors.ECODE_INVAL)