Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 6e80da8b

History | View | Annotate | Download (468.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    self.context = context
123
    self.rpc = rpc
124
    # Dicts used to declare locking needs to mcpu
125
    self.needed_locks = None
126
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
127
    self.add_locks = {}
128
    self.remove_locks = {}
129
    # Used to force good behavior when calling helper functions
130
    self.recalculate_locks = {}
131
    # logging
132
    self.Log = processor.Log # pylint: disable-msg=C0103
133
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136
    # support for dry-run
137
    self.dry_run_result = None
138
    # support for generic debug attribute
139
    if (not hasattr(self.op, "debug_level") or
140
        not isinstance(self.op.debug_level, int)):
141
      self.op.debug_level = 0
142

    
143
    # Tasklets
144
    self.tasklets = None
145

    
146
    # Validate opcode parameters and set defaults
147
    self.op.Validate(True)
148

    
149
    self.CheckArguments()
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    @rtype: dict
277
    @return: Dictionary containing the environment that will be used for
278
      running the hooks for this LU. The keys of the dict must not be prefixed
279
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280
      will extend the environment with additional variables. If no environment
281
      should be defined, an empty dictionary should be returned (not C{None}).
282
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
283
      will not be called.
284

285
    """
286
    raise NotImplementedError
287

    
288
  def BuildHooksNodes(self):
289
    """Build list of nodes to run LU's hooks.
290

291
    @rtype: tuple; (list, list)
292
    @return: Tuple containing a list of node names on which the hook
293
      should run before the execution and a list of node names on which the
294
      hook should run after the execution. No nodes should be returned as an
295
      empty list (and not None).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303
    """Notify the LU about the results of its hooks.
304

305
    This method is called every time a hooks phase is executed, and notifies
306
    the Logical Unit about the hooks' result. The LU can then use it to alter
307
    its result based on the hooks.  By default the method does nothing and the
308
    previous result is passed back unchanged but any LU can define it if it
309
    wants to use the local cluster hook-scripts somehow.
310

311
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
312
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313
    @param hook_results: the results of the multi-node hooks rpc call
314
    @param feedback_fn: function used send feedback back to the caller
315
    @param lu_result: the previous Exec result this LU had, or None
316
        in the PRE phase
317
    @return: the new Exec result, based on the previous result
318
        and hook results
319

320
    """
321
    # API must be kept, thus we ignore the unused argument and could
322
    # be a function warnings
323
    # pylint: disable-msg=W0613,R0201
324
    return lu_result
325

    
326
  def _ExpandAndLockInstance(self):
327
    """Helper function to expand and lock an instance.
328

329
    Many LUs that work on an instance take its name in self.op.instance_name
330
    and need to expand it and then declare the expanded name for locking. This
331
    function does it, and then updates self.op.instance_name to the expanded
332
    name. It also initializes needed_locks as a dict, if this hasn't been done
333
    before.
334

335
    """
336
    if self.needed_locks is None:
337
      self.needed_locks = {}
338
    else:
339
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340
        "_ExpandAndLockInstance called with instance-level locks set"
341
    self.op.instance_name = _ExpandInstanceName(self.cfg,
342
                                                self.op.instance_name)
343
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
344

    
345
  def _LockInstancesNodes(self, primary_only=False):
346
    """Helper function to declare instances' nodes for locking.
347

348
    This function should be called after locking one or more instances to lock
349
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350
    with all primary or secondary nodes for instances already locked and
351
    present in self.needed_locks[locking.LEVEL_INSTANCE].
352

353
    It should be called from DeclareLocks, and for safety only works if
354
    self.recalculate_locks[locking.LEVEL_NODE] is set.
355

356
    In the future it may grow parameters to just lock some instance's nodes, or
357
    to just lock primaries or secondary nodes, if needed.
358

359
    If should be called in DeclareLocks in a way similar to::
360

361
      if level == locking.LEVEL_NODE:
362
        self._LockInstancesNodes()
363

364
    @type primary_only: boolean
365
    @param primary_only: only lock primary nodes of locked instances
366

367
    """
368
    assert locking.LEVEL_NODE in self.recalculate_locks, \
369
      "_LockInstancesNodes helper function called with no nodes to recalculate"
370

    
371
    # TODO: check if we're really been called with the instance locks held
372

    
373
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374
    # future we might want to have different behaviors depending on the value
375
    # of self.recalculate_locks[locking.LEVEL_NODE]
376
    wanted_nodes = []
377
    locked_i = self.glm.list_owned(locking.LEVEL_INSTANCE)
378
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
379
      wanted_nodes.append(instance.primary_node)
380
      if not primary_only:
381
        wanted_nodes.extend(instance.secondary_nodes)
382

    
383
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
387

    
388
    del self.recalculate_locks[locking.LEVEL_NODE]
389

    
390

    
391
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392
  """Simple LU which runs no hooks.
393

394
  This LU is intended as a parent for other LogicalUnits which will
395
  run no hooks, in order to reduce duplicate code.
396

397
  """
398
  HPATH = None
399
  HTYPE = None
400

    
401
  def BuildHooksEnv(self):
402
    """Empty BuildHooksEnv for NoHooksLu.
403

404
    This just raises an error.
405

406
    """
407
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
408

    
409
  def BuildHooksNodes(self):
410
    """Empty BuildHooksNodes for NoHooksLU.
411

412
    """
413
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
414

    
415

    
416
class Tasklet:
417
  """Tasklet base class.
418

419
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
421
  tasklets know nothing about locks.
422

423
  Subclasses must follow these rules:
424
    - Implement CheckPrereq
425
    - Implement Exec
426

427
  """
428
  def __init__(self, lu):
429
    self.lu = lu
430

    
431
    # Shortcuts
432
    self.cfg = lu.cfg
433
    self.rpc = lu.rpc
434

    
435
  def CheckPrereq(self):
436
    """Check prerequisites for this tasklets.
437

438
    This method should check whether the prerequisites for the execution of
439
    this tasklet are fulfilled. It can do internode communication, but it
440
    should be idempotent - no cluster or system changes are allowed.
441

442
    The method should raise errors.OpPrereqError in case something is not
443
    fulfilled. Its return value is ignored.
444

445
    This method should also update all parameters to their canonical form if it
446
    hasn't been done before.
447

448
    """
449
    pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the tasklet.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in code, or
456
    expected.
457

458
    """
459
    raise NotImplementedError
460

    
461

    
462
class _QueryBase:
463
  """Base for query utility classes.
464

465
  """
466
  #: Attribute holding field definitions
467
  FIELDS = None
468

    
469
  def __init__(self, filter_, fields, use_locking):
470
    """Initializes this class.
471

472
    """
473
    self.use_locking = use_locking
474

    
475
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
476
                             namefield="name")
477
    self.requested_data = self.query.RequestedData()
478
    self.names = self.query.RequestedNames()
479

    
480
    # Sort only if no names were requested
481
    self.sort_by_name = not self.names
482

    
483
    self.do_locking = None
484
    self.wanted = None
485

    
486
  def _GetNames(self, lu, all_names, lock_level):
487
    """Helper function to determine names asked for in the query.
488

489
    """
490
    if self.do_locking:
491
      names = lu.glm.list_owned(lock_level)
492
    else:
493
      names = all_names
494

    
495
    if self.wanted == locking.ALL_SET:
496
      assert not self.names
497
      # caller didn't specify names, so ordering is not important
498
      return utils.NiceSort(names)
499

    
500
    # caller specified names and we must keep the same order
501
    assert self.names
502
    assert not self.do_locking or lu.glm.is_owned(lock_level)
503

    
504
    missing = set(self.wanted).difference(names)
505
    if missing:
506
      raise errors.OpExecError("Some items were removed before retrieving"
507
                               " their data: %s" % missing)
508

    
509
    # Return expanded names
510
    return self.wanted
511

    
512
  def ExpandNames(self, lu):
513
    """Expand names for this query.
514

515
    See L{LogicalUnit.ExpandNames}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def DeclareLocks(self, lu, level):
521
    """Declare locks for this query.
522

523
    See L{LogicalUnit.DeclareLocks}.
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def _GetQueryData(self, lu):
529
    """Collects all data for this query.
530

531
    @return: Query data object
532

533
    """
534
    raise NotImplementedError()
535

    
536
  def NewStyleQuery(self, lu):
537
    """Collect data and execute query.
538

539
    """
540
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541
                                  sort_by_name=self.sort_by_name)
542

    
543
  def OldStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return self.query.OldStyleQuery(self._GetQueryData(lu),
548
                                    sort_by_name=self.sort_by_name)
549

    
550

    
551
def _ShareAll():
552
  """Returns a dict declaring all lock levels shared.
553

554
  """
555
  return dict.fromkeys(locking.LEVELS, 1)
556

    
557

    
558
def _SupportsOob(cfg, node):
559
  """Tells if node supports OOB.
560

561
  @type cfg: L{config.ConfigWriter}
562
  @param cfg: The cluster configuration
563
  @type node: L{objects.Node}
564
  @param node: The node
565
  @return: The OOB script if supported or an empty string otherwise
566

567
  """
568
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
569

    
570

    
571
def _GetWantedNodes(lu, nodes):
572
  """Returns list of checked and expanded node names.
573

574
  @type lu: L{LogicalUnit}
575
  @param lu: the logical unit on whose behalf we execute
576
  @type nodes: list
577
  @param nodes: list of node names or None for all nodes
578
  @rtype: list
579
  @return: the list of nodes, sorted
580
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
581

582
  """
583
  if nodes:
584
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
585

    
586
  return utils.NiceSort(lu.cfg.GetNodeList())
587

    
588

    
589
def _GetWantedInstances(lu, instances):
590
  """Returns list of checked and expanded instance names.
591

592
  @type lu: L{LogicalUnit}
593
  @param lu: the logical unit on whose behalf we execute
594
  @type instances: list
595
  @param instances: list of instance names or None for all instances
596
  @rtype: list
597
  @return: the list of instances, sorted
598
  @raise errors.OpPrereqError: if the instances parameter is wrong type
599
  @raise errors.OpPrereqError: if any of the passed instances is not found
600

601
  """
602
  if instances:
603
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
604
  else:
605
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
606
  return wanted
607

    
608

    
609
def _GetUpdatedParams(old_params, update_dict,
610
                      use_default=True, use_none=False):
611
  """Return the new version of a parameter dictionary.
612

613
  @type old_params: dict
614
  @param old_params: old parameters
615
  @type update_dict: dict
616
  @param update_dict: dict containing new parameter values, or
617
      constants.VALUE_DEFAULT to reset the parameter to its default
618
      value
619
  @param use_default: boolean
620
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621
      values as 'to be deleted' values
622
  @param use_none: boolean
623
  @type use_none: whether to recognise C{None} values as 'to be
624
      deleted' values
625
  @rtype: dict
626
  @return: the new parameter dictionary
627

628
  """
629
  params_copy = copy.deepcopy(old_params)
630
  for key, val in update_dict.iteritems():
631
    if ((use_default and val == constants.VALUE_DEFAULT) or
632
        (use_none and val is None)):
633
      try:
634
        del params_copy[key]
635
      except KeyError:
636
        pass
637
    else:
638
      params_copy[key] = val
639
  return params_copy
640

    
641

    
642
def _ReleaseLocks(lu, level, names=None, keep=None):
643
  """Releases locks owned by an LU.
644

645
  @type lu: L{LogicalUnit}
646
  @param level: Lock level
647
  @type names: list or None
648
  @param names: Names of locks to release
649
  @type keep: list or None
650
  @param keep: Names of locks to retain
651

652
  """
653
  assert not (keep is not None and names is not None), \
654
         "Only one of the 'names' and the 'keep' parameters can be given"
655

    
656
  if names is not None:
657
    should_release = names.__contains__
658
  elif keep:
659
    should_release = lambda name: name not in keep
660
  else:
661
    should_release = None
662

    
663
  if should_release:
664
    retain = []
665
    release = []
666

    
667
    # Determine which locks to release
668
    for name in lu.glm.list_owned(level):
669
      if should_release(name):
670
        release.append(name)
671
      else:
672
        retain.append(name)
673

    
674
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
675

    
676
    # Release just some locks
677
    lu.glm.release(level, names=release)
678

    
679
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
680
  else:
681
    # Release everything
682
    lu.glm.release(level)
683

    
684
    assert not lu.glm.is_owned(level), "No locks should be owned"
685

    
686

    
687
def _MapInstanceDisksToNodes(instances):
688
  """Creates a map from (node, volume) to instance name.
689

690
  @type instances: list of L{objects.Instance}
691
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
692

693
  """
694
  return dict(((node, vol), inst.name)
695
              for inst in instances
696
              for (node, vols) in inst.MapLVsByNode().items()
697
              for vol in vols)
698

    
699

    
700
def _RunPostHook(lu, node_name):
701
  """Runs the post-hook for an opcode on a single node.
702

703
  """
704
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
705
  try:
706
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
707
  except:
708
    # pylint: disable-msg=W0702
709
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
710

    
711

    
712
def _CheckOutputFields(static, dynamic, selected):
713
  """Checks whether all selected fields are valid.
714

715
  @type static: L{utils.FieldSet}
716
  @param static: static fields set
717
  @type dynamic: L{utils.FieldSet}
718
  @param dynamic: dynamic fields set
719

720
  """
721
  f = utils.FieldSet()
722
  f.Extend(static)
723
  f.Extend(dynamic)
724

    
725
  delta = f.NonMatching(selected)
726
  if delta:
727
    raise errors.OpPrereqError("Unknown output fields selected: %s"
728
                               % ",".join(delta), errors.ECODE_INVAL)
729

    
730

    
731
def _CheckGlobalHvParams(params):
732
  """Validates that given hypervisor params are not global ones.
733

734
  This will ensure that instances don't get customised versions of
735
  global params.
736

737
  """
738
  used_globals = constants.HVC_GLOBALS.intersection(params)
739
  if used_globals:
740
    msg = ("The following hypervisor parameters are global and cannot"
741
           " be customized at instance level, please modify them at"
742
           " cluster level: %s" % utils.CommaJoin(used_globals))
743
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
744

    
745

    
746
def _CheckNodeOnline(lu, node, msg=None):
747
  """Ensure that a given node is online.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param msg: if passed, should be a message to replace the default one
752
  @raise errors.OpPrereqError: if the node is offline
753

754
  """
755
  if msg is None:
756
    msg = "Can't use offline node"
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
759

    
760

    
761
def _CheckNodeNotDrained(lu, node):
762
  """Ensure that a given node is not drained.
763

764
  @param lu: the LU on behalf of which we make the check
765
  @param node: the node to check
766
  @raise errors.OpPrereqError: if the node is drained
767

768
  """
769
  if lu.cfg.GetNodeInfo(node).drained:
770
    raise errors.OpPrereqError("Can't use drained node %s" % node,
771
                               errors.ECODE_STATE)
772

    
773

    
774
def _CheckNodeVmCapable(lu, node):
775
  """Ensure that a given node is vm capable.
776

777
  @param lu: the LU on behalf of which we make the check
778
  @param node: the node to check
779
  @raise errors.OpPrereqError: if the node is not vm capable
780

781
  """
782
  if not lu.cfg.GetNodeInfo(node).vm_capable:
783
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
784
                               errors.ECODE_STATE)
785

    
786

    
787
def _CheckNodeHasOS(lu, node, os_name, force_variant):
788
  """Ensure that a node supports a given OS.
789

790
  @param lu: the LU on behalf of which we make the check
791
  @param node: the node to check
792
  @param os_name: the OS to query about
793
  @param force_variant: whether to ignore variant errors
794
  @raise errors.OpPrereqError: if the node is not supporting the OS
795

796
  """
797
  result = lu.rpc.call_os_get(node, os_name)
798
  result.Raise("OS '%s' not in supported OS list for node %s" %
799
               (os_name, node),
800
               prereq=True, ecode=errors.ECODE_INVAL)
801
  if not force_variant:
802
    _CheckOSVariant(result.payload, os_name)
803

    
804

    
805
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806
  """Ensure that a node has the given secondary ip.
807

808
  @type lu: L{LogicalUnit}
809
  @param lu: the LU on behalf of which we make the check
810
  @type node: string
811
  @param node: the node to check
812
  @type secondary_ip: string
813
  @param secondary_ip: the ip to check
814
  @type prereq: boolean
815
  @param prereq: whether to throw a prerequisite or an execute error
816
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
818

819
  """
820
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821
  result.Raise("Failure checking secondary ip on node %s" % node,
822
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
823
  if not result.payload:
824
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825
           " please fix and re-run this command" % secondary_ip)
826
    if prereq:
827
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
828
    else:
829
      raise errors.OpExecError(msg)
830

    
831

    
832
def _GetClusterDomainSecret():
833
  """Reads the cluster domain secret.
834

835
  """
836
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
837
                               strict=True)
838

    
839

    
840
def _CheckInstanceDown(lu, instance, reason):
841
  """Ensure that an instance is not running."""
842
  if instance.admin_up:
843
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844
                               (instance.name, reason), errors.ECODE_STATE)
845

    
846
  pnode = instance.primary_node
847
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
849
              prereq=True, ecode=errors.ECODE_ENVIRON)
850

    
851
  if instance.name in ins_l.payload:
852
    raise errors.OpPrereqError("Instance %s is running, %s" %
853
                               (instance.name, reason), errors.ECODE_STATE)
854

    
855

    
856
def _ExpandItemName(fn, name, kind):
857
  """Expand an item name.
858

859
  @param fn: the function to use for expansion
860
  @param name: requested item name
861
  @param kind: text description ('Node' or 'Instance')
862
  @return: the resolved (full) name
863
  @raise errors.OpPrereqError: if the item is not found
864

865
  """
866
  full_name = fn(name)
867
  if full_name is None:
868
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
869
                               errors.ECODE_NOENT)
870
  return full_name
871

    
872

    
873
def _ExpandNodeName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for nodes."""
875
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
876

    
877

    
878
def _ExpandInstanceName(cfg, name):
879
  """Wrapper over L{_ExpandItemName} for instance."""
880
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
881

    
882

    
883
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884
                          memory, vcpus, nics, disk_template, disks,
885
                          bep, hvp, hypervisor_name, tags):
886
  """Builds instance related env variables for hooks
887

888
  This builds the hook environment from individual variables.
889

890
  @type name: string
891
  @param name: the name of the instance
892
  @type primary_node: string
893
  @param primary_node: the name of the instance's primary node
894
  @type secondary_nodes: list
895
  @param secondary_nodes: list of secondary nodes as strings
896
  @type os_type: string
897
  @param os_type: the name of the instance's OS
898
  @type status: boolean
899
  @param status: the should_run status of the instance
900
  @type memory: string
901
  @param memory: the memory size of the instance
902
  @type vcpus: string
903
  @param vcpus: the count of VCPUs the instance has
904
  @type nics: list
905
  @param nics: list of tuples (ip, mac, mode, link) representing
906
      the NICs the instance has
907
  @type disk_template: string
908
  @param disk_template: the disk template of the instance
909
  @type disks: list
910
  @param disks: the list of (size, mode) pairs
911
  @type bep: dict
912
  @param bep: the backend parameters for the instance
913
  @type hvp: dict
914
  @param hvp: the hypervisor parameters for the instance
915
  @type hypervisor_name: string
916
  @param hypervisor_name: the hypervisor for the instance
917
  @type tags: list
918
  @param tags: list of instance tags as strings
919
  @rtype: dict
920
  @return: the hook environment for this instance
921

922
  """
923
  if status:
924
    str_status = "up"
925
  else:
926
    str_status = "down"
927
  env = {
928
    "OP_TARGET": name,
929
    "INSTANCE_NAME": name,
930
    "INSTANCE_PRIMARY": primary_node,
931
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932
    "INSTANCE_OS_TYPE": os_type,
933
    "INSTANCE_STATUS": str_status,
934
    "INSTANCE_MEMORY": memory,
935
    "INSTANCE_VCPUS": vcpus,
936
    "INSTANCE_DISK_TEMPLATE": disk_template,
937
    "INSTANCE_HYPERVISOR": hypervisor_name,
938
  }
939

    
940
  if nics:
941
    nic_count = len(nics)
942
    for idx, (ip, mac, mode, link) in enumerate(nics):
943
      if ip is None:
944
        ip = ""
945
      env["INSTANCE_NIC%d_IP" % idx] = ip
946
      env["INSTANCE_NIC%d_MAC" % idx] = mac
947
      env["INSTANCE_NIC%d_MODE" % idx] = mode
948
      env["INSTANCE_NIC%d_LINK" % idx] = link
949
      if mode == constants.NIC_MODE_BRIDGED:
950
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
951
  else:
952
    nic_count = 0
953

    
954
  env["INSTANCE_NIC_COUNT"] = nic_count
955

    
956
  if disks:
957
    disk_count = len(disks)
958
    for idx, (size, mode) in enumerate(disks):
959
      env["INSTANCE_DISK%d_SIZE" % idx] = size
960
      env["INSTANCE_DISK%d_MODE" % idx] = mode
961
  else:
962
    disk_count = 0
963

    
964
  env["INSTANCE_DISK_COUNT"] = disk_count
965

    
966
  if not tags:
967
    tags = []
968

    
969
  env["INSTANCE_TAGS"] = " ".join(tags)
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUInstanceQueryData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    "name": instance.name,
1022
    "primary_node": instance.primary_node,
1023
    "secondary_nodes": instance.secondary_nodes,
1024
    "os_type": instance.os,
1025
    "status": instance.admin_up,
1026
    "memory": bep[constants.BE_MEMORY],
1027
    "vcpus": bep[constants.BE_VCPUS],
1028
    "nics": _NICListToTuple(lu, instance.nics),
1029
    "disk_template": instance.disk_template,
1030
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031
    "bep": bep,
1032
    "hvp": hvp,
1033
    "hypervisor_name": instance.hypervisor,
1034
    "tags": instance.tags,
1035
  }
1036
  if override:
1037
    args.update(override)
1038
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039

    
1040

    
1041
def _AdjustCandidatePool(lu, exceptions):
1042
  """Adjust the candidate pool after node operations.
1043

1044
  """
1045
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046
  if mod_list:
1047
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1048
               utils.CommaJoin(node.name for node in mod_list))
1049
    for name in mod_list:
1050
      lu.context.ReaddNode(name)
1051
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  if mc_now > mc_max:
1053
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054
               (mc_now, mc_max))
1055

    
1056

    
1057
def _DecideSelfPromotion(lu, exceptions=None):
1058
  """Decide whether I should promote myself as a master candidate.
1059

1060
  """
1061
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063
  # the new node will increase mc_max with one, so:
1064
  mc_should = min(mc_should + 1, cp_size)
1065
  return mc_now < mc_should
1066

    
1067

    
1068
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069
  """Check that the brigdes needed by a list of nics exist.
1070

1071
  """
1072
  cluster = lu.cfg.GetClusterInfo()
1073
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074
  brlist = [params[constants.NIC_LINK] for params in paramslist
1075
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076
  if brlist:
1077
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1078
    result.Raise("Error checking bridges on destination node '%s'" %
1079
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080

    
1081

    
1082
def _CheckInstanceBridgesExist(lu, instance, node=None):
1083
  """Check that the brigdes needed by an instance exist.
1084

1085
  """
1086
  if node is None:
1087
    node = instance.primary_node
1088
  _CheckNicsBridgesExist(lu, instance.nics, node)
1089

    
1090

    
1091
def _CheckOSVariant(os_obj, name):
1092
  """Check whether an OS name conforms to the os variants specification.
1093

1094
  @type os_obj: L{objects.OS}
1095
  @param os_obj: OS object to check
1096
  @type name: string
1097
  @param name: OS name passed by the user, to check for validity
1098

1099
  """
1100
  variant = objects.OS.GetVariant(name)
1101
  if not os_obj.supported_variants:
1102
    if variant:
1103
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104
                                 " passed)" % (os_obj.name, variant),
1105
                                 errors.ECODE_INVAL)
1106
    return
1107
  if not variant:
1108
    raise errors.OpPrereqError("OS name must include a variant",
1109
                               errors.ECODE_INVAL)
1110

    
1111
  if variant not in os_obj.supported_variants:
1112
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1113

    
1114

    
1115
def _GetNodeInstancesInner(cfg, fn):
1116
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1117

    
1118

    
1119
def _GetNodeInstances(cfg, node_name):
1120
  """Returns a list of all primary and secondary instances on a node.
1121

1122
  """
1123

    
1124
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1125

    
1126

    
1127
def _GetNodePrimaryInstances(cfg, node_name):
1128
  """Returns primary instances on a node.
1129

1130
  """
1131
  return _GetNodeInstancesInner(cfg,
1132
                                lambda inst: node_name == inst.primary_node)
1133

    
1134

    
1135
def _GetNodeSecondaryInstances(cfg, node_name):
1136
  """Returns secondary instances on a node.
1137

1138
  """
1139
  return _GetNodeInstancesInner(cfg,
1140
                                lambda inst: node_name in inst.secondary_nodes)
1141

    
1142

    
1143
def _GetStorageTypeArgs(cfg, storage_type):
1144
  """Returns the arguments for a storage type.
1145

1146
  """
1147
  # Special case for file storage
1148
  if storage_type == constants.ST_FILE:
1149
    # storage.FileStorage wants a list of storage directories
1150
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1151

    
1152
  return []
1153

    
1154

    
1155
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1156
  faulty = []
1157

    
1158
  for dev in instance.disks:
1159
    cfg.SetDiskID(dev, node_name)
1160

    
1161
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162
  result.Raise("Failed to get disk status from node %s" % node_name,
1163
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164

    
1165
  for idx, bdev_status in enumerate(result.payload):
1166
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1167
      faulty.append(idx)
1168

    
1169
  return faulty
1170

    
1171

    
1172
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173
  """Check the sanity of iallocator and node arguments and use the
1174
  cluster-wide iallocator if appropriate.
1175

1176
  Check that at most one of (iallocator, node) is specified. If none is
1177
  specified, then the LU's opcode's iallocator slot is filled with the
1178
  cluster-wide default iallocator.
1179

1180
  @type iallocator_slot: string
1181
  @param iallocator_slot: the name of the opcode iallocator slot
1182
  @type node_slot: string
1183
  @param node_slot: the name of the opcode target node slot
1184

1185
  """
1186
  node = getattr(lu.op, node_slot, None)
1187
  iallocator = getattr(lu.op, iallocator_slot, None)
1188

    
1189
  if node is not None and iallocator is not None:
1190
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1191
                               errors.ECODE_INVAL)
1192
  elif node is None and iallocator is None:
1193
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1194
    if default_iallocator:
1195
      setattr(lu.op, iallocator_slot, default_iallocator)
1196
    else:
1197
      raise errors.OpPrereqError("No iallocator or node given and no"
1198
                                 " cluster-wide default iallocator found;"
1199
                                 " please specify either an iallocator or a"
1200
                                 " node, or set a cluster-wide default"
1201
                                 " iallocator")
1202

    
1203

    
1204
def _GetDefaultIAllocator(cfg, iallocator):
1205
  """Decides on which iallocator to use.
1206

1207
  @type cfg: L{config.ConfigWriter}
1208
  @param cfg: Cluster configuration object
1209
  @type iallocator: string or None
1210
  @param iallocator: Iallocator specified in opcode
1211
  @rtype: string
1212
  @return: Iallocator name
1213

1214
  """
1215
  if not iallocator:
1216
    # Use default iallocator
1217
    iallocator = cfg.GetDefaultIAllocator()
1218

    
1219
  if not iallocator:
1220
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1221
                               " opcode nor as a cluster-wide default",
1222
                               errors.ECODE_INVAL)
1223

    
1224
  return iallocator
1225

    
1226

    
1227
class LUClusterPostInit(LogicalUnit):
1228
  """Logical unit for running hooks after cluster initialization.
1229

1230
  """
1231
  HPATH = "cluster-init"
1232
  HTYPE = constants.HTYPE_CLUSTER
1233

    
1234
  def BuildHooksEnv(self):
1235
    """Build hooks env.
1236

1237
    """
1238
    return {
1239
      "OP_TARGET": self.cfg.GetClusterName(),
1240
      }
1241

    
1242
  def BuildHooksNodes(self):
1243
    """Build hooks nodes.
1244

1245
    """
1246
    return ([], [self.cfg.GetMasterNode()])
1247

    
1248
  def Exec(self, feedback_fn):
1249
    """Nothing to do.
1250

1251
    """
1252
    return True
1253

    
1254

    
1255
class LUClusterDestroy(LogicalUnit):
1256
  """Logical unit for destroying the cluster.
1257

1258
  """
1259
  HPATH = "cluster-destroy"
1260
  HTYPE = constants.HTYPE_CLUSTER
1261

    
1262
  def BuildHooksEnv(self):
1263
    """Build hooks env.
1264

1265
    """
1266
    return {
1267
      "OP_TARGET": self.cfg.GetClusterName(),
1268
      }
1269

    
1270
  def BuildHooksNodes(self):
1271
    """Build hooks nodes.
1272

1273
    """
1274
    return ([], [])
1275

    
1276
  def CheckPrereq(self):
1277
    """Check prerequisites.
1278

1279
    This checks whether the cluster is empty.
1280

1281
    Any errors are signaled by raising errors.OpPrereqError.
1282

1283
    """
1284
    master = self.cfg.GetMasterNode()
1285

    
1286
    nodelist = self.cfg.GetNodeList()
1287
    if len(nodelist) != 1 or nodelist[0] != master:
1288
      raise errors.OpPrereqError("There are still %d node(s) in"
1289
                                 " this cluster." % (len(nodelist) - 1),
1290
                                 errors.ECODE_INVAL)
1291
    instancelist = self.cfg.GetInstanceList()
1292
    if instancelist:
1293
      raise errors.OpPrereqError("There are still %d instance(s) in"
1294
                                 " this cluster." % len(instancelist),
1295
                                 errors.ECODE_INVAL)
1296

    
1297
  def Exec(self, feedback_fn):
1298
    """Destroys the cluster.
1299

1300
    """
1301
    master = self.cfg.GetMasterNode()
1302

    
1303
    # Run post hooks on master node before it's removed
1304
    _RunPostHook(self, master)
1305

    
1306
    result = self.rpc.call_node_stop_master(master, False)
1307
    result.Raise("Could not disable the master role")
1308

    
1309
    return master
1310

    
1311

    
1312
def _VerifyCertificate(filename):
1313
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1314

1315
  @type filename: string
1316
  @param filename: Path to PEM file
1317

1318
  """
1319
  try:
1320
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1321
                                           utils.ReadFile(filename))
1322
  except Exception, err: # pylint: disable-msg=W0703
1323
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1324
            "Failed to load X509 certificate %s: %s" % (filename, err))
1325

    
1326
  (errcode, msg) = \
1327
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1328
                                constants.SSL_CERT_EXPIRATION_ERROR)
1329

    
1330
  if msg:
1331
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1332
  else:
1333
    fnamemsg = None
1334

    
1335
  if errcode is None:
1336
    return (None, fnamemsg)
1337
  elif errcode == utils.CERT_WARNING:
1338
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1339
  elif errcode == utils.CERT_ERROR:
1340
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1341

    
1342
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1343

    
1344

    
1345
def _GetAllHypervisorParameters(cluster, instances):
1346
  """Compute the set of all hypervisor parameters.
1347

1348
  @type cluster: L{objects.Cluster}
1349
  @param cluster: the cluster object
1350
  @param instances: list of L{objects.Instance}
1351
  @param instances: additional instances from which to obtain parameters
1352
  @rtype: list of (origin, hypervisor, parameters)
1353
  @return: a list with all parameters found, indicating the hypervisor they
1354
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1355

1356
  """
1357
  hvp_data = []
1358

    
1359
  for hv_name in cluster.enabled_hypervisors:
1360
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1361

    
1362
  for os_name, os_hvp in cluster.os_hvp.items():
1363
    for hv_name, hv_params in os_hvp.items():
1364
      if hv_params:
1365
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1366
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1367

    
1368
  # TODO: collapse identical parameter values in a single one
1369
  for instance in instances:
1370
    if instance.hvparams:
1371
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1372
                       cluster.FillHV(instance)))
1373

    
1374
  return hvp_data
1375

    
1376

    
1377
class _VerifyErrors(object):
1378
  """Mix-in for cluster/group verify LUs.
1379

1380
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1381
  self.op and self._feedback_fn to be available.)
1382

1383
  """
1384
  TCLUSTER = "cluster"
1385
  TNODE = "node"
1386
  TINSTANCE = "instance"
1387

    
1388
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1389
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1390
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1391
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1392
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1393
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1394
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1395
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1396
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1397
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1398
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1399
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1400
  ENODEDRBD = (TNODE, "ENODEDRBD")
1401
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1402
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1403
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1404
  ENODEHV = (TNODE, "ENODEHV")
1405
  ENODELVM = (TNODE, "ENODELVM")
1406
  ENODEN1 = (TNODE, "ENODEN1")
1407
  ENODENET = (TNODE, "ENODENET")
1408
  ENODEOS = (TNODE, "ENODEOS")
1409
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1410
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1411
  ENODERPC = (TNODE, "ENODERPC")
1412
  ENODESSH = (TNODE, "ENODESSH")
1413
  ENODEVERSION = (TNODE, "ENODEVERSION")
1414
  ENODESETUP = (TNODE, "ENODESETUP")
1415
  ENODETIME = (TNODE, "ENODETIME")
1416
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1417

    
1418
  ETYPE_FIELD = "code"
1419
  ETYPE_ERROR = "ERROR"
1420
  ETYPE_WARNING = "WARNING"
1421

    
1422
  def _Error(self, ecode, item, msg, *args, **kwargs):
1423
    """Format an error message.
1424

1425
    Based on the opcode's error_codes parameter, either format a
1426
    parseable error code, or a simpler error string.
1427

1428
    This must be called only from Exec and functions called from Exec.
1429

1430
    """
1431
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432
    itype, etxt = ecode
1433
    # first complete the msg
1434
    if args:
1435
      msg = msg % args
1436
    # then format the whole message
1437
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1438
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1439
    else:
1440
      if item:
1441
        item = " " + item
1442
      else:
1443
        item = ""
1444
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1445
    # and finally report it via the feedback_fn
1446
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1447

    
1448
  def _ErrorIf(self, cond, *args, **kwargs):
1449
    """Log an error message if the passed condition is True.
1450

1451
    """
1452
    cond = (bool(cond)
1453
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1454
    if cond:
1455
      self._Error(*args, **kwargs)
1456
    # do not mark the operation as failed for WARN cases only
1457
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1458
      self.bad = self.bad or cond
1459

    
1460

    
1461
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1462
  """Verifies the cluster config.
1463

1464
  """
1465
  REQ_BGL = True
1466

    
1467
  def _VerifyHVP(self, hvp_data):
1468
    """Verifies locally the syntax of the hypervisor parameters.
1469

1470
    """
1471
    for item, hv_name, hv_params in hvp_data:
1472
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1473
             (item, hv_name))
1474
      try:
1475
        hv_class = hypervisor.GetHypervisor(hv_name)
1476
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1477
        hv_class.CheckParameterSyntax(hv_params)
1478
      except errors.GenericError, err:
1479
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1480

    
1481
  def ExpandNames(self):
1482
    # Information can be safely retrieved as the BGL is acquired in exclusive
1483
    # mode
1484
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1485
    self.all_node_info = self.cfg.GetAllNodesInfo()
1486
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1487
    self.needed_locks = {}
1488

    
1489
  def Exec(self, feedback_fn):
1490
    """Verify integrity of cluster, performing various test on nodes.
1491

1492
    """
1493
    self.bad = False
1494
    self._feedback_fn = feedback_fn
1495

    
1496
    feedback_fn("* Verifying cluster config")
1497

    
1498
    for msg in self.cfg.VerifyConfig():
1499
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1500

    
1501
    feedback_fn("* Verifying cluster certificate files")
1502

    
1503
    for cert_filename in constants.ALL_CERT_FILES:
1504
      (errcode, msg) = _VerifyCertificate(cert_filename)
1505
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1506

    
1507
    feedback_fn("* Verifying hypervisor parameters")
1508

    
1509
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1510
                                                self.all_inst_info.values()))
1511

    
1512
    feedback_fn("* Verifying all nodes belong to an existing group")
1513

    
1514
    # We do this verification here because, should this bogus circumstance
1515
    # occur, it would never be caught by VerifyGroup, which only acts on
1516
    # nodes/instances reachable from existing node groups.
1517

    
1518
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1519
                         if node.group not in self.all_group_info)
1520

    
1521
    dangling_instances = {}
1522
    no_node_instances = []
1523

    
1524
    for inst in self.all_inst_info.values():
1525
      if inst.primary_node in dangling_nodes:
1526
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1527
      elif inst.primary_node not in self.all_node_info:
1528
        no_node_instances.append(inst.name)
1529

    
1530
    pretty_dangling = [
1531
        "%s (%s)" %
1532
        (node.name,
1533
         utils.CommaJoin(dangling_instances.get(node.name,
1534
                                                ["no instances"])))
1535
        for node in dangling_nodes]
1536

    
1537
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1538
                  "the following nodes (and their instances) belong to a non"
1539
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1540

    
1541
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1542
                  "the following instances have a non-existing primary-node:"
1543
                  " %s", utils.CommaJoin(no_node_instances))
1544

    
1545
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1546

    
1547

    
1548
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1549
  """Verifies the status of a node group.
1550

1551
  """
1552
  HPATH = "cluster-verify"
1553
  HTYPE = constants.HTYPE_CLUSTER
1554
  REQ_BGL = False
1555

    
1556
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1557

    
1558
  class NodeImage(object):
1559
    """A class representing the logical and physical status of a node.
1560

1561
    @type name: string
1562
    @ivar name: the node name to which this object refers
1563
    @ivar volumes: a structure as returned from
1564
        L{ganeti.backend.GetVolumeList} (runtime)
1565
    @ivar instances: a list of running instances (runtime)
1566
    @ivar pinst: list of configured primary instances (config)
1567
    @ivar sinst: list of configured secondary instances (config)
1568
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1569
        instances for which this node is secondary (config)
1570
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1571
    @ivar dfree: free disk, as reported by the node (runtime)
1572
    @ivar offline: the offline status (config)
1573
    @type rpc_fail: boolean
1574
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1575
        not whether the individual keys were correct) (runtime)
1576
    @type lvm_fail: boolean
1577
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1578
    @type hyp_fail: boolean
1579
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1580
    @type ghost: boolean
1581
    @ivar ghost: whether this is a known node or not (config)
1582
    @type os_fail: boolean
1583
    @ivar os_fail: whether the RPC call didn't return valid OS data
1584
    @type oslist: list
1585
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1586
    @type vm_capable: boolean
1587
    @ivar vm_capable: whether the node can host instances
1588

1589
    """
1590
    def __init__(self, offline=False, name=None, vm_capable=True):
1591
      self.name = name
1592
      self.volumes = {}
1593
      self.instances = []
1594
      self.pinst = []
1595
      self.sinst = []
1596
      self.sbp = {}
1597
      self.mfree = 0
1598
      self.dfree = 0
1599
      self.offline = offline
1600
      self.vm_capable = vm_capable
1601
      self.rpc_fail = False
1602
      self.lvm_fail = False
1603
      self.hyp_fail = False
1604
      self.ghost = False
1605
      self.os_fail = False
1606
      self.oslist = {}
1607

    
1608
  def ExpandNames(self):
1609
    # This raises errors.OpPrereqError on its own:
1610
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1611

    
1612
    # Get instances in node group; this is unsafe and needs verification later
1613
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1614

    
1615
    self.needed_locks = {
1616
      locking.LEVEL_INSTANCE: inst_names,
1617
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1618
      locking.LEVEL_NODE: [],
1619
      }
1620

    
1621
    self.share_locks = _ShareAll()
1622

    
1623
  def DeclareLocks(self, level):
1624
    if level == locking.LEVEL_NODE:
1625
      # Get members of node group; this is unsafe and needs verification later
1626
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1627

    
1628
      all_inst_info = self.cfg.GetAllInstancesInfo()
1629

    
1630
      # In Exec(), we warn about mirrored instances that have primary and
1631
      # secondary living in separate node groups. To fully verify that
1632
      # volumes for these instances are healthy, we will need to do an
1633
      # extra call to their secondaries. We ensure here those nodes will
1634
      # be locked.
1635
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1636
        # Important: access only the instances whose lock is owned
1637
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1638
          nodes.update(all_inst_info[inst].secondary_nodes)
1639

    
1640
      self.needed_locks[locking.LEVEL_NODE] = nodes
1641

    
1642
  def CheckPrereq(self):
1643
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1644
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1645

    
1646
    unlocked_nodes = \
1647
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1648

    
1649
    unlocked_instances = \
1650
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1651

    
1652
    if unlocked_nodes:
1653
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1654
                                 utils.CommaJoin(unlocked_nodes))
1655

    
1656
    if unlocked_instances:
1657
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1658
                                 utils.CommaJoin(unlocked_instances))
1659

    
1660
    self.all_node_info = self.cfg.GetAllNodesInfo()
1661
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1662

    
1663
    self.my_node_names = utils.NiceSort(group_nodes)
1664
    self.my_inst_names = utils.NiceSort(group_instances)
1665

    
1666
    self.my_node_info = dict((name, self.all_node_info[name])
1667
                             for name in self.my_node_names)
1668

    
1669
    self.my_inst_info = dict((name, self.all_inst_info[name])
1670
                             for name in self.my_inst_names)
1671

    
1672
    # We detect here the nodes that will need the extra RPC calls for verifying
1673
    # split LV volumes; they should be locked.
1674
    extra_lv_nodes = set()
1675

    
1676
    for inst in self.my_inst_info.values():
1677
      if inst.disk_template in constants.DTS_INT_MIRROR:
1678
        group = self.my_node_info[inst.primary_node].group
1679
        for nname in inst.secondary_nodes:
1680
          if self.all_node_info[nname].group != group:
1681
            extra_lv_nodes.add(nname)
1682

    
1683
    unlocked_lv_nodes = \
1684
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1685

    
1686
    if unlocked_lv_nodes:
1687
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1688
                                 utils.CommaJoin(unlocked_lv_nodes))
1689
    self.extra_lv_nodes = list(extra_lv_nodes)
1690

    
1691
  def _VerifyNode(self, ninfo, nresult):
1692
    """Perform some basic validation on data returned from a node.
1693

1694
      - check the result data structure is well formed and has all the
1695
        mandatory fields
1696
      - check ganeti version
1697

1698
    @type ninfo: L{objects.Node}
1699
    @param ninfo: the node to check
1700
    @param nresult: the results from the node
1701
    @rtype: boolean
1702
    @return: whether overall this call was successful (and we can expect
1703
         reasonable values in the respose)
1704

1705
    """
1706
    node = ninfo.name
1707
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1708

    
1709
    # main result, nresult should be a non-empty dict
1710
    test = not nresult or not isinstance(nresult, dict)
1711
    _ErrorIf(test, self.ENODERPC, node,
1712
                  "unable to verify node: no data returned")
1713
    if test:
1714
      return False
1715

    
1716
    # compares ganeti version
1717
    local_version = constants.PROTOCOL_VERSION
1718
    remote_version = nresult.get("version", None)
1719
    test = not (remote_version and
1720
                isinstance(remote_version, (list, tuple)) and
1721
                len(remote_version) == 2)
1722
    _ErrorIf(test, self.ENODERPC, node,
1723
             "connection to node returned invalid data")
1724
    if test:
1725
      return False
1726

    
1727
    test = local_version != remote_version[0]
1728
    _ErrorIf(test, self.ENODEVERSION, node,
1729
             "incompatible protocol versions: master %s,"
1730
             " node %s", local_version, remote_version[0])
1731
    if test:
1732
      return False
1733

    
1734
    # node seems compatible, we can actually try to look into its results
1735

    
1736
    # full package version
1737
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1738
                  self.ENODEVERSION, node,
1739
                  "software version mismatch: master %s, node %s",
1740
                  constants.RELEASE_VERSION, remote_version[1],
1741
                  code=self.ETYPE_WARNING)
1742

    
1743
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1744
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1745
      for hv_name, hv_result in hyp_result.iteritems():
1746
        test = hv_result is not None
1747
        _ErrorIf(test, self.ENODEHV, node,
1748
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1749

    
1750
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1751
    if ninfo.vm_capable and isinstance(hvp_result, list):
1752
      for item, hv_name, hv_result in hvp_result:
1753
        _ErrorIf(True, self.ENODEHV, node,
1754
                 "hypervisor %s parameter verify failure (source %s): %s",
1755
                 hv_name, item, hv_result)
1756

    
1757
    test = nresult.get(constants.NV_NODESETUP,
1758
                       ["Missing NODESETUP results"])
1759
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1760
             "; ".join(test))
1761

    
1762
    return True
1763

    
1764
  def _VerifyNodeTime(self, ninfo, nresult,
1765
                      nvinfo_starttime, nvinfo_endtime):
1766
    """Check the node time.
1767

1768
    @type ninfo: L{objects.Node}
1769
    @param ninfo: the node to check
1770
    @param nresult: the remote results for the node
1771
    @param nvinfo_starttime: the start time of the RPC call
1772
    @param nvinfo_endtime: the end time of the RPC call
1773

1774
    """
1775
    node = ninfo.name
1776
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1777

    
1778
    ntime = nresult.get(constants.NV_TIME, None)
1779
    try:
1780
      ntime_merged = utils.MergeTime(ntime)
1781
    except (ValueError, TypeError):
1782
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1783
      return
1784

    
1785
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1786
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1787
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1788
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1789
    else:
1790
      ntime_diff = None
1791

    
1792
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1793
             "Node time diverges by at least %s from master node time",
1794
             ntime_diff)
1795

    
1796
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1797
    """Check the node LVM results.
1798

1799
    @type ninfo: L{objects.Node}
1800
    @param ninfo: the node to check
1801
    @param nresult: the remote results for the node
1802
    @param vg_name: the configured VG name
1803

1804
    """
1805
    if vg_name is None:
1806
      return
1807

    
1808
    node = ninfo.name
1809
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1810

    
1811
    # checks vg existence and size > 20G
1812
    vglist = nresult.get(constants.NV_VGLIST, None)
1813
    test = not vglist
1814
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1815
    if not test:
1816
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1817
                                            constants.MIN_VG_SIZE)
1818
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1819

    
1820
    # check pv names
1821
    pvlist = nresult.get(constants.NV_PVLIST, None)
1822
    test = pvlist is None
1823
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1824
    if not test:
1825
      # check that ':' is not present in PV names, since it's a
1826
      # special character for lvcreate (denotes the range of PEs to
1827
      # use on the PV)
1828
      for _, pvname, owner_vg in pvlist:
1829
        test = ":" in pvname
1830
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1831
                 " '%s' of VG '%s'", pvname, owner_vg)
1832

    
1833
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1834
    """Check the node bridges.
1835

1836
    @type ninfo: L{objects.Node}
1837
    @param ninfo: the node to check
1838
    @param nresult: the remote results for the node
1839
    @param bridges: the expected list of bridges
1840

1841
    """
1842
    if not bridges:
1843
      return
1844

    
1845
    node = ninfo.name
1846
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1847

    
1848
    missing = nresult.get(constants.NV_BRIDGES, None)
1849
    test = not isinstance(missing, list)
1850
    _ErrorIf(test, self.ENODENET, node,
1851
             "did not return valid bridge information")
1852
    if not test:
1853
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1854
               utils.CommaJoin(sorted(missing)))
1855

    
1856
  def _VerifyNodeNetwork(self, ninfo, nresult):
1857
    """Check the node network connectivity results.
1858

1859
    @type ninfo: L{objects.Node}
1860
    @param ninfo: the node to check
1861
    @param nresult: the remote results for the node
1862

1863
    """
1864
    node = ninfo.name
1865
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1866

    
1867
    test = constants.NV_NODELIST not in nresult
1868
    _ErrorIf(test, self.ENODESSH, node,
1869
             "node hasn't returned node ssh connectivity data")
1870
    if not test:
1871
      if nresult[constants.NV_NODELIST]:
1872
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1873
          _ErrorIf(True, self.ENODESSH, node,
1874
                   "ssh communication with node '%s': %s", a_node, a_msg)
1875

    
1876
    test = constants.NV_NODENETTEST not in nresult
1877
    _ErrorIf(test, self.ENODENET, node,
1878
             "node hasn't returned node tcp connectivity data")
1879
    if not test:
1880
      if nresult[constants.NV_NODENETTEST]:
1881
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1882
        for anode in nlist:
1883
          _ErrorIf(True, self.ENODENET, node,
1884
                   "tcp communication with node '%s': %s",
1885
                   anode, nresult[constants.NV_NODENETTEST][anode])
1886

    
1887
    test = constants.NV_MASTERIP not in nresult
1888
    _ErrorIf(test, self.ENODENET, node,
1889
             "node hasn't returned node master IP reachability data")
1890
    if not test:
1891
      if not nresult[constants.NV_MASTERIP]:
1892
        if node == self.master_node:
1893
          msg = "the master node cannot reach the master IP (not configured?)"
1894
        else:
1895
          msg = "cannot reach the master IP"
1896
        _ErrorIf(True, self.ENODENET, node, msg)
1897

    
1898
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1899
                      diskstatus):
1900
    """Verify an instance.
1901

1902
    This function checks to see if the required block devices are
1903
    available on the instance's node.
1904

1905
    """
1906
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1907
    node_current = instanceconfig.primary_node
1908

    
1909
    node_vol_should = {}
1910
    instanceconfig.MapLVsByNode(node_vol_should)
1911

    
1912
    for node in node_vol_should:
1913
      n_img = node_image[node]
1914
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1915
        # ignore missing volumes on offline or broken nodes
1916
        continue
1917
      for volume in node_vol_should[node]:
1918
        test = volume not in n_img.volumes
1919
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1920
                 "volume %s missing on node %s", volume, node)
1921

    
1922
    if instanceconfig.admin_up:
1923
      pri_img = node_image[node_current]
1924
      test = instance not in pri_img.instances and not pri_img.offline
1925
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1926
               "instance not running on its primary node %s",
1927
               node_current)
1928

    
1929
    diskdata = [(nname, success, status, idx)
1930
                for (nname, disks) in diskstatus.items()
1931
                for idx, (success, status) in enumerate(disks)]
1932

    
1933
    for nname, success, bdev_status, idx in diskdata:
1934
      # the 'ghost node' construction in Exec() ensures that we have a
1935
      # node here
1936
      snode = node_image[nname]
1937
      bad_snode = snode.ghost or snode.offline
1938
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1939
               self.EINSTANCEFAULTYDISK, instance,
1940
               "couldn't retrieve status for disk/%s on %s: %s",
1941
               idx, nname, bdev_status)
1942
      _ErrorIf((instanceconfig.admin_up and success and
1943
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1944
               self.EINSTANCEFAULTYDISK, instance,
1945
               "disk/%s on %s is faulty", idx, nname)
1946

    
1947
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1948
    """Verify if there are any unknown volumes in the cluster.
1949

1950
    The .os, .swap and backup volumes are ignored. All other volumes are
1951
    reported as unknown.
1952

1953
    @type reserved: L{ganeti.utils.FieldSet}
1954
    @param reserved: a FieldSet of reserved volume names
1955

1956
    """
1957
    for node, n_img in node_image.items():
1958
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1959
        # skip non-healthy nodes
1960
        continue
1961
      for volume in n_img.volumes:
1962
        test = ((node not in node_vol_should or
1963
                volume not in node_vol_should[node]) and
1964
                not reserved.Matches(volume))
1965
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1966
                      "volume %s is unknown", volume)
1967

    
1968
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1969
    """Verify N+1 Memory Resilience.
1970

1971
    Check that if one single node dies we can still start all the
1972
    instances it was primary for.
1973

1974
    """
1975
    cluster_info = self.cfg.GetClusterInfo()
1976
    for node, n_img in node_image.items():
1977
      # This code checks that every node which is now listed as
1978
      # secondary has enough memory to host all instances it is
1979
      # supposed to should a single other node in the cluster fail.
1980
      # FIXME: not ready for failover to an arbitrary node
1981
      # FIXME: does not support file-backed instances
1982
      # WARNING: we currently take into account down instances as well
1983
      # as up ones, considering that even if they're down someone
1984
      # might want to start them even in the event of a node failure.
1985
      if n_img.offline:
1986
        # we're skipping offline nodes from the N+1 warning, since
1987
        # most likely we don't have good memory infromation from them;
1988
        # we already list instances living on such nodes, and that's
1989
        # enough warning
1990
        continue
1991
      for prinode, instances in n_img.sbp.items():
1992
        needed_mem = 0
1993
        for instance in instances:
1994
          bep = cluster_info.FillBE(instance_cfg[instance])
1995
          if bep[constants.BE_AUTO_BALANCE]:
1996
            needed_mem += bep[constants.BE_MEMORY]
1997
        test = n_img.mfree < needed_mem
1998
        self._ErrorIf(test, self.ENODEN1, node,
1999
                      "not enough memory to accomodate instance failovers"
2000
                      " should node %s fail (%dMiB needed, %dMiB available)",
2001
                      prinode, needed_mem, n_img.mfree)
2002

    
2003
  @classmethod
2004
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2005
                   (files_all, files_all_opt, files_mc, files_vm)):
2006
    """Verifies file checksums collected from all nodes.
2007

2008
    @param errorif: Callback for reporting errors
2009
    @param nodeinfo: List of L{objects.Node} objects
2010
    @param master_node: Name of master node
2011
    @param all_nvinfo: RPC results
2012

2013
    """
2014
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2015

    
2016
    assert master_node in node_names
2017
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2018
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2019
           "Found file listed in more than one file list"
2020

    
2021
    # Define functions determining which nodes to consider for a file
2022
    file2nodefn = dict([(filename, fn)
2023
      for (files, fn) in [(files_all, None),
2024
                          (files_all_opt, None),
2025
                          (files_mc, lambda node: (node.master_candidate or
2026
                                                   node.name == master_node)),
2027
                          (files_vm, lambda node: node.vm_capable)]
2028
      for filename in files])
2029

    
2030
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2031

    
2032
    for node in nodeinfo:
2033
      if node.offline:
2034
        continue
2035

    
2036
      nresult = all_nvinfo[node.name]
2037

    
2038
      if nresult.fail_msg or not nresult.payload:
2039
        node_files = None
2040
      else:
2041
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2042

    
2043
      test = not (node_files and isinstance(node_files, dict))
2044
      errorif(test, cls.ENODEFILECHECK, node.name,
2045
              "Node did not return file checksum data")
2046
      if test:
2047
        continue
2048

    
2049
      for (filename, checksum) in node_files.items():
2050
        # Check if the file should be considered for a node
2051
        fn = file2nodefn[filename]
2052
        if fn is None or fn(node):
2053
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2054

    
2055
    for (filename, checksums) in fileinfo.items():
2056
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2057

    
2058
      # Nodes having the file
2059
      with_file = frozenset(node_name
2060
                            for nodes in fileinfo[filename].values()
2061
                            for node_name in nodes)
2062

    
2063
      # Nodes missing file
2064
      missing_file = node_names - with_file
2065

    
2066
      if filename in files_all_opt:
2067
        # All or no nodes
2068
        errorif(missing_file and missing_file != node_names,
2069
                cls.ECLUSTERFILECHECK, None,
2070
                "File %s is optional, but it must exist on all or no"
2071
                " nodes (not found on %s)",
2072
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2073
      else:
2074
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2075
                "File %s is missing from node(s) %s", filename,
2076
                utils.CommaJoin(utils.NiceSort(missing_file)))
2077

    
2078
      # See if there are multiple versions of the file
2079
      test = len(checksums) > 1
2080
      if test:
2081
        variants = ["variant %s on %s" %
2082
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2083
                    for (idx, (checksum, nodes)) in
2084
                      enumerate(sorted(checksums.items()))]
2085
      else:
2086
        variants = []
2087

    
2088
      errorif(test, cls.ECLUSTERFILECHECK, None,
2089
              "File %s found with %s different checksums (%s)",
2090
              filename, len(checksums), "; ".join(variants))
2091

    
2092
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2093
                      drbd_map):
2094
    """Verifies and the node DRBD status.
2095

2096
    @type ninfo: L{objects.Node}
2097
    @param ninfo: the node to check
2098
    @param nresult: the remote results for the node
2099
    @param instanceinfo: the dict of instances
2100
    @param drbd_helper: the configured DRBD usermode helper
2101
    @param drbd_map: the DRBD map as returned by
2102
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2103

2104
    """
2105
    node = ninfo.name
2106
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2107

    
2108
    if drbd_helper:
2109
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2110
      test = (helper_result == None)
2111
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2112
               "no drbd usermode helper returned")
2113
      if helper_result:
2114
        status, payload = helper_result
2115
        test = not status
2116
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2117
                 "drbd usermode helper check unsuccessful: %s", payload)
2118
        test = status and (payload != drbd_helper)
2119
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2120
                 "wrong drbd usermode helper: %s", payload)
2121

    
2122
    # compute the DRBD minors
2123
    node_drbd = {}
2124
    for minor, instance in drbd_map[node].items():
2125
      test = instance not in instanceinfo
2126
      _ErrorIf(test, self.ECLUSTERCFG, None,
2127
               "ghost instance '%s' in temporary DRBD map", instance)
2128
        # ghost instance should not be running, but otherwise we
2129
        # don't give double warnings (both ghost instance and
2130
        # unallocated minor in use)
2131
      if test:
2132
        node_drbd[minor] = (instance, False)
2133
      else:
2134
        instance = instanceinfo[instance]
2135
        node_drbd[minor] = (instance.name, instance.admin_up)
2136

    
2137
    # and now check them
2138
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2139
    test = not isinstance(used_minors, (tuple, list))
2140
    _ErrorIf(test, self.ENODEDRBD, node,
2141
             "cannot parse drbd status file: %s", str(used_minors))
2142
    if test:
2143
      # we cannot check drbd status
2144
      return
2145

    
2146
    for minor, (iname, must_exist) in node_drbd.items():
2147
      test = minor not in used_minors and must_exist
2148
      _ErrorIf(test, self.ENODEDRBD, node,
2149
               "drbd minor %d of instance %s is not active", minor, iname)
2150
    for minor in used_minors:
2151
      test = minor not in node_drbd
2152
      _ErrorIf(test, self.ENODEDRBD, node,
2153
               "unallocated drbd minor %d is in use", minor)
2154

    
2155
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2156
    """Builds the node OS structures.
2157

2158
    @type ninfo: L{objects.Node}
2159
    @param ninfo: the node to check
2160
    @param nresult: the remote results for the node
2161
    @param nimg: the node image object
2162

2163
    """
2164
    node = ninfo.name
2165
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2166

    
2167
    remote_os = nresult.get(constants.NV_OSLIST, None)
2168
    test = (not isinstance(remote_os, list) or
2169
            not compat.all(isinstance(v, list) and len(v) == 7
2170
                           for v in remote_os))
2171

    
2172
    _ErrorIf(test, self.ENODEOS, node,
2173
             "node hasn't returned valid OS data")
2174

    
2175
    nimg.os_fail = test
2176

    
2177
    if test:
2178
      return
2179

    
2180
    os_dict = {}
2181

    
2182
    for (name, os_path, status, diagnose,
2183
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2184

    
2185
      if name not in os_dict:
2186
        os_dict[name] = []
2187

    
2188
      # parameters is a list of lists instead of list of tuples due to
2189
      # JSON lacking a real tuple type, fix it:
2190
      parameters = [tuple(v) for v in parameters]
2191
      os_dict[name].append((os_path, status, diagnose,
2192
                            set(variants), set(parameters), set(api_ver)))
2193

    
2194
    nimg.oslist = os_dict
2195

    
2196
  def _VerifyNodeOS(self, ninfo, nimg, base):
2197
    """Verifies the node OS list.
2198

2199
    @type ninfo: L{objects.Node}
2200
    @param ninfo: the node to check
2201
    @param nimg: the node image object
2202
    @param base: the 'template' node we match against (e.g. from the master)
2203

2204
    """
2205
    node = ninfo.name
2206
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2207

    
2208
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2209

    
2210
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2211
    for os_name, os_data in nimg.oslist.items():
2212
      assert os_data, "Empty OS status for OS %s?!" % os_name
2213
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2214
      _ErrorIf(not f_status, self.ENODEOS, node,
2215
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2216
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2217
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2218
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2219
      # comparisons with the 'base' image
2220
      test = os_name not in base.oslist
2221
      _ErrorIf(test, self.ENODEOS, node,
2222
               "Extra OS %s not present on reference node (%s)",
2223
               os_name, base.name)
2224
      if test:
2225
        continue
2226
      assert base.oslist[os_name], "Base node has empty OS status?"
2227
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2228
      if not b_status:
2229
        # base OS is invalid, skipping
2230
        continue
2231
      for kind, a, b in [("API version", f_api, b_api),
2232
                         ("variants list", f_var, b_var),
2233
                         ("parameters", beautify_params(f_param),
2234
                          beautify_params(b_param))]:
2235
        _ErrorIf(a != b, self.ENODEOS, node,
2236
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2237
                 kind, os_name, base.name,
2238
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2239

    
2240
    # check any missing OSes
2241
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2242
    _ErrorIf(missing, self.ENODEOS, node,
2243
             "OSes present on reference node %s but missing on this node: %s",
2244
             base.name, utils.CommaJoin(missing))
2245

    
2246
  def _VerifyOob(self, ninfo, nresult):
2247
    """Verifies out of band functionality of a node.
2248

2249
    @type ninfo: L{objects.Node}
2250
    @param ninfo: the node to check
2251
    @param nresult: the remote results for the node
2252

2253
    """
2254
    node = ninfo.name
2255
    # We just have to verify the paths on master and/or master candidates
2256
    # as the oob helper is invoked on the master
2257
    if ((ninfo.master_candidate or ninfo.master_capable) and
2258
        constants.NV_OOB_PATHS in nresult):
2259
      for path_result in nresult[constants.NV_OOB_PATHS]:
2260
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2261

    
2262
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2263
    """Verifies and updates the node volume data.
2264

2265
    This function will update a L{NodeImage}'s internal structures
2266
    with data from the remote call.
2267

2268
    @type ninfo: L{objects.Node}
2269
    @param ninfo: the node to check
2270
    @param nresult: the remote results for the node
2271
    @param nimg: the node image object
2272
    @param vg_name: the configured VG name
2273

2274
    """
2275
    node = ninfo.name
2276
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2277

    
2278
    nimg.lvm_fail = True
2279
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2280
    if vg_name is None:
2281
      pass
2282
    elif isinstance(lvdata, basestring):
2283
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2284
               utils.SafeEncode(lvdata))
2285
    elif not isinstance(lvdata, dict):
2286
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2287
    else:
2288
      nimg.volumes = lvdata
2289
      nimg.lvm_fail = False
2290

    
2291
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2292
    """Verifies and updates the node instance list.
2293

2294
    If the listing was successful, then updates this node's instance
2295
    list. Otherwise, it marks the RPC call as failed for the instance
2296
    list key.
2297

2298
    @type ninfo: L{objects.Node}
2299
    @param ninfo: the node to check
2300
    @param nresult: the remote results for the node
2301
    @param nimg: the node image object
2302

2303
    """
2304
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2305
    test = not isinstance(idata, list)
2306
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2307
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2308
    if test:
2309
      nimg.hyp_fail = True
2310
    else:
2311
      nimg.instances = idata
2312

    
2313
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2314
    """Verifies and computes a node information map
2315

2316
    @type ninfo: L{objects.Node}
2317
    @param ninfo: the node to check
2318
    @param nresult: the remote results for the node
2319
    @param nimg: the node image object
2320
    @param vg_name: the configured VG name
2321

2322
    """
2323
    node = ninfo.name
2324
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2325

    
2326
    # try to read free memory (from the hypervisor)
2327
    hv_info = nresult.get(constants.NV_HVINFO, None)
2328
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2329
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2330
    if not test:
2331
      try:
2332
        nimg.mfree = int(hv_info["memory_free"])
2333
      except (ValueError, TypeError):
2334
        _ErrorIf(True, self.ENODERPC, node,
2335
                 "node returned invalid nodeinfo, check hypervisor")
2336

    
2337
    # FIXME: devise a free space model for file based instances as well
2338
    if vg_name is not None:
2339
      test = (constants.NV_VGLIST not in nresult or
2340
              vg_name not in nresult[constants.NV_VGLIST])
2341
      _ErrorIf(test, self.ENODELVM, node,
2342
               "node didn't return data for the volume group '%s'"
2343
               " - it is either missing or broken", vg_name)
2344
      if not test:
2345
        try:
2346
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2347
        except (ValueError, TypeError):
2348
          _ErrorIf(True, self.ENODERPC, node,
2349
                   "node returned invalid LVM info, check LVM status")
2350

    
2351
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2352
    """Gets per-disk status information for all instances.
2353

2354
    @type nodelist: list of strings
2355
    @param nodelist: Node names
2356
    @type node_image: dict of (name, L{objects.Node})
2357
    @param node_image: Node objects
2358
    @type instanceinfo: dict of (name, L{objects.Instance})
2359
    @param instanceinfo: Instance objects
2360
    @rtype: {instance: {node: [(succes, payload)]}}
2361
    @return: a dictionary of per-instance dictionaries with nodes as
2362
        keys and disk information as values; the disk information is a
2363
        list of tuples (success, payload)
2364

2365
    """
2366
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2367

    
2368
    node_disks = {}
2369
    node_disks_devonly = {}
2370
    diskless_instances = set()
2371
    diskless = constants.DT_DISKLESS
2372

    
2373
    for nname in nodelist:
2374
      node_instances = list(itertools.chain(node_image[nname].pinst,
2375
                                            node_image[nname].sinst))
2376
      diskless_instances.update(inst for inst in node_instances
2377
                                if instanceinfo[inst].disk_template == diskless)
2378
      disks = [(inst, disk)
2379
               for inst in node_instances
2380
               for disk in instanceinfo[inst].disks]
2381

    
2382
      if not disks:
2383
        # No need to collect data
2384
        continue
2385

    
2386
      node_disks[nname] = disks
2387

    
2388
      # Creating copies as SetDiskID below will modify the objects and that can
2389
      # lead to incorrect data returned from nodes
2390
      devonly = [dev.Copy() for (_, dev) in disks]
2391

    
2392
      for dev in devonly:
2393
        self.cfg.SetDiskID(dev, nname)
2394

    
2395
      node_disks_devonly[nname] = devonly
2396

    
2397
    assert len(node_disks) == len(node_disks_devonly)
2398

    
2399
    # Collect data from all nodes with disks
2400
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2401
                                                          node_disks_devonly)
2402

    
2403
    assert len(result) == len(node_disks)
2404

    
2405
    instdisk = {}
2406

    
2407
    for (nname, nres) in result.items():
2408
      disks = node_disks[nname]
2409

    
2410
      if nres.offline:
2411
        # No data from this node
2412
        data = len(disks) * [(False, "node offline")]
2413
      else:
2414
        msg = nres.fail_msg
2415
        _ErrorIf(msg, self.ENODERPC, nname,
2416
                 "while getting disk information: %s", msg)
2417
        if msg:
2418
          # No data from this node
2419
          data = len(disks) * [(False, msg)]
2420
        else:
2421
          data = []
2422
          for idx, i in enumerate(nres.payload):
2423
            if isinstance(i, (tuple, list)) and len(i) == 2:
2424
              data.append(i)
2425
            else:
2426
              logging.warning("Invalid result from node %s, entry %d: %s",
2427
                              nname, idx, i)
2428
              data.append((False, "Invalid result from the remote node"))
2429

    
2430
      for ((inst, _), status) in zip(disks, data):
2431
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2432

    
2433
    # Add empty entries for diskless instances.
2434
    for inst in diskless_instances:
2435
      assert inst not in instdisk
2436
      instdisk[inst] = {}
2437

    
2438
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2439
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2440
                      compat.all(isinstance(s, (tuple, list)) and
2441
                                 len(s) == 2 for s in statuses)
2442
                      for inst, nnames in instdisk.items()
2443
                      for nname, statuses in nnames.items())
2444
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2445

    
2446
    return instdisk
2447

    
2448
  def BuildHooksEnv(self):
2449
    """Build hooks env.
2450

2451
    Cluster-Verify hooks just ran in the post phase and their failure makes
2452
    the output be logged in the verify output and the verification to fail.
2453

2454
    """
2455
    env = {
2456
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2457
      }
2458

    
2459
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2460
               for node in self.my_node_info.values())
2461

    
2462
    return env
2463

    
2464
  def BuildHooksNodes(self):
2465
    """Build hooks nodes.
2466

2467
    """
2468
    return ([], self.my_node_names)
2469

    
2470
  def Exec(self, feedback_fn):
2471
    """Verify integrity of the node group, performing various test on nodes.
2472

2473
    """
2474
    # This method has too many local variables. pylint: disable-msg=R0914
2475

    
2476
    if not self.my_node_names:
2477
      # empty node group
2478
      feedback_fn("* Empty node group, skipping verification")
2479
      return True
2480

    
2481
    self.bad = False
2482
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2483
    verbose = self.op.verbose
2484
    self._feedback_fn = feedback_fn
2485

    
2486
    vg_name = self.cfg.GetVGName()
2487
    drbd_helper = self.cfg.GetDRBDHelper()
2488
    cluster = self.cfg.GetClusterInfo()
2489
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2490
    hypervisors = cluster.enabled_hypervisors
2491
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2492

    
2493
    i_non_redundant = [] # Non redundant instances
2494
    i_non_a_balanced = [] # Non auto-balanced instances
2495
    n_offline = 0 # Count of offline nodes
2496
    n_drained = 0 # Count of nodes being drained
2497
    node_vol_should = {}
2498

    
2499
    # FIXME: verify OS list
2500

    
2501
    # File verification
2502
    filemap = _ComputeAncillaryFiles(cluster, False)
2503

    
2504
    # do local checksums
2505
    master_node = self.master_node = self.cfg.GetMasterNode()
2506
    master_ip = self.cfg.GetMasterIP()
2507

    
2508
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2509

    
2510
    # We will make nodes contact all nodes in their group, and one node from
2511
    # every other group.
2512
    # TODO: should it be a *random* node, different every time?
2513
    online_nodes = [node.name for node in node_data_list if not node.offline]
2514
    other_group_nodes = {}
2515

    
2516
    for name in sorted(self.all_node_info):
2517
      node = self.all_node_info[name]
2518
      if (node.group not in other_group_nodes
2519
          and node.group != self.group_uuid
2520
          and not node.offline):
2521
        other_group_nodes[node.group] = node.name
2522

    
2523
    node_verify_param = {
2524
      constants.NV_FILELIST:
2525
        utils.UniqueSequence(filename
2526
                             for files in filemap
2527
                             for filename in files),
2528
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2529
      constants.NV_HYPERVISOR: hypervisors,
2530
      constants.NV_HVPARAMS:
2531
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2532
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2533
                                 for node in node_data_list
2534
                                 if not node.offline],
2535
      constants.NV_INSTANCELIST: hypervisors,
2536
      constants.NV_VERSION: None,
2537
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2538
      constants.NV_NODESETUP: None,
2539
      constants.NV_TIME: None,
2540
      constants.NV_MASTERIP: (master_node, master_ip),
2541
      constants.NV_OSLIST: None,
2542
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2543
      }
2544

    
2545
    if vg_name is not None:
2546
      node_verify_param[constants.NV_VGLIST] = None
2547
      node_verify_param[constants.NV_LVLIST] = vg_name
2548
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2549
      node_verify_param[constants.NV_DRBDLIST] = None
2550

    
2551
    if drbd_helper:
2552
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2553

    
2554
    # bridge checks
2555
    # FIXME: this needs to be changed per node-group, not cluster-wide
2556
    bridges = set()
2557
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2558
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2559
      bridges.add(default_nicpp[constants.NIC_LINK])
2560
    for instance in self.my_inst_info.values():
2561
      for nic in instance.nics:
2562
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2563
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2564
          bridges.add(full_nic[constants.NIC_LINK])
2565

    
2566
    if bridges:
2567
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2568

    
2569
    # Build our expected cluster state
2570
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2571
                                                 name=node.name,
2572
                                                 vm_capable=node.vm_capable))
2573
                      for node in node_data_list)
2574

    
2575
    # Gather OOB paths
2576
    oob_paths = []
2577
    for node in self.all_node_info.values():
2578
      path = _SupportsOob(self.cfg, node)
2579
      if path and path not in oob_paths:
2580
        oob_paths.append(path)
2581

    
2582
    if oob_paths:
2583
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2584

    
2585
    for instance in self.my_inst_names:
2586
      inst_config = self.my_inst_info[instance]
2587

    
2588
      for nname in inst_config.all_nodes:
2589
        if nname not in node_image:
2590
          gnode = self.NodeImage(name=nname)
2591
          gnode.ghost = (nname not in self.all_node_info)
2592
          node_image[nname] = gnode
2593

    
2594
      inst_config.MapLVsByNode(node_vol_should)
2595

    
2596
      pnode = inst_config.primary_node
2597
      node_image[pnode].pinst.append(instance)
2598

    
2599
      for snode in inst_config.secondary_nodes:
2600
        nimg = node_image[snode]
2601
        nimg.sinst.append(instance)
2602
        if pnode not in nimg.sbp:
2603
          nimg.sbp[pnode] = []
2604
        nimg.sbp[pnode].append(instance)
2605

    
2606
    # At this point, we have the in-memory data structures complete,
2607
    # except for the runtime information, which we'll gather next
2608

    
2609
    # Due to the way our RPC system works, exact response times cannot be
2610
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2611
    # time before and after executing the request, we can at least have a time
2612
    # window.
2613
    nvinfo_starttime = time.time()
2614
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2615
                                           node_verify_param,
2616
                                           self.cfg.GetClusterName())
2617
    nvinfo_endtime = time.time()
2618

    
2619
    if self.extra_lv_nodes and vg_name is not None:
2620
      extra_lv_nvinfo = \
2621
          self.rpc.call_node_verify(self.extra_lv_nodes,
2622
                                    {constants.NV_LVLIST: vg_name},
2623
                                    self.cfg.GetClusterName())
2624
    else:
2625
      extra_lv_nvinfo = {}
2626

    
2627
    all_drbd_map = self.cfg.ComputeDRBDMap()
2628

    
2629
    feedback_fn("* Gathering disk information (%s nodes)" %
2630
                len(self.my_node_names))
2631
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2632
                                     self.my_inst_info)
2633

    
2634
    feedback_fn("* Verifying configuration file consistency")
2635

    
2636
    # If not all nodes are being checked, we need to make sure the master node
2637
    # and a non-checked vm_capable node are in the list.
2638
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2639
    if absent_nodes:
2640
      vf_nvinfo = all_nvinfo.copy()
2641
      vf_node_info = list(self.my_node_info.values())
2642
      additional_nodes = []
2643
      if master_node not in self.my_node_info:
2644
        additional_nodes.append(master_node)
2645
        vf_node_info.append(self.all_node_info[master_node])
2646
      # Add the first vm_capable node we find which is not included
2647
      for node in absent_nodes:
2648
        nodeinfo = self.all_node_info[node]
2649
        if nodeinfo.vm_capable and not nodeinfo.offline:
2650
          additional_nodes.append(node)
2651
          vf_node_info.append(self.all_node_info[node])
2652
          break
2653
      key = constants.NV_FILELIST
2654
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2655
                                                 {key: node_verify_param[key]},
2656
                                                 self.cfg.GetClusterName()))
2657
    else:
2658
      vf_nvinfo = all_nvinfo
2659
      vf_node_info = self.my_node_info.values()
2660

    
2661
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2662

    
2663
    feedback_fn("* Verifying node status")
2664

    
2665
    refos_img = None
2666

    
2667
    for node_i in node_data_list:
2668
      node = node_i.name
2669
      nimg = node_image[node]
2670

    
2671
      if node_i.offline:
2672
        if verbose:
2673
          feedback_fn("* Skipping offline node %s" % (node,))
2674
        n_offline += 1
2675
        continue
2676

    
2677
      if node == master_node:
2678
        ntype = "master"
2679
      elif node_i.master_candidate:
2680
        ntype = "master candidate"
2681
      elif node_i.drained:
2682
        ntype = "drained"
2683
        n_drained += 1
2684
      else:
2685
        ntype = "regular"
2686
      if verbose:
2687
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2688

    
2689
      msg = all_nvinfo[node].fail_msg
2690
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2691
      if msg:
2692
        nimg.rpc_fail = True
2693
        continue
2694

    
2695
      nresult = all_nvinfo[node].payload
2696

    
2697
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2698
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2699
      self._VerifyNodeNetwork(node_i, nresult)
2700
      self._VerifyOob(node_i, nresult)
2701

    
2702
      if nimg.vm_capable:
2703
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2704
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2705
                             all_drbd_map)
2706

    
2707
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2708
        self._UpdateNodeInstances(node_i, nresult, nimg)
2709
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2710
        self._UpdateNodeOS(node_i, nresult, nimg)
2711

    
2712
        if not nimg.os_fail:
2713
          if refos_img is None:
2714
            refos_img = nimg
2715
          self._VerifyNodeOS(node_i, nimg, refos_img)
2716
        self._VerifyNodeBridges(node_i, nresult, bridges)
2717

    
2718
        # Check whether all running instancies are primary for the node. (This
2719
        # can no longer be done from _VerifyInstance below, since some of the
2720
        # wrong instances could be from other node groups.)
2721
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2722

    
2723
        for inst in non_primary_inst:
2724
          test = inst in self.all_inst_info
2725
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2726
                   "instance should not run on node %s", node_i.name)
2727
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2728
                   "node is running unknown instance %s", inst)
2729

    
2730
    for node, result in extra_lv_nvinfo.items():
2731
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2732
                              node_image[node], vg_name)
2733

    
2734
    feedback_fn("* Verifying instance status")
2735
    for instance in self.my_inst_names:
2736
      if verbose:
2737
        feedback_fn("* Verifying instance %s" % instance)
2738
      inst_config = self.my_inst_info[instance]
2739
      self._VerifyInstance(instance, inst_config, node_image,
2740
                           instdisk[instance])
2741
      inst_nodes_offline = []
2742

    
2743
      pnode = inst_config.primary_node
2744
      pnode_img = node_image[pnode]
2745
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2746
               self.ENODERPC, pnode, "instance %s, connection to"
2747
               " primary node failed", instance)
2748

    
2749
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2750
               self.EINSTANCEBADNODE, instance,
2751
               "instance is marked as running and lives on offline node %s",
2752
               inst_config.primary_node)
2753

    
2754
      # If the instance is non-redundant we cannot survive losing its primary
2755
      # node, so we are not N+1 compliant. On the other hand we have no disk
2756
      # templates with more than one secondary so that situation is not well
2757
      # supported either.
2758
      # FIXME: does not support file-backed instances
2759
      if not inst_config.secondary_nodes:
2760
        i_non_redundant.append(instance)
2761

    
2762
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2763
               instance, "instance has multiple secondary nodes: %s",
2764
               utils.CommaJoin(inst_config.secondary_nodes),
2765
               code=self.ETYPE_WARNING)
2766

    
2767
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2768
        pnode = inst_config.primary_node
2769
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2770
        instance_groups = {}
2771

    
2772
        for node in instance_nodes:
2773
          instance_groups.setdefault(self.all_node_info[node].group,
2774
                                     []).append(node)
2775

    
2776
        pretty_list = [
2777
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2778
          # Sort so that we always list the primary node first.
2779
          for group, nodes in sorted(instance_groups.items(),
2780
                                     key=lambda (_, nodes): pnode in nodes,
2781
                                     reverse=True)]
2782

    
2783
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2784
                      instance, "instance has primary and secondary nodes in"
2785
                      " different groups: %s", utils.CommaJoin(pretty_list),
2786
                      code=self.ETYPE_WARNING)
2787

    
2788
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2789
        i_non_a_balanced.append(instance)
2790

    
2791
      for snode in inst_config.secondary_nodes:
2792
        s_img = node_image[snode]
2793
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2794
                 "instance %s, connection to secondary node failed", instance)
2795

    
2796
        if s_img.offline:
2797
          inst_nodes_offline.append(snode)
2798

    
2799
      # warn that the instance lives on offline nodes
2800
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2801
               "instance has offline secondary node(s) %s",
2802
               utils.CommaJoin(inst_nodes_offline))
2803
      # ... or ghost/non-vm_capable nodes
2804
      for node in inst_config.all_nodes:
2805
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2806
                 "instance lives on ghost node %s", node)
2807
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2808
                 instance, "instance lives on non-vm_capable node %s", node)
2809

    
2810
    feedback_fn("* Verifying orphan volumes")
2811
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2812

    
2813
    # We will get spurious "unknown volume" warnings if any node of this group
2814
    # is secondary for an instance whose primary is in another group. To avoid
2815
    # them, we find these instances and add their volumes to node_vol_should.
2816
    for inst in self.all_inst_info.values():
2817
      for secondary in inst.secondary_nodes:
2818
        if (secondary in self.my_node_info
2819
            and inst.name not in self.my_inst_info):
2820
          inst.MapLVsByNode(node_vol_should)
2821
          break
2822

    
2823
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2824

    
2825
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2826
      feedback_fn("* Verifying N+1 Memory redundancy")
2827
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2828

    
2829
    feedback_fn("* Other Notes")
2830
    if i_non_redundant:
2831
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2832
                  % len(i_non_redundant))
2833

    
2834
    if i_non_a_balanced:
2835
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2836
                  % len(i_non_a_balanced))
2837

    
2838
    if n_offline:
2839
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2840

    
2841
    if n_drained:
2842
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2843

    
2844
    return not self.bad
2845

    
2846
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2847
    """Analyze the post-hooks' result
2848

2849
    This method analyses the hook result, handles it, and sends some
2850
    nicely-formatted feedback back to the user.
2851

2852
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2853
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2854
    @param hooks_results: the results of the multi-node hooks rpc call
2855
    @param feedback_fn: function used send feedback back to the caller
2856
    @param lu_result: previous Exec result
2857
    @return: the new Exec result, based on the previous result
2858
        and hook results
2859

2860
    """
2861
    # We only really run POST phase hooks, only for non-empty groups,
2862
    # and are only interested in their results
2863
    if not self.my_node_names:
2864
      # empty node group
2865
      pass
2866
    elif phase == constants.HOOKS_PHASE_POST:
2867
      # Used to change hooks' output to proper indentation
2868
      feedback_fn("* Hooks Results")
2869
      assert hooks_results, "invalid result from hooks"
2870

    
2871
      for node_name in hooks_results:
2872
        res = hooks_results[node_name]
2873
        msg = res.fail_msg
2874
        test = msg and not res.offline
2875
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2876
                      "Communication failure in hooks execution: %s", msg)
2877
        if res.offline or msg:
2878
          # No need to investigate payload if node is offline or gave an error.
2879
          # override manually lu_result here as _ErrorIf only
2880
          # overrides self.bad
2881
          lu_result = 1
2882
          continue
2883
        for script, hkr, output in res.payload:
2884
          test = hkr == constants.HKR_FAIL
2885
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2886
                        "Script %s failed, output:", script)
2887
          if test:
2888
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2889
            feedback_fn("%s" % output)
2890
            lu_result = 0
2891

    
2892
    return lu_result
2893

    
2894

    
2895
class LUClusterVerifyDisks(NoHooksLU):
2896
  """Verifies the cluster disks status.
2897

2898
  """
2899
  REQ_BGL = False
2900

    
2901
  def ExpandNames(self):
2902
    self.share_locks = _ShareAll()
2903
    self.needed_locks = {
2904
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2905
      }
2906

    
2907
  def Exec(self, feedback_fn):
2908
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2909

    
2910
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2911
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2912
                           for group in group_names])
2913

    
2914

    
2915
class LUGroupVerifyDisks(NoHooksLU):
2916
  """Verifies the status of all disks in a node group.
2917

2918
  """
2919
  REQ_BGL = False
2920

    
2921
  def ExpandNames(self):
2922
    # Raises errors.OpPrereqError on its own if group can't be found
2923
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2924

    
2925
    self.share_locks = _ShareAll()
2926
    self.needed_locks = {
2927
      locking.LEVEL_INSTANCE: [],
2928
      locking.LEVEL_NODEGROUP: [],
2929
      locking.LEVEL_NODE: [],
2930
      }
2931

    
2932
  def DeclareLocks(self, level):
2933
    if level == locking.LEVEL_INSTANCE:
2934
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2935

    
2936
      # Lock instances optimistically, needs verification once node and group
2937
      # locks have been acquired
2938
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2939
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2940

    
2941
    elif level == locking.LEVEL_NODEGROUP:
2942
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2943

    
2944
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2945
        set([self.group_uuid] +
2946
            # Lock all groups used by instances optimistically; this requires
2947
            # going via the node before it's locked, requiring verification
2948
            # later on
2949
            [group_uuid
2950
             for instance_name in
2951
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2952
             for group_uuid in
2953
               self.cfg.GetInstanceNodeGroups(instance_name)])
2954

    
2955
    elif level == locking.LEVEL_NODE:
2956
      # This will only lock the nodes in the group to be verified which contain
2957
      # actual instances
2958
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2959
      self._LockInstancesNodes()
2960

    
2961
      # Lock all nodes in group to be verified
2962
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2963
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2964
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2965

    
2966
  def CheckPrereq(self):
2967
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2968
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2969
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2970

    
2971
    assert self.group_uuid in owned_groups
2972

    
2973
    # Check if locked instances are still correct
2974
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2975
    if owned_instances != wanted_instances:
2976
      raise errors.OpPrereqError("Instances in node group %s changed since"
2977
                                 " locks were acquired, wanted %s, have %s;"
2978
                                 " retry the operation" %
2979
                                 (self.op.group_name,
2980
                                  utils.CommaJoin(wanted_instances),
2981
                                  utils.CommaJoin(owned_instances)),
2982
                                 errors.ECODE_STATE)
2983

    
2984
    # Get instance information
2985
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
2986

    
2987
    # Check if node groups for locked instances are still correct
2988
    for (instance_name, inst) in self.instances.items():
2989
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2990
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2991
      assert owned_nodes.issuperset(inst.all_nodes), \
2992
        "Instance %s's nodes changed while we kept the lock" % instance_name
2993

    
2994
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2995
      if not owned_groups.issuperset(inst_groups):
2996
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2997
                                   " locks were acquired, current groups are"
2998
                                   " are '%s', owning groups '%s'; retry the"
2999
                                   " operation" %
3000
                                   (instance_name,
3001
                                    utils.CommaJoin(inst_groups),
3002
                                    utils.CommaJoin(owned_groups)),
3003
                                   errors.ECODE_STATE)
3004

    
3005
  def Exec(self, feedback_fn):
3006
    """Verify integrity of cluster disks.
3007

3008
    @rtype: tuple of three items
3009
    @return: a tuple of (dict of node-to-node_error, list of instances
3010
        which need activate-disks, dict of instance: (node, volume) for
3011
        missing volumes
3012

3013
    """
3014
    res_nodes = {}
3015
    res_instances = set()
3016
    res_missing = {}
3017

    
3018
    nv_dict = _MapInstanceDisksToNodes([inst
3019
                                        for inst in self.instances.values()
3020
                                        if inst.admin_up])
3021

    
3022
    if nv_dict:
3023
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3024
                             set(self.cfg.GetVmCapableNodeList()))
3025

    
3026
      node_lvs = self.rpc.call_lv_list(nodes, [])
3027

    
3028
      for (node, node_res) in node_lvs.items():
3029
        if node_res.offline:
3030
          continue
3031

    
3032
        msg = node_res.fail_msg
3033
        if msg:
3034
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3035
          res_nodes[node] = msg
3036
          continue
3037

    
3038
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3039
          inst = nv_dict.pop((node, lv_name), None)
3040
          if not (lv_online or inst is None):
3041
            res_instances.add(inst)
3042

    
3043
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3044
      # better
3045
      for key, inst in nv_dict.iteritems():
3046
        res_missing.setdefault(inst, []).append(key)
3047

    
3048
    return (res_nodes, list(res_instances), res_missing)
3049

    
3050

    
3051
class LUClusterRepairDiskSizes(NoHooksLU):
3052
  """Verifies the cluster disks sizes.
3053

3054
  """
3055
  REQ_BGL = False
3056

    
3057
  def ExpandNames(self):
3058
    if self.op.instances:
3059
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3060
      self.needed_locks = {
3061
        locking.LEVEL_NODE: [],
3062
        locking.LEVEL_INSTANCE: self.wanted_names,
3063
        }
3064
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3065
    else:
3066
      self.wanted_names = None
3067
      self.needed_locks = {
3068
        locking.LEVEL_NODE: locking.ALL_SET,
3069
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3070
        }
3071
    self.share_locks = _ShareAll()
3072

    
3073
  def DeclareLocks(self, level):
3074
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3075
      self._LockInstancesNodes(primary_only=True)
3076

    
3077
  def CheckPrereq(self):
3078
    """Check prerequisites.
3079

3080
    This only checks the optional instance list against the existing names.
3081

3082
    """
3083
    if self.wanted_names is None:
3084
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3085

    
3086
    self.wanted_instances = \
3087
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3088

    
3089
  def _EnsureChildSizes(self, disk):
3090
    """Ensure children of the disk have the needed disk size.
3091

3092
    This is valid mainly for DRBD8 and fixes an issue where the
3093
    children have smaller disk size.
3094

3095
    @param disk: an L{ganeti.objects.Disk} object
3096

3097
    """
3098
    if disk.dev_type == constants.LD_DRBD8:
3099
      assert disk.children, "Empty children for DRBD8?"
3100
      fchild = disk.children[0]
3101
      mismatch = fchild.size < disk.size
3102
      if mismatch:
3103
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3104
                     fchild.size, disk.size)
3105
        fchild.size = disk.size
3106

    
3107
      # and we recurse on this child only, not on the metadev
3108
      return self._EnsureChildSizes(fchild) or mismatch
3109
    else:
3110
      return False
3111

    
3112
  def Exec(self, feedback_fn):
3113
    """Verify the size of cluster disks.
3114

3115
    """
3116
    # TODO: check child disks too
3117
    # TODO: check differences in size between primary/secondary nodes
3118
    per_node_disks = {}
3119
    for instance in self.wanted_instances:
3120
      pnode = instance.primary_node
3121
      if pnode not in per_node_disks:
3122
        per_node_disks[pnode] = []
3123
      for idx, disk in enumerate(instance.disks):
3124
        per_node_disks[pnode].append((instance, idx, disk))
3125

    
3126
    changed = []
3127
    for node, dskl in per_node_disks.items():
3128
      newl = [v[2].Copy() for v in dskl]
3129
      for dsk in newl:
3130
        self.cfg.SetDiskID(dsk, node)
3131
      result = self.rpc.call_blockdev_getsize(node, newl)
3132
      if result.fail_msg:
3133
        self.LogWarning("Failure in blockdev_getsize call to node"
3134
                        " %s, ignoring", node)
3135
        continue
3136
      if len(result.payload) != len(dskl):
3137
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3138
                        " result.payload=%s", node, len(dskl), result.payload)
3139
        self.LogWarning("Invalid result from node %s, ignoring node results",
3140
                        node)
3141
        continue
3142
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3143
        if size is None:
3144
          self.LogWarning("Disk %d of instance %s did not return size"
3145
                          " information, ignoring", idx, instance.name)
3146
          continue
3147
        if not isinstance(size, (int, long)):
3148
          self.LogWarning("Disk %d of instance %s did not return valid"
3149
                          " size information, ignoring", idx, instance.name)
3150
          continue
3151
        size = size >> 20
3152
        if size != disk.size:
3153
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3154
                       " correcting: recorded %d, actual %d", idx,
3155
                       instance.name, disk.size, size)
3156
          disk.size = size
3157
          self.cfg.Update(instance, feedback_fn)
3158
          changed.append((instance.name, idx, size))
3159
        if self._EnsureChildSizes(disk):
3160
          self.cfg.Update(instance, feedback_fn)
3161
          changed.append((instance.name, idx, disk.size))
3162
    return changed
3163

    
3164

    
3165
class LUClusterRename(LogicalUnit):
3166
  """Rename the cluster.
3167

3168
  """
3169
  HPATH = "cluster-rename"
3170
  HTYPE = constants.HTYPE_CLUSTER
3171

    
3172
  def BuildHooksEnv(self):
3173
    """Build hooks env.
3174

3175
    """
3176
    return {
3177
      "OP_TARGET": self.cfg.GetClusterName(),
3178
      "NEW_NAME": self.op.name,
3179
      }
3180

    
3181
  def BuildHooksNodes(self):
3182
    """Build hooks nodes.
3183

3184
    """
3185
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3186

    
3187
  def CheckPrereq(self):
3188
    """Verify that the passed name is a valid one.
3189

3190
    """
3191
    hostname = netutils.GetHostname(name=self.op.name,
3192
                                    family=self.cfg.GetPrimaryIPFamily())
3193

    
3194
    new_name = hostname.name
3195
    self.ip = new_ip = hostname.ip
3196
    old_name = self.cfg.GetClusterName()
3197
    old_ip = self.cfg.GetMasterIP()
3198
    if new_name == old_name and new_ip == old_ip:
3199
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3200
                                 " cluster has changed",
3201
                                 errors.ECODE_INVAL)
3202
    if new_ip != old_ip:
3203
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3204
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3205
                                   " reachable on the network" %
3206
                                   new_ip, errors.ECODE_NOTUNIQUE)
3207

    
3208
    self.op.name = new_name
3209

    
3210
  def Exec(self, feedback_fn):
3211
    """Rename the cluster.
3212

3213
    """
3214
    clustername = self.op.name
3215
    ip = self.ip
3216

    
3217
    # shutdown the master IP
3218
    master = self.cfg.GetMasterNode()
3219
    result = self.rpc.call_node_stop_master(master, False)
3220
    result.Raise("Could not disable the master role")
3221

    
3222
    try:
3223
      cluster = self.cfg.GetClusterInfo()
3224
      cluster.cluster_name = clustername
3225
      cluster.master_ip = ip
3226
      self.cfg.Update(cluster, feedback_fn)
3227

    
3228
      # update the known hosts file
3229
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3230
      node_list = self.cfg.GetOnlineNodeList()
3231
      try:
3232
        node_list.remove(master)
3233
      except ValueError:
3234
        pass
3235
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3236
    finally:
3237
      result = self.rpc.call_node_start_master(master, False, False)
3238
      msg = result.fail_msg
3239
      if msg:
3240
        self.LogWarning("Could not re-enable the master role on"
3241
                        " the master, please restart manually: %s", msg)
3242

    
3243
    return clustername
3244

    
3245

    
3246
class LUClusterSetParams(LogicalUnit):
3247
  """Change the parameters of the cluster.
3248

3249
  """
3250
  HPATH = "cluster-modify"
3251
  HTYPE = constants.HTYPE_CLUSTER
3252
  REQ_BGL = False
3253

    
3254
  def CheckArguments(self):
3255
    """Check parameters
3256

3257
    """
3258
    if self.op.uid_pool:
3259
      uidpool.CheckUidPool(self.op.uid_pool)
3260

    
3261
    if self.op.add_uids:
3262
      uidpool.CheckUidPool(self.op.add_uids)
3263

    
3264
    if self.op.remove_uids:
3265
      uidpool.CheckUidPool(self.op.remove_uids)
3266

    
3267
  def ExpandNames(self):
3268
    # FIXME: in the future maybe other cluster params won't require checking on
3269
    # all nodes to be modified.
3270
    self.needed_locks = {
3271
      locking.LEVEL_NODE: locking.ALL_SET,
3272
    }
3273
    self.share_locks[locking.LEVEL_NODE] = 1
3274

    
3275
  def BuildHooksEnv(self):
3276
    """Build hooks env.
3277

3278
    """
3279
    return {
3280
      "OP_TARGET": self.cfg.GetClusterName(),
3281
      "NEW_VG_NAME": self.op.vg_name,
3282
      }
3283

    
3284
  def BuildHooksNodes(self):
3285
    """Build hooks nodes.
3286

3287
    """
3288
    mn = self.cfg.GetMasterNode()
3289
    return ([mn], [mn])
3290

    
3291
  def CheckPrereq(self):
3292
    """Check prerequisites.
3293

3294
    This checks whether the given params don't conflict and
3295
    if the given volume group is valid.
3296

3297
    """
3298
    if self.op.vg_name is not None and not self.op.vg_name:
3299
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3300
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3301
                                   " instances exist", errors.ECODE_INVAL)
3302

    
3303
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3304
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3305
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3306
                                   " drbd-based instances exist",
3307
                                   errors.ECODE_INVAL)
3308

    
3309
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3310

    
3311
    # if vg_name not None, checks given volume group on all nodes
3312
    if self.op.vg_name:
3313
      vglist = self.rpc.call_vg_list(node_list)
3314
      for node in node_list:
3315
        msg = vglist[node].fail_msg
3316
        if msg:
3317
          # ignoring down node
3318
          self.LogWarning("Error while gathering data on node %s"
3319
                          " (ignoring node): %s", node, msg)
3320
          continue
3321
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3322
                                              self.op.vg_name,
3323
                                              constants.MIN_VG_SIZE)
3324
        if vgstatus:
3325
          raise errors.OpPrereqError("Error on node '%s': %s" %
3326
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3327

    
3328
    if self.op.drbd_helper:
3329
      # checks given drbd helper on all nodes
3330
      helpers = self.rpc.call_drbd_helper(node_list)
3331
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3332
        if ninfo.offline:
3333
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3334
          continue
3335
        msg = helpers[node].fail_msg
3336
        if msg:
3337
          raise errors.OpPrereqError("Error checking drbd helper on node"
3338
                                     " '%s': %s" % (node, msg),
3339
                                     errors.ECODE_ENVIRON)
3340
        node_helper = helpers[node].payload
3341
        if node_helper != self.op.drbd_helper:
3342
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3343
                                     (node, node_helper), errors.ECODE_ENVIRON)
3344

    
3345
    self.cluster = cluster = self.cfg.GetClusterInfo()
3346
    # validate params changes
3347
    if self.op.beparams:
3348
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3349
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3350

    
3351
    if self.op.ndparams:
3352
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3353
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3354

    
3355
      # TODO: we need a more general way to handle resetting
3356
      # cluster-level parameters to default values
3357
      if self.new_ndparams["oob_program"] == "":
3358
        self.new_ndparams["oob_program"] = \
3359
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3360

    
3361
    if self.op.nicparams:
3362
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3363
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3364
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3365
      nic_errors = []
3366

    
3367
      # check all instances for consistency
3368
      for instance in self.cfg.GetAllInstancesInfo().values():
3369
        for nic_idx, nic in enumerate(instance.nics):
3370
          params_copy = copy.deepcopy(nic.nicparams)
3371
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3372

    
3373
          # check parameter syntax
3374
          try:
3375
            objects.NIC.CheckParameterSyntax(params_filled)
3376
          except errors.ConfigurationError, err:
3377
            nic_errors.append("Instance %s, nic/%d: %s" %
3378
                              (instance.name, nic_idx, err))
3379

    
3380
          # if we're moving instances to routed, check that they have an ip
3381
          target_mode = params_filled[constants.NIC_MODE]
3382
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3383
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3384
                              " address" % (instance.name, nic_idx))
3385
      if nic_errors:
3386
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3387
                                   "\n".join(nic_errors))
3388

    
3389
    # hypervisor list/parameters
3390
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3391
    if self.op.hvparams:
3392
      for hv_name, hv_dict in self.op.hvparams.items():
3393
        if hv_name not in self.new_hvparams:
3394
          self.new_hvparams[hv_name] = hv_dict
3395
        else:
3396
          self.new_hvparams[hv_name].update(hv_dict)
3397

    
3398
    # os hypervisor parameters
3399
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3400
    if self.op.os_hvp:
3401
      for os_name, hvs in self.op.os_hvp.items():
3402
        if os_name not in self.new_os_hvp:
3403
          self.new_os_hvp[os_name] = hvs
3404
        else:
3405
          for hv_name, hv_dict in hvs.items():
3406
            if hv_name not in self.new_os_hvp[os_name]:
3407
              self.new_os_hvp[os_name][hv_name] = hv_dict
3408
            else:
3409
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3410

    
3411
    # os parameters
3412
    self.new_osp = objects.FillDict(cluster.osparams, {})
3413
    if self.op.osparams:
3414
      for os_name, osp in self.op.osparams.items():
3415
        if os_name not in self.new_osp:
3416
          self.new_osp[os_name] = {}
3417

    
3418
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3419
                                                  use_none=True)
3420

    
3421
        if not self.new_osp[os_name]:
3422
          # we removed all parameters
3423
          del self.new_osp[os_name]
3424
        else:
3425
          # check the parameter validity (remote check)
3426
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3427
                         os_name, self.new_osp[os_name])
3428

    
3429
    # changes to the hypervisor list
3430
    if self.op.enabled_hypervisors is not None:
3431
      self.hv_list = self.op.enabled_hypervisors
3432
      for hv in self.hv_list:
3433
        # if the hypervisor doesn't already exist in the cluster
3434
        # hvparams, we initialize it to empty, and then (in both
3435
        # cases) we make sure to fill the defaults, as we might not
3436
        # have a complete defaults list if the hypervisor wasn't
3437
        # enabled before
3438
        if hv not in new_hvp:
3439
          new_hvp[hv] = {}
3440
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3441
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3442
    else:
3443
      self.hv_list = cluster.enabled_hypervisors
3444

    
3445
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3446
      # either the enabled list has changed, or the parameters have, validate
3447
      for hv_name, hv_params in self.new_hvparams.items():
3448
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3449
            (self.op.enabled_hypervisors and
3450
             hv_name in self.op.enabled_hypervisors)):
3451
          # either this is a new hypervisor, or its parameters have changed
3452
          hv_class = hypervisor.GetHypervisor(hv_name)
3453
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3454
          hv_class.CheckParameterSyntax(hv_params)
3455
          _CheckHVParams(self, node_list, hv_name, hv_params)
3456

    
3457
    if self.op.os_hvp:
3458
      # no need to check any newly-enabled hypervisors, since the
3459
      # defaults have already been checked in the above code-block
3460
      for os_name, os_hvp in self.new_os_hvp.items():
3461
        for hv_name, hv_params in os_hvp.items():
3462
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3463
          # we need to fill in the new os_hvp on top of the actual hv_p
3464
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3465
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3466
          hv_class = hypervisor.GetHypervisor(hv_name)
3467
          hv_class.CheckParameterSyntax(new_osp)
3468
          _CheckHVParams(self, node_list, hv_name, new_osp)
3469

    
3470
    if self.op.default_iallocator:
3471
      alloc_script = utils.FindFile(self.op.default_iallocator,
3472
                                    constants.IALLOCATOR_SEARCH_PATH,
3473
                                    os.path.isfile)
3474
      if alloc_script is None:
3475
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3476
                                   " specified" % self.op.default_iallocator,
3477
                                   errors.ECODE_INVAL)
3478

    
3479
  def Exec(self, feedback_fn):
3480
    """Change the parameters of the cluster.
3481

3482
    """
3483
    if self.op.vg_name is not None:
3484
      new_volume = self.op.vg_name
3485
      if not new_volume:
3486
        new_volume = None
3487
      if new_volume != self.cfg.GetVGName():
3488
        self.cfg.SetVGName(new_volume)
3489
      else:
3490
        feedback_fn("Cluster LVM configuration already in desired"
3491
                    " state, not changing")
3492
    if self.op.drbd_helper is not None:
3493
      new_helper = self.op.drbd_helper
3494
      if not new_helper:
3495
        new_helper = None
3496
      if new_helper != self.cfg.GetDRBDHelper():
3497
        self.cfg.SetDRBDHelper(new_helper)
3498
      else:
3499
        feedback_fn("Cluster DRBD helper already in desired state,"
3500
                    " not changing")
3501
    if self.op.hvparams:
3502
      self.cluster.hvparams = self.new_hvparams
3503
    if self.op.os_hvp:
3504
      self.cluster.os_hvp = self.new_os_hvp
3505
    if self.op.enabled_hypervisors is not None:
3506
      self.cluster.hvparams = self.new_hvparams
3507
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3508
    if self.op.beparams:
3509
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3510
    if self.op.nicparams:
3511
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3512
    if self.op.osparams:
3513
      self.cluster.osparams = self.new_osp
3514
    if self.op.ndparams:
3515
      self.cluster.ndparams = self.new_ndparams
3516

    
3517
    if self.op.candidate_pool_size is not None:
3518
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3519
      # we need to update the pool size here, otherwise the save will fail
3520
      _AdjustCandidatePool(self, [])
3521

    
3522
    if self.op.maintain_node_health is not None:
3523
      self.cluster.maintain_node_health = self.op.maintain_node_health
3524

    
3525
    if self.op.prealloc_wipe_disks is not None:
3526
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3527

    
3528
    if self.op.add_uids is not None:
3529
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3530

    
3531
    if self.op.remove_uids is not None:
3532
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3533

    
3534
    if self.op.uid_pool is not None:
3535
      self.cluster.uid_pool = self.op.uid_pool
3536

    
3537
    if self.op.default_iallocator is not None:
3538
      self.cluster.default_iallocator = self.op.default_iallocator
3539

    
3540
    if self.op.reserved_lvs is not None:
3541
      self.cluster.reserved_lvs = self.op.reserved_lvs
3542

    
3543
    def helper_os(aname, mods, desc):
3544
      desc += " OS list"
3545
      lst = getattr(self.cluster, aname)
3546
      for key, val in mods:
3547
        if key == constants.DDM_ADD:
3548
          if val in lst:
3549
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3550
          else:
3551
            lst.append(val)
3552
        elif key == constants.DDM_REMOVE:
3553
          if val in lst:
3554
            lst.remove(val)
3555
          else:
3556
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3557
        else:
3558
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3559

    
3560
    if self.op.hidden_os:
3561
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3562

    
3563
    if self.op.blacklisted_os:
3564
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3565

    
3566
    if self.op.master_netdev:
3567
      master = self.cfg.GetMasterNode()
3568
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3569
                  self.cluster.master_netdev)
3570
      result = self.rpc.call_node_stop_master(master, False)
3571
      result.Raise("Could not disable the master ip")
3572
      feedback_fn("Changing master_netdev from %s to %s" %
3573
                  (self.cluster.master_netdev, self.op.master_netdev))
3574
      self.cluster.master_netdev = self.op.master_netdev
3575

    
3576
    self.cfg.Update(self.cluster, feedback_fn)
3577

    
3578
    if self.op.master_netdev:
3579
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3580
                  self.op.master_netdev)
3581
      result = self.rpc.call_node_start_master(master, False, False)
3582
      if result.fail_msg:
3583
        self.LogWarning("Could not re-enable the master ip on"
3584
                        " the master, please restart manually: %s",
3585
                        result.fail_msg)
3586

    
3587

    
3588
def _UploadHelper(lu, nodes, fname):
3589
  """Helper for uploading a file and showing warnings.
3590

3591
  """
3592
  if os.path.exists(fname):
3593
    result = lu.rpc.call_upload_file(nodes, fname)
3594
    for to_node, to_result in result.items():
3595
      msg = to_result.fail_msg
3596
      if msg:
3597
        msg = ("Copy of file %s to node %s failed: %s" %
3598
               (fname, to_node, msg))
3599
        lu.proc.LogWarning(msg)
3600

    
3601

    
3602
def _ComputeAncillaryFiles(cluster, redist):
3603
  """Compute files external to Ganeti which need to be consistent.
3604

3605
  @type redist: boolean
3606
  @param redist: Whether to include files which need to be redistributed
3607

3608
  """
3609
  # Compute files for all nodes
3610
  files_all = set([
3611
    constants.SSH_KNOWN_HOSTS_FILE,
3612
    constants.CONFD_HMAC_KEY,
3613
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3614
    ])
3615

    
3616
  if not redist:
3617
    files_all.update(constants.ALL_CERT_FILES)
3618
    files_all.update(ssconf.SimpleStore().GetFileList())
3619

    
3620
  if cluster.modify_etc_hosts:
3621
    files_all.add(constants.ETC_HOSTS)
3622

    
3623
  # Files which must either exist on all nodes or on none
3624
  files_all_opt = set([
3625
    constants.RAPI_USERS_FILE,
3626
    ])
3627

    
3628
  # Files which should only be on master candidates
3629
  files_mc = set()
3630
  if not redist:
3631
    files_mc.add(constants.CLUSTER_CONF_FILE)
3632

    
3633
  # Files which should only be on VM-capable nodes
3634
  files_vm = set(filename
3635
    for hv_name in cluster.enabled_hypervisors
3636
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3637

    
3638
  # Filenames must be unique
3639
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3640
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3641
         "Found file listed in more than one file list"
3642

    
3643
  return (files_all, files_all_opt, files_mc, files_vm)
3644

    
3645

    
3646
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3647
  """Distribute additional files which are part of the cluster configuration.
3648

3649
  ConfigWriter takes care of distributing the config and ssconf files, but
3650
  there are more files which should be distributed to all nodes. This function
3651
  makes sure those are copied.
3652

3653
  @param lu: calling logical unit
3654
  @param additional_nodes: list of nodes not in the config to distribute to
3655
  @type additional_vm: boolean
3656
  @param additional_vm: whether the additional nodes are vm-capable or not
3657

3658
  """
3659
  # Gather target nodes
3660
  cluster = lu.cfg.GetClusterInfo()
3661
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3662

    
3663
  online_nodes = lu.cfg.GetOnlineNodeList()
3664
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3665

    
3666
  if additional_nodes is not None:
3667
    online_nodes.extend(additional_nodes)
3668
    if additional_vm:
3669
      vm_nodes.extend(additional_nodes)
3670

    
3671
  # Never distribute to master node
3672
  for nodelist in [online_nodes, vm_nodes]:
3673
    if master_info.name in nodelist:
3674
      nodelist.remove(master_info.name)
3675

    
3676
  # Gather file lists
3677
  (files_all, files_all_opt, files_mc, files_vm) = \
3678
    _ComputeAncillaryFiles(cluster, True)
3679

    
3680
  # Never re-distribute configuration file from here
3681
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3682
              constants.CLUSTER_CONF_FILE in files_vm)
3683
  assert not files_mc, "Master candidates not handled in this function"
3684

    
3685
  filemap = [
3686
    (online_nodes, files_all),
3687
    (online_nodes, files_all_opt),
3688
    (vm_nodes, files_vm),
3689
    ]
3690

    
3691
  # Upload the files
3692
  for (node_list, files) in filemap:
3693
    for fname in files:
3694
      _UploadHelper(lu, node_list, fname)
3695

    
3696

    
3697
class LUClusterRedistConf(NoHooksLU):
3698
  """Force the redistribution of cluster configuration.
3699

3700
  This is a very simple LU.
3701

3702
  """
3703
  REQ_BGL = False
3704

    
3705
  def ExpandNames(self):
3706
    self.needed_locks = {
3707
      locking.LEVEL_NODE: locking.ALL_SET,
3708
    }
3709
    self.share_locks[locking.LEVEL_NODE] = 1
3710

    
3711
  def Exec(self, feedback_fn):
3712
    """Redistribute the configuration.
3713

3714
    """
3715
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3716
    _RedistributeAncillaryFiles(self)
3717

    
3718

    
3719
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3720
  """Sleep and poll for an instance's disk to sync.
3721

3722
  """
3723
  if not instance.disks or disks is not None and not disks:
3724
    return True
3725

    
3726
  disks = _ExpandCheckDisks(instance, disks)
3727

    
3728
  if not oneshot:
3729
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3730

    
3731
  node = instance.primary_node
3732

    
3733
  for dev in disks:
3734
    lu.cfg.SetDiskID(dev, node)
3735

    
3736
  # TODO: Convert to utils.Retry
3737

    
3738
  retries = 0
3739
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3740
  while True:
3741
    max_time = 0
3742
    done = True
3743
    cumul_degraded = False
3744
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3745
    msg = rstats.fail_msg
3746
    if msg:
3747
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3748
      retries += 1
3749
      if retries >= 10:
3750
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3751
                                 " aborting." % node)
3752
      time.sleep(6)
3753
      continue
3754
    rstats = rstats.payload
3755
    retries = 0
3756
    for i, mstat in enumerate(rstats):
3757
      if mstat is None:
3758
        lu.LogWarning("Can't compute data for node %s/%s",
3759
                           node, disks[i].iv_name)
3760
        continue
3761

    
3762
      cumul_degraded = (cumul_degraded or
3763
                        (mstat.is_degraded and mstat.sync_percent is None))
3764
      if mstat.sync_percent is not None:
3765
        done = False
3766
        if mstat.estimated_time is not None:
3767
          rem_time = ("%s remaining (estimated)" %
3768
                      utils.FormatSeconds(mstat.estimated_time))
3769
          max_time = mstat.estimated_time
3770
        else:
3771
          rem_time = "no time estimate"
3772
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3773
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3774

    
3775
    # if we're done but degraded, let's do a few small retries, to
3776
    # make sure we see a stable and not transient situation; therefore
3777
    # we force restart of the loop
3778
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3779
      logging.info("Degraded disks found, %d retries left", degr_retries)
3780
      degr_retries -= 1
3781
      time.sleep(1)
3782
      continue
3783

    
3784
    if done or oneshot:
3785
      break
3786

    
3787
    time.sleep(min(60, max_time))
3788

    
3789
  if done:
3790
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3791
  return not cumul_degraded
3792

    
3793

    
3794
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3795
  """Check that mirrors are not degraded.
3796

3797
  The ldisk parameter, if True, will change the test from the
3798
  is_degraded attribute (which represents overall non-ok status for
3799
  the device(s)) to the ldisk (representing the local storage status).
3800

3801
  """
3802
  lu.cfg.SetDiskID(dev, node)
3803

    
3804
  result = True
3805

    
3806
  if on_primary or dev.AssembleOnSecondary():
3807
    rstats = lu.rpc.call_blockdev_find(node, dev)
3808
    msg = rstats.fail_msg
3809
    if msg:
3810
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3811
      result = False
3812
    elif not rstats.payload:
3813
      lu.LogWarning("Can't find disk on node %s", node)
3814
      result = False
3815
    else:
3816
      if ldisk:
3817
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3818
      else:
3819
        result = result and not rstats.payload.is_degraded
3820

    
3821
  if dev.children:
3822
    for child in dev.children:
3823
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3824

    
3825
  return result
3826

    
3827

    
3828
class LUOobCommand(NoHooksLU):
3829
  """Logical unit for OOB handling.
3830

3831
  """
3832
  REG_BGL = False
3833
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3834

    
3835
  def ExpandNames(self):
3836
    """Gather locks we need.
3837

3838
    """
3839
    if self.op.node_names:
3840
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3841
      lock_names = self.op.node_names
3842
    else:
3843
      lock_names = locking.ALL_SET
3844

    
3845
    self.needed_locks = {
3846
      locking.LEVEL_NODE: lock_names,
3847
      }
3848

    
3849
  def CheckPrereq(self):
3850
    """Check prerequisites.
3851

3852
    This checks:
3853
     - the node exists in the configuration
3854
     - OOB is supported
3855

3856
    Any errors are signaled by raising errors.OpPrereqError.
3857

3858
    """
3859
    self.nodes = []
3860
    self.master_node = self.cfg.GetMasterNode()
3861

    
3862
    assert self.op.power_delay >= 0.0
3863

    
3864
    if self.op.node_names:
3865
      if (self.op.command in self._SKIP_MASTER and
3866
          self.master_node in self.op.node_names):
3867
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3868
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3869

    
3870
        if master_oob_handler:
3871
          additional_text = ("run '%s %s %s' if you want to operate on the"
3872
                             " master regardless") % (master_oob_handler,
3873
                                                      self.op.command,
3874
                                                      self.master_node)
3875
        else:
3876
          additional_text = "it does not support out-of-band operations"
3877

    
3878
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3879
                                    " allowed for %s; %s") %
3880
                                   (self.master_node, self.op.command,
3881
                                    additional_text), errors.ECODE_INVAL)
3882
    else:
3883
      self.op.node_names = self.cfg.GetNodeList()
3884
      if self.op.command in self._SKIP_MASTER:
3885
        self.op.node_names.remove(self.master_node)
3886

    
3887
    if self.op.command in self._SKIP_MASTER:
3888
      assert self.master_node not in self.op.node_names
3889

    
3890
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3891
      if node is None:
3892
        raise errors.OpPrereqError("Node %s not found" % node_name,
3893
                                   errors.ECODE_NOENT)
3894
      else:
3895
        self.nodes.append(node)
3896

    
3897
      if (not self.op.ignore_status and
3898
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3899
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3900
                                    " not marked offline") % node_name,
3901
                                   errors.ECODE_STATE)
3902

    
3903
  def Exec(self, feedback_fn):
3904
    """Execute OOB and return result if we expect any.
3905

3906
    """
3907
    master_node = self.master_node
3908
    ret = []
3909

    
3910
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3911
                                              key=lambda node: node.name)):
3912
      node_entry = [(constants.RS_NORMAL, node.name)]
3913
      ret.append(node_entry)
3914

    
3915
      oob_program = _SupportsOob(self.cfg, node)
3916

    
3917
      if not oob_program:
3918
        node_entry.append((constants.RS_UNAVAIL, None))
3919
        continue
3920

    
3921
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3922
                   self.op.command, oob_program, node.name)
3923
      result = self.rpc.call_run_oob(master_node, oob_program,
3924
                                     self.op.command, node.name,
3925
                                     self.op.timeout)
3926

    
3927
      if result.fail_msg:
3928
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3929
                        node.name, result.fail_msg)
3930
        node_entry.append((constants.RS_NODATA, None))
3931
      else:
3932
        try:
3933
          self._CheckPayload(result)
3934
        except errors.OpExecError, err:
3935
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3936
                          node.name, err)
3937
          node_entry.append((constants.RS_NODATA, None))
3938
        else:
3939
          if self.op.command == constants.OOB_HEALTH:
3940
            # For health we should log important events
3941
            for item, status in result.payload:
3942
              if status in [constants.OOB_STATUS_WARNING,
3943
                            constants.OOB_STATUS_CRITICAL]:
3944
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3945
                                item, node.name, status)
3946

    
3947
          if self.op.command == constants.OOB_POWER_ON:
3948
            node.powered = True
3949
          elif self.op.command == constants.OOB_POWER_OFF:
3950
            node.powered = False
3951
          elif self.op.command == constants.OOB_POWER_STATUS:
3952
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3953
            if powered != node.powered:
3954
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3955
                               " match actual power state (%s)"), node.powered,
3956
                              node.name, powered)
3957

    
3958
          # For configuration changing commands we should update the node
3959
          if self.op.command in (constants.OOB_POWER_ON,
3960
                                 constants.OOB_POWER_OFF):
3961
            self.cfg.Update(node, feedback_fn)
3962

    
3963
          node_entry.append((constants.RS_NORMAL, result.payload))
3964

    
3965
          if (self.op.command == constants.OOB_POWER_ON and
3966
              idx < len(self.nodes) - 1):
3967
            time.sleep(self.op.power_delay)
3968

    
3969
    return ret
3970

    
3971
  def _CheckPayload(self, result):
3972
    """Checks if the payload is valid.
3973

3974
    @param result: RPC result
3975
    @raises errors.OpExecError: If payload is not valid
3976

3977
    """
3978
    errs = []
3979
    if self.op.command == constants.OOB_HEALTH:
3980
      if not isinstance(result.payload, list):
3981
        errs.append("command 'health' is expected to return a list but got %s" %
3982
                    type(result.payload))
3983
      else:
3984
        for item, status in result.payload:
3985
          if status not in constants.OOB_STATUSES:
3986
            errs.append("health item '%s' has invalid status '%s'" %
3987
                        (item, status))
3988

    
3989
    if self.op.command == constants.OOB_POWER_STATUS:
3990
      if not isinstance(result.payload, dict):
3991
        errs.append("power-status is expected to return a dict but got %s" %
3992
                    type(result.payload))
3993

    
3994
    if self.op.command in [
3995
        constants.OOB_POWER_ON,
3996
        constants.OOB_POWER_OFF,
3997
        constants.OOB_POWER_CYCLE,
3998
        ]:
3999
      if result.payload is not None:
4000
        errs.append("%s is expected to not return payload but got '%s'" %
4001
                    (self.op.command, result.payload))
4002

    
4003
    if errs:
4004
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4005
                               utils.CommaJoin(errs))
4006

    
4007
class _OsQuery(_QueryBase):
4008
  FIELDS = query.OS_FIELDS
4009

    
4010
  def ExpandNames(self, lu):
4011
    # Lock all nodes in shared mode
4012
    # Temporary removal of locks, should be reverted later
4013
    # TODO: reintroduce locks when they are lighter-weight
4014
    lu.needed_locks = {}
4015
    #self.share_locks[locking.LEVEL_NODE] = 1
4016
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4017

    
4018
    # The following variables interact with _QueryBase._GetNames
4019
    if self.names:
4020
      self.wanted = self.names
4021
    else:
4022
      self.wanted = locking.ALL_SET
4023

    
4024
    self.do_locking = self.use_locking
4025

    
4026
  def DeclareLocks(self, lu, level):
4027
    pass
4028

    
4029
  @staticmethod
4030
  def _DiagnoseByOS(rlist):
4031
    """Remaps a per-node return list into an a per-os per-node dictionary
4032

4033
    @param rlist: a map with node names as keys and OS objects as values
4034

4035
    @rtype: dict
4036
    @return: a dictionary with osnames as keys and as value another
4037
        map, with nodes as keys and tuples of (path, status, diagnose,
4038
        variants, parameters, api_versions) as values, eg::
4039

4040
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4041
                                     (/srv/..., False, "invalid api")],
4042
                           "node2": [(/srv/..., True, "", [], [])]}
4043
          }
4044

4045
    """
4046
    all_os = {}
4047
    # we build here the list of nodes that didn't fail the RPC (at RPC
4048
    # level), so that nodes with a non-responding node daemon don't
4049
    # make all OSes invalid
4050
    good_nodes = [node_name for node_name in rlist
4051
                  if not rlist[node_name].fail_msg]
4052
    for node_name, nr in rlist.items():
4053
      if nr.fail_msg or not nr.payload:
4054
        continue
4055
      for (name, path, status, diagnose, variants,
4056
           params, api_versions) in nr.payload:
4057
        if name not in all_os:
4058
          # build a list of nodes for this os containing empty lists
4059
          # for each node in node_list
4060
          all_os[name] = {}
4061
          for nname in good_nodes:
4062
            all_os[name][nname] = []
4063
        # convert params from [name, help] to (name, help)
4064
        params = [tuple(v) for v in params]
4065
        all_os[name][node_name].append((path, status, diagnose,
4066
                                        variants, params, api_versions))
4067
    return all_os
4068

    
4069
  def _GetQueryData(self, lu):
4070
    """Computes the list of nodes and their attributes.
4071

4072
    """
4073
    # Locking is not used
4074
    assert not (compat.any(lu.glm.is_owned(level)
4075
                           for level in locking.LEVELS
4076
                           if level != locking.LEVEL_CLUSTER) or
4077
                self.do_locking or self.use_locking)
4078

    
4079
    valid_nodes = [node.name
4080
                   for node in lu.cfg.GetAllNodesInfo().values()
4081
                   if not node.offline and node.vm_capable]
4082
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4083
    cluster = lu.cfg.GetClusterInfo()
4084

    
4085
    data = {}
4086

    
4087
    for (os_name, os_data) in pol.items():
4088
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4089
                          hidden=(os_name in cluster.hidden_os),
4090
                          blacklisted=(os_name in cluster.blacklisted_os))
4091

    
4092
      variants = set()
4093
      parameters = set()
4094
      api_versions = set()
4095

    
4096
      for idx, osl in enumerate(os_data.values()):
4097
        info.valid = bool(info.valid and osl and osl[0][1])
4098
        if not info.valid:
4099
          break
4100

    
4101
        (node_variants, node_params, node_api) = osl[0][3:6]
4102
        if idx == 0:
4103
          # First entry
4104
          variants.update(node_variants)
4105
          parameters.update(node_params)
4106
          api_versions.update(node_api)
4107
        else:
4108
          # Filter out inconsistent values
4109
          variants.intersection_update(node_variants)
4110
          parameters.intersection_update(node_params)
4111
          api_versions.intersection_update(node_api)
4112

    
4113
      info.variants = list(variants)
4114
      info.parameters = list(parameters)
4115
      info.api_versions = list(api_versions)
4116

    
4117
      data[os_name] = info
4118

    
4119
    # Prepare data in requested order
4120
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4121
            if name in data]
4122

    
4123

    
4124
class LUOsDiagnose(NoHooksLU):
4125
  """Logical unit for OS diagnose/query.
4126

4127
  """
4128
  REQ_BGL = False
4129

    
4130
  @staticmethod
4131
  def _BuildFilter(fields, names):
4132
    """Builds a filter for querying OSes.
4133

4134
    """
4135
    name_filter = qlang.MakeSimpleFilter("name", names)
4136

    
4137
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4138
    # respective field is not requested
4139
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4140
                     for fname in ["hidden", "blacklisted"]
4141
                     if fname not in fields]
4142
    if "valid" not in fields:
4143
      status_filter.append([qlang.OP_TRUE, "valid"])
4144

    
4145
    if status_filter:
4146
      status_filter.insert(0, qlang.OP_AND)
4147
    else:
4148
      status_filter = None
4149

    
4150
    if name_filter and status_filter:
4151
      return [qlang.OP_AND, name_filter, status_filter]
4152
    elif name_filter:
4153
      return name_filter
4154
    else:
4155
      return status_filter
4156

    
4157
  def CheckArguments(self):
4158
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4159
                       self.op.output_fields, False)
4160

    
4161
  def ExpandNames(self):
4162
    self.oq.ExpandNames(self)
4163

    
4164
  def Exec(self, feedback_fn):
4165
    return self.oq.OldStyleQuery(self)
4166

    
4167

    
4168
class LUNodeRemove(LogicalUnit):
4169
  """Logical unit for removing a node.
4170

4171
  """
4172
  HPATH = "node-remove"
4173
  HTYPE = constants.HTYPE_NODE
4174

    
4175
  def BuildHooksEnv(self):
4176
    """Build hooks env.
4177

4178
    This doesn't run on the target node in the pre phase as a failed
4179
    node would then be impossible to remove.
4180

4181
    """
4182
    return {
4183
      "OP_TARGET": self.op.node_name,
4184
      "NODE_NAME": self.op.node_name,
4185
      }
4186

    
4187
  def BuildHooksNodes(self):
4188
    """Build hooks nodes.
4189

4190
    """
4191
    all_nodes = self.cfg.GetNodeList()
4192
    try:
4193
      all_nodes.remove(self.op.node_name)
4194
    except ValueError:
4195
      logging.warning("Node '%s', which is about to be removed, was not found"
4196
                      " in the list of all nodes", self.op.node_name)
4197
    return (all_nodes, all_nodes)
4198

    
4199
  def CheckPrereq(self):
4200
    """Check prerequisites.
4201

4202
    This checks:
4203
     - the node exists in the configuration
4204
     - it does not have primary or secondary instances
4205
     - it's not the master
4206

4207
    Any errors are signaled by raising errors.OpPrereqError.
4208

4209
    """
4210
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4211
    node = self.cfg.GetNodeInfo(self.op.node_name)
4212
    assert node is not None
4213

    
4214
    masternode = self.cfg.GetMasterNode()
4215
    if node.name == masternode:
4216
      raise errors.OpPrereqError("Node is the master node, failover to another"
4217
                                 " node is required", errors.ECODE_INVAL)
4218

    
4219
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4220
      if node.name in instance.all_nodes:
4221
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4222
                                   " please remove first" % instance_name,
4223
                                   errors.ECODE_INVAL)
4224
    self.op.node_name = node.name
4225
    self.node = node
4226

    
4227
  def Exec(self, feedback_fn):
4228
    """Removes the node from the cluster.
4229

4230
    """
4231
    node = self.node
4232
    logging.info("Stopping the node daemon and removing configs from node %s",
4233
                 node.name)
4234

    
4235
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4236

    
4237
    # Promote nodes to master candidate as needed
4238
    _AdjustCandidatePool(self, exceptions=[node.name])
4239
    self.context.RemoveNode(node.name)
4240

    
4241
    # Run post hooks on the node before it's removed
4242
    _RunPostHook(self, node.name)
4243

    
4244
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4245
    msg = result.fail_msg
4246
    if msg:
4247
      self.LogWarning("Errors encountered on the remote node while leaving"
4248
                      " the cluster: %s", msg)
4249

    
4250
    # Remove node from our /etc/hosts
4251
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4252
      master_node = self.cfg.GetMasterNode()
4253
      result = self.rpc.call_etc_hosts_modify(master_node,
4254
                                              constants.ETC_HOSTS_REMOVE,
4255
                                              node.name, None)
4256
      result.Raise("Can't update hosts file with new host data")
4257
      _RedistributeAncillaryFiles(self)
4258

    
4259

    
4260
class _NodeQuery(_QueryBase):
4261
  FIELDS = query.NODE_FIELDS
4262

    
4263
  def ExpandNames(self, lu):
4264
    lu.needed_locks = {}
4265
    lu.share_locks[locking.LEVEL_NODE] = 1
4266

    
4267
    if self.names:
4268
      self.wanted = _GetWantedNodes(lu, self.names)
4269
    else:
4270
      self.wanted = locking.ALL_SET
4271

    
4272
    self.do_locking = (self.use_locking and
4273
                       query.NQ_LIVE in self.requested_data)
4274

    
4275
    if self.do_locking:
4276
      # if we don't request only static fields, we need to lock the nodes
4277
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4278

    
4279
  def DeclareLocks(self, lu, level):
4280
    pass
4281

    
4282
  def _GetQueryData(self, lu):
4283
    """Computes the list of nodes and their attributes.
4284

4285
    """
4286
    all_info = lu.cfg.GetAllNodesInfo()
4287

    
4288
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4289

    
4290
    # Gather data as requested
4291
    if query.NQ_LIVE in self.requested_data:
4292
      # filter out non-vm_capable nodes
4293
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4294

    
4295
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4296
                                        lu.cfg.GetHypervisorType())
4297
      live_data = dict((name, nresult.payload)
4298
                       for (name, nresult) in node_data.items()
4299
                       if not nresult.fail_msg and nresult.payload)
4300
    else:
4301
      live_data = None
4302

    
4303
    if query.NQ_INST in self.requested_data:
4304
      node_to_primary = dict([(name, set()) for name in nodenames])
4305
      node_to_secondary = dict([(name, set()) for name in nodenames])
4306

    
4307
      inst_data = lu.cfg.GetAllInstancesInfo()
4308

    
4309
      for inst in inst_data.values():
4310
        if inst.primary_node in node_to_primary:
4311
          node_to_primary[inst.primary_node].add(inst.name)
4312
        for secnode in inst.secondary_nodes:
4313
          if secnode in node_to_secondary:
4314
            node_to_secondary[secnode].add(inst.name)
4315
    else:
4316
      node_to_primary = None
4317
      node_to_secondary = None
4318

    
4319
    if query.NQ_OOB in self.requested_data:
4320
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4321
                         for name, node in all_info.iteritems())
4322
    else:
4323
      oob_support = None
4324

    
4325
    if query.NQ_GROUP in self.requested_data:
4326
      groups = lu.cfg.GetAllNodeGroupsInfo()
4327
    else:
4328
      groups = {}
4329

    
4330
    return query.NodeQueryData([all_info[name] for name in nodenames],
4331
                               live_data, lu.cfg.GetMasterNode(),
4332
                               node_to_primary, node_to_secondary, groups,
4333
                               oob_support, lu.cfg.GetClusterInfo())
4334

    
4335

    
4336
class LUNodeQuery(NoHooksLU):
4337
  """Logical unit for querying nodes.
4338

4339
  """
4340
  # pylint: disable-msg=W0142
4341
  REQ_BGL = False
4342

    
4343
  def CheckArguments(self):
4344
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4345
                         self.op.output_fields, self.op.use_locking)
4346

    
4347
  def ExpandNames(self):
4348
    self.nq.ExpandNames(self)
4349

    
4350
  def Exec(self, feedback_fn):
4351
    return self.nq.OldStyleQuery(self)
4352

    
4353

    
4354
class LUNodeQueryvols(NoHooksLU):
4355
  """Logical unit for getting volumes on node(s).
4356

4357
  """
4358
  REQ_BGL = False
4359
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4360
  _FIELDS_STATIC = utils.FieldSet("node")
4361

    
4362
  def CheckArguments(self):
4363
    _CheckOutputFields(static=self._FIELDS_STATIC,
4364
                       dynamic=self._FIELDS_DYNAMIC,
4365
                       selected=self.op.output_fields)
4366

    
4367
  def ExpandNames(self):
4368
    self.needed_locks = {}
4369
    self.share_locks[locking.LEVEL_NODE] = 1
4370
    if not self.op.nodes:
4371
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4372
    else:
4373
      self.needed_locks[locking.LEVEL_NODE] = \
4374
        _GetWantedNodes(self, self.op.nodes)
4375

    
4376
  def Exec(self, feedback_fn):
4377
    """Computes the list of nodes and their attributes.
4378

4379
    """
4380
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4381
    volumes = self.rpc.call_node_volumes(nodenames)
4382

    
4383
    ilist = self.cfg.GetAllInstancesInfo()
4384
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4385

    
4386
    output = []
4387
    for node in nodenames:
4388
      nresult = volumes[node]
4389
      if nresult.offline:
4390
        continue
4391
      msg = nresult.fail_msg
4392
      if msg:
4393
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4394
        continue
4395

    
4396
      node_vols = sorted(nresult.payload,
4397
                         key=operator.itemgetter("dev"))
4398

    
4399
      for vol in node_vols:
4400
        node_output = []
4401
        for field in self.op.output_fields:
4402
          if field == "node":
4403
            val = node
4404
          elif field == "phys":
4405
            val = vol["dev"]
4406
          elif field == "vg":
4407
            val = vol["vg"]
4408
          elif field == "name":
4409
            val = vol["name"]
4410
          elif field == "size":
4411
            val = int(float(vol["size"]))
4412
          elif field == "instance":
4413
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4414
          else:
4415
            raise errors.ParameterError(field)
4416
          node_output.append(str(val))
4417

    
4418
        output.append(node_output)
4419

    
4420
    return output
4421

    
4422

    
4423
class LUNodeQueryStorage(NoHooksLU):
4424
  """Logical unit for getting information on storage units on node(s).
4425

4426
  """
4427
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4428
  REQ_BGL = False
4429

    
4430
  def CheckArguments(self):
4431
    _CheckOutputFields(static=self._FIELDS_STATIC,
4432
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4433
                       selected=self.op.output_fields)
4434

    
4435
  def ExpandNames(self):
4436
    self.needed_locks = {}
4437
    self.share_locks[locking.LEVEL_NODE] = 1
4438

    
4439
    if self.op.nodes:
4440
      self.needed_locks[locking.LEVEL_NODE] = \
4441
        _GetWantedNodes(self, self.op.nodes)
4442
    else:
4443
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4444

    
4445
  def Exec(self, feedback_fn):
4446
    """Computes the list of nodes and their attributes.
4447

4448
    """
4449
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4450

    
4451
    # Always get name to sort by
4452
    if constants.SF_NAME in self.op.output_fields:
4453
      fields = self.op.output_fields[:]
4454
    else:
4455
      fields = [constants.SF_NAME] + self.op.output_fields
4456

    
4457
    # Never ask for node or type as it's only known to the LU
4458
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4459
      while extra in fields:
4460
        fields.remove(extra)
4461

    
4462
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4463
    name_idx = field_idx[constants.SF_NAME]
4464

    
4465
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4466
    data = self.rpc.call_storage_list(self.nodes,
4467
                                      self.op.storage_type, st_args,
4468
                                      self.op.name, fields)
4469

    
4470
    result = []
4471

    
4472
    for node in utils.NiceSort(self.nodes):
4473
      nresult = data[node]
4474
      if nresult.offline:
4475
        continue
4476

    
4477
      msg = nresult.fail_msg
4478
      if msg:
4479
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4480
        continue
4481

    
4482
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4483

    
4484
      for name in utils.NiceSort(rows.keys()):
4485
        row = rows[name]
4486

    
4487
        out = []
4488

    
4489
        for field in self.op.output_fields:
4490
          if field == constants.SF_NODE:
4491
            val = node
4492
          elif field == constants.SF_TYPE:
4493
            val = self.op.storage_type
4494
          elif field in field_idx:
4495
            val = row[field_idx[field]]
4496
          else:
4497
            raise errors.ParameterError(field)
4498

    
4499
          out.append(val)
4500

    
4501
        result.append(out)
4502

    
4503
    return result
4504

    
4505

    
4506
class _InstanceQuery(_QueryBase):
4507
  FIELDS = query.INSTANCE_FIELDS
4508

    
4509
  def ExpandNames(self, lu):
4510
    lu.needed_locks = {}
4511
    lu.share_locks = _ShareAll()
4512

    
4513
    if self.names:
4514
      self.wanted = _GetWantedInstances(lu, self.names)
4515
    else:
4516
      self.wanted = locking.ALL_SET
4517

    
4518
    self.do_locking = (self.use_locking and
4519
                       query.IQ_LIVE in self.requested_data)
4520
    if self.do_locking:
4521
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4522
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4523
      lu.needed_locks[locking.LEVEL_NODE] = []
4524
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4525

    
4526
    self.do_grouplocks = (self.do_locking and
4527
                          query.IQ_NODES in self.requested_data)
4528

    
4529
  def DeclareLocks(self, lu, level):
4530
    if self.do_locking:
4531
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4532
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4533

    
4534
        # Lock all groups used by instances optimistically; this requires going
4535
        # via the node before it's locked, requiring verification later on
4536
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4537
          set(group_uuid
4538
              for instance_name in
4539
                lu.glm.list_owned(locking.LEVEL_INSTANCE)
4540
              for group_uuid in
4541
                lu.cfg.GetInstanceNodeGroups(instance_name))
4542
      elif level == locking.LEVEL_NODE:
4543
        lu._LockInstancesNodes() # pylint: disable-msg=W0212
4544

    
4545
  @staticmethod
4546
  def _CheckGroupLocks(lu):
4547
    owned_instances = frozenset(lu.glm.list_owned(locking.LEVEL_INSTANCE))
4548
    owned_groups = frozenset(lu.glm.list_owned(locking.LEVEL_NODEGROUP))
4549

    
4550
    # Check if node groups for locked instances are still correct
4551
    for instance_name in owned_instances:
4552
      inst_groups = lu.cfg.GetInstanceNodeGroups(instance_name)
4553
      if not owned_groups.issuperset(inst_groups):
4554
        raise errors.OpPrereqError("Instance %s's node groups changed since"
4555
                                   " locks were acquired, current groups are"
4556
                                   " are '%s', owning groups '%s'; retry the"
4557
                                   " operation" %
4558
                                   (instance_name,
4559
                                    utils.CommaJoin(inst_groups),
4560
                                    utils.CommaJoin(owned_groups)),
4561
                                   errors.ECODE_STATE)
4562

    
4563
  def _GetQueryData(self, lu):
4564
    """Computes the list of instances and their attributes.
4565

4566
    """
4567
    if self.do_grouplocks:
4568
      self._CheckGroupLocks(lu)
4569

    
4570
    cluster = lu.cfg.GetClusterInfo()
4571
    all_info = lu.cfg.GetAllInstancesInfo()
4572

    
4573
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4574

    
4575
    instance_list = [all_info[name] for name in instance_names]
4576
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4577
                                        for inst in instance_list)))
4578
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4579
    bad_nodes = []
4580
    offline_nodes = []
4581
    wrongnode_inst = set()
4582

    
4583
    # Gather data as requested
4584
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4585
      live_data = {}
4586
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4587
      for name in nodes:
4588
        result = node_data[name]
4589
        if result.offline:
4590
          # offline nodes will be in both lists
4591
          assert result.fail_msg
4592
          offline_nodes.append(name)
4593
        if result.fail_msg:
4594
          bad_nodes.append(name)
4595
        elif result.payload:
4596
          for inst in result.payload:
4597
            if inst in all_info:
4598
              if all_info[inst].primary_node == name:
4599
                live_data.update(result.payload)
4600
              else:
4601
                wrongnode_inst.add(inst)
4602
            else:
4603
              # orphan instance; we don't list it here as we don't
4604
              # handle this case yet in the output of instance listing
4605
              logging.warning("Orphan instance '%s' found on node %s",
4606
                              inst, name)
4607
        # else no instance is alive
4608
    else:
4609
      live_data = {}
4610

    
4611
    if query.IQ_DISKUSAGE in self.requested_data:
4612
      disk_usage = dict((inst.name,
4613
                         _ComputeDiskSize(inst.disk_template,
4614
                                          [{constants.IDISK_SIZE: disk.size}
4615
                                           for disk in inst.disks]))
4616
                        for inst in instance_list)
4617
    else:
4618
      disk_usage = None
4619

    
4620
    if query.IQ_CONSOLE in self.requested_data:
4621
      consinfo = {}
4622
      for inst in instance_list:
4623
        if inst.name in live_data:
4624
          # Instance is running
4625
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4626
        else:
4627
          consinfo[inst.name] = None
4628
      assert set(consinfo.keys()) == set(instance_names)
4629
    else:
4630
      consinfo = None
4631

    
4632
    if query.IQ_NODES in self.requested_data:
4633
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4634
                                            instance_list)))
4635
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4636
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4637
                    for uuid in set(map(operator.attrgetter("group"),
4638
                                        nodes.values())))
4639
    else:
4640
      nodes = None
4641
      groups = None
4642

    
4643
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4644
                                   disk_usage, offline_nodes, bad_nodes,
4645
                                   live_data, wrongnode_inst, consinfo,
4646
                                   nodes, groups)
4647

    
4648

    
4649
class LUQuery(NoHooksLU):
4650
  """Query for resources/items of a certain kind.
4651

4652
  """
4653
  # pylint: disable-msg=W0142
4654
  REQ_BGL = False
4655

    
4656
  def CheckArguments(self):
4657
    qcls = _GetQueryImplementation(self.op.what)
4658

    
4659
    self.impl = qcls(self.op.filter, self.op.fields, False)
4660

    
4661
  def ExpandNames(self):
4662
    self.impl.ExpandNames(self)
4663

    
4664
  def DeclareLocks(self, level):
4665
    self.impl.DeclareLocks(self, level)
4666

    
4667
  def Exec(self, feedback_fn):
4668
    return self.impl.NewStyleQuery(self)
4669

    
4670

    
4671
class LUQueryFields(NoHooksLU):
4672
  """Query for resources/items of a certain kind.
4673

4674
  """
4675
  # pylint: disable-msg=W0142
4676
  REQ_BGL = False
4677

    
4678
  def CheckArguments(self):
4679
    self.qcls = _GetQueryImplementation(self.op.what)
4680

    
4681
  def ExpandNames(self):
4682
    self.needed_locks = {}
4683

    
4684
  def Exec(self, feedback_fn):
4685
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4686

    
4687

    
4688
class LUNodeModifyStorage(NoHooksLU):
4689
  """Logical unit for modifying a storage volume on a node.
4690

4691
  """
4692
  REQ_BGL = False
4693

    
4694
  def CheckArguments(self):
4695
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4696

    
4697
    storage_type = self.op.storage_type
4698

    
4699
    try:
4700
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4701
    except KeyError:
4702
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4703
                                 " modified" % storage_type,
4704
                                 errors.ECODE_INVAL)
4705

    
4706
    diff = set(self.op.changes.keys()) - modifiable
4707
    if diff:
4708
      raise errors.OpPrereqError("The following fields can not be modified for"
4709
                                 " storage units of type '%s': %r" %
4710
                                 (storage_type, list(diff)),
4711
                                 errors.ECODE_INVAL)
4712

    
4713
  def ExpandNames(self):
4714
    self.needed_locks = {
4715
      locking.LEVEL_NODE: self.op.node_name,
4716
      }
4717

    
4718
  def Exec(self, feedback_fn):
4719
    """Computes the list of nodes and their attributes.
4720

4721
    """
4722
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4723
    result = self.rpc.call_storage_modify(self.op.node_name,
4724
                                          self.op.storage_type, st_args,
4725
                                          self.op.name, self.op.changes)
4726
    result.Raise("Failed to modify storage unit '%s' on %s" %
4727
                 (self.op.name, self.op.node_name))
4728

    
4729

    
4730
class LUNodeAdd(LogicalUnit):
4731
  """Logical unit for adding node to the cluster.
4732

4733
  """
4734
  HPATH = "node-add"
4735
  HTYPE = constants.HTYPE_NODE
4736
  _NFLAGS = ["master_capable", "vm_capable"]
4737

    
4738
  def CheckArguments(self):
4739
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4740
    # validate/normalize the node name
4741
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4742
                                         family=self.primary_ip_family)
4743
    self.op.node_name = self.hostname.name
4744

    
4745
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4746
      raise errors.OpPrereqError("Cannot readd the master node",
4747
                                 errors.ECODE_STATE)
4748

    
4749
    if self.op.readd and self.op.group:
4750
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4751
                                 " being readded", errors.ECODE_INVAL)
4752

    
4753
  def BuildHooksEnv(self):
4754
    """Build hooks env.
4755

4756
    This will run on all nodes before, and on all nodes + the new node after.
4757

4758
    """
4759
    return {
4760
      "OP_TARGET": self.op.node_name,
4761
      "NODE_NAME": self.op.node_name,
4762
      "NODE_PIP": self.op.primary_ip,
4763
      "NODE_SIP": self.op.secondary_ip,
4764
      "MASTER_CAPABLE": str(self.op.master_capable),
4765
      "VM_CAPABLE": str(self.op.vm_capable),
4766
      }
4767

    
4768
  def BuildHooksNodes(self):
4769
    """Build hooks nodes.
4770

4771
    """
4772
    # Exclude added node
4773
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4774
    post_nodes = pre_nodes + [self.op.node_name, ]
4775

    
4776
    return (pre_nodes, post_nodes)
4777

    
4778
  def CheckPrereq(self):
4779
    """Check prerequisites.
4780

4781
    This checks:
4782
     - the new node is not already in the config
4783
     - it is resolvable
4784
     - its parameters (single/dual homed) matches the cluster
4785

4786
    Any errors are signaled by raising errors.OpPrereqError.
4787

4788
    """
4789
    cfg = self.cfg
4790
    hostname = self.hostname
4791
    node = hostname.name
4792
    primary_ip = self.op.primary_ip = hostname.ip
4793
    if self.op.secondary_ip is None:
4794
      if self.primary_ip_family == netutils.IP6Address.family:
4795
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4796
                                   " IPv4 address must be given as secondary",
4797
                                   errors.ECODE_INVAL)
4798
      self.op.secondary_ip = primary_ip
4799

    
4800
    secondary_ip = self.op.secondary_ip
4801
    if not netutils.IP4Address.IsValid(secondary_ip):
4802
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4803
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4804

    
4805
    node_list = cfg.GetNodeList()
4806
    if not self.op.readd and node in node_list:
4807
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4808
                                 node, errors.ECODE_EXISTS)
4809
    elif self.op.readd and node not in node_list:
4810
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4811
                                 errors.ECODE_NOENT)
4812

    
4813
    self.changed_primary_ip = False
4814

    
4815
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4816
      if self.op.readd and node == existing_node_name:
4817
        if existing_node.secondary_ip != secondary_ip:
4818
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4819
                                     " address configuration as before",
4820
                                     errors.ECODE_INVAL)
4821
        if existing_node.primary_ip != primary_ip:
4822
          self.changed_primary_ip = True
4823

    
4824
        continue
4825

    
4826
      if (existing_node.primary_ip == primary_ip or
4827
          existing_node.secondary_ip == primary_ip or
4828
          existing_node.primary_ip == secondary_ip or
4829
          existing_node.secondary_ip == secondary_ip):
4830
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4831
                                   " existing node %s" % existing_node.name,
4832
                                   errors.ECODE_NOTUNIQUE)
4833

    
4834
    # After this 'if' block, None is no longer a valid value for the
4835
    # _capable op attributes
4836
    if self.op.readd:
4837
      old_node = self.cfg.GetNodeInfo(node)
4838
      assert old_node is not None, "Can't retrieve locked node %s" % node
4839
      for attr in self._NFLAGS:
4840
        if getattr(self.op, attr) is None:
4841
          setattr(self.op, attr, getattr(old_node, attr))
4842
    else:
4843
      for attr in self._NFLAGS:
4844
        if getattr(self.op, attr) is None:
4845
          setattr(self.op, attr, True)
4846

    
4847
    if self.op.readd and not self.op.vm_capable:
4848
      pri, sec = cfg.GetNodeInstances(node)
4849
      if pri or sec:
4850
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4851
                                   " flag set to false, but it already holds"
4852
                                   " instances" % node,
4853
                                   errors.ECODE_STATE)
4854

    
4855
    # check that the type of the node (single versus dual homed) is the
4856
    # same as for the master
4857
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4858
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4859
    newbie_singlehomed = secondary_ip == primary_ip
4860
    if master_singlehomed != newbie_singlehomed:
4861
      if master_singlehomed:
4862
        raise errors.OpPrereqError("The master has no secondary ip but the"
4863
                                   " new node has one",
4864
                                   errors.ECODE_INVAL)
4865
      else:
4866
        raise errors.OpPrereqError("The master has a secondary ip but the"
4867
                                   " new node doesn't have one",
4868
                                   errors.ECODE_INVAL)
4869

    
4870
    # checks reachability
4871
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4872
      raise errors.OpPrereqError("Node not reachable by ping",
4873
                                 errors.ECODE_ENVIRON)
4874

    
4875
    if not newbie_singlehomed:
4876
      # check reachability from my secondary ip to newbie's secondary ip
4877
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4878
                           source=myself.secondary_ip):
4879
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4880
                                   " based ping to node daemon port",
4881
                                   errors.ECODE_ENVIRON)
4882

    
4883
    if self.op.readd:
4884
      exceptions = [node]
4885
    else:
4886
      exceptions = []
4887

    
4888
    if self.op.master_capable:
4889
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4890
    else:
4891
      self.master_candidate = False
4892

    
4893
    if self.op.readd:
4894
      self.new_node = old_node
4895
    else:
4896
      node_group = cfg.LookupNodeGroup(self.op.group)
4897
      self.new_node = objects.Node(name=node,
4898
                                   primary_ip=primary_ip,
4899
                                   secondary_ip=secondary_ip,
4900
                                   master_candidate=self.master_candidate,
4901
                                   offline=False, drained=False,
4902
                                   group=node_group)
4903

    
4904
    if self.op.ndparams:
4905
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4906

    
4907
  def Exec(self, feedback_fn):
4908
    """Adds the new node to the cluster.
4909

4910
    """
4911
    new_node = self.new_node
4912
    node = new_node.name
4913

    
4914
    # We adding a new node so we assume it's powered
4915
    new_node.powered = True
4916

    
4917
    # for re-adds, reset the offline/drained/master-candidate flags;
4918
    # we need to reset here, otherwise offline would prevent RPC calls
4919
    # later in the procedure; this also means that if the re-add
4920
    # fails, we are left with a non-offlined, broken node
4921
    if self.op.readd:
4922
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4923
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4924
      # if we demote the node, we do cleanup later in the procedure
4925
      new_node.master_candidate = self.master_candidate
4926
      if self.changed_primary_ip:
4927
        new_node.primary_ip = self.op.primary_ip
4928

    
4929
    # copy the master/vm_capable flags
4930
    for attr in self._NFLAGS:
4931
      setattr(new_node, attr, getattr(self.op, attr))
4932

    
4933
    # notify the user about any possible mc promotion
4934
    if new_node.master_candidate:
4935
      self.LogInfo("Node will be a master candidate")
4936

    
4937
    if self.op.ndparams:
4938
      new_node.ndparams = self.op.ndparams
4939
    else:
4940
      new_node.ndparams = {}
4941

    
4942
    # check connectivity
4943
    result = self.rpc.call_version([node])[node]
4944
    result.Raise("Can't get version information from node %s" % node)
4945
    if constants.PROTOCOL_VERSION == result.payload:
4946
      logging.info("Communication to node %s fine, sw version %s match",
4947
                   node, result.payload)
4948
    else:
4949
      raise errors.OpExecError("Version mismatch master version %s,"
4950
                               " node version %s" %
4951
                               (constants.PROTOCOL_VERSION, result.payload))
4952

    
4953
    # Add node to our /etc/hosts, and add key to known_hosts
4954
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4955
      master_node = self.cfg.GetMasterNode()
4956
      result = self.rpc.call_etc_hosts_modify(master_node,
4957
                                              constants.ETC_HOSTS_ADD,
4958
                                              self.hostname.name,
4959
                                              self.hostname.ip)
4960
      result.Raise("Can't update hosts file with new host data")
4961

    
4962
    if new_node.secondary_ip != new_node.primary_ip:
4963
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4964
                               False)
4965

    
4966
    node_verify_list = [self.cfg.GetMasterNode()]
4967
    node_verify_param = {
4968
      constants.NV_NODELIST: [node],
4969
      # TODO: do a node-net-test as well?
4970
    }
4971

    
4972
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4973
                                       self.cfg.GetClusterName())
4974
    for verifier in node_verify_list:
4975
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4976
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4977
      if nl_payload:
4978
        for failed in nl_payload:
4979
          feedback_fn("ssh/hostname verification failed"
4980
                      " (checking from %s): %s" %
4981
                      (verifier, nl_payload[failed]))
4982
        raise errors.OpExecError("ssh/hostname verification failed")
4983

    
4984
    if self.op.readd:
4985
      _RedistributeAncillaryFiles(self)
4986
      self.context.ReaddNode(new_node)
4987
      # make sure we redistribute the config
4988
      self.cfg.Update(new_node, feedback_fn)
4989
      # and make sure the new node will not have old files around
4990
      if not new_node.master_candidate:
4991
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4992
        msg = result.fail_msg
4993
        if msg:
4994
          self.LogWarning("Node failed to demote itself from master"
4995
                          " candidate status: %s" % msg)
4996
    else:
4997
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4998
                                  additional_vm=self.op.vm_capable)
4999
      self.context.AddNode(new_node, self.proc.GetECId())
5000

    
5001

    
5002
class LUNodeSetParams(LogicalUnit):
5003
  """Modifies the parameters of a node.
5004

5005
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5006
      to the node role (as _ROLE_*)
5007
  @cvar _R2F: a dictionary from node role to tuples of flags
5008
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5009

5010
  """
5011
  HPATH = "node-modify"
5012
  HTYPE = constants.HTYPE_NODE
5013
  REQ_BGL = False
5014
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5015
  _F2R = {
5016
    (True, False, False): _ROLE_CANDIDATE,
5017
    (False, True, False): _ROLE_DRAINED,
5018
    (False, False, True): _ROLE_OFFLINE,
5019
    (False, False, False): _ROLE_REGULAR,
5020
    }
5021
  _R2F = dict((v, k) for k, v in _F2R.items())
5022
  _FLAGS = ["master_candidate", "drained", "offline"]
5023

    
5024
  def CheckArguments(self):
5025
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5026
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5027
                self.op.master_capable, self.op.vm_capable,
5028
                self.op.secondary_ip, self.op.ndparams]
5029
    if all_mods.count(None) == len(all_mods):
5030
      raise errors.OpPrereqError("Please pass at least one modification",
5031
                                 errors.ECODE_INVAL)
5032
    if all_mods.count(True) > 1:
5033
      raise errors.OpPrereqError("Can't set the node into more than one"
5034
                                 " state at the same time",
5035
                                 errors.ECODE_INVAL)
5036

    
5037
    # Boolean value that tells us whether we might be demoting from MC
5038
    self.might_demote = (self.op.master_candidate == False or
5039
                         self.op.offline == True or
5040
                         self.op.drained == True or
5041
                         self.op.master_capable == False)
5042

    
5043
    if self.op.secondary_ip:
5044
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5045
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5046
                                   " address" % self.op.secondary_ip,
5047
                                   errors.ECODE_INVAL)
5048

    
5049
    self.lock_all = self.op.auto_promote and self.might_demote
5050
    self.lock_instances = self.op.secondary_ip is not None
5051

    
5052
  def ExpandNames(self):
5053
    if self.lock_all:
5054
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5055
    else:
5056
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5057

    
5058
    if self.lock_instances:
5059
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5060

    
5061
  def DeclareLocks(self, level):
5062
    # If we have locked all instances, before waiting to lock nodes, release
5063
    # all the ones living on nodes unrelated to the current operation.
5064
    if level == locking.LEVEL_NODE and self.lock_instances:
5065
      self.affected_instances = []
5066
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5067
        instances_keep = []
5068

    
5069
        # Build list of instances to release
5070
        locked_i = self.glm.list_owned(locking.LEVEL_INSTANCE)
5071
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5072
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5073
              self.op.node_name in instance.all_nodes):
5074
            instances_keep.append(instance_name)
5075
            self.affected_instances.append(instance)
5076

    
5077
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5078

    
5079
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5080
                set(instances_keep))
5081

    
5082
  def BuildHooksEnv(self):
5083
    """Build hooks env.
5084

5085
    This runs on the master node.
5086

5087
    """
5088
    return {
5089
      "OP_TARGET": self.op.node_name,
5090
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5091
      "OFFLINE": str(self.op.offline),
5092
      "DRAINED": str(self.op.drained),
5093
      "MASTER_CAPABLE": str(self.op.master_capable),
5094
      "VM_CAPABLE": str(self.op.vm_capable),
5095
      }
5096

    
5097
  def BuildHooksNodes(self):
5098
    """Build hooks nodes.
5099

5100
    """
5101
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5102
    return (nl, nl)
5103

    
5104
  def CheckPrereq(self):
5105
    """Check prerequisites.
5106

5107
    This only checks the instance list against the existing names.
5108

5109
    """
5110
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5111

    
5112
    if (self.op.master_candidate is not None or
5113
        self.op.drained is not None or
5114
        self.op.offline is not None):
5115
      # we can't change the master's node flags
5116
      if self.op.node_name == self.cfg.GetMasterNode():
5117
        raise errors.OpPrereqError("The master role can be changed"
5118
                                   " only via master-failover",
5119
                                   errors.ECODE_INVAL)
5120

    
5121
    if self.op.master_candidate and not node.master_capable:
5122
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5123
                                 " it a master candidate" % node.name,
5124
                                 errors.ECODE_STATE)
5125

    
5126
    if self.op.vm_capable == False:
5127
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5128
      if ipri or isec:
5129
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5130
                                   " the vm_capable flag" % node.name,
5131
                                   errors.ECODE_STATE)
5132

    
5133
    if node.master_candidate and self.might_demote and not self.lock_all:
5134
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5135
      # check if after removing the current node, we're missing master
5136
      # candidates
5137
      (mc_remaining, mc_should, _) = \
5138
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5139
      if mc_remaining < mc_should:
5140
        raise errors.OpPrereqError("Not enough master candidates, please"
5141
                                   " pass auto promote option to allow"
5142
                                   " promotion", errors.ECODE_STATE)
5143

    
5144
    self.old_flags = old_flags = (node.master_candidate,
5145
                                  node.drained, node.offline)
5146
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5147
    self.old_role = old_role = self._F2R[old_flags]
5148

    
5149
    # Check for ineffective changes
5150
    for attr in self._FLAGS:
5151
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5152
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5153
        setattr(self.op, attr, None)
5154

    
5155
    # Past this point, any flag change to False means a transition
5156
    # away from the respective state, as only real changes are kept
5157

    
5158
    # TODO: We might query the real power state if it supports OOB
5159
    if _SupportsOob(self.cfg, node):
5160
      if self.op.offline is False and not (node.powered or
5161
                                           self.op.powered == True):
5162
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5163
                                    " offline status can be reset") %
5164
                                   self.op.node_name)
5165
    elif self.op.powered is not None:
5166
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5167
                                  " as it does not support out-of-band"
5168
                                  " handling") % self.op.node_name)
5169

    
5170
    # If we're being deofflined/drained, we'll MC ourself if needed
5171
    if (self.op.drained == False or self.op.offline == False or
5172
        (self.op.master_capable and not node.master_capable)):
5173
      if _DecideSelfPromotion(self):
5174
        self.op.master_candidate = True
5175
        self.LogInfo("Auto-promoting node to master candidate")
5176

    
5177
    # If we're no longer master capable, we'll demote ourselves from MC
5178
    if self.op.master_capable == False and node.master_candidate:
5179
      self.LogInfo("Demoting from master candidate")
5180
      self.op.master_candidate = False
5181

    
5182
    # Compute new role
5183
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5184
    if self.op.master_candidate:
5185
      new_role = self._ROLE_CANDIDATE
5186
    elif self.op.drained:
5187
      new_role = self._ROLE_DRAINED
5188
    elif self.op.offline:
5189
      new_role = self._ROLE_OFFLINE
5190
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5191
      # False is still in new flags, which means we're un-setting (the
5192
      # only) True flag
5193
      new_role = self._ROLE_REGULAR
5194
    else: # no new flags, nothing, keep old role
5195
      new_role = old_role
5196

    
5197
    self.new_role = new_role
5198

    
5199
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5200
      # Trying to transition out of offline status
5201
      result = self.rpc.call_version([node.name])[node.name]
5202
      if result.fail_msg:
5203
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5204
                                   " to report its version: %s" %
5205
                                   (node.name, result.fail_msg),
5206
                                   errors.ECODE_STATE)
5207
      else:
5208
        self.LogWarning("Transitioning node from offline to online state"
5209
                        " without using re-add. Please make sure the node"
5210
                        " is healthy!")
5211

    
5212
    if self.op.secondary_ip:
5213
      # Ok even without locking, because this can't be changed by any LU
5214
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5215
      master_singlehomed = master.secondary_ip == master.primary_ip
5216
      if master_singlehomed and self.op.secondary_ip:
5217
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5218
                                   " homed cluster", errors.ECODE_INVAL)
5219

    
5220
      if node.offline:
5221
        if self.affected_instances:
5222
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5223
                                     " node has instances (%s) configured"
5224
                                     " to use it" % self.affected_instances)
5225
      else:
5226
        # On online nodes, check that no instances are running, and that
5227
        # the node has the new ip and we can reach it.
5228
        for instance in self.affected_instances:
5229
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5230

    
5231
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5232
        if master.name != node.name:
5233
          # check reachability from master secondary ip to new secondary ip
5234
          if not netutils.TcpPing(self.op.secondary_ip,
5235
                                  constants.DEFAULT_NODED_PORT,
5236
                                  source=master.secondary_ip):
5237
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5238
                                       " based ping to node daemon port",
5239
                                       errors.ECODE_ENVIRON)
5240

    
5241
    if self.op.ndparams:
5242
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5243
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5244
      self.new_ndparams = new_ndparams
5245

    
5246
  def Exec(self, feedback_fn):
5247
    """Modifies a node.
5248

5249
    """
5250
    node = self.node
5251
    old_role = self.old_role
5252
    new_role = self.new_role
5253

    
5254
    result = []
5255

    
5256
    if self.op.ndparams:
5257
      node.ndparams = self.new_ndparams
5258

    
5259
    if self.op.powered is not None:
5260
      node.powered = self.op.powered
5261

    
5262
    for attr in ["master_capable", "vm_capable"]:
5263
      val = getattr(self.op, attr)
5264
      if val is not None:
5265
        setattr(node, attr, val)
5266
        result.append((attr, str(val)))
5267

    
5268
    if new_role != old_role:
5269
      # Tell the node to demote itself, if no longer MC and not offline
5270
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5271
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5272
        if msg:
5273
          self.LogWarning("Node failed to demote itself: %s", msg)
5274

    
5275
      new_flags = self._R2F[new_role]
5276
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5277
        if of != nf:
5278
          result.append((desc, str(nf)))
5279
      (node.master_candidate, node.drained, node.offline) = new_flags
5280

    
5281
      # we locked all nodes, we adjust the CP before updating this node
5282
      if self.lock_all:
5283
        _AdjustCandidatePool(self, [node.name])
5284

    
5285
    if self.op.secondary_ip:
5286
      node.secondary_ip = self.op.secondary_ip
5287
      result.append(("secondary_ip", self.op.secondary_ip))
5288

    
5289
    # this will trigger configuration file update, if needed
5290
    self.cfg.Update(node, feedback_fn)
5291

    
5292
    # this will trigger job queue propagation or cleanup if the mc
5293
    # flag changed
5294
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5295
      self.context.ReaddNode(node)
5296

    
5297
    return result
5298

    
5299

    
5300
class LUNodePowercycle(NoHooksLU):
5301
  """Powercycles a node.
5302

5303
  """
5304
  REQ_BGL = False
5305

    
5306
  def CheckArguments(self):
5307
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5308
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5309
      raise errors.OpPrereqError("The node is the master and the force"
5310
                                 " parameter was not set",
5311
                                 errors.ECODE_INVAL)
5312

    
5313
  def ExpandNames(self):
5314
    """Locking for PowercycleNode.
5315

5316
    This is a last-resort option and shouldn't block on other
5317
    jobs. Therefore, we grab no locks.
5318

5319
    """
5320
    self.needed_locks = {}
5321

    
5322
  def Exec(self, feedback_fn):
5323
    """Reboots a node.
5324

5325
    """
5326
    result = self.rpc.call_node_powercycle(self.op.node_name,
5327
                                           self.cfg.GetHypervisorType())
5328
    result.Raise("Failed to schedule the reboot")
5329
    return result.payload
5330

    
5331

    
5332
class LUClusterQuery(NoHooksLU):
5333
  """Query cluster configuration.
5334

5335
  """
5336
  REQ_BGL = False
5337

    
5338
  def ExpandNames(self):
5339
    self.needed_locks = {}
5340

    
5341
  def Exec(self, feedback_fn):
5342
    """Return cluster config.
5343

5344
    """
5345
    cluster = self.cfg.GetClusterInfo()
5346
    os_hvp = {}
5347

    
5348
    # Filter just for enabled hypervisors
5349
    for os_name, hv_dict in cluster.os_hvp.items():
5350
      os_hvp[os_name] = {}
5351
      for hv_name, hv_params in hv_dict.items():
5352
        if hv_name in cluster.enabled_hypervisors:
5353
          os_hvp[os_name][hv_name] = hv_params
5354

    
5355
    # Convert ip_family to ip_version
5356
    primary_ip_version = constants.IP4_VERSION
5357
    if cluster.primary_ip_family == netutils.IP6Address.family:
5358
      primary_ip_version = constants.IP6_VERSION
5359

    
5360
    result = {
5361
      "software_version": constants.RELEASE_VERSION,
5362
      "protocol_version": constants.PROTOCOL_VERSION,
5363
      "config_version": constants.CONFIG_VERSION,
5364
      "os_api_version": max(constants.OS_API_VERSIONS),
5365
      "export_version": constants.EXPORT_VERSION,
5366
      "architecture": (platform.architecture()[0], platform.machine()),
5367
      "name": cluster.cluster_name,
5368
      "master": cluster.master_node,
5369
      "default_hypervisor": cluster.enabled_hypervisors[0],
5370
      "enabled_hypervisors": cluster.enabled_hypervisors,
5371
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5372
                        for hypervisor_name in cluster.enabled_hypervisors]),
5373
      "os_hvp": os_hvp,
5374
      "beparams": cluster.beparams,
5375
      "osparams": cluster.osparams,
5376
      "nicparams": cluster.nicparams,
5377
      "ndparams": cluster.ndparams,
5378
      "candidate_pool_size": cluster.candidate_pool_size,
5379
      "master_netdev": cluster.master_netdev,
5380
      "volume_group_name": cluster.volume_group_name,
5381
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5382
      "file_storage_dir": cluster.file_storage_dir,
5383
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5384
      "maintain_node_health": cluster.maintain_node_health,
5385
      "ctime": cluster.ctime,
5386
      "mtime": cluster.mtime,
5387
      "uuid": cluster.uuid,
5388
      "tags": list(cluster.GetTags()),
5389
      "uid_pool": cluster.uid_pool,
5390
      "default_iallocator": cluster.default_iallocator,
5391
      "reserved_lvs": cluster.reserved_lvs,
5392
      "primary_ip_version": primary_ip_version,
5393
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5394
      "hidden_os": cluster.hidden_os,
5395
      "blacklisted_os": cluster.blacklisted_os,
5396
      }
5397

    
5398
    return result
5399

    
5400

    
5401
class LUClusterConfigQuery(NoHooksLU):
5402
  """Return configuration values.
5403

5404
  """
5405
  REQ_BGL = False
5406
  _FIELDS_DYNAMIC = utils.FieldSet()
5407
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5408
                                  "watcher_pause", "volume_group_name")
5409

    
5410
  def CheckArguments(self):
5411
    _CheckOutputFields(static=self._FIELDS_STATIC,
5412
                       dynamic=self._FIELDS_DYNAMIC,
5413
                       selected=self.op.output_fields)
5414

    
5415
  def ExpandNames(self):
5416
    self.needed_locks = {}
5417

    
5418
  def Exec(self, feedback_fn):
5419
    """Dump a representation of the cluster config to the standard output.
5420

5421
    """
5422
    values = []
5423
    for field in self.op.output_fields:
5424
      if field == "cluster_name":
5425
        entry = self.cfg.GetClusterName()
5426
      elif field == "master_node":
5427
        entry = self.cfg.GetMasterNode()
5428
      elif field == "drain_flag":
5429
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5430
      elif field == "watcher_pause":
5431
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5432
      elif field == "volume_group_name":
5433
        entry = self.cfg.GetVGName()
5434
      else:
5435
        raise errors.ParameterError(field)
5436
      values.append(entry)
5437
    return values
5438

    
5439

    
5440
class LUInstanceActivateDisks(NoHooksLU):
5441
  """Bring up an instance's disks.
5442

5443
  """
5444
  REQ_BGL = False
5445

    
5446
  def ExpandNames(self):
5447
    self._ExpandAndLockInstance()
5448
    self.needed_locks[locking.LEVEL_NODE] = []
5449
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5450

    
5451
  def DeclareLocks(self, level):
5452
    if level == locking.LEVEL_NODE:
5453
      self._LockInstancesNodes()
5454

    
5455
  def CheckPrereq(self):
5456
    """Check prerequisites.
5457

5458
    This checks that the instance is in the cluster.
5459

5460
    """
5461
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5462
    assert self.instance is not None, \
5463
      "Cannot retrieve locked instance %s" % self.op.instance_name
5464
    _CheckNodeOnline(self, self.instance.primary_node)
5465

    
5466
  def Exec(self, feedback_fn):
5467
    """Activate the disks.
5468

5469
    """
5470
    disks_ok, disks_info = \
5471
              _AssembleInstanceDisks(self, self.instance,
5472
                                     ignore_size=self.op.ignore_size)
5473
    if not disks_ok:
5474
      raise errors.OpExecError("Cannot activate block devices")
5475

    
5476
    return disks_info
5477

    
5478

    
5479
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5480
                           ignore_size=False):
5481
  """Prepare the block devices for an instance.
5482

5483
  This sets up the block devices on all nodes.
5484

5485
  @type lu: L{LogicalUnit}
5486
  @param lu: the logical unit on whose behalf we execute
5487
  @type instance: L{objects.Instance}
5488
  @param instance: the instance for whose disks we assemble
5489
  @type disks: list of L{objects.Disk} or None
5490
  @param disks: which disks to assemble (or all, if None)
5491
  @type ignore_secondaries: boolean
5492
  @param ignore_secondaries: if true, errors on secondary nodes
5493
      won't result in an error return from the function
5494
  @type ignore_size: boolean
5495
  @param ignore_size: if true, the current known size of the disk
5496
      will not be used during the disk activation, useful for cases
5497
      when the size is wrong
5498
  @return: False if the operation failed, otherwise a list of
5499
      (host, instance_visible_name, node_visible_name)
5500
      with the mapping from node devices to instance devices
5501

5502
  """
5503
  device_info = []
5504
  disks_ok = True
5505
  iname = instance.name
5506
  disks = _ExpandCheckDisks(instance, disks)
5507

    
5508
  # With the two passes mechanism we try to reduce the window of
5509
  # opportunity for the race condition of switching DRBD to primary
5510
  # before handshaking occured, but we do not eliminate it
5511

    
5512
  # The proper fix would be to wait (with some limits) until the
5513
  # connection has been made and drbd transitions from WFConnection
5514
  # into any other network-connected state (Connected, SyncTarget,
5515
  # SyncSource, etc.)
5516

    
5517
  # 1st pass, assemble on all nodes in secondary mode
5518
  for idx, inst_disk in enumerate(disks):
5519
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5520
      if ignore_size:
5521
        node_disk = node_disk.Copy()
5522
        node_disk.UnsetSize()
5523
      lu.cfg.SetDiskID(node_disk, node)
5524
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5525
      msg = result.fail_msg
5526
      if msg:
5527
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5528
                           " (is_primary=False, pass=1): %s",
5529
                           inst_disk.iv_name, node, msg)
5530
        if not ignore_secondaries:
5531
          disks_ok = False
5532

    
5533
  # FIXME: race condition on drbd migration to primary
5534

    
5535
  # 2nd pass, do only the primary node
5536
  for idx, inst_disk in enumerate(disks):
5537
    dev_path = None
5538

    
5539
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5540
      if node != instance.primary_node:
5541
        continue
5542
      if ignore_size:
5543
        node_disk = node_disk.Copy()
5544
        node_disk.UnsetSize()
5545
      lu.cfg.SetDiskID(node_disk, node)
5546
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5547
      msg = result.fail_msg
5548
      if msg:
5549
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5550
                           " (is_primary=True, pass=2): %s",
5551
                           inst_disk.iv_name, node, msg)
5552
        disks_ok = False
5553
      else:
5554
        dev_path = result.payload
5555

    
5556
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5557

    
5558
  # leave the disks configured for the primary node
5559
  # this is a workaround that would be fixed better by
5560
  # improving the logical/physical id handling
5561
  for disk in disks:
5562
    lu.cfg.SetDiskID(disk, instance.primary_node)
5563

    
5564
  return disks_ok, device_info
5565

    
5566

    
5567
def _StartInstanceDisks(lu, instance, force):
5568
  """Start the disks of an instance.
5569

5570
  """
5571
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5572
                                           ignore_secondaries=force)
5573
  if not disks_ok:
5574
    _ShutdownInstanceDisks(lu, instance)
5575
    if force is not None and not force:
5576
      lu.proc.LogWarning("", hint="If the message above refers to a"
5577
                         " secondary node,"
5578
                         " you can retry the operation using '--force'.")
5579
    raise errors.OpExecError("Disk consistency error")
5580

    
5581

    
5582
class LUInstanceDeactivateDisks(NoHooksLU):
5583
  """Shutdown an instance's disks.
5584

5585
  """
5586
  REQ_BGL = False
5587

    
5588
  def ExpandNames(self):
5589
    self._ExpandAndLockInstance()
5590
    self.needed_locks[locking.LEVEL_NODE] = []
5591
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5592

    
5593
  def DeclareLocks(self, level):
5594
    if level == locking.LEVEL_NODE:
5595
      self._LockInstancesNodes()
5596

    
5597
  def CheckPrereq(self):
5598
    """Check prerequisites.
5599

5600
    This checks that the instance is in the cluster.
5601

5602
    """
5603
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5604
    assert self.instance is not None, \
5605
      "Cannot retrieve locked instance %s" % self.op.instance_name
5606

    
5607
  def Exec(self, feedback_fn):
5608
    """Deactivate the disks
5609

5610
    """
5611
    instance = self.instance
5612
    if self.op.force:
5613
      _ShutdownInstanceDisks(self, instance)
5614
    else:
5615
      _SafeShutdownInstanceDisks(self, instance)
5616

    
5617

    
5618
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5619
  """Shutdown block devices of an instance.
5620

5621
  This function checks if an instance is running, before calling
5622
  _ShutdownInstanceDisks.
5623

5624
  """
5625
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5626
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5627

    
5628

    
5629
def _ExpandCheckDisks(instance, disks):
5630
  """Return the instance disks selected by the disks list
5631

5632
  @type disks: list of L{objects.Disk} or None
5633
  @param disks: selected disks
5634
  @rtype: list of L{objects.Disk}
5635
  @return: selected instance disks to act on
5636

5637
  """
5638
  if disks is None:
5639
    return instance.disks
5640
  else:
5641
    if not set(disks).issubset(instance.disks):
5642
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5643
                                   " target instance")
5644
    return disks
5645

    
5646

    
5647
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5648
  """Shutdown block devices of an instance.
5649

5650
  This does the shutdown on all nodes of the instance.
5651

5652
  If the ignore_primary is false, errors on the primary node are
5653
  ignored.
5654

5655
  """
5656
  all_result = True
5657
  disks = _ExpandCheckDisks(instance, disks)
5658

    
5659
  for disk in disks:
5660
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5661
      lu.cfg.SetDiskID(top_disk, node)
5662
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5663
      msg = result.fail_msg
5664
      if msg:
5665
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5666
                      disk.iv_name, node, msg)
5667
        if ((node == instance.primary_node and not ignore_primary) or
5668
            (node != instance.primary_node and not result.offline)):
5669
          all_result = False
5670
  return all_result
5671

    
5672

    
5673
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5674
  """Checks if a node has enough free memory.
5675

5676
  This function check if a given node has the needed amount of free
5677
  memory. In case the node has less memory or we cannot get the
5678
  information from the node, this function raise an OpPrereqError
5679
  exception.
5680

5681
  @type lu: C{LogicalUnit}
5682
  @param lu: a logical unit from which we get configuration data
5683
  @type node: C{str}
5684
  @param node: the node to check
5685
  @type reason: C{str}
5686
  @param reason: string to use in the error message
5687
  @type requested: C{int}
5688
  @param requested: the amount of memory in MiB to check for
5689
  @type hypervisor_name: C{str}
5690
  @param hypervisor_name: the hypervisor to ask for memory stats
5691
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5692
      we cannot check the node
5693

5694
  """
5695
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5696
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5697
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5698
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5699
  if not isinstance(free_mem, int):
5700
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5701
                               " was '%s'" % (node, free_mem),
5702
                               errors.ECODE_ENVIRON)
5703
  if requested > free_mem:
5704
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5705
                               " needed %s MiB, available %s MiB" %
5706
                               (node, reason, requested, free_mem),
5707
                               errors.ECODE_NORES)
5708

    
5709

    
5710
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5711
  """Checks if nodes have enough free disk space in the all VGs.
5712

5713
  This function check if all given nodes have the needed amount of
5714
  free disk. In case any node has less disk or we cannot get the
5715
  information from the node, this function raise an OpPrereqError
5716
  exception.
5717

5718
  @type lu: C{LogicalUnit}
5719
  @param lu: a logical unit from which we get configuration data
5720
  @type nodenames: C{list}
5721
  @param nodenames: the list of node names to check
5722
  @type req_sizes: C{dict}
5723
  @param req_sizes: the hash of vg and corresponding amount of disk in
5724
      MiB to check for
5725
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5726
      or we cannot check the node
5727

5728
  """
5729
  for vg, req_size in req_sizes.items():
5730
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5731

    
5732

    
5733
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5734
  """Checks if nodes have enough free disk space in the specified VG.
5735

5736
  This function check if all given nodes have the needed amount of
5737
  free disk. In case any node has less disk or we cannot get the
5738
  information from the node, this function raise an OpPrereqError
5739
  exception.
5740

5741
  @type lu: C{LogicalUnit}
5742
  @param lu: a logical unit from which we get configuration data
5743
  @type nodenames: C{list}
5744
  @param nodenames: the list of node names to check
5745
  @type vg: C{str}
5746
  @param vg: the volume group to check
5747
  @type requested: C{int}
5748
  @param requested: the amount of disk in MiB to check for
5749
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5750
      or we cannot check the node
5751

5752
  """
5753
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5754
  for node in nodenames:
5755
    info = nodeinfo[node]
5756
    info.Raise("Cannot get current information from node %s" % node,
5757
               prereq=True, ecode=errors.ECODE_ENVIRON)
5758
    vg_free = info.payload.get("vg_free", None)
5759
    if not isinstance(vg_free, int):
5760
      raise errors.OpPrereqError("Can't compute free disk space on node"
5761
                                 " %s for vg %s, result was '%s'" %
5762
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5763
    if requested > vg_free:
5764
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5765
                                 " vg %s: required %d MiB, available %d MiB" %
5766
                                 (node, vg, requested, vg_free),
5767
                                 errors.ECODE_NORES)
5768

    
5769

    
5770
class LUInstanceStartup(LogicalUnit):
5771
  """Starts an instance.
5772

5773
  """
5774
  HPATH = "instance-start"
5775
  HTYPE = constants.HTYPE_INSTANCE
5776
  REQ_BGL = False
5777

    
5778
  def CheckArguments(self):
5779
    # extra beparams
5780
    if self.op.beparams:
5781
      # fill the beparams dict
5782
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5783

    
5784
  def ExpandNames(self):
5785
    self._ExpandAndLockInstance()
5786

    
5787
  def BuildHooksEnv(self):
5788
    """Build hooks env.
5789

5790
    This runs on master, primary and secondary nodes of the instance.
5791

5792
    """
5793
    env = {
5794
      "FORCE": self.op.force,
5795
      }
5796

    
5797
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5798

    
5799
    return env
5800

    
5801
  def BuildHooksNodes(self):
5802
    """Build hooks nodes.
5803

5804
    """
5805
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5806
    return (nl, nl)
5807

    
5808
  def CheckPrereq(self):
5809
    """Check prerequisites.
5810

5811
    This checks that the instance is in the cluster.
5812

5813
    """
5814
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5815
    assert self.instance is not None, \
5816
      "Cannot retrieve locked instance %s" % self.op.instance_name
5817

    
5818
    # extra hvparams
5819
    if self.op.hvparams:
5820
      # check hypervisor parameter syntax (locally)
5821
      cluster = self.cfg.GetClusterInfo()
5822
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5823
      filled_hvp = cluster.FillHV(instance)
5824
      filled_hvp.update(self.op.hvparams)
5825
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5826
      hv_type.CheckParameterSyntax(filled_hvp)
5827
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5828

    
5829
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5830

    
5831
    if self.primary_offline and self.op.ignore_offline_nodes:
5832
      self.proc.LogWarning("Ignoring offline primary node")
5833

    
5834
      if self.op.hvparams or self.op.beparams:
5835
        self.proc.LogWarning("Overridden parameters are ignored")
5836
    else:
5837
      _CheckNodeOnline(self, instance.primary_node)
5838

    
5839
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5840

    
5841
      # check bridges existence
5842
      _CheckInstanceBridgesExist(self, instance)
5843

    
5844
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5845
                                                instance.name,
5846
                                                instance.hypervisor)
5847
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5848
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5849
      if not remote_info.payload: # not running already
5850
        _CheckNodeFreeMemory(self, instance.primary_node,
5851
                             "starting instance %s" % instance.name,
5852
                             bep[constants.BE_MEMORY], instance.hypervisor)
5853

    
5854
  def Exec(self, feedback_fn):
5855
    """Start the instance.
5856

5857
    """
5858
    instance = self.instance
5859
    force = self.op.force
5860

    
5861
    if not self.op.no_remember:
5862
      self.cfg.MarkInstanceUp(instance.name)
5863

    
5864
    if self.primary_offline:
5865
      assert self.op.ignore_offline_nodes
5866
      self.proc.LogInfo("Primary node offline, marked instance as started")
5867
    else:
5868
      node_current = instance.primary_node
5869

    
5870
      _StartInstanceDisks(self, instance, force)
5871

    
5872
      result = self.rpc.call_instance_start(node_current, instance,
5873
                                            self.op.hvparams, self.op.beparams,
5874
                                            self.op.startup_paused)
5875
      msg = result.fail_msg
5876
      if msg:
5877
        _ShutdownInstanceDisks(self, instance)
5878
        raise errors.OpExecError("Could not start instance: %s" % msg)
5879

    
5880

    
5881
class LUInstanceReboot(LogicalUnit):
5882
  """Reboot an instance.
5883

5884
  """
5885
  HPATH = "instance-reboot"
5886
  HTYPE = constants.HTYPE_INSTANCE
5887
  REQ_BGL = False
5888

    
5889
  def ExpandNames(self):
5890
    self._ExpandAndLockInstance()
5891

    
5892
  def BuildHooksEnv(self):
5893
    """Build hooks env.
5894

5895
    This runs on master, primary and secondary nodes of the instance.
5896

5897
    """
5898
    env = {
5899
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5900
      "REBOOT_TYPE": self.op.reboot_type,
5901
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5902
      }
5903

    
5904
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5905

    
5906
    return env
5907

    
5908
  def BuildHooksNodes(self):
5909
    """Build hooks nodes.
5910

5911
    """
5912
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5913
    return (nl, nl)
5914

    
5915
  def CheckPrereq(self):
5916
    """Check prerequisites.
5917

5918
    This checks that the instance is in the cluster.
5919

5920
    """
5921
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5922
    assert self.instance is not None, \
5923
      "Cannot retrieve locked instance %s" % self.op.instance_name
5924

    
5925
    _CheckNodeOnline(self, instance.primary_node)
5926

    
5927
    # check bridges existence
5928
    _CheckInstanceBridgesExist(self, instance)
5929

    
5930
  def Exec(self, feedback_fn):
5931
    """Reboot the instance.
5932

5933
    """
5934
    instance = self.instance
5935
    ignore_secondaries = self.op.ignore_secondaries
5936
    reboot_type = self.op.reboot_type
5937

    
5938
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5939
                                              instance.name,
5940
                                              instance.hypervisor)
5941
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5942
    instance_running = bool(remote_info.payload)
5943

    
5944
    node_current = instance.primary_node
5945

    
5946
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5947
                                            constants.INSTANCE_REBOOT_HARD]:
5948
      for disk in instance.disks:
5949
        self.cfg.SetDiskID(disk, node_current)
5950
      result = self.rpc.call_instance_reboot(node_current, instance,
5951
                                             reboot_type,
5952
                                             self.op.shutdown_timeout)
5953
      result.Raise("Could not reboot instance")
5954
    else:
5955
      if instance_running:
5956
        result = self.rpc.call_instance_shutdown(node_current, instance,
5957
                                                 self.op.shutdown_timeout)
5958
        result.Raise("Could not shutdown instance for full reboot")
5959
        _ShutdownInstanceDisks(self, instance)
5960
      else:
5961
        self.LogInfo("Instance %s was already stopped, starting now",
5962
                     instance.name)
5963
      _StartInstanceDisks(self, instance, ignore_secondaries)
5964
      result = self.rpc.call_instance_start(node_current, instance,
5965
                                            None, None, False)
5966
      msg = result.fail_msg
5967
      if msg:
5968
        _ShutdownInstanceDisks(self, instance)
5969
        raise errors.OpExecError("Could not start instance for"
5970
                                 " full reboot: %s" % msg)
5971

    
5972
    self.cfg.MarkInstanceUp(instance.name)
5973

    
5974

    
5975
class LUInstanceShutdown(LogicalUnit):
5976
  """Shutdown an instance.
5977

5978
  """
5979
  HPATH = "instance-stop"
5980
  HTYPE = constants.HTYPE_INSTANCE
5981
  REQ_BGL = False
5982

    
5983
  def ExpandNames(self):
5984
    self._ExpandAndLockInstance()
5985

    
5986
  def BuildHooksEnv(self):
5987
    """Build hooks env.
5988

5989
    This runs on master, primary and secondary nodes of the instance.
5990

5991
    """
5992
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5993
    env["TIMEOUT"] = self.op.timeout
5994
    return env
5995

    
5996
  def BuildHooksNodes(self):
5997
    """Build hooks nodes.
5998

5999
    """
6000
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6001
    return (nl, nl)
6002

    
6003
  def CheckPrereq(self):
6004
    """Check prerequisites.
6005

6006
    This checks that the instance is in the cluster.
6007

6008
    """
6009
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6010
    assert self.instance is not None, \
6011
      "Cannot retrieve locked instance %s" % self.op.instance_name
6012

    
6013
    self.primary_offline = \
6014
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6015

    
6016
    if self.primary_offline and self.op.ignore_offline_nodes:
6017
      self.proc.LogWarning("Ignoring offline primary node")
6018
    else:
6019
      _CheckNodeOnline(self, self.instance.primary_node)
6020

    
6021
  def Exec(self, feedback_fn):
6022
    """Shutdown the instance.
6023

6024
    """
6025
    instance = self.instance
6026
    node_current = instance.primary_node
6027
    timeout = self.op.timeout
6028

    
6029
    if not self.op.no_remember:
6030
      self.cfg.MarkInstanceDown(instance.name)
6031

    
6032
    if self.primary_offline:
6033
      assert self.op.ignore_offline_nodes
6034
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6035
    else:
6036
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6037
      msg = result.fail_msg
6038
      if msg:
6039
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6040

    
6041
      _ShutdownInstanceDisks(self, instance)
6042

    
6043

    
6044
class LUInstanceReinstall(LogicalUnit):
6045
  """Reinstall an instance.
6046

6047
  """
6048
  HPATH = "instance-reinstall"
6049
  HTYPE = constants.HTYPE_INSTANCE
6050
  REQ_BGL = False
6051

    
6052
  def ExpandNames(self):
6053
    self._ExpandAndLockInstance()
6054

    
6055
  def BuildHooksEnv(self):
6056
    """Build hooks env.
6057

6058
    This runs on master, primary and secondary nodes of the instance.
6059

6060
    """
6061
    return _BuildInstanceHookEnvByObject(self, self.instance)
6062

    
6063
  def BuildHooksNodes(self):
6064
    """Build hooks nodes.
6065

6066
    """
6067
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6068
    return (nl, nl)
6069

    
6070
  def CheckPrereq(self):
6071
    """Check prerequisites.
6072

6073
    This checks that the instance is in the cluster and is not running.
6074

6075
    """
6076
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6077
    assert instance is not None, \
6078
      "Cannot retrieve locked instance %s" % self.op.instance_name
6079
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6080
                     " offline, cannot reinstall")
6081
    for node in instance.secondary_nodes:
6082
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6083
                       " cannot reinstall")
6084

    
6085
    if instance.disk_template == constants.DT_DISKLESS:
6086
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6087
                                 self.op.instance_name,
6088
                                 errors.ECODE_INVAL)
6089
    _CheckInstanceDown(self, instance, "cannot reinstall")
6090

    
6091
    if self.op.os_type is not None:
6092
      # OS verification
6093
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6094
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6095
      instance_os = self.op.os_type
6096
    else:
6097
      instance_os = instance.os
6098

    
6099
    nodelist = list(instance.all_nodes)
6100

    
6101
    if self.op.osparams:
6102
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6103
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6104
      self.os_inst = i_osdict # the new dict (without defaults)
6105
    else:
6106
      self.os_inst = None
6107

    
6108
    self.instance = instance
6109

    
6110
  def Exec(self, feedback_fn):
6111
    """Reinstall the instance.
6112

6113
    """
6114
    inst = self.instance
6115

    
6116
    if self.op.os_type is not None:
6117
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6118
      inst.os = self.op.os_type
6119
      # Write to configuration
6120
      self.cfg.Update(inst, feedback_fn)
6121

    
6122
    _StartInstanceDisks(self, inst, None)
6123
    try:
6124
      feedback_fn("Running the instance OS create scripts...")
6125
      # FIXME: pass debug option from opcode to backend
6126
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6127
                                             self.op.debug_level,
6128
                                             osparams=self.os_inst)
6129
      result.Raise("Could not install OS for instance %s on node %s" %
6130
                   (inst.name, inst.primary_node))
6131
    finally:
6132
      _ShutdownInstanceDisks(self, inst)
6133

    
6134

    
6135
class LUInstanceRecreateDisks(LogicalUnit):
6136
  """Recreate an instance's missing disks.
6137

6138
  """
6139
  HPATH = "instance-recreate-disks"
6140
  HTYPE = constants.HTYPE_INSTANCE
6141
  REQ_BGL = False
6142

    
6143
  def CheckArguments(self):
6144
    # normalise the disk list
6145
    self.op.disks = sorted(frozenset(self.op.disks))
6146

    
6147
  def ExpandNames(self):
6148
    self._ExpandAndLockInstance()
6149
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6150
    if self.op.nodes:
6151
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6152
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6153
    else:
6154
      self.needed_locks[locking.LEVEL_NODE] = []
6155

    
6156
  def DeclareLocks(self, level):
6157
    if level == locking.LEVEL_NODE:
6158
      # if we replace the nodes, we only need to lock the old primary,
6159
      # otherwise we need to lock all nodes for disk re-creation
6160
      primary_only = bool(self.op.nodes)
6161
      self._LockInstancesNodes(primary_only=primary_only)
6162

    
6163
  def BuildHooksEnv(self):
6164
    """Build hooks env.
6165

6166
    This runs on master, primary and secondary nodes of the instance.
6167

6168
    """
6169
    return _BuildInstanceHookEnvByObject(self, self.instance)
6170

    
6171
  def BuildHooksNodes(self):
6172
    """Build hooks nodes.
6173

6174
    """
6175
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6176
    return (nl, nl)
6177

    
6178
  def CheckPrereq(self):
6179
    """Check prerequisites.
6180

6181
    This checks that the instance is in the cluster and is not running.
6182

6183
    """
6184
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6185
    assert instance is not None, \
6186
      "Cannot retrieve locked instance %s" % self.op.instance_name
6187
    if self.op.nodes:
6188
      if len(self.op.nodes) != len(instance.all_nodes):
6189
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6190
                                   " %d replacement nodes were specified" %
6191
                                   (instance.name, len(instance.all_nodes),
6192
                                    len(self.op.nodes)),
6193
                                   errors.ECODE_INVAL)
6194
      assert instance.disk_template != constants.DT_DRBD8 or \
6195
          len(self.op.nodes) == 2
6196
      assert instance.disk_template != constants.DT_PLAIN or \
6197
          len(self.op.nodes) == 1
6198
      primary_node = self.op.nodes[0]
6199
    else:
6200
      primary_node = instance.primary_node
6201
    _CheckNodeOnline(self, primary_node)
6202

    
6203
    if instance.disk_template == constants.DT_DISKLESS:
6204
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6205
                                 self.op.instance_name, errors.ECODE_INVAL)
6206
    # if we replace nodes *and* the old primary is offline, we don't
6207
    # check
6208
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6209
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6210
    if not (self.op.nodes and old_pnode.offline):
6211
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6212

    
6213
    if not self.op.disks:
6214
      self.op.disks = range(len(instance.disks))
6215
    else:
6216
      for idx in self.op.disks:
6217
        if idx >= len(instance.disks):
6218
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6219
                                     errors.ECODE_INVAL)
6220
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6221
      raise errors.OpPrereqError("Can't recreate disks partially and"
6222
                                 " change the nodes at the same time",
6223
                                 errors.ECODE_INVAL)
6224
    self.instance = instance
6225

    
6226
  def Exec(self, feedback_fn):
6227
    """Recreate the disks.
6228

6229
    """
6230
    instance = self.instance
6231

    
6232
    to_skip = []
6233
    mods = [] # keeps track of needed logical_id changes
6234

    
6235
    for idx, disk in enumerate(instance.disks):
6236
      if idx not in self.op.disks: # disk idx has not been passed in
6237
        to_skip.append(idx)
6238
        continue
6239
      # update secondaries for disks, if needed
6240
      if self.op.nodes:
6241
        if disk.dev_type == constants.LD_DRBD8:
6242
          # need to update the nodes and minors
6243
          assert len(self.op.nodes) == 2
6244
          assert len(disk.logical_id) == 6 # otherwise disk internals
6245
                                           # have changed
6246
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6247
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6248
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6249
                    new_minors[0], new_minors[1], old_secret)
6250
          assert len(disk.logical_id) == len(new_id)
6251
          mods.append((idx, new_id))
6252

    
6253
    # now that we have passed all asserts above, we can apply the mods
6254
    # in a single run (to avoid partial changes)
6255
    for idx, new_id in mods:
6256
      instance.disks[idx].logical_id = new_id
6257

    
6258
    # change primary node, if needed
6259
    if self.op.nodes:
6260
      instance.primary_node = self.op.nodes[0]
6261
      self.LogWarning("Changing the instance's nodes, you will have to"
6262
                      " remove any disks left on the older nodes manually")
6263

    
6264
    if self.op.nodes:
6265
      self.cfg.Update(instance, feedback_fn)
6266

    
6267
    _CreateDisks(self, instance, to_skip=to_skip)
6268

    
6269

    
6270
class LUInstanceRename(LogicalUnit):
6271
  """Rename an instance.
6272

6273
  """
6274
  HPATH = "instance-rename"
6275
  HTYPE = constants.HTYPE_INSTANCE
6276

    
6277
  def CheckArguments(self):
6278
    """Check arguments.
6279

6280
    """
6281
    if self.op.ip_check and not self.op.name_check:
6282
      # TODO: make the ip check more flexible and not depend on the name check
6283
      raise errors.OpPrereqError("IP address check requires a name check",
6284
                                 errors.ECODE_INVAL)
6285

    
6286
  def BuildHooksEnv(self):
6287
    """Build hooks env.
6288

6289
    This runs on master, primary and secondary nodes of the instance.
6290

6291
    """
6292
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6293
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6294
    return env
6295

    
6296
  def BuildHooksNodes(self):
6297
    """Build hooks nodes.
6298

6299
    """
6300
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6301
    return (nl, nl)
6302

    
6303
  def CheckPrereq(self):
6304
    """Check prerequisites.
6305

6306
    This checks that the instance is in the cluster and is not running.
6307

6308
    """
6309
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6310
                                                self.op.instance_name)
6311
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6312
    assert instance is not None
6313
    _CheckNodeOnline(self, instance.primary_node)
6314
    _CheckInstanceDown(self, instance, "cannot rename")
6315
    self.instance = instance
6316

    
6317
    new_name = self.op.new_name
6318
    if self.op.name_check:
6319
      hostname = netutils.GetHostname(name=new_name)
6320
      if hostname != new_name:
6321
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6322
                     hostname.name)
6323
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6324
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6325
                                    " same as given hostname '%s'") %
6326
                                    (hostname.name, self.op.new_name),
6327
                                    errors.ECODE_INVAL)
6328
      new_name = self.op.new_name = hostname.name
6329
      if (self.op.ip_check and
6330
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6331
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6332
                                   (hostname.ip, new_name),
6333
                                   errors.ECODE_NOTUNIQUE)
6334

    
6335
    instance_list = self.cfg.GetInstanceList()
6336
    if new_name in instance_list and new_name != instance.name:
6337
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6338
                                 new_name, errors.ECODE_EXISTS)
6339

    
6340
  def Exec(self, feedback_fn):
6341
    """Rename the instance.
6342

6343
    """
6344
    inst = self.instance
6345
    old_name = inst.name
6346

    
6347
    rename_file_storage = False
6348
    if (inst.disk_template in constants.DTS_FILEBASED and
6349
        self.op.new_name != inst.name):
6350
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6351
      rename_file_storage = True
6352

    
6353
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6354
    # Change the instance lock. This is definitely safe while we hold the BGL.
6355
    # Otherwise the new lock would have to be added in acquired mode.
6356
    assert self.REQ_BGL
6357
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6358
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6359

    
6360
    # re-read the instance from the configuration after rename
6361
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6362

    
6363
    if rename_file_storage:
6364
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6365
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6366
                                                     old_file_storage_dir,
6367
                                                     new_file_storage_dir)
6368
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6369
                   " (but the instance has been renamed in Ganeti)" %
6370
                   (inst.primary_node, old_file_storage_dir,
6371
                    new_file_storage_dir))
6372

    
6373
    _StartInstanceDisks(self, inst, None)
6374
    try:
6375
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6376
                                                 old_name, self.op.debug_level)
6377
      msg = result.fail_msg
6378
      if msg:
6379
        msg = ("Could not run OS rename script for instance %s on node %s"
6380
               " (but the instance has been renamed in Ganeti): %s" %
6381
               (inst.name, inst.primary_node, msg))
6382
        self.proc.LogWarning(msg)
6383
    finally:
6384
      _ShutdownInstanceDisks(self, inst)
6385

    
6386
    return inst.name
6387

    
6388

    
6389
class LUInstanceRemove(LogicalUnit):
6390
  """Remove an instance.
6391

6392
  """
6393
  HPATH = "instance-remove"
6394
  HTYPE = constants.HTYPE_INSTANCE
6395
  REQ_BGL = False
6396

    
6397
  def ExpandNames(self):
6398
    self._ExpandAndLockInstance()
6399
    self.needed_locks[locking.LEVEL_NODE] = []
6400
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6401

    
6402
  def DeclareLocks(self, level):
6403
    if level == locking.LEVEL_NODE:
6404
      self._LockInstancesNodes()
6405

    
6406
  def BuildHooksEnv(self):
6407
    """Build hooks env.
6408

6409
    This runs on master, primary and secondary nodes of the instance.
6410

6411
    """
6412
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6413
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6414
    return env
6415

    
6416
  def BuildHooksNodes(self):
6417
    """Build hooks nodes.
6418

6419
    """
6420
    nl = [self.cfg.GetMasterNode()]
6421
    nl_post = list(self.instance.all_nodes) + nl
6422
    return (nl, nl_post)
6423

    
6424
  def CheckPrereq(self):
6425
    """Check prerequisites.
6426

6427
    This checks that the instance is in the cluster.
6428

6429
    """
6430
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6431
    assert self.instance is not None, \
6432
      "Cannot retrieve locked instance %s" % self.op.instance_name
6433

    
6434
  def Exec(self, feedback_fn):
6435
    """Remove the instance.
6436

6437
    """
6438
    instance = self.instance
6439
    logging.info("Shutting down instance %s on node %s",
6440
                 instance.name, instance.primary_node)
6441

    
6442
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6443
                                             self.op.shutdown_timeout)
6444
    msg = result.fail_msg
6445
    if msg:
6446
      if self.op.ignore_failures:
6447
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6448
      else:
6449
        raise errors.OpExecError("Could not shutdown instance %s on"
6450
                                 " node %s: %s" %
6451
                                 (instance.name, instance.primary_node, msg))
6452

    
6453
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6454

    
6455

    
6456
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6457
  """Utility function to remove an instance.
6458

6459
  """
6460
  logging.info("Removing block devices for instance %s", instance.name)
6461

    
6462
  if not _RemoveDisks(lu, instance):
6463
    if not ignore_failures:
6464
      raise errors.OpExecError("Can't remove instance's disks")
6465
    feedback_fn("Warning: can't remove instance's disks")
6466

    
6467
  logging.info("Removing instance %s out of cluster config", instance.name)
6468

    
6469
  lu.cfg.RemoveInstance(instance.name)
6470

    
6471
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6472
    "Instance lock removal conflict"
6473

    
6474
  # Remove lock for the instance
6475
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6476

    
6477

    
6478
class LUInstanceQuery(NoHooksLU):
6479
  """Logical unit for querying instances.
6480

6481
  """
6482
  # pylint: disable-msg=W0142
6483
  REQ_BGL = False
6484

    
6485
  def CheckArguments(self):
6486
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6487
                             self.op.output_fields, self.op.use_locking)
6488

    
6489
  def ExpandNames(self):
6490
    self.iq.ExpandNames(self)
6491

    
6492
  def DeclareLocks(self, level):
6493
    self.iq.DeclareLocks(self, level)
6494

    
6495
  def Exec(self, feedback_fn):
6496
    return self.iq.OldStyleQuery(self)
6497

    
6498

    
6499
class LUInstanceFailover(LogicalUnit):
6500
  """Failover an instance.
6501

6502
  """
6503
  HPATH = "instance-failover"
6504
  HTYPE = constants.HTYPE_INSTANCE
6505
  REQ_BGL = False
6506

    
6507
  def CheckArguments(self):
6508
    """Check the arguments.
6509

6510
    """
6511
    self.iallocator = getattr(self.op, "iallocator", None)
6512
    self.target_node = getattr(self.op, "target_node", None)
6513

    
6514
  def ExpandNames(self):
6515
    self._ExpandAndLockInstance()
6516

    
6517
    if self.op.target_node is not None:
6518
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6519

    
6520
    self.needed_locks[locking.LEVEL_NODE] = []
6521
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6522

    
6523
    ignore_consistency = self.op.ignore_consistency
6524
    shutdown_timeout = self.op.shutdown_timeout
6525
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6526
                                       cleanup=False,
6527
                                       failover=True,
6528
                                       ignore_consistency=ignore_consistency,
6529
                                       shutdown_timeout=shutdown_timeout)
6530
    self.tasklets = [self._migrater]
6531

    
6532
  def DeclareLocks(self, level):
6533
    if level == locking.LEVEL_NODE:
6534
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6535
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6536
        if self.op.target_node is None:
6537
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6538
        else:
6539
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6540
                                                   self.op.target_node]
6541
        del self.recalculate_locks[locking.LEVEL_NODE]
6542
      else:
6543
        self._LockInstancesNodes()
6544

    
6545
  def BuildHooksEnv(self):
6546
    """Build hooks env.
6547

6548
    This runs on master, primary and secondary nodes of the instance.
6549

6550
    """
6551
    instance = self._migrater.instance
6552
    source_node = instance.primary_node
6553
    target_node = self.op.target_node
6554
    env = {
6555
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6556
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6557
      "OLD_PRIMARY": source_node,
6558
      "NEW_PRIMARY": target_node,
6559
      }
6560

    
6561
    if instance.disk_template in constants.DTS_INT_MIRROR:
6562
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6563
      env["NEW_SECONDARY"] = source_node
6564
    else:
6565
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6566

    
6567
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6568

    
6569
    return env
6570

    
6571
  def BuildHooksNodes(self):
6572
    """Build hooks nodes.
6573

6574
    """
6575
    instance = self._migrater.instance
6576
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6577
    return (nl, nl + [instance.primary_node])
6578

    
6579

    
6580
class LUInstanceMigrate(LogicalUnit):
6581
  """Migrate an instance.
6582

6583
  This is migration without shutting down, compared to the failover,
6584
  which is done with shutdown.
6585

6586
  """
6587
  HPATH = "instance-migrate"
6588
  HTYPE = constants.HTYPE_INSTANCE
6589
  REQ_BGL = False
6590

    
6591
  def ExpandNames(self):
6592
    self._ExpandAndLockInstance()
6593

    
6594
    if self.op.target_node is not None:
6595
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6596

    
6597
    self.needed_locks[locking.LEVEL_NODE] = []
6598
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6599

    
6600
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6601
                                       cleanup=self.op.cleanup,
6602
                                       failover=False,
6603
                                       fallback=self.op.allow_failover)
6604
    self.tasklets = [self._migrater]
6605

    
6606
  def DeclareLocks(self, level):
6607
    if level == locking.LEVEL_NODE:
6608
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6609
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6610
        if self.op.target_node is None:
6611
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6612
        else:
6613
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6614
                                                   self.op.target_node]
6615
        del self.recalculate_locks[locking.LEVEL_NODE]
6616
      else:
6617
        self._LockInstancesNodes()
6618

    
6619
  def BuildHooksEnv(self):
6620
    """Build hooks env.
6621

6622
    This runs on master, primary and secondary nodes of the instance.
6623

6624
    """
6625
    instance = self._migrater.instance
6626
    source_node = instance.primary_node
6627
    target_node = self.op.target_node
6628
    env = _BuildInstanceHookEnvByObject(self, instance)
6629
    env.update({
6630
      "MIGRATE_LIVE": self._migrater.live,
6631
      "MIGRATE_CLEANUP": self.op.cleanup,
6632
      "OLD_PRIMARY": source_node,
6633
      "NEW_PRIMARY": target_node,
6634
      })
6635

    
6636
    if instance.disk_template in constants.DTS_INT_MIRROR:
6637
      env["OLD_SECONDARY"] = target_node
6638
      env["NEW_SECONDARY"] = source_node
6639
    else:
6640
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6641

    
6642
    return env
6643

    
6644
  def BuildHooksNodes(self):
6645
    """Build hooks nodes.
6646

6647
    """
6648
    instance = self._migrater.instance
6649
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6650
    return (nl, nl + [instance.primary_node])
6651

    
6652

    
6653
class LUInstanceMove(LogicalUnit):
6654
  """Move an instance by data-copying.
6655

6656
  """
6657
  HPATH = "instance-move"
6658
  HTYPE = constants.HTYPE_INSTANCE
6659
  REQ_BGL = False
6660

    
6661
  def ExpandNames(self):
6662
    self._ExpandAndLockInstance()
6663
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6664
    self.op.target_node = target_node
6665
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6666
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6667

    
6668
  def DeclareLocks(self, level):
6669
    if level == locking.LEVEL_NODE:
6670
      self._LockInstancesNodes(primary_only=True)
6671

    
6672
  def BuildHooksEnv(self):
6673
    """Build hooks env.
6674

6675
    This runs on master, primary and secondary nodes of the instance.
6676

6677
    """
6678
    env = {
6679
      "TARGET_NODE": self.op.target_node,
6680
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6681
      }
6682
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6683
    return env
6684

    
6685
  def BuildHooksNodes(self):
6686
    """Build hooks nodes.
6687

6688
    """
6689
    nl = [
6690
      self.cfg.GetMasterNode(),
6691
      self.instance.primary_node,
6692
      self.op.target_node,
6693
      ]
6694
    return (nl, nl)
6695

    
6696
  def CheckPrereq(self):
6697
    """Check prerequisites.
6698

6699
    This checks that the instance is in the cluster.
6700

6701
    """
6702
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6703
    assert self.instance is not None, \
6704
      "Cannot retrieve locked instance %s" % self.op.instance_name
6705

    
6706
    node = self.cfg.GetNodeInfo(self.op.target_node)
6707
    assert node is not None, \
6708
      "Cannot retrieve locked node %s" % self.op.target_node
6709

    
6710
    self.target_node = target_node = node.name
6711

    
6712
    if target_node == instance.primary_node:
6713
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6714
                                 (instance.name, target_node),
6715
                                 errors.ECODE_STATE)
6716

    
6717
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6718

    
6719
    for idx, dsk in enumerate(instance.disks):
6720
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6721
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6722
                                   " cannot copy" % idx, errors.ECODE_STATE)
6723

    
6724
    _CheckNodeOnline(self, target_node)
6725
    _CheckNodeNotDrained(self, target_node)
6726
    _CheckNodeVmCapable(self, target_node)
6727

    
6728
    if instance.admin_up:
6729
      # check memory requirements on the secondary node
6730
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6731
                           instance.name, bep[constants.BE_MEMORY],
6732
                           instance.hypervisor)
6733
    else:
6734
      self.LogInfo("Not checking memory on the secondary node as"
6735
                   " instance will not be started")
6736

    
6737
    # check bridge existance
6738
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6739

    
6740
  def Exec(self, feedback_fn):
6741
    """Move an instance.
6742

6743
    The move is done by shutting it down on its present node, copying
6744
    the data over (slow) and starting it on the new node.
6745

6746
    """
6747
    instance = self.instance
6748

    
6749
    source_node = instance.primary_node
6750
    target_node = self.target_node
6751

    
6752
    self.LogInfo("Shutting down instance %s on source node %s",
6753
                 instance.name, source_node)
6754

    
6755
    result = self.rpc.call_instance_shutdown(source_node, instance,
6756
                                             self.op.shutdown_timeout)
6757
    msg = result.fail_msg
6758
    if msg:
6759
      if self.op.ignore_consistency:
6760
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6761
                             " Proceeding anyway. Please make sure node"
6762
                             " %s is down. Error details: %s",
6763
                             instance.name, source_node, source_node, msg)
6764
      else:
6765
        raise errors.OpExecError("Could not shutdown instance %s on"
6766
                                 " node %s: %s" %
6767
                                 (instance.name, source_node, msg))
6768

    
6769
    # create the target disks
6770
    try:
6771
      _CreateDisks(self, instance, target_node=target_node)
6772
    except errors.OpExecError:
6773
      self.LogWarning("Device creation failed, reverting...")
6774
      try:
6775
        _RemoveDisks(self, instance, target_node=target_node)
6776
      finally:
6777
        self.cfg.ReleaseDRBDMinors(instance.name)
6778
        raise
6779

    
6780
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6781

    
6782
    errs = []
6783
    # activate, get path, copy the data over
6784
    for idx, disk in enumerate(instance.disks):
6785
      self.LogInfo("Copying data for disk %d", idx)
6786
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6787
                                               instance.name, True, idx)
6788
      if result.fail_msg:
6789
        self.LogWarning("Can't assemble newly created disk %d: %s",
6790
                        idx, result.fail_msg)
6791
        errs.append(result.fail_msg)
6792
        break
6793
      dev_path = result.payload
6794
      result = self.rpc.call_blockdev_export(source_node, disk,
6795
                                             target_node, dev_path,
6796
                                             cluster_name)
6797
      if result.fail_msg:
6798
        self.LogWarning("Can't copy data over for disk %d: %s",
6799
                        idx, result.fail_msg)
6800
        errs.append(result.fail_msg)
6801
        break
6802

    
6803
    if errs:
6804
      self.LogWarning("Some disks failed to copy, aborting")
6805
      try:
6806
        _RemoveDisks(self, instance, target_node=target_node)
6807
      finally:
6808
        self.cfg.ReleaseDRBDMinors(instance.name)
6809
        raise errors.OpExecError("Errors during disk copy: %s" %
6810
                                 (",".join(errs),))
6811

    
6812
    instance.primary_node = target_node
6813
    self.cfg.Update(instance, feedback_fn)
6814

    
6815
    self.LogInfo("Removing the disks on the original node")
6816
    _RemoveDisks(self, instance, target_node=source_node)
6817

    
6818
    # Only start the instance if it's marked as up
6819
    if instance.admin_up:
6820
      self.LogInfo("Starting instance %s on node %s",
6821
                   instance.name, target_node)
6822

    
6823
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6824
                                           ignore_secondaries=True)
6825
      if not disks_ok:
6826
        _ShutdownInstanceDisks(self, instance)
6827
        raise errors.OpExecError("Can't activate the instance's disks")
6828

    
6829
      result = self.rpc.call_instance_start(target_node, instance,
6830
                                            None, None, False)
6831
      msg = result.fail_msg
6832
      if msg:
6833
        _ShutdownInstanceDisks(self, instance)
6834
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6835
                                 (instance.name, target_node, msg))
6836

    
6837

    
6838
class LUNodeMigrate(LogicalUnit):
6839
  """Migrate all instances from a node.
6840

6841
  """
6842
  HPATH = "node-migrate"
6843
  HTYPE = constants.HTYPE_NODE
6844
  REQ_BGL = False
6845

    
6846
  def CheckArguments(self):
6847
    pass
6848

    
6849
  def ExpandNames(self):
6850
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6851

    
6852
    self.share_locks = _ShareAll()
6853
    self.needed_locks = {
6854
      locking.LEVEL_NODE: [self.op.node_name],
6855
      }
6856

    
6857
  def BuildHooksEnv(self):
6858
    """Build hooks env.
6859

6860
    This runs on the master, the primary and all the secondaries.
6861

6862
    """
6863
    return {
6864
      "NODE_NAME": self.op.node_name,
6865
      }
6866

    
6867
  def BuildHooksNodes(self):
6868
    """Build hooks nodes.
6869

6870
    """
6871
    nl = [self.cfg.GetMasterNode()]
6872
    return (nl, nl)
6873

    
6874
  def CheckPrereq(self):
6875
    pass
6876

    
6877
  def Exec(self, feedback_fn):
6878
    # Prepare jobs for migration instances
6879
    jobs = [
6880
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6881
                                 mode=self.op.mode,
6882
                                 live=self.op.live,
6883
                                 iallocator=self.op.iallocator,
6884
                                 target_node=self.op.target_node)]
6885
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6886
      ]
6887

    
6888
    # TODO: Run iallocator in this opcode and pass correct placement options to
6889
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6890
    # running the iallocator and the actual migration, a good consistency model
6891
    # will have to be found.
6892

    
6893
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6894
            frozenset([self.op.node_name]))
6895

    
6896
    return ResultWithJobs(jobs)
6897

    
6898

    
6899
class TLMigrateInstance(Tasklet):
6900
  """Tasklet class for instance migration.
6901

6902
  @type live: boolean
6903
  @ivar live: whether the migration will be done live or non-live;
6904
      this variable is initalized only after CheckPrereq has run
6905
  @type cleanup: boolean
6906
  @ivar cleanup: Wheater we cleanup from a failed migration
6907
  @type iallocator: string
6908
  @ivar iallocator: The iallocator used to determine target_node
6909
  @type target_node: string
6910
  @ivar target_node: If given, the target_node to reallocate the instance to
6911
  @type failover: boolean
6912
  @ivar failover: Whether operation results in failover or migration
6913
  @type fallback: boolean
6914
  @ivar fallback: Whether fallback to failover is allowed if migration not
6915
                  possible
6916
  @type ignore_consistency: boolean
6917
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6918
                            and target node
6919
  @type shutdown_timeout: int
6920
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6921

6922
  """
6923
  def __init__(self, lu, instance_name, cleanup=False,
6924
               failover=False, fallback=False,
6925
               ignore_consistency=False,
6926
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6927
    """Initializes this class.
6928

6929
    """
6930
    Tasklet.__init__(self, lu)
6931

    
6932
    # Parameters
6933
    self.instance_name = instance_name
6934
    self.cleanup = cleanup
6935
    self.live = False # will be overridden later
6936
    self.failover = failover
6937
    self.fallback = fallback
6938
    self.ignore_consistency = ignore_consistency
6939
    self.shutdown_timeout = shutdown_timeout
6940

    
6941
  def CheckPrereq(self):
6942
    """Check prerequisites.
6943

6944
    This checks that the instance is in the cluster.
6945

6946
    """
6947
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6948
    instance = self.cfg.GetInstanceInfo(instance_name)
6949
    assert instance is not None
6950
    self.instance = instance
6951

    
6952
    if (not self.cleanup and not instance.admin_up and not self.failover and
6953
        self.fallback):
6954
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6955
                      " to failover")
6956
      self.failover = True
6957

    
6958
    if instance.disk_template not in constants.DTS_MIRRORED:
6959
      if self.failover:
6960
        text = "failovers"
6961
      else:
6962
        text = "migrations"
6963
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6964
                                 " %s" % (instance.disk_template, text),
6965
                                 errors.ECODE_STATE)
6966

    
6967
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6968
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6969

    
6970
      if self.lu.op.iallocator:
6971
        self._RunAllocator()
6972
      else:
6973
        # We set set self.target_node as it is required by
6974
        # BuildHooksEnv
6975
        self.target_node = self.lu.op.target_node
6976

    
6977
      # self.target_node is already populated, either directly or by the
6978
      # iallocator run
6979
      target_node = self.target_node
6980
      if self.target_node == instance.primary_node:
6981
        raise errors.OpPrereqError("Cannot migrate instance %s"
6982
                                   " to its primary (%s)" %
6983
                                   (instance.name, instance.primary_node))
6984

    
6985
      if len(self.lu.tasklets) == 1:
6986
        # It is safe to release locks only when we're the only tasklet
6987
        # in the LU
6988
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6989
                      keep=[instance.primary_node, self.target_node])
6990

    
6991
    else:
6992
      secondary_nodes = instance.secondary_nodes
6993
      if not secondary_nodes:
6994
        raise errors.ConfigurationError("No secondary node but using"
6995
                                        " %s disk template" %
6996
                                        instance.disk_template)
6997
      target_node = secondary_nodes[0]
6998
      if self.lu.op.iallocator or (self.lu.op.target_node and
6999
                                   self.lu.op.target_node != target_node):
7000
        if self.failover:
7001
          text = "failed over"
7002
        else:
7003
          text = "migrated"
7004
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7005
                                   " be %s to arbitrary nodes"
7006
                                   " (neither an iallocator nor a target"
7007
                                   " node can be passed)" %
7008
                                   (instance.disk_template, text),
7009
                                   errors.ECODE_INVAL)
7010

    
7011
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7012

    
7013
    # check memory requirements on the secondary node
7014
    if not self.failover or instance.admin_up:
7015
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7016
                           instance.name, i_be[constants.BE_MEMORY],
7017
                           instance.hypervisor)
7018
    else:
7019
      self.lu.LogInfo("Not checking memory on the secondary node as"
7020
                      " instance will not be started")
7021

    
7022
    # check bridge existance
7023
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7024

    
7025
    if not self.cleanup:
7026
      _CheckNodeNotDrained(self.lu, target_node)
7027
      if not self.failover:
7028
        result = self.rpc.call_instance_migratable(instance.primary_node,
7029
                                                   instance)
7030
        if result.fail_msg and self.fallback:
7031
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7032
                          " failover")
7033
          self.failover = True
7034
        else:
7035
          result.Raise("Can't migrate, please use failover",
7036
                       prereq=True, ecode=errors.ECODE_STATE)
7037

    
7038
    assert not (self.failover and self.cleanup)
7039

    
7040
    if not self.failover:
7041
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7042
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7043
                                   " parameters are accepted",
7044
                                   errors.ECODE_INVAL)
7045
      if self.lu.op.live is not None:
7046
        if self.lu.op.live:
7047
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7048
        else:
7049
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7050
        # reset the 'live' parameter to None so that repeated
7051
        # invocations of CheckPrereq do not raise an exception
7052
        self.lu.op.live = None
7053
      elif self.lu.op.mode is None:
7054
        # read the default value from the hypervisor
7055
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7056
                                                skip_globals=False)
7057
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7058

    
7059
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7060
    else:
7061
      # Failover is never live
7062
      self.live = False
7063

    
7064
  def _RunAllocator(self):
7065
    """Run the allocator based on input opcode.
7066

7067
    """
7068
    ial = IAllocator(self.cfg, self.rpc,
7069
                     mode=constants.IALLOCATOR_MODE_RELOC,
7070
                     name=self.instance_name,
7071
                     # TODO See why hail breaks with a single node below
7072
                     relocate_from=[self.instance.primary_node,
7073
                                    self.instance.primary_node],
7074
                     )
7075

    
7076
    ial.Run(self.lu.op.iallocator)
7077

    
7078
    if not ial.success:
7079
      raise errors.OpPrereqError("Can't compute nodes using"
7080
                                 " iallocator '%s': %s" %
7081
                                 (self.lu.op.iallocator, ial.info),
7082
                                 errors.ECODE_NORES)
7083
    if len(ial.result) != ial.required_nodes:
7084
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7085
                                 " of nodes (%s), required %s" %
7086
                                 (self.lu.op.iallocator, len(ial.result),
7087
                                  ial.required_nodes), errors.ECODE_FAULT)
7088
    self.target_node = ial.result[0]
7089
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7090
                 self.instance_name, self.lu.op.iallocator,
7091
                 utils.CommaJoin(ial.result))
7092

    
7093
  def _WaitUntilSync(self):
7094
    """Poll with custom rpc for disk sync.
7095

7096
    This uses our own step-based rpc call.
7097

7098
    """
7099
    self.feedback_fn("* wait until resync is done")
7100
    all_done = False
7101
    while not all_done:
7102
      all_done = True
7103
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7104
                                            self.nodes_ip,
7105
                                            self.instance.disks)
7106
      min_percent = 100
7107
      for node, nres in result.items():
7108
        nres.Raise("Cannot resync disks on node %s" % node)
7109
        node_done, node_percent = nres.payload
7110
        all_done = all_done and node_done
7111
        if node_percent is not None:
7112
          min_percent = min(min_percent, node_percent)
7113
      if not all_done:
7114
        if min_percent < 100:
7115
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7116
        time.sleep(2)
7117

    
7118
  def _EnsureSecondary(self, node):
7119
    """Demote a node to secondary.
7120

7121
    """
7122
    self.feedback_fn("* switching node %s to secondary mode" % node)
7123

    
7124
    for dev in self.instance.disks:
7125
      self.cfg.SetDiskID(dev, node)
7126

    
7127
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7128
                                          self.instance.disks)
7129
    result.Raise("Cannot change disk to secondary on node %s" % node)
7130

    
7131
  def _GoStandalone(self):
7132
    """Disconnect from the network.
7133

7134
    """
7135
    self.feedback_fn("* changing into standalone mode")
7136
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7137
                                               self.instance.disks)
7138
    for node, nres in result.items():
7139
      nres.Raise("Cannot disconnect disks node %s" % node)
7140

    
7141
  def _GoReconnect(self, multimaster):
7142
    """Reconnect to the network.
7143

7144
    """
7145
    if multimaster:
7146
      msg = "dual-master"
7147
    else:
7148
      msg = "single-master"
7149
    self.feedback_fn("* changing disks into %s mode" % msg)
7150
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7151
                                           self.instance.disks,
7152
                                           self.instance.name, multimaster)
7153
    for node, nres in result.items():
7154
      nres.Raise("Cannot change disks config on node %s" % node)
7155

    
7156
  def _ExecCleanup(self):
7157
    """Try to cleanup after a failed migration.
7158

7159
    The cleanup is done by:
7160
      - check that the instance is running only on one node
7161
        (and update the config if needed)
7162
      - change disks on its secondary node to secondary
7163
      - wait until disks are fully synchronized
7164
      - disconnect from the network
7165
      - change disks into single-master mode
7166
      - wait again until disks are fully synchronized
7167

7168
    """
7169
    instance = self.instance
7170
    target_node = self.target_node
7171
    source_node = self.source_node
7172

    
7173
    # check running on only one node
7174
    self.feedback_fn("* checking where the instance actually runs"
7175
                     " (if this hangs, the hypervisor might be in"
7176
                     " a bad state)")
7177
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7178
    for node, result in ins_l.items():
7179
      result.Raise("Can't contact node %s" % node)
7180

    
7181
    runningon_source = instance.name in ins_l[source_node].payload
7182
    runningon_target = instance.name in ins_l[target_node].payload
7183

    
7184
    if runningon_source and runningon_target:
7185
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7186
                               " or the hypervisor is confused; you will have"
7187
                               " to ensure manually that it runs only on one"
7188
                               " and restart this operation")
7189

    
7190
    if not (runningon_source or runningon_target):
7191
      raise errors.OpExecError("Instance does not seem to be running at all;"
7192
                               " in this case it's safer to repair by"
7193
                               " running 'gnt-instance stop' to ensure disk"
7194
                               " shutdown, and then restarting it")
7195

    
7196
    if runningon_target:
7197
      # the migration has actually succeeded, we need to update the config
7198
      self.feedback_fn("* instance running on secondary node (%s),"
7199
                       " updating config" % target_node)
7200
      instance.primary_node = target_node
7201
      self.cfg.Update(instance, self.feedback_fn)
7202
      demoted_node = source_node
7203
    else:
7204
      self.feedback_fn("* instance confirmed to be running on its"
7205
                       " primary node (%s)" % source_node)
7206
      demoted_node = target_node
7207

    
7208
    if instance.disk_template in constants.DTS_INT_MIRROR:
7209
      self._EnsureSecondary(demoted_node)
7210
      try:
7211
        self._WaitUntilSync()
7212
      except errors.OpExecError:
7213
        # we ignore here errors, since if the device is standalone, it
7214
        # won't be able to sync
7215
        pass
7216
      self._GoStandalone()
7217
      self._GoReconnect(False)
7218
      self._WaitUntilSync()
7219

    
7220
    self.feedback_fn("* done")
7221

    
7222
  def _RevertDiskStatus(self):
7223
    """Try to revert the disk status after a failed migration.
7224

7225
    """
7226
    target_node = self.target_node
7227
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7228
      return
7229

    
7230
    try:
7231
      self._EnsureSecondary(target_node)
7232
      self._GoStandalone()
7233
      self._GoReconnect(False)
7234
      self._WaitUntilSync()
7235
    except errors.OpExecError, err:
7236
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7237
                         " please try to recover the instance manually;"
7238
                         " error '%s'" % str(err))
7239

    
7240
  def _AbortMigration(self):
7241
    """Call the hypervisor code to abort a started migration.
7242

7243
    """
7244
    instance = self.instance
7245
    target_node = self.target_node
7246
    migration_info = self.migration_info
7247

    
7248
    abort_result = self.rpc.call_finalize_migration(target_node,
7249
                                                    instance,
7250
                                                    migration_info,
7251
                                                    False)
7252
    abort_msg = abort_result.fail_msg
7253
    if abort_msg:
7254
      logging.error("Aborting migration failed on target node %s: %s",
7255
                    target_node, abort_msg)
7256
      # Don't raise an exception here, as we stil have to try to revert the
7257
      # disk status, even if this step failed.
7258

    
7259
  def _ExecMigration(self):
7260
    """Migrate an instance.
7261

7262
    The migrate is done by:
7263
      - change the disks into dual-master mode
7264
      - wait until disks are fully synchronized again
7265
      - migrate the instance
7266
      - change disks on the new secondary node (the old primary) to secondary
7267
      - wait until disks are fully synchronized
7268
      - change disks into single-master mode
7269

7270
    """
7271
    instance = self.instance
7272
    target_node = self.target_node
7273
    source_node = self.source_node
7274

    
7275
    self.feedback_fn("* checking disk consistency between source and target")
7276
    for dev in instance.disks:
7277
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7278
        raise errors.OpExecError("Disk %s is degraded or not fully"
7279
                                 " synchronized on target node,"
7280
                                 " aborting migration" % dev.iv_name)
7281

    
7282
    # First get the migration information from the remote node
7283
    result = self.rpc.call_migration_info(source_node, instance)
7284
    msg = result.fail_msg
7285
    if msg:
7286
      log_err = ("Failed fetching source migration information from %s: %s" %
7287
                 (source_node, msg))
7288
      logging.error(log_err)
7289
      raise errors.OpExecError(log_err)
7290

    
7291
    self.migration_info = migration_info = result.payload
7292

    
7293
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7294
      # Then switch the disks to master/master mode
7295
      self._EnsureSecondary(target_node)
7296
      self._GoStandalone()
7297
      self._GoReconnect(True)
7298
      self._WaitUntilSync()
7299

    
7300
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7301
    result = self.rpc.call_accept_instance(target_node,
7302
                                           instance,
7303
                                           migration_info,
7304
                                           self.nodes_ip[target_node])
7305

    
7306
    msg = result.fail_msg
7307
    if msg:
7308
      logging.error("Instance pre-migration failed, trying to revert"
7309
                    " disk status: %s", msg)
7310
      self.feedback_fn("Pre-migration failed, aborting")
7311
      self._AbortMigration()
7312
      self._RevertDiskStatus()
7313
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7314
                               (instance.name, msg))
7315

    
7316
    self.feedback_fn("* migrating instance to %s" % target_node)
7317
    result = self.rpc.call_instance_migrate(source_node, instance,
7318
                                            self.nodes_ip[target_node],
7319
                                            self.live)
7320
    msg = result.fail_msg
7321
    if msg:
7322
      logging.error("Instance migration failed, trying to revert"
7323
                    " disk status: %s", msg)
7324
      self.feedback_fn("Migration failed, aborting")
7325
      self._AbortMigration()
7326
      self._RevertDiskStatus()
7327
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7328
                               (instance.name, msg))
7329

    
7330
    instance.primary_node = target_node
7331
    # distribute new instance config to the other nodes
7332
    self.cfg.Update(instance, self.feedback_fn)
7333

    
7334
    result = self.rpc.call_finalize_migration(target_node,
7335
                                              instance,
7336
                                              migration_info,
7337
                                              True)
7338
    msg = result.fail_msg
7339
    if msg:
7340
      logging.error("Instance migration succeeded, but finalization failed:"
7341
                    " %s", msg)
7342
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7343
                               msg)
7344

    
7345
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7346
      self._EnsureSecondary(source_node)
7347
      self._WaitUntilSync()
7348
      self._GoStandalone()
7349
      self._GoReconnect(False)
7350
      self._WaitUntilSync()
7351

    
7352
    self.feedback_fn("* done")
7353

    
7354
  def _ExecFailover(self):
7355
    """Failover an instance.
7356

7357
    The failover is done by shutting it down on its present node and
7358
    starting it on the secondary.
7359

7360
    """
7361
    instance = self.instance
7362
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7363

    
7364
    source_node = instance.primary_node
7365
    target_node = self.target_node
7366

    
7367
    if instance.admin_up:
7368
      self.feedback_fn("* checking disk consistency between source and target")
7369
      for dev in instance.disks:
7370
        # for drbd, these are drbd over lvm
7371
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7372
          if primary_node.offline:
7373
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7374
                             " target node %s" %
7375
                             (primary_node.name, dev.iv_name, target_node))
7376
          elif not self.ignore_consistency:
7377
            raise errors.OpExecError("Disk %s is degraded on target node,"
7378
                                     " aborting failover" % dev.iv_name)
7379
    else:
7380
      self.feedback_fn("* not checking disk consistency as instance is not"
7381
                       " running")
7382

    
7383
    self.feedback_fn("* shutting down instance on source node")
7384
    logging.info("Shutting down instance %s on node %s",
7385
                 instance.name, source_node)
7386

    
7387
    result = self.rpc.call_instance_shutdown(source_node, instance,
7388
                                             self.shutdown_timeout)
7389
    msg = result.fail_msg
7390
    if msg:
7391
      if self.ignore_consistency or primary_node.offline:
7392
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7393
                           " proceeding anyway; please make sure node"
7394
                           " %s is down; error details: %s",
7395
                           instance.name, source_node, source_node, msg)
7396
      else:
7397
        raise errors.OpExecError("Could not shutdown instance %s on"
7398
                                 " node %s: %s" %
7399
                                 (instance.name, source_node, msg))
7400

    
7401
    self.feedback_fn("* deactivating the instance's disks on source node")
7402
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7403
      raise errors.OpExecError("Can't shut down the instance's disks")
7404

    
7405
    instance.primary_node = target_node
7406
    # distribute new instance config to the other nodes
7407
    self.cfg.Update(instance, self.feedback_fn)
7408

    
7409
    # Only start the instance if it's marked as up
7410
    if instance.admin_up:
7411
      self.feedback_fn("* activating the instance's disks on target node %s" %
7412
                       target_node)
7413
      logging.info("Starting instance %s on node %s",
7414
                   instance.name, target_node)
7415

    
7416
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7417
                                           ignore_secondaries=True)
7418
      if not disks_ok:
7419
        _ShutdownInstanceDisks(self.lu, instance)
7420
        raise errors.OpExecError("Can't activate the instance's disks")
7421

    
7422
      self.feedback_fn("* starting the instance on the target node %s" %
7423
                       target_node)
7424
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7425
                                            False)
7426
      msg = result.fail_msg
7427
      if msg:
7428
        _ShutdownInstanceDisks(self.lu, instance)
7429
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7430
                                 (instance.name, target_node, msg))
7431

    
7432
  def Exec(self, feedback_fn):
7433
    """Perform the migration.
7434

7435
    """
7436
    self.feedback_fn = feedback_fn
7437
    self.source_node = self.instance.primary_node
7438

    
7439
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7440
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7441
      self.target_node = self.instance.secondary_nodes[0]
7442
      # Otherwise self.target_node has been populated either
7443
      # directly, or through an iallocator.
7444

    
7445
    self.all_nodes = [self.source_node, self.target_node]
7446
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7447
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7448

    
7449
    if self.failover:
7450
      feedback_fn("Failover instance %s" % self.instance.name)
7451
      self._ExecFailover()
7452
    else:
7453
      feedback_fn("Migrating instance %s" % self.instance.name)
7454

    
7455
      if self.cleanup:
7456
        return self._ExecCleanup()
7457
      else:
7458
        return self._ExecMigration()
7459

    
7460

    
7461
def _CreateBlockDev(lu, node, instance, device, force_create,
7462
                    info, force_open):
7463
  """Create a tree of block devices on a given node.
7464

7465
  If this device type has to be created on secondaries, create it and
7466
  all its children.
7467

7468
  If not, just recurse to children keeping the same 'force' value.
7469

7470
  @param lu: the lu on whose behalf we execute
7471
  @param node: the node on which to create the device
7472
  @type instance: L{objects.Instance}
7473
  @param instance: the instance which owns the device
7474
  @type device: L{objects.Disk}
7475
  @param device: the device to create
7476
  @type force_create: boolean
7477
  @param force_create: whether to force creation of this device; this
7478
      will be change to True whenever we find a device which has
7479
      CreateOnSecondary() attribute
7480
  @param info: the extra 'metadata' we should attach to the device
7481
      (this will be represented as a LVM tag)
7482
  @type force_open: boolean
7483
  @param force_open: this parameter will be passes to the
7484
      L{backend.BlockdevCreate} function where it specifies
7485
      whether we run on primary or not, and it affects both
7486
      the child assembly and the device own Open() execution
7487

7488
  """
7489
  if device.CreateOnSecondary():
7490
    force_create = True
7491

    
7492
  if device.children:
7493
    for child in device.children:
7494
      _CreateBlockDev(lu, node, instance, child, force_create,
7495
                      info, force_open)
7496

    
7497
  if not force_create:
7498
    return
7499

    
7500
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7501

    
7502

    
7503
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7504
  """Create a single block device on a given node.
7505

7506
  This will not recurse over children of the device, so they must be
7507
  created in advance.
7508

7509
  @param lu: the lu on whose behalf we execute
7510
  @param node: the node on which to create the device
7511
  @type instance: L{objects.Instance}
7512
  @param instance: the instance which owns the device
7513
  @type device: L{objects.Disk}
7514
  @param device: the device to create
7515
  @param info: the extra 'metadata' we should attach to the device
7516
      (this will be represented as a LVM tag)
7517
  @type force_open: boolean
7518
  @param force_open: this parameter will be passes to the
7519
      L{backend.BlockdevCreate} function where it specifies
7520
      whether we run on primary or not, and it affects both
7521
      the child assembly and the device own Open() execution
7522

7523
  """
7524
  lu.cfg.SetDiskID(device, node)
7525
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7526
                                       instance.name, force_open, info)
7527
  result.Raise("Can't create block device %s on"
7528
               " node %s for instance %s" % (device, node, instance.name))
7529
  if device.physical_id is None:
7530
    device.physical_id = result.payload
7531

    
7532

    
7533
def _GenerateUniqueNames(lu, exts):
7534
  """Generate a suitable LV name.
7535

7536
  This will generate a logical volume name for the given instance.
7537

7538
  """
7539
  results = []
7540
  for val in exts:
7541
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7542
    results.append("%s%s" % (new_id, val))
7543
  return results
7544

    
7545

    
7546
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7547
                         iv_name, p_minor, s_minor):
7548
  """Generate a drbd8 device complete with its children.
7549

7550
  """
7551
  assert len(vgnames) == len(names) == 2
7552
  port = lu.cfg.AllocatePort()
7553
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7554
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7555
                          logical_id=(vgnames[0], names[0]))
7556
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7557
                          logical_id=(vgnames[1], names[1]))
7558
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7559
                          logical_id=(primary, secondary, port,
7560
                                      p_minor, s_minor,
7561
                                      shared_secret),
7562
                          children=[dev_data, dev_meta],
7563
                          iv_name=iv_name)
7564
  return drbd_dev
7565

    
7566

    
7567
def _GenerateDiskTemplate(lu, template_name,
7568
                          instance_name, primary_node,
7569
                          secondary_nodes, disk_info,
7570
                          file_storage_dir, file_driver,
7571
                          base_index, feedback_fn):
7572
  """Generate the entire disk layout for a given template type.
7573

7574
  """
7575
  #TODO: compute space requirements
7576

    
7577
  vgname = lu.cfg.GetVGName()
7578
  disk_count = len(disk_info)
7579
  disks = []
7580
  if template_name == constants.DT_DISKLESS:
7581
    pass
7582
  elif template_name == constants.DT_PLAIN:
7583
    if len(secondary_nodes) != 0:
7584
      raise errors.ProgrammerError("Wrong template configuration")
7585

    
7586
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7587
                                      for i in range(disk_count)])
7588
    for idx, disk in enumerate(disk_info):
7589
      disk_index = idx + base_index
7590
      vg = disk.get(constants.IDISK_VG, vgname)
7591
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7592
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7593
                              size=disk[constants.IDISK_SIZE],
7594
                              logical_id=(vg, names[idx]),
7595
                              iv_name="disk/%d" % disk_index,
7596
                              mode=disk[constants.IDISK_MODE])
7597
      disks.append(disk_dev)
7598
  elif template_name == constants.DT_DRBD8:
7599
    if len(secondary_nodes) != 1:
7600
      raise errors.ProgrammerError("Wrong template configuration")
7601
    remote_node = secondary_nodes[0]
7602
    minors = lu.cfg.AllocateDRBDMinor(
7603
      [primary_node, remote_node] * len(disk_info), instance_name)
7604

    
7605
    names = []
7606
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7607
                                               for i in range(disk_count)]):
7608
      names.append(lv_prefix + "_data")
7609
      names.append(lv_prefix + "_meta")
7610
    for idx, disk in enumerate(disk_info):
7611
      disk_index = idx + base_index
7612
      data_vg = disk.get(constants.IDISK_VG, vgname)
7613
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7614
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7615
                                      disk[constants.IDISK_SIZE],
7616
                                      [data_vg, meta_vg],
7617
                                      names[idx * 2:idx * 2 + 2],
7618
                                      "disk/%d" % disk_index,
7619
                                      minors[idx * 2], minors[idx * 2 + 1])
7620
      disk_dev.mode = disk[constants.IDISK_MODE]
7621
      disks.append(disk_dev)
7622
  elif template_name == constants.DT_FILE:
7623
    if len(secondary_nodes) != 0:
7624
      raise errors.ProgrammerError("Wrong template configuration")
7625

    
7626
    opcodes.RequireFileStorage()
7627

    
7628
    for idx, disk in enumerate(disk_info):
7629
      disk_index = idx + base_index
7630
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7631
                              size=disk[constants.IDISK_SIZE],
7632
                              iv_name="disk/%d" % disk_index,
7633
                              logical_id=(file_driver,
7634
                                          "%s/disk%d" % (file_storage_dir,
7635
                                                         disk_index)),
7636
                              mode=disk[constants.IDISK_MODE])
7637
      disks.append(disk_dev)
7638
  elif template_name == constants.DT_SHARED_FILE:
7639
    if len(secondary_nodes) != 0:
7640
      raise errors.ProgrammerError("Wrong template configuration")
7641

    
7642
    opcodes.RequireSharedFileStorage()
7643

    
7644
    for idx, disk in enumerate(disk_info):
7645
      disk_index = idx + base_index
7646
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7647
                              size=disk[constants.IDISK_SIZE],
7648
                              iv_name="disk/%d" % disk_index,
7649
                              logical_id=(file_driver,
7650
                                          "%s/disk%d" % (file_storage_dir,
7651
                                                         disk_index)),
7652
                              mode=disk[constants.IDISK_MODE])
7653
      disks.append(disk_dev)
7654
  elif template_name == constants.DT_BLOCK:
7655
    if len(secondary_nodes) != 0:
7656
      raise errors.ProgrammerError("Wrong template configuration")
7657

    
7658
    for idx, disk in enumerate(disk_info):
7659
      disk_index = idx + base_index
7660
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7661
                              size=disk[constants.IDISK_SIZE],
7662
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7663
                                          disk[constants.IDISK_ADOPT]),
7664
                              iv_name="disk/%d" % disk_index,
7665
                              mode=disk[constants.IDISK_MODE])
7666
      disks.append(disk_dev)
7667

    
7668
  else:
7669
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7670
  return disks
7671

    
7672

    
7673
def _GetInstanceInfoText(instance):
7674
  """Compute that text that should be added to the disk's metadata.
7675

7676
  """
7677
  return "originstname+%s" % instance.name
7678

    
7679

    
7680
def _CalcEta(time_taken, written, total_size):
7681
  """Calculates the ETA based on size written and total size.
7682

7683
  @param time_taken: The time taken so far
7684
  @param written: amount written so far
7685
  @param total_size: The total size of data to be written
7686
  @return: The remaining time in seconds
7687

7688
  """
7689
  avg_time = time_taken / float(written)
7690
  return (total_size - written) * avg_time
7691

    
7692

    
7693
def _WipeDisks(lu, instance):
7694
  """Wipes instance disks.
7695

7696
  @type lu: L{LogicalUnit}
7697
  @param lu: the logical unit on whose behalf we execute
7698
  @type instance: L{objects.Instance}
7699
  @param instance: the instance whose disks we should create
7700
  @return: the success of the wipe
7701

7702
  """
7703
  node = instance.primary_node
7704

    
7705
  for device in instance.disks:
7706
    lu.cfg.SetDiskID(device, node)
7707

    
7708
  logging.info("Pause sync of instance %s disks", instance.name)
7709
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7710

    
7711
  for idx, success in enumerate(result.payload):
7712
    if not success:
7713
      logging.warn("pause-sync of instance %s for disks %d failed",
7714
                   instance.name, idx)
7715

    
7716
  try:
7717
    for idx, device in enumerate(instance.disks):
7718
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7719
      # MAX_WIPE_CHUNK at max
7720
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7721
                            constants.MIN_WIPE_CHUNK_PERCENT)
7722
      # we _must_ make this an int, otherwise rounding errors will
7723
      # occur
7724
      wipe_chunk_size = int(wipe_chunk_size)
7725

    
7726
      lu.LogInfo("* Wiping disk %d", idx)
7727
      logging.info("Wiping disk %d for instance %s, node %s using"
7728
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7729

    
7730
      offset = 0
7731
      size = device.size
7732
      last_output = 0
7733
      start_time = time.time()
7734

    
7735
      while offset < size:
7736
        wipe_size = min(wipe_chunk_size, size - offset)
7737
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7738
                      idx, offset, wipe_size)
7739
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7740
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7741
                     (idx, offset, wipe_size))
7742
        now = time.time()
7743
        offset += wipe_size
7744
        if now - last_output >= 60:
7745
          eta = _CalcEta(now - start_time, offset, size)
7746
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7747
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7748
          last_output = now
7749
  finally:
7750
    logging.info("Resume sync of instance %s disks", instance.name)
7751

    
7752
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7753

    
7754
    for idx, success in enumerate(result.payload):
7755
      if not success:
7756
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7757
                      " look at the status and troubleshoot the issue", idx)
7758
        logging.warn("resume-sync of instance %s for disks %d failed",
7759
                     instance.name, idx)
7760

    
7761

    
7762
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7763
  """Create all disks for an instance.
7764

7765
  This abstracts away some work from AddInstance.
7766

7767
  @type lu: L{LogicalUnit}
7768
  @param lu: the logical unit on whose behalf we execute
7769
  @type instance: L{objects.Instance}
7770
  @param instance: the instance whose disks we should create
7771
  @type to_skip: list
7772
  @param to_skip: list of indices to skip
7773
  @type target_node: string
7774
  @param target_node: if passed, overrides the target node for creation
7775
  @rtype: boolean
7776
  @return: the success of the creation
7777

7778
  """
7779
  info = _GetInstanceInfoText(instance)
7780
  if target_node is None:
7781
    pnode = instance.primary_node
7782
    all_nodes = instance.all_nodes
7783
  else:
7784
    pnode = target_node
7785
    all_nodes = [pnode]
7786

    
7787
  if instance.disk_template in constants.DTS_FILEBASED:
7788
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7789
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7790

    
7791
    result.Raise("Failed to create directory '%s' on"
7792
                 " node %s" % (file_storage_dir, pnode))
7793

    
7794
  # Note: this needs to be kept in sync with adding of disks in
7795
  # LUInstanceSetParams
7796
  for idx, device in enumerate(instance.disks):
7797
    if to_skip and idx in to_skip:
7798
      continue
7799
    logging.info("Creating volume %s for instance %s",
7800
                 device.iv_name, instance.name)
7801
    #HARDCODE
7802
    for node in all_nodes:
7803
      f_create = node == pnode
7804
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7805

    
7806

    
7807
def _RemoveDisks(lu, instance, target_node=None):
7808
  """Remove all disks for an instance.
7809

7810
  This abstracts away some work from `AddInstance()` and
7811
  `RemoveInstance()`. Note that in case some of the devices couldn't
7812
  be removed, the removal will continue with the other ones (compare
7813
  with `_CreateDisks()`).
7814

7815
  @type lu: L{LogicalUnit}
7816
  @param lu: the logical unit on whose behalf we execute
7817
  @type instance: L{objects.Instance}
7818
  @param instance: the instance whose disks we should remove
7819
  @type target_node: string
7820
  @param target_node: used to override the node on which to remove the disks
7821
  @rtype: boolean
7822
  @return: the success of the removal
7823

7824
  """
7825
  logging.info("Removing block devices for instance %s", instance.name)
7826

    
7827
  all_result = True
7828
  for device in instance.disks:
7829
    if target_node:
7830
      edata = [(target_node, device)]
7831
    else:
7832
      edata = device.ComputeNodeTree(instance.primary_node)
7833
    for node, disk in edata:
7834
      lu.cfg.SetDiskID(disk, node)
7835
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7836
      if msg:
7837
        lu.LogWarning("Could not remove block device %s on node %s,"
7838
                      " continuing anyway: %s", device.iv_name, node, msg)
7839
        all_result = False
7840

    
7841
  if instance.disk_template == constants.DT_FILE:
7842
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7843
    if target_node:
7844
      tgt = target_node
7845
    else:
7846
      tgt = instance.primary_node
7847
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7848
    if result.fail_msg:
7849
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7850
                    file_storage_dir, instance.primary_node, result.fail_msg)
7851
      all_result = False
7852

    
7853
  return all_result
7854

    
7855

    
7856
def _ComputeDiskSizePerVG(disk_template, disks):
7857
  """Compute disk size requirements in the volume group
7858

7859
  """
7860
  def _compute(disks, payload):
7861
    """Universal algorithm.
7862

7863
    """
7864
    vgs = {}
7865
    for disk in disks:
7866
      vgs[disk[constants.IDISK_VG]] = \
7867
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7868

    
7869
    return vgs
7870

    
7871
  # Required free disk space as a function of disk and swap space
7872
  req_size_dict = {
7873
    constants.DT_DISKLESS: {},
7874
    constants.DT_PLAIN: _compute(disks, 0),
7875
    # 128 MB are added for drbd metadata for each disk
7876
    constants.DT_DRBD8: _compute(disks, 128),
7877
    constants.DT_FILE: {},
7878
    constants.DT_SHARED_FILE: {},
7879
  }
7880

    
7881
  if disk_template not in req_size_dict:
7882
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7883
                                 " is unknown" %  disk_template)
7884

    
7885
  return req_size_dict[disk_template]
7886

    
7887

    
7888
def _ComputeDiskSize(disk_template, disks):
7889
  """Compute disk size requirements in the volume group
7890

7891
  """
7892
  # Required free disk space as a function of disk and swap space
7893
  req_size_dict = {
7894
    constants.DT_DISKLESS: None,
7895
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7896
    # 128 MB are added for drbd metadata for each disk
7897
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7898
    constants.DT_FILE: None,
7899
    constants.DT_SHARED_FILE: 0,
7900
    constants.DT_BLOCK: 0,
7901
  }
7902

    
7903
  if disk_template not in req_size_dict:
7904
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7905
                                 " is unknown" %  disk_template)
7906

    
7907
  return req_size_dict[disk_template]
7908

    
7909

    
7910
def _FilterVmNodes(lu, nodenames):
7911
  """Filters out non-vm_capable nodes from a list.
7912

7913
  @type lu: L{LogicalUnit}
7914
  @param lu: the logical unit for which we check
7915
  @type nodenames: list
7916
  @param nodenames: the list of nodes on which we should check
7917
  @rtype: list
7918
  @return: the list of vm-capable nodes
7919

7920
  """
7921
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7922
  return [name for name in nodenames if name not in vm_nodes]
7923

    
7924

    
7925
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7926
  """Hypervisor parameter validation.
7927

7928
  This function abstract the hypervisor parameter validation to be
7929
  used in both instance create and instance modify.
7930

7931
  @type lu: L{LogicalUnit}
7932
  @param lu: the logical unit for which we check
7933
  @type nodenames: list
7934
  @param nodenames: the list of nodes on which we should check
7935
  @type hvname: string
7936
  @param hvname: the name of the hypervisor we should use
7937
  @type hvparams: dict
7938
  @param hvparams: the parameters which we need to check
7939
  @raise errors.OpPrereqError: if the parameters are not valid
7940

7941
  """
7942
  nodenames = _FilterVmNodes(lu, nodenames)
7943
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7944
                                                  hvname,
7945
                                                  hvparams)
7946
  for node in nodenames:
7947
    info = hvinfo[node]
7948
    if info.offline:
7949
      continue
7950
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7951

    
7952

    
7953
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7954
  """OS parameters validation.
7955

7956
  @type lu: L{LogicalUnit}
7957
  @param lu: the logical unit for which we check
7958
  @type required: boolean
7959
  @param required: whether the validation should fail if the OS is not
7960
      found
7961
  @type nodenames: list
7962
  @param nodenames: the list of nodes on which we should check
7963
  @type osname: string
7964
  @param osname: the name of the hypervisor we should use
7965
  @type osparams: dict
7966
  @param osparams: the parameters which we need to check
7967
  @raise errors.OpPrereqError: if the parameters are not valid
7968

7969
  """
7970
  nodenames = _FilterVmNodes(lu, nodenames)
7971
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7972
                                   [constants.OS_VALIDATE_PARAMETERS],
7973
                                   osparams)
7974
  for node, nres in result.items():
7975
    # we don't check for offline cases since this should be run only
7976
    # against the master node and/or an instance's nodes
7977
    nres.Raise("OS Parameters validation failed on node %s" % node)
7978
    if not nres.payload:
7979
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7980
                 osname, node)
7981

    
7982

    
7983
class LUInstanceCreate(LogicalUnit):
7984
  """Create an instance.
7985

7986
  """
7987
  HPATH = "instance-add"
7988
  HTYPE = constants.HTYPE_INSTANCE
7989
  REQ_BGL = False
7990

    
7991
  def CheckArguments(self):
7992
    """Check arguments.
7993

7994
    """
7995
    # do not require name_check to ease forward/backward compatibility
7996
    # for tools
7997
    if self.op.no_install and self.op.start:
7998
      self.LogInfo("No-installation mode selected, disabling startup")
7999
      self.op.start = False
8000
    # validate/normalize the instance name
8001
    self.op.instance_name = \
8002
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8003

    
8004
    if self.op.ip_check and not self.op.name_check:
8005
      # TODO: make the ip check more flexible and not depend on the name check
8006
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8007
                                 " check", errors.ECODE_INVAL)
8008

    
8009
    # check nics' parameter names
8010
    for nic in self.op.nics:
8011
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8012

    
8013
    # check disks. parameter names and consistent adopt/no-adopt strategy
8014
    has_adopt = has_no_adopt = False
8015
    for disk in self.op.disks:
8016
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8017
      if constants.IDISK_ADOPT in disk:
8018
        has_adopt = True
8019
      else:
8020
        has_no_adopt = True
8021
    if has_adopt and has_no_adopt:
8022
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8023
                                 errors.ECODE_INVAL)
8024
    if has_adopt:
8025
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8026
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8027
                                   " '%s' disk template" %
8028
                                   self.op.disk_template,
8029
                                   errors.ECODE_INVAL)
8030
      if self.op.iallocator is not None:
8031
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8032
                                   " iallocator script", errors.ECODE_INVAL)
8033
      if self.op.mode == constants.INSTANCE_IMPORT:
8034
        raise errors.OpPrereqError("Disk adoption not allowed for"
8035
                                   " instance import", errors.ECODE_INVAL)
8036
    else:
8037
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8038
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8039
                                   " but no 'adopt' parameter given" %
8040
                                   self.op.disk_template,
8041
                                   errors.ECODE_INVAL)
8042

    
8043
    self.adopt_disks = has_adopt
8044

    
8045
    # instance name verification
8046
    if self.op.name_check:
8047
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8048
      self.op.instance_name = self.hostname1.name
8049
      # used in CheckPrereq for ip ping check
8050
      self.check_ip = self.hostname1.ip
8051
    else:
8052
      self.check_ip = None
8053

    
8054
    # file storage checks
8055
    if (self.op.file_driver and
8056
        not self.op.file_driver in constants.FILE_DRIVER):
8057
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8058
                                 self.op.file_driver, errors.ECODE_INVAL)
8059

    
8060
    if self.op.disk_template == constants.DT_FILE:
8061
      opcodes.RequireFileStorage()
8062
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8063
      opcodes.RequireSharedFileStorage()
8064

    
8065
    ### Node/iallocator related checks
8066
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8067

    
8068
    if self.op.pnode is not None:
8069
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8070
        if self.op.snode is None:
8071
          raise errors.OpPrereqError("The networked disk templates need"
8072
                                     " a mirror node", errors.ECODE_INVAL)
8073
      elif self.op.snode:
8074
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8075
                        " template")
8076
        self.op.snode = None
8077

    
8078
    self._cds = _GetClusterDomainSecret()
8079

    
8080
    if self.op.mode == constants.INSTANCE_IMPORT:
8081
      # On import force_variant must be True, because if we forced it at
8082
      # initial install, our only chance when importing it back is that it
8083
      # works again!
8084
      self.op.force_variant = True
8085

    
8086
      if self.op.no_install:
8087
        self.LogInfo("No-installation mode has no effect during import")
8088

    
8089
    elif self.op.mode == constants.INSTANCE_CREATE:
8090
      if self.op.os_type is None:
8091
        raise errors.OpPrereqError("No guest OS specified",
8092
                                   errors.ECODE_INVAL)
8093
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8094
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8095
                                   " installation" % self.op.os_type,
8096
                                   errors.ECODE_STATE)
8097
      if self.op.disk_template is None:
8098
        raise errors.OpPrereqError("No disk template specified",
8099
                                   errors.ECODE_INVAL)
8100

    
8101
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8102
      # Check handshake to ensure both clusters have the same domain secret
8103
      src_handshake = self.op.source_handshake
8104
      if not src_handshake:
8105
        raise errors.OpPrereqError("Missing source handshake",
8106
                                   errors.ECODE_INVAL)
8107

    
8108
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8109
                                                           src_handshake)
8110
      if errmsg:
8111
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8112
                                   errors.ECODE_INVAL)
8113

    
8114
      # Load and check source CA
8115
      self.source_x509_ca_pem = self.op.source_x509_ca
8116
      if not self.source_x509_ca_pem:
8117
        raise errors.OpPrereqError("Missing source X509 CA",
8118
                                   errors.ECODE_INVAL)
8119

    
8120
      try:
8121
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8122
                                                    self._cds)
8123
      except OpenSSL.crypto.Error, err:
8124
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8125
                                   (err, ), errors.ECODE_INVAL)
8126

    
8127
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8128
      if errcode is not None:
8129
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8130
                                   errors.ECODE_INVAL)
8131

    
8132
      self.source_x509_ca = cert
8133

    
8134
      src_instance_name = self.op.source_instance_name
8135
      if not src_instance_name:
8136
        raise errors.OpPrereqError("Missing source instance name",
8137
                                   errors.ECODE_INVAL)
8138

    
8139
      self.source_instance_name = \
8140
          netutils.GetHostname(name=src_instance_name).name
8141

    
8142
    else:
8143
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8144
                                 self.op.mode, errors.ECODE_INVAL)
8145

    
8146
  def ExpandNames(self):
8147
    """ExpandNames for CreateInstance.
8148

8149
    Figure out the right locks for instance creation.
8150

8151
    """
8152
    self.needed_locks = {}
8153

    
8154
    instance_name = self.op.instance_name
8155
    # this is just a preventive check, but someone might still add this
8156
    # instance in the meantime, and creation will fail at lock-add time
8157
    if instance_name in self.cfg.GetInstanceList():
8158
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8159
                                 instance_name, errors.ECODE_EXISTS)
8160

    
8161
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8162

    
8163
    if self.op.iallocator:
8164
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8165
    else:
8166
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8167
      nodelist = [self.op.pnode]
8168
      if self.op.snode is not None:
8169
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8170
        nodelist.append(self.op.snode)
8171
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8172

    
8173
    # in case of import lock the source node too
8174
    if self.op.mode == constants.INSTANCE_IMPORT:
8175
      src_node = self.op.src_node
8176
      src_path = self.op.src_path
8177

    
8178
      if src_path is None:
8179
        self.op.src_path = src_path = self.op.instance_name
8180

    
8181
      if src_node is None:
8182
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8183
        self.op.src_node = None
8184
        if os.path.isabs(src_path):
8185
          raise errors.OpPrereqError("Importing an instance from an absolute"
8186
                                     " path requires a source node option",
8187
                                     errors.ECODE_INVAL)
8188
      else:
8189
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8190
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8191
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8192
        if not os.path.isabs(src_path):
8193
          self.op.src_path = src_path = \
8194
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8195

    
8196
  def _RunAllocator(self):
8197
    """Run the allocator based on input opcode.
8198

8199
    """
8200
    nics = [n.ToDict() for n in self.nics]
8201
    ial = IAllocator(self.cfg, self.rpc,
8202
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8203
                     name=self.op.instance_name,
8204
                     disk_template=self.op.disk_template,
8205
                     tags=self.op.tags,
8206
                     os=self.op.os_type,
8207
                     vcpus=self.be_full[constants.BE_VCPUS],
8208
                     memory=self.be_full[constants.BE_MEMORY],
8209
                     disks=self.disks,
8210
                     nics=nics,
8211
                     hypervisor=self.op.hypervisor,
8212
                     )
8213

    
8214
    ial.Run(self.op.iallocator)
8215

    
8216
    if not ial.success:
8217
      raise errors.OpPrereqError("Can't compute nodes using"
8218
                                 " iallocator '%s': %s" %
8219
                                 (self.op.iallocator, ial.info),
8220
                                 errors.ECODE_NORES)
8221
    if len(ial.result) != ial.required_nodes:
8222
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8223
                                 " of nodes (%s), required %s" %
8224
                                 (self.op.iallocator, len(ial.result),
8225
                                  ial.required_nodes), errors.ECODE_FAULT)
8226
    self.op.pnode = ial.result[0]
8227
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8228
                 self.op.instance_name, self.op.iallocator,
8229
                 utils.CommaJoin(ial.result))
8230
    if ial.required_nodes == 2:
8231
      self.op.snode = ial.result[1]
8232

    
8233
  def BuildHooksEnv(self):
8234
    """Build hooks env.
8235

8236
    This runs on master, primary and secondary nodes of the instance.
8237

8238
    """
8239
    env = {
8240
      "ADD_MODE": self.op.mode,
8241
      }
8242
    if self.op.mode == constants.INSTANCE_IMPORT:
8243
      env["SRC_NODE"] = self.op.src_node
8244
      env["SRC_PATH"] = self.op.src_path
8245
      env["SRC_IMAGES"] = self.src_images
8246

    
8247
    env.update(_BuildInstanceHookEnv(
8248
      name=self.op.instance_name,
8249
      primary_node=self.op.pnode,
8250
      secondary_nodes=self.secondaries,
8251
      status=self.op.start,
8252
      os_type=self.op.os_type,
8253
      memory=self.be_full[constants.BE_MEMORY],
8254
      vcpus=self.be_full[constants.BE_VCPUS],
8255
      nics=_NICListToTuple(self, self.nics),
8256
      disk_template=self.op.disk_template,
8257
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8258
             for d in self.disks],
8259
      bep=self.be_full,
8260
      hvp=self.hv_full,
8261
      hypervisor_name=self.op.hypervisor,
8262
      tags=self.op.tags,
8263
    ))
8264

    
8265
    return env
8266

    
8267
  def BuildHooksNodes(self):
8268
    """Build hooks nodes.
8269

8270
    """
8271
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8272
    return nl, nl
8273

    
8274
  def _ReadExportInfo(self):
8275
    """Reads the export information from disk.
8276

8277
    It will override the opcode source node and path with the actual
8278
    information, if these two were not specified before.
8279

8280
    @return: the export information
8281

8282
    """
8283
    assert self.op.mode == constants.INSTANCE_IMPORT
8284

    
8285
    src_node = self.op.src_node
8286
    src_path = self.op.src_path
8287

    
8288
    if src_node is None:
8289
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8290
      exp_list = self.rpc.call_export_list(locked_nodes)
8291
      found = False
8292
      for node in exp_list:
8293
        if exp_list[node].fail_msg:
8294
          continue
8295
        if src_path in exp_list[node].payload:
8296
          found = True
8297
          self.op.src_node = src_node = node
8298
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8299
                                                       src_path)
8300
          break
8301
      if not found:
8302
        raise errors.OpPrereqError("No export found for relative path %s" %
8303
                                    src_path, errors.ECODE_INVAL)
8304

    
8305
    _CheckNodeOnline(self, src_node)
8306
    result = self.rpc.call_export_info(src_node, src_path)
8307
    result.Raise("No export or invalid export found in dir %s" % src_path)
8308

    
8309
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8310
    if not export_info.has_section(constants.INISECT_EXP):
8311
      raise errors.ProgrammerError("Corrupted export config",
8312
                                   errors.ECODE_ENVIRON)
8313

    
8314
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8315
    if (int(ei_version) != constants.EXPORT_VERSION):
8316
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8317
                                 (ei_version, constants.EXPORT_VERSION),
8318
                                 errors.ECODE_ENVIRON)
8319
    return export_info
8320

    
8321
  def _ReadExportParams(self, einfo):
8322
    """Use export parameters as defaults.
8323

8324
    In case the opcode doesn't specify (as in override) some instance
8325
    parameters, then try to use them from the export information, if
8326
    that declares them.
8327

8328
    """
8329
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8330

    
8331
    if self.op.disk_template is None:
8332
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8333
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8334
                                          "disk_template")
8335
      else:
8336
        raise errors.OpPrereqError("No disk template specified and the export"
8337
                                   " is missing the disk_template information",
8338
                                   errors.ECODE_INVAL)
8339

    
8340
    if not self.op.disks:
8341
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8342
        disks = []
8343
        # TODO: import the disk iv_name too
8344
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8345
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8346
          disks.append({constants.IDISK_SIZE: disk_sz})
8347
        self.op.disks = disks
8348
      else:
8349
        raise errors.OpPrereqError("No disk info specified and the export"
8350
                                   " is missing the disk information",
8351
                                   errors.ECODE_INVAL)
8352

    
8353
    if (not self.op.nics and
8354
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8355
      nics = []
8356
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8357
        ndict = {}
8358
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8359
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8360
          ndict[name] = v
8361
        nics.append(ndict)
8362
      self.op.nics = nics
8363

    
8364
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8365
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8366

    
8367
    if (self.op.hypervisor is None and
8368
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8369
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8370

    
8371
    if einfo.has_section(constants.INISECT_HYP):
8372
      # use the export parameters but do not override the ones
8373
      # specified by the user
8374
      for name, value in einfo.items(constants.INISECT_HYP):
8375
        if name not in self.op.hvparams:
8376
          self.op.hvparams[name] = value
8377

    
8378
    if einfo.has_section(constants.INISECT_BEP):
8379
      # use the parameters, without overriding
8380
      for name, value in einfo.items(constants.INISECT_BEP):
8381
        if name not in self.op.beparams:
8382
          self.op.beparams[name] = value
8383
    else:
8384
      # try to read the parameters old style, from the main section
8385
      for name in constants.BES_PARAMETERS:
8386
        if (name not in self.op.beparams and
8387
            einfo.has_option(constants.INISECT_INS, name)):
8388
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8389

    
8390
    if einfo.has_section(constants.INISECT_OSP):
8391
      # use the parameters, without overriding
8392
      for name, value in einfo.items(constants.INISECT_OSP):
8393
        if name not in self.op.osparams:
8394
          self.op.osparams[name] = value
8395

    
8396
  def _RevertToDefaults(self, cluster):
8397
    """Revert the instance parameters to the default values.
8398

8399
    """
8400
    # hvparams
8401
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8402
    for name in self.op.hvparams.keys():
8403
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8404
        del self.op.hvparams[name]
8405
    # beparams
8406
    be_defs = cluster.SimpleFillBE({})
8407
    for name in self.op.beparams.keys():
8408
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8409
        del self.op.beparams[name]
8410
    # nic params
8411
    nic_defs = cluster.SimpleFillNIC({})
8412
    for nic in self.op.nics:
8413
      for name in constants.NICS_PARAMETERS:
8414
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8415
          del nic[name]
8416
    # osparams
8417
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8418
    for name in self.op.osparams.keys():
8419
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8420
        del self.op.osparams[name]
8421

    
8422
  def _CalculateFileStorageDir(self):
8423
    """Calculate final instance file storage dir.
8424

8425
    """
8426
    # file storage dir calculation/check
8427
    self.instance_file_storage_dir = None
8428
    if self.op.disk_template in constants.DTS_FILEBASED:
8429
      # build the full file storage dir path
8430
      joinargs = []
8431

    
8432
      if self.op.disk_template == constants.DT_SHARED_FILE:
8433
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8434
      else:
8435
        get_fsd_fn = self.cfg.GetFileStorageDir
8436

    
8437
      cfg_storagedir = get_fsd_fn()
8438
      if not cfg_storagedir:
8439
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8440
      joinargs.append(cfg_storagedir)
8441

    
8442
      if self.op.file_storage_dir is not None:
8443
        joinargs.append(self.op.file_storage_dir)
8444

    
8445
      joinargs.append(self.op.instance_name)
8446

    
8447
      # pylint: disable-msg=W0142
8448
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8449

    
8450
  def CheckPrereq(self):
8451
    """Check prerequisites.
8452

8453
    """
8454
    self._CalculateFileStorageDir()
8455

    
8456
    if self.op.mode == constants.INSTANCE_IMPORT:
8457
      export_info = self._ReadExportInfo()
8458
      self._ReadExportParams(export_info)
8459

    
8460
    if (not self.cfg.GetVGName() and
8461
        self.op.disk_template not in constants.DTS_NOT_LVM):
8462
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8463
                                 " instances", errors.ECODE_STATE)
8464

    
8465
    if self.op.hypervisor is None:
8466
      self.op.hypervisor = self.cfg.GetHypervisorType()
8467

    
8468
    cluster = self.cfg.GetClusterInfo()
8469
    enabled_hvs = cluster.enabled_hypervisors
8470
    if self.op.hypervisor not in enabled_hvs:
8471
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8472
                                 " cluster (%s)" % (self.op.hypervisor,
8473
                                  ",".join(enabled_hvs)),
8474
                                 errors.ECODE_STATE)
8475

    
8476
    # Check tag validity
8477
    for tag in self.op.tags:
8478
      objects.TaggableObject.ValidateTag(tag)
8479

    
8480
    # check hypervisor parameter syntax (locally)
8481
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8482
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8483
                                      self.op.hvparams)
8484
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8485
    hv_type.CheckParameterSyntax(filled_hvp)
8486
    self.hv_full = filled_hvp
8487
    # check that we don't specify global parameters on an instance
8488
    _CheckGlobalHvParams(self.op.hvparams)
8489

    
8490
    # fill and remember the beparams dict
8491
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8492
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8493

    
8494
    # build os parameters
8495
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8496

    
8497
    # now that hvp/bep are in final format, let's reset to defaults,
8498
    # if told to do so
8499
    if self.op.identify_defaults:
8500
      self._RevertToDefaults(cluster)
8501

    
8502
    # NIC buildup
8503
    self.nics = []
8504
    for idx, nic in enumerate(self.op.nics):
8505
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8506
      nic_mode = nic_mode_req
8507
      if nic_mode is None:
8508
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8509

    
8510
      # in routed mode, for the first nic, the default ip is 'auto'
8511
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8512
        default_ip_mode = constants.VALUE_AUTO
8513
      else:
8514
        default_ip_mode = constants.VALUE_NONE
8515

    
8516
      # ip validity checks
8517
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8518
      if ip is None or ip.lower() == constants.VALUE_NONE:
8519
        nic_ip = None
8520
      elif ip.lower() == constants.VALUE_AUTO:
8521
        if not self.op.name_check:
8522
          raise errors.OpPrereqError("IP address set to auto but name checks"
8523
                                     " have been skipped",
8524
                                     errors.ECODE_INVAL)
8525
        nic_ip = self.hostname1.ip
8526
      else:
8527
        if not netutils.IPAddress.IsValid(ip):
8528
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8529
                                     errors.ECODE_INVAL)
8530
        nic_ip = ip
8531

    
8532
      # TODO: check the ip address for uniqueness
8533
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8534
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8535
                                   errors.ECODE_INVAL)
8536

    
8537
      # MAC address verification
8538
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8539
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8540
        mac = utils.NormalizeAndValidateMac(mac)
8541

    
8542
        try:
8543
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8544
        except errors.ReservationError:
8545
          raise errors.OpPrereqError("MAC address %s already in use"
8546
                                     " in cluster" % mac,
8547
                                     errors.ECODE_NOTUNIQUE)
8548

    
8549
      #  Build nic parameters
8550
      link = nic.get(constants.INIC_LINK, None)
8551
      nicparams = {}
8552
      if nic_mode_req:
8553
        nicparams[constants.NIC_MODE] = nic_mode_req
8554
      if link:
8555
        nicparams[constants.NIC_LINK] = link
8556

    
8557
      check_params = cluster.SimpleFillNIC(nicparams)
8558
      objects.NIC.CheckParameterSyntax(check_params)
8559
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8560

    
8561
    # disk checks/pre-build
8562
    default_vg = self.cfg.GetVGName()
8563
    self.disks = []
8564
    for disk in self.op.disks:
8565
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8566
      if mode not in constants.DISK_ACCESS_SET:
8567
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8568
                                   mode, errors.ECODE_INVAL)
8569
      size = disk.get(constants.IDISK_SIZE, None)
8570
      if size is None:
8571
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8572
      try:
8573
        size = int(size)
8574
      except (TypeError, ValueError):
8575
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8576
                                   errors.ECODE_INVAL)
8577

    
8578
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8579
      new_disk = {
8580
        constants.IDISK_SIZE: size,
8581
        constants.IDISK_MODE: mode,
8582
        constants.IDISK_VG: data_vg,
8583
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8584
        }
8585
      if constants.IDISK_ADOPT in disk:
8586
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8587
      self.disks.append(new_disk)
8588

    
8589
    if self.op.mode == constants.INSTANCE_IMPORT:
8590

    
8591
      # Check that the new instance doesn't have less disks than the export
8592
      instance_disks = len(self.disks)
8593
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8594
      if instance_disks < export_disks:
8595
        raise errors.OpPrereqError("Not enough disks to import."
8596
                                   " (instance: %d, export: %d)" %
8597
                                   (instance_disks, export_disks),
8598
                                   errors.ECODE_INVAL)
8599

    
8600
      disk_images = []
8601
      for idx in range(export_disks):
8602
        option = "disk%d_dump" % idx
8603
        if export_info.has_option(constants.INISECT_INS, option):
8604
          # FIXME: are the old os-es, disk sizes, etc. useful?
8605
          export_name = export_info.get(constants.INISECT_INS, option)
8606
          image = utils.PathJoin(self.op.src_path, export_name)
8607
          disk_images.append(image)
8608
        else:
8609
          disk_images.append(False)
8610

    
8611
      self.src_images = disk_images
8612

    
8613
      old_name = export_info.get(constants.INISECT_INS, "name")
8614
      try:
8615
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8616
      except (TypeError, ValueError), err:
8617
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8618
                                   " an integer: %s" % str(err),
8619
                                   errors.ECODE_STATE)
8620
      if self.op.instance_name == old_name:
8621
        for idx, nic in enumerate(self.nics):
8622
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8623
            nic_mac_ini = "nic%d_mac" % idx
8624
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8625

    
8626
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8627

    
8628
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8629
    if self.op.ip_check:
8630
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8631
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8632
                                   (self.check_ip, self.op.instance_name),
8633
                                   errors.ECODE_NOTUNIQUE)
8634

    
8635
    #### mac address generation
8636
    # By generating here the mac address both the allocator and the hooks get
8637
    # the real final mac address rather than the 'auto' or 'generate' value.
8638
    # There is a race condition between the generation and the instance object
8639
    # creation, which means that we know the mac is valid now, but we're not
8640
    # sure it will be when we actually add the instance. If things go bad
8641
    # adding the instance will abort because of a duplicate mac, and the
8642
    # creation job will fail.
8643
    for nic in self.nics:
8644
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8645
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8646

    
8647
    #### allocator run
8648

    
8649
    if self.op.iallocator is not None:
8650
      self._RunAllocator()
8651

    
8652
    #### node related checks
8653

    
8654
    # check primary node
8655
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8656
    assert self.pnode is not None, \
8657
      "Cannot retrieve locked node %s" % self.op.pnode
8658
    if pnode.offline:
8659
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8660
                                 pnode.name, errors.ECODE_STATE)
8661
    if pnode.drained:
8662
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8663
                                 pnode.name, errors.ECODE_STATE)
8664
    if not pnode.vm_capable:
8665
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8666
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8667

    
8668
    self.secondaries = []
8669

    
8670
    # mirror node verification
8671
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8672
      if self.op.snode == pnode.name:
8673
        raise errors.OpPrereqError("The secondary node cannot be the"
8674
                                   " primary node", errors.ECODE_INVAL)
8675
      _CheckNodeOnline(self, self.op.snode)
8676
      _CheckNodeNotDrained(self, self.op.snode)
8677
      _CheckNodeVmCapable(self, self.op.snode)
8678
      self.secondaries.append(self.op.snode)
8679

    
8680
    nodenames = [pnode.name] + self.secondaries
8681

    
8682
    if not self.adopt_disks:
8683
      # Check lv size requirements, if not adopting
8684
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8685
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8686

    
8687
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8688
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8689
                                disk[constants.IDISK_ADOPT])
8690
                     for disk in self.disks])
8691
      if len(all_lvs) != len(self.disks):
8692
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8693
                                   errors.ECODE_INVAL)
8694
      for lv_name in all_lvs:
8695
        try:
8696
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8697
          # to ReserveLV uses the same syntax
8698
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8699
        except errors.ReservationError:
8700
          raise errors.OpPrereqError("LV named %s used by another instance" %
8701
                                     lv_name, errors.ECODE_NOTUNIQUE)
8702

    
8703
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8704
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8705

    
8706
      node_lvs = self.rpc.call_lv_list([pnode.name],
8707
                                       vg_names.payload.keys())[pnode.name]
8708
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8709
      node_lvs = node_lvs.payload
8710

    
8711
      delta = all_lvs.difference(node_lvs.keys())
8712
      if delta:
8713
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8714
                                   utils.CommaJoin(delta),
8715
                                   errors.ECODE_INVAL)
8716
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8717
      if online_lvs:
8718
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8719
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8720
                                   errors.ECODE_STATE)
8721
      # update the size of disk based on what is found
8722
      for dsk in self.disks:
8723
        dsk[constants.IDISK_SIZE] = \
8724
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8725
                                        dsk[constants.IDISK_ADOPT])][0]))
8726

    
8727
    elif self.op.disk_template == constants.DT_BLOCK:
8728
      # Normalize and de-duplicate device paths
8729
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8730
                       for disk in self.disks])
8731
      if len(all_disks) != len(self.disks):
8732
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8733
                                   errors.ECODE_INVAL)
8734
      baddisks = [d for d in all_disks
8735
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8736
      if baddisks:
8737
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8738
                                   " cannot be adopted" %
8739
                                   (", ".join(baddisks),
8740
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8741
                                   errors.ECODE_INVAL)
8742

    
8743
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8744
                                            list(all_disks))[pnode.name]
8745
      node_disks.Raise("Cannot get block device information from node %s" %
8746
                       pnode.name)
8747
      node_disks = node_disks.payload
8748
      delta = all_disks.difference(node_disks.keys())
8749
      if delta:
8750
        raise errors.OpPrereqError("Missing block device(s): %s" %
8751
                                   utils.CommaJoin(delta),
8752
                                   errors.ECODE_INVAL)
8753
      for dsk in self.disks:
8754
        dsk[constants.IDISK_SIZE] = \
8755
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8756

    
8757
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8758

    
8759
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8760
    # check OS parameters (remotely)
8761
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8762

    
8763
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8764

    
8765
    # memory check on primary node
8766
    if self.op.start:
8767
      _CheckNodeFreeMemory(self, self.pnode.name,
8768
                           "creating instance %s" % self.op.instance_name,
8769
                           self.be_full[constants.BE_MEMORY],
8770
                           self.op.hypervisor)
8771

    
8772
    self.dry_run_result = list(nodenames)
8773

    
8774
  def Exec(self, feedback_fn):
8775
    """Create and add the instance to the cluster.
8776

8777
    """
8778
    instance = self.op.instance_name
8779
    pnode_name = self.pnode.name
8780

    
8781
    ht_kind = self.op.hypervisor
8782
    if ht_kind in constants.HTS_REQ_PORT:
8783
      network_port = self.cfg.AllocatePort()
8784
    else:
8785
      network_port = None
8786

    
8787
    disks = _GenerateDiskTemplate(self,
8788
                                  self.op.disk_template,
8789
                                  instance, pnode_name,
8790
                                  self.secondaries,
8791
                                  self.disks,
8792
                                  self.instance_file_storage_dir,
8793
                                  self.op.file_driver,
8794
                                  0,
8795
                                  feedback_fn)
8796

    
8797
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8798
                            primary_node=pnode_name,
8799
                            nics=self.nics, disks=disks,
8800
                            disk_template=self.op.disk_template,
8801
                            admin_up=False,
8802
                            network_port=network_port,
8803
                            beparams=self.op.beparams,
8804
                            hvparams=self.op.hvparams,
8805
                            hypervisor=self.op.hypervisor,
8806
                            osparams=self.op.osparams,
8807
                            )
8808

    
8809
    if self.op.tags:
8810
      for tag in self.op.tags:
8811
        iobj.AddTag(tag)
8812

    
8813
    if self.adopt_disks:
8814
      if self.op.disk_template == constants.DT_PLAIN:
8815
        # rename LVs to the newly-generated names; we need to construct
8816
        # 'fake' LV disks with the old data, plus the new unique_id
8817
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8818
        rename_to = []
8819
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8820
          rename_to.append(t_dsk.logical_id)
8821
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8822
          self.cfg.SetDiskID(t_dsk, pnode_name)
8823
        result = self.rpc.call_blockdev_rename(pnode_name,
8824
                                               zip(tmp_disks, rename_to))
8825
        result.Raise("Failed to rename adoped LVs")
8826
    else:
8827
      feedback_fn("* creating instance disks...")
8828
      try:
8829
        _CreateDisks(self, iobj)
8830
      except errors.OpExecError:
8831
        self.LogWarning("Device creation failed, reverting...")
8832
        try:
8833
          _RemoveDisks(self, iobj)
8834
        finally:
8835
          self.cfg.ReleaseDRBDMinors(instance)
8836
          raise
8837

    
8838
    feedback_fn("adding instance %s to cluster config" % instance)
8839

    
8840
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8841

    
8842
    # Declare that we don't want to remove the instance lock anymore, as we've
8843
    # added the instance to the config
8844
    del self.remove_locks[locking.LEVEL_INSTANCE]
8845

    
8846
    if self.op.mode == constants.INSTANCE_IMPORT:
8847
      # Release unused nodes
8848
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8849
    else:
8850
      # Release all nodes
8851
      _ReleaseLocks(self, locking.LEVEL_NODE)
8852

    
8853
    disk_abort = False
8854
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8855
      feedback_fn("* wiping instance disks...")
8856
      try:
8857
        _WipeDisks(self, iobj)
8858
      except errors.OpExecError, err:
8859
        logging.exception("Wiping disks failed")
8860
        self.LogWarning("Wiping instance disks failed (%s)", err)
8861
        disk_abort = True
8862

    
8863
    if disk_abort:
8864
      # Something is already wrong with the disks, don't do anything else
8865
      pass
8866
    elif self.op.wait_for_sync:
8867
      disk_abort = not _WaitForSync(self, iobj)
8868
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8869
      # make sure the disks are not degraded (still sync-ing is ok)
8870
      time.sleep(15)
8871
      feedback_fn("* checking mirrors status")
8872
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8873
    else:
8874
      disk_abort = False
8875

    
8876
    if disk_abort:
8877
      _RemoveDisks(self, iobj)
8878
      self.cfg.RemoveInstance(iobj.name)
8879
      # Make sure the instance lock gets removed
8880
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8881
      raise errors.OpExecError("There are some degraded disks for"
8882
                               " this instance")
8883

    
8884
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8885
      if self.op.mode == constants.INSTANCE_CREATE:
8886
        if not self.op.no_install:
8887
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8888
                        not self.op.wait_for_sync)
8889
          if pause_sync:
8890
            feedback_fn("* pausing disk sync to install instance OS")
8891
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8892
                                                              iobj.disks, True)
8893
            for idx, success in enumerate(result.payload):
8894
              if not success:
8895
                logging.warn("pause-sync of instance %s for disk %d failed",
8896
                             instance, idx)
8897

    
8898
          feedback_fn("* running the instance OS create scripts...")
8899
          # FIXME: pass debug option from opcode to backend
8900
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8901
                                                 self.op.debug_level)
8902
          if pause_sync:
8903
            feedback_fn("* resuming disk sync")
8904
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8905
                                                              iobj.disks, False)
8906
            for idx, success in enumerate(result.payload):
8907
              if not success:
8908
                logging.warn("resume-sync of instance %s for disk %d failed",
8909
                             instance, idx)
8910

    
8911
          result.Raise("Could not add os for instance %s"
8912
                       " on node %s" % (instance, pnode_name))
8913

    
8914
      elif self.op.mode == constants.INSTANCE_IMPORT:
8915
        feedback_fn("* running the instance OS import scripts...")
8916

    
8917
        transfers = []
8918

    
8919
        for idx, image in enumerate(self.src_images):
8920
          if not image:
8921
            continue
8922

    
8923
          # FIXME: pass debug option from opcode to backend
8924
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8925
                                             constants.IEIO_FILE, (image, ),
8926
                                             constants.IEIO_SCRIPT,
8927
                                             (iobj.disks[idx], idx),
8928
                                             None)
8929
          transfers.append(dt)
8930

    
8931
        import_result = \
8932
          masterd.instance.TransferInstanceData(self, feedback_fn,
8933
                                                self.op.src_node, pnode_name,
8934
                                                self.pnode.secondary_ip,
8935
                                                iobj, transfers)
8936
        if not compat.all(import_result):
8937
          self.LogWarning("Some disks for instance %s on node %s were not"
8938
                          " imported successfully" % (instance, pnode_name))
8939

    
8940
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8941
        feedback_fn("* preparing remote import...")
8942
        # The source cluster will stop the instance before attempting to make a
8943
        # connection. In some cases stopping an instance can take a long time,
8944
        # hence the shutdown timeout is added to the connection timeout.
8945
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8946
                           self.op.source_shutdown_timeout)
8947
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8948

    
8949
        assert iobj.primary_node == self.pnode.name
8950
        disk_results = \
8951
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8952
                                        self.source_x509_ca,
8953
                                        self._cds, timeouts)
8954
        if not compat.all(disk_results):
8955
          # TODO: Should the instance still be started, even if some disks
8956
          # failed to import (valid for local imports, too)?
8957
          self.LogWarning("Some disks for instance %s on node %s were not"
8958
                          " imported successfully" % (instance, pnode_name))
8959

    
8960
        # Run rename script on newly imported instance
8961
        assert iobj.name == instance
8962
        feedback_fn("Running rename script for %s" % instance)
8963
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8964
                                                   self.source_instance_name,
8965
                                                   self.op.debug_level)
8966
        if result.fail_msg:
8967
          self.LogWarning("Failed to run rename script for %s on node"
8968
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8969

    
8970
      else:
8971
        # also checked in the prereq part
8972
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8973
                                     % self.op.mode)
8974

    
8975
    if self.op.start:
8976
      iobj.admin_up = True
8977
      self.cfg.Update(iobj, feedback_fn)
8978
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8979
      feedback_fn("* starting instance...")
8980
      result = self.rpc.call_instance_start(pnode_name, iobj,
8981
                                            None, None, False)
8982
      result.Raise("Could not start instance")
8983

    
8984
    return list(iobj.all_nodes)
8985

    
8986

    
8987
class LUInstanceConsole(NoHooksLU):
8988
  """Connect to an instance's console.
8989

8990
  This is somewhat special in that it returns the command line that
8991
  you need to run on the master node in order to connect to the
8992
  console.
8993

8994
  """
8995
  REQ_BGL = False
8996

    
8997
  def ExpandNames(self):
8998
    self._ExpandAndLockInstance()
8999

    
9000
  def CheckPrereq(self):
9001
    """Check prerequisites.
9002

9003
    This checks that the instance is in the cluster.
9004

9005
    """
9006
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9007
    assert self.instance is not None, \
9008
      "Cannot retrieve locked instance %s" % self.op.instance_name
9009
    _CheckNodeOnline(self, self.instance.primary_node)
9010

    
9011
  def Exec(self, feedback_fn):
9012
    """Connect to the console of an instance
9013

9014
    """
9015
    instance = self.instance
9016
    node = instance.primary_node
9017

    
9018
    node_insts = self.rpc.call_instance_list([node],
9019
                                             [instance.hypervisor])[node]
9020
    node_insts.Raise("Can't get node information from %s" % node)
9021

    
9022
    if instance.name not in node_insts.payload:
9023
      if instance.admin_up:
9024
        state = constants.INSTST_ERRORDOWN
9025
      else:
9026
        state = constants.INSTST_ADMINDOWN
9027
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9028
                               (instance.name, state))
9029

    
9030
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9031

    
9032
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9033

    
9034

    
9035
def _GetInstanceConsole(cluster, instance):
9036
  """Returns console information for an instance.
9037

9038
  @type cluster: L{objects.Cluster}
9039
  @type instance: L{objects.Instance}
9040
  @rtype: dict
9041

9042
  """
9043
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9044
  # beparams and hvparams are passed separately, to avoid editing the
9045
  # instance and then saving the defaults in the instance itself.
9046
  hvparams = cluster.FillHV(instance)
9047
  beparams = cluster.FillBE(instance)
9048
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9049

    
9050
  assert console.instance == instance.name
9051
  assert console.Validate()
9052

    
9053
  return console.ToDict()
9054

    
9055

    
9056
class LUInstanceReplaceDisks(LogicalUnit):
9057
  """Replace the disks of an instance.
9058

9059
  """
9060
  HPATH = "mirrors-replace"
9061
  HTYPE = constants.HTYPE_INSTANCE
9062
  REQ_BGL = False
9063

    
9064
  def CheckArguments(self):
9065
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9066
                                  self.op.iallocator)
9067

    
9068
  def ExpandNames(self):
9069
    self._ExpandAndLockInstance()
9070

    
9071
    assert locking.LEVEL_NODE not in self.needed_locks
9072
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9073

    
9074
    assert self.op.iallocator is None or self.op.remote_node is None, \
9075
      "Conflicting options"
9076

    
9077
    if self.op.remote_node is not None:
9078
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9079

    
9080
      # Warning: do not remove the locking of the new secondary here
9081
      # unless DRBD8.AddChildren is changed to work in parallel;
9082
      # currently it doesn't since parallel invocations of
9083
      # FindUnusedMinor will conflict
9084
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9085
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9086
    else:
9087
      self.needed_locks[locking.LEVEL_NODE] = []
9088
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9089

    
9090
      if self.op.iallocator is not None:
9091
        # iallocator will select a new node in the same group
9092
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9093

    
9094
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9095
                                   self.op.iallocator, self.op.remote_node,
9096
                                   self.op.disks, False, self.op.early_release)
9097

    
9098
    self.tasklets = [self.replacer]
9099

    
9100
  def DeclareLocks(self, level):
9101
    if level == locking.LEVEL_NODEGROUP:
9102
      assert self.op.remote_node is None
9103
      assert self.op.iallocator is not None
9104
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9105

    
9106
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9107
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9108
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9109

    
9110
    elif level == locking.LEVEL_NODE:
9111
      if self.op.iallocator is not None:
9112
        assert self.op.remote_node is None
9113
        assert not self.needed_locks[locking.LEVEL_NODE]
9114

    
9115
        # Lock member nodes of all locked groups
9116
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9117
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9118
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9119
      else:
9120
        self._LockInstancesNodes()
9121

    
9122
  def BuildHooksEnv(self):
9123
    """Build hooks env.
9124

9125
    This runs on the master, the primary and all the secondaries.
9126

9127
    """
9128
    instance = self.replacer.instance
9129
    env = {
9130
      "MODE": self.op.mode,
9131
      "NEW_SECONDARY": self.op.remote_node,
9132
      "OLD_SECONDARY": instance.secondary_nodes[0],
9133
      }
9134
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9135
    return env
9136

    
9137
  def BuildHooksNodes(self):
9138
    """Build hooks nodes.
9139

9140
    """
9141
    instance = self.replacer.instance
9142
    nl = [
9143
      self.cfg.GetMasterNode(),
9144
      instance.primary_node,
9145
      ]
9146
    if self.op.remote_node is not None:
9147
      nl.append(self.op.remote_node)
9148
    return nl, nl
9149

    
9150
  def CheckPrereq(self):
9151
    """Check prerequisites.
9152

9153
    """
9154
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9155
            self.op.iallocator is None)
9156

    
9157
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9158
    if owned_groups:
9159
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9160
      if owned_groups != groups:
9161
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9162
                                 " since lock was acquired, current list is %r,"
9163
                                 " used to be '%s'" %
9164
                                 (self.op.instance_name,
9165
                                  utils.CommaJoin(groups),
9166
                                  utils.CommaJoin(owned_groups)))
9167

    
9168
    return LogicalUnit.CheckPrereq(self)
9169

    
9170

    
9171
class TLReplaceDisks(Tasklet):
9172
  """Replaces disks for an instance.
9173

9174
  Note: Locking is not within the scope of this class.
9175

9176
  """
9177
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9178
               disks, delay_iallocator, early_release):
9179
    """Initializes this class.
9180

9181
    """
9182
    Tasklet.__init__(self, lu)
9183

    
9184
    # Parameters
9185
    self.instance_name = instance_name
9186
    self.mode = mode
9187
    self.iallocator_name = iallocator_name
9188
    self.remote_node = remote_node
9189
    self.disks = disks
9190
    self.delay_iallocator = delay_iallocator
9191
    self.early_release = early_release
9192

    
9193
    # Runtime data
9194
    self.instance = None
9195
    self.new_node = None
9196
    self.target_node = None
9197
    self.other_node = None
9198
    self.remote_node_info = None
9199
    self.node_secondary_ip = None
9200

    
9201
  @staticmethod
9202
  def CheckArguments(mode, remote_node, iallocator):
9203
    """Helper function for users of this class.
9204

9205
    """
9206
    # check for valid parameter combination
9207
    if mode == constants.REPLACE_DISK_CHG:
9208
      if remote_node is None and iallocator is None:
9209
        raise errors.OpPrereqError("When changing the secondary either an"
9210
                                   " iallocator script must be used or the"
9211
                                   " new node given", errors.ECODE_INVAL)
9212

    
9213
      if remote_node is not None and iallocator is not None:
9214
        raise errors.OpPrereqError("Give either the iallocator or the new"
9215
                                   " secondary, not both", errors.ECODE_INVAL)
9216

    
9217
    elif remote_node is not None or iallocator is not None:
9218
      # Not replacing the secondary
9219
      raise errors.OpPrereqError("The iallocator and new node options can"
9220
                                 " only be used when changing the"
9221
                                 " secondary node", errors.ECODE_INVAL)
9222

    
9223
  @staticmethod
9224
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9225
    """Compute a new secondary node using an IAllocator.
9226

9227
    """
9228
    ial = IAllocator(lu.cfg, lu.rpc,
9229
                     mode=constants.IALLOCATOR_MODE_RELOC,
9230
                     name=instance_name,
9231
                     relocate_from=list(relocate_from))
9232

    
9233
    ial.Run(iallocator_name)
9234

    
9235
    if not ial.success:
9236
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9237
                                 " %s" % (iallocator_name, ial.info),
9238
                                 errors.ECODE_NORES)
9239

    
9240
    if len(ial.result) != ial.required_nodes:
9241
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9242
                                 " of nodes (%s), required %s" %
9243
                                 (iallocator_name,
9244
                                  len(ial.result), ial.required_nodes),
9245
                                 errors.ECODE_FAULT)
9246

    
9247
    remote_node_name = ial.result[0]
9248

    
9249
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9250
               instance_name, remote_node_name)
9251

    
9252
    return remote_node_name
9253

    
9254
  def _FindFaultyDisks(self, node_name):
9255
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9256
                                    node_name, True)
9257

    
9258
  def _CheckDisksActivated(self, instance):
9259
    """Checks if the instance disks are activated.
9260

9261
    @param instance: The instance to check disks
9262
    @return: True if they are activated, False otherwise
9263

9264
    """
9265
    nodes = instance.all_nodes
9266

    
9267
    for idx, dev in enumerate(instance.disks):
9268
      for node in nodes:
9269
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9270
        self.cfg.SetDiskID(dev, node)
9271

    
9272
        result = self.rpc.call_blockdev_find(node, dev)
9273

    
9274
        if result.offline:
9275
          continue
9276
        elif result.fail_msg or not result.payload:
9277
          return False
9278

    
9279
    return True
9280

    
9281
  def CheckPrereq(self):
9282
    """Check prerequisites.
9283

9284
    This checks that the instance is in the cluster.
9285

9286
    """
9287
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9288
    assert instance is not None, \
9289
      "Cannot retrieve locked instance %s" % self.instance_name
9290

    
9291
    if instance.disk_template != constants.DT_DRBD8:
9292
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9293
                                 " instances", errors.ECODE_INVAL)
9294

    
9295
    if len(instance.secondary_nodes) != 1:
9296
      raise errors.OpPrereqError("The instance has a strange layout,"
9297
                                 " expected one secondary but found %d" %
9298
                                 len(instance.secondary_nodes),
9299
                                 errors.ECODE_FAULT)
9300

    
9301
    if not self.delay_iallocator:
9302
      self._CheckPrereq2()
9303

    
9304
  def _CheckPrereq2(self):
9305
    """Check prerequisites, second part.
9306

9307
    This function should always be part of CheckPrereq. It was separated and is
9308
    now called from Exec because during node evacuation iallocator was only
9309
    called with an unmodified cluster model, not taking planned changes into
9310
    account.
9311

9312
    """
9313
    instance = self.instance
9314
    secondary_node = instance.secondary_nodes[0]
9315

    
9316
    if self.iallocator_name is None:
9317
      remote_node = self.remote_node
9318
    else:
9319
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9320
                                       instance.name, instance.secondary_nodes)
9321

    
9322
    if remote_node is None:
9323
      self.remote_node_info = None
9324
    else:
9325
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9326
             "Remote node '%s' is not locked" % remote_node
9327

    
9328
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9329
      assert self.remote_node_info is not None, \
9330
        "Cannot retrieve locked node %s" % remote_node
9331

    
9332
    if remote_node == self.instance.primary_node:
9333
      raise errors.OpPrereqError("The specified node is the primary node of"
9334
                                 " the instance", errors.ECODE_INVAL)
9335

    
9336
    if remote_node == secondary_node:
9337
      raise errors.OpPrereqError("The specified node is already the"
9338
                                 " secondary node of the instance",
9339
                                 errors.ECODE_INVAL)
9340

    
9341
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9342
                                    constants.REPLACE_DISK_CHG):
9343
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9344
                                 errors.ECODE_INVAL)
9345

    
9346
    if self.mode == constants.REPLACE_DISK_AUTO:
9347
      if not self._CheckDisksActivated(instance):
9348
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9349
                                   " first" % self.instance_name,
9350
                                   errors.ECODE_STATE)
9351
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9352
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9353

    
9354
      if faulty_primary and faulty_secondary:
9355
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9356
                                   " one node and can not be repaired"
9357
                                   " automatically" % self.instance_name,
9358
                                   errors.ECODE_STATE)
9359

    
9360
      if faulty_primary:
9361
        self.disks = faulty_primary
9362
        self.target_node = instance.primary_node
9363
        self.other_node = secondary_node
9364
        check_nodes = [self.target_node, self.other_node]
9365
      elif faulty_secondary:
9366
        self.disks = faulty_secondary
9367
        self.target_node = secondary_node
9368
        self.other_node = instance.primary_node
9369
        check_nodes = [self.target_node, self.other_node]
9370
      else:
9371
        self.disks = []
9372
        check_nodes = []
9373

    
9374
    else:
9375
      # Non-automatic modes
9376
      if self.mode == constants.REPLACE_DISK_PRI:
9377
        self.target_node = instance.primary_node
9378
        self.other_node = secondary_node
9379
        check_nodes = [self.target_node, self.other_node]
9380

    
9381
      elif self.mode == constants.REPLACE_DISK_SEC:
9382
        self.target_node = secondary_node
9383
        self.other_node = instance.primary_node
9384
        check_nodes = [self.target_node, self.other_node]
9385

    
9386
      elif self.mode == constants.REPLACE_DISK_CHG:
9387
        self.new_node = remote_node
9388
        self.other_node = instance.primary_node
9389
        self.target_node = secondary_node
9390
        check_nodes = [self.new_node, self.other_node]
9391

    
9392
        _CheckNodeNotDrained(self.lu, remote_node)
9393
        _CheckNodeVmCapable(self.lu, remote_node)
9394

    
9395
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9396
        assert old_node_info is not None
9397
        if old_node_info.offline and not self.early_release:
9398
          # doesn't make sense to delay the release
9399
          self.early_release = True
9400
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9401
                          " early-release mode", secondary_node)
9402

    
9403
      else:
9404
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9405
                                     self.mode)
9406

    
9407
      # If not specified all disks should be replaced
9408
      if not self.disks:
9409
        self.disks = range(len(self.instance.disks))
9410

    
9411
    for node in check_nodes:
9412
      _CheckNodeOnline(self.lu, node)
9413

    
9414
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9415
                                                          self.other_node,
9416
                                                          self.target_node]
9417
                              if node_name is not None)
9418

    
9419
    # Release unneeded node locks
9420
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9421

    
9422
    # Release any owned node group
9423
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9424
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9425

    
9426
    # Check whether disks are valid
9427
    for disk_idx in self.disks:
9428
      instance.FindDisk(disk_idx)
9429

    
9430
    # Get secondary node IP addresses
9431
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9432
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9433

    
9434
  def Exec(self, feedback_fn):
9435
    """Execute disk replacement.
9436

9437
    This dispatches the disk replacement to the appropriate handler.
9438

9439
    """
9440
    if self.delay_iallocator:
9441
      self._CheckPrereq2()
9442

    
9443
    if __debug__:
9444
      # Verify owned locks before starting operation
9445
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9446
      assert set(owned_locks) == set(self.node_secondary_ip), \
9447
          ("Incorrect node locks, owning %s, expected %s" %
9448
           (owned_locks, self.node_secondary_ip.keys()))
9449

    
9450
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9451
      assert list(owned_locks) == [self.instance_name], \
9452
          "Instance '%s' not locked" % self.instance_name
9453

    
9454
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9455
          "Should not own any node group lock at this point"
9456

    
9457
    if not self.disks:
9458
      feedback_fn("No disks need replacement")
9459
      return
9460

    
9461
    feedback_fn("Replacing disk(s) %s for %s" %
9462
                (utils.CommaJoin(self.disks), self.instance.name))
9463

    
9464
    activate_disks = (not self.instance.admin_up)
9465

    
9466
    # Activate the instance disks if we're replacing them on a down instance
9467
    if activate_disks:
9468
      _StartInstanceDisks(self.lu, self.instance, True)
9469

    
9470
    try:
9471
      # Should we replace the secondary node?
9472
      if self.new_node is not None:
9473
        fn = self._ExecDrbd8Secondary
9474
      else:
9475
        fn = self._ExecDrbd8DiskOnly
9476

    
9477
      result = fn(feedback_fn)
9478
    finally:
9479
      # Deactivate the instance disks if we're replacing them on a
9480
      # down instance
9481
      if activate_disks:
9482
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9483

    
9484
    if __debug__:
9485
      # Verify owned locks
9486
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9487
      nodes = frozenset(self.node_secondary_ip)
9488
      assert ((self.early_release and not owned_locks) or
9489
              (not self.early_release and not (set(owned_locks) - nodes))), \
9490
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9491
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9492

    
9493
    return result
9494

    
9495
  def _CheckVolumeGroup(self, nodes):
9496
    self.lu.LogInfo("Checking volume groups")
9497

    
9498
    vgname = self.cfg.GetVGName()
9499

    
9500
    # Make sure volume group exists on all involved nodes
9501
    results = self.rpc.call_vg_list(nodes)
9502
    if not results:
9503
      raise errors.OpExecError("Can't list volume groups on the nodes")
9504

    
9505
    for node in nodes:
9506
      res = results[node]
9507
      res.Raise("Error checking node %s" % node)
9508
      if vgname not in res.payload:
9509
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9510
                                 (vgname, node))
9511

    
9512
  def _CheckDisksExistence(self, nodes):
9513
    # Check disk existence
9514
    for idx, dev in enumerate(self.instance.disks):
9515
      if idx not in self.disks:
9516
        continue
9517

    
9518
      for node in nodes:
9519
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9520
        self.cfg.SetDiskID(dev, node)
9521

    
9522
        result = self.rpc.call_blockdev_find(node, dev)
9523

    
9524
        msg = result.fail_msg
9525
        if msg or not result.payload:
9526
          if not msg:
9527
            msg = "disk not found"
9528
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9529
                                   (idx, node, msg))
9530

    
9531
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9532
    for idx, dev in enumerate(self.instance.disks):
9533
      if idx not in self.disks:
9534
        continue
9535

    
9536
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9537
                      (idx, node_name))
9538

    
9539
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9540
                                   ldisk=ldisk):
9541
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9542
                                 " replace disks for instance %s" %
9543
                                 (node_name, self.instance.name))
9544

    
9545
  def _CreateNewStorage(self, node_name):
9546
    """Create new storage on the primary or secondary node.
9547

9548
    This is only used for same-node replaces, not for changing the
9549
    secondary node, hence we don't want to modify the existing disk.
9550

9551
    """
9552
    iv_names = {}
9553

    
9554
    for idx, dev in enumerate(self.instance.disks):
9555
      if idx not in self.disks:
9556
        continue
9557

    
9558
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9559

    
9560
      self.cfg.SetDiskID(dev, node_name)
9561

    
9562
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9563
      names = _GenerateUniqueNames(self.lu, lv_names)
9564

    
9565
      vg_data = dev.children[0].logical_id[0]
9566
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9567
                             logical_id=(vg_data, names[0]))
9568
      vg_meta = dev.children[1].logical_id[0]
9569
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9570
                             logical_id=(vg_meta, names[1]))
9571

    
9572
      new_lvs = [lv_data, lv_meta]
9573
      old_lvs = [child.Copy() for child in dev.children]
9574
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9575

    
9576
      # we pass force_create=True to force the LVM creation
9577
      for new_lv in new_lvs:
9578
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9579
                        _GetInstanceInfoText(self.instance), False)
9580

    
9581
    return iv_names
9582

    
9583
  def _CheckDevices(self, node_name, iv_names):
9584
    for name, (dev, _, _) in iv_names.iteritems():
9585
      self.cfg.SetDiskID(dev, node_name)
9586

    
9587
      result = self.rpc.call_blockdev_find(node_name, dev)
9588

    
9589
      msg = result.fail_msg
9590
      if msg or not result.payload:
9591
        if not msg:
9592
          msg = "disk not found"
9593
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9594
                                 (name, msg))
9595

    
9596
      if result.payload.is_degraded:
9597
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9598

    
9599
  def _RemoveOldStorage(self, node_name, iv_names):
9600
    for name, (_, old_lvs, _) in iv_names.iteritems():
9601
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9602

    
9603
      for lv in old_lvs:
9604
        self.cfg.SetDiskID(lv, node_name)
9605

    
9606
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9607
        if msg:
9608
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9609
                             hint="remove unused LVs manually")
9610

    
9611
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9612
    """Replace a disk on the primary or secondary for DRBD 8.
9613

9614
    The algorithm for replace is quite complicated:
9615

9616
      1. for each disk to be replaced:
9617

9618
        1. create new LVs on the target node with unique names
9619
        1. detach old LVs from the drbd device
9620
        1. rename old LVs to name_replaced.<time_t>
9621
        1. rename new LVs to old LVs
9622
        1. attach the new LVs (with the old names now) to the drbd device
9623

9624
      1. wait for sync across all devices
9625

9626
      1. for each modified disk:
9627

9628
        1. remove old LVs (which have the name name_replaces.<time_t>)
9629

9630
    Failures are not very well handled.
9631

9632
    """
9633
    steps_total = 6
9634

    
9635
    # Step: check device activation
9636
    self.lu.LogStep(1, steps_total, "Check device existence")
9637
    self._CheckDisksExistence([self.other_node, self.target_node])
9638
    self._CheckVolumeGroup([self.target_node, self.other_node])
9639

    
9640
    # Step: check other node consistency
9641
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9642
    self._CheckDisksConsistency(self.other_node,
9643
                                self.other_node == self.instance.primary_node,
9644
                                False)
9645

    
9646
    # Step: create new storage
9647
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9648
    iv_names = self._CreateNewStorage(self.target_node)
9649

    
9650
    # Step: for each lv, detach+rename*2+attach
9651
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9652
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9653
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9654

    
9655
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9656
                                                     old_lvs)
9657
      result.Raise("Can't detach drbd from local storage on node"
9658
                   " %s for device %s" % (self.target_node, dev.iv_name))
9659
      #dev.children = []
9660
      #cfg.Update(instance)
9661

    
9662
      # ok, we created the new LVs, so now we know we have the needed
9663
      # storage; as such, we proceed on the target node to rename
9664
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9665
      # using the assumption that logical_id == physical_id (which in
9666
      # turn is the unique_id on that node)
9667

    
9668
      # FIXME(iustin): use a better name for the replaced LVs
9669
      temp_suffix = int(time.time())
9670
      ren_fn = lambda d, suff: (d.physical_id[0],
9671
                                d.physical_id[1] + "_replaced-%s" % suff)
9672

    
9673
      # Build the rename list based on what LVs exist on the node
9674
      rename_old_to_new = []
9675
      for to_ren in old_lvs:
9676
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9677
        if not result.fail_msg and result.payload:
9678
          # device exists
9679
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9680

    
9681
      self.lu.LogInfo("Renaming the old LVs on the target node")
9682
      result = self.rpc.call_blockdev_rename(self.target_node,
9683
                                             rename_old_to_new)
9684
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9685

    
9686
      # Now we rename the new LVs to the old LVs
9687
      self.lu.LogInfo("Renaming the new LVs on the target node")
9688
      rename_new_to_old = [(new, old.physical_id)
9689
                           for old, new in zip(old_lvs, new_lvs)]
9690
      result = self.rpc.call_blockdev_rename(self.target_node,
9691
                                             rename_new_to_old)
9692
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9693

    
9694
      # Intermediate steps of in memory modifications
9695
      for old, new in zip(old_lvs, new_lvs):
9696
        new.logical_id = old.logical_id
9697
        self.cfg.SetDiskID(new, self.target_node)
9698

    
9699
      # We need to modify old_lvs so that removal later removes the
9700
      # right LVs, not the newly added ones; note that old_lvs is a
9701
      # copy here
9702
      for disk in old_lvs:
9703
        disk.logical_id = ren_fn(disk, temp_suffix)
9704
        self.cfg.SetDiskID(disk, self.target_node)
9705

    
9706
      # Now that the new lvs have the old name, we can add them to the device
9707
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9708
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9709
                                                  new_lvs)
9710
      msg = result.fail_msg
9711
      if msg:
9712
        for new_lv in new_lvs:
9713
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9714
                                               new_lv).fail_msg
9715
          if msg2:
9716
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9717
                               hint=("cleanup manually the unused logical"
9718
                                     "volumes"))
9719
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9720

    
9721
    cstep = 5
9722
    if self.early_release:
9723
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9724
      cstep += 1
9725
      self._RemoveOldStorage(self.target_node, iv_names)
9726
      # WARNING: we release both node locks here, do not do other RPCs
9727
      # than WaitForSync to the primary node
9728
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9729
                    names=[self.target_node, self.other_node])
9730

    
9731
    # Wait for sync
9732
    # This can fail as the old devices are degraded and _WaitForSync
9733
    # does a combined result over all disks, so we don't check its return value
9734
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9735
    cstep += 1
9736
    _WaitForSync(self.lu, self.instance)
9737

    
9738
    # Check all devices manually
9739
    self._CheckDevices(self.instance.primary_node, iv_names)
9740

    
9741
    # Step: remove old storage
9742
    if not self.early_release:
9743
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9744
      cstep += 1
9745
      self._RemoveOldStorage(self.target_node, iv_names)
9746

    
9747
  def _ExecDrbd8Secondary(self, feedback_fn):
9748
    """Replace the secondary node for DRBD 8.
9749

9750
    The algorithm for replace is quite complicated:
9751
      - for all disks of the instance:
9752
        - create new LVs on the new node with same names
9753
        - shutdown the drbd device on the old secondary
9754
        - disconnect the drbd network on the primary
9755
        - create the drbd device on the new secondary
9756
        - network attach the drbd on the primary, using an artifice:
9757
          the drbd code for Attach() will connect to the network if it
9758
          finds a device which is connected to the good local disks but
9759
          not network enabled
9760
      - wait for sync across all devices
9761
      - remove all disks from the old secondary
9762

9763
    Failures are not very well handled.
9764

9765
    """
9766
    steps_total = 6
9767

    
9768
    # Step: check device activation
9769
    self.lu.LogStep(1, steps_total, "Check device existence")
9770
    self._CheckDisksExistence([self.instance.primary_node])
9771
    self._CheckVolumeGroup([self.instance.primary_node])
9772

    
9773
    # Step: check other node consistency
9774
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9775
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9776

    
9777
    # Step: create new storage
9778
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9779
    for idx, dev in enumerate(self.instance.disks):
9780
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9781
                      (self.new_node, idx))
9782
      # we pass force_create=True to force LVM creation
9783
      for new_lv in dev.children:
9784
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9785
                        _GetInstanceInfoText(self.instance), False)
9786

    
9787
    # Step 4: dbrd minors and drbd setups changes
9788
    # after this, we must manually remove the drbd minors on both the
9789
    # error and the success paths
9790
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9791
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9792
                                         for dev in self.instance.disks],
9793
                                        self.instance.name)
9794
    logging.debug("Allocated minors %r", minors)
9795

    
9796
    iv_names = {}
9797
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9798
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9799
                      (self.new_node, idx))
9800
      # create new devices on new_node; note that we create two IDs:
9801
      # one without port, so the drbd will be activated without
9802
      # networking information on the new node at this stage, and one
9803
      # with network, for the latter activation in step 4
9804
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9805
      if self.instance.primary_node == o_node1:
9806
        p_minor = o_minor1
9807
      else:
9808
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9809
        p_minor = o_minor2
9810

    
9811
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9812
                      p_minor, new_minor, o_secret)
9813
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9814
                    p_minor, new_minor, o_secret)
9815

    
9816
      iv_names[idx] = (dev, dev.children, new_net_id)
9817
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9818
                    new_net_id)
9819
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9820
                              logical_id=new_alone_id,
9821
                              children=dev.children,
9822
                              size=dev.size)
9823
      try:
9824
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9825
                              _GetInstanceInfoText(self.instance), False)
9826
      except errors.GenericError:
9827
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9828
        raise
9829

    
9830
    # We have new devices, shutdown the drbd on the old secondary
9831
    for idx, dev in enumerate(self.instance.disks):
9832
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9833
      self.cfg.SetDiskID(dev, self.target_node)
9834
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9835
      if msg:
9836
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9837
                           "node: %s" % (idx, msg),
9838
                           hint=("Please cleanup this device manually as"
9839
                                 " soon as possible"))
9840

    
9841
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9842
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9843
                                               self.node_secondary_ip,
9844
                                               self.instance.disks)\
9845
                                              [self.instance.primary_node]
9846

    
9847
    msg = result.fail_msg
9848
    if msg:
9849
      # detaches didn't succeed (unlikely)
9850
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9851
      raise errors.OpExecError("Can't detach the disks from the network on"
9852
                               " old node: %s" % (msg,))
9853

    
9854
    # if we managed to detach at least one, we update all the disks of
9855
    # the instance to point to the new secondary
9856
    self.lu.LogInfo("Updating instance configuration")
9857
    for dev, _, new_logical_id in iv_names.itervalues():
9858
      dev.logical_id = new_logical_id
9859
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9860

    
9861
    self.cfg.Update(self.instance, feedback_fn)
9862

    
9863
    # and now perform the drbd attach
9864
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9865
                    " (standalone => connected)")
9866
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9867
                                            self.new_node],
9868
                                           self.node_secondary_ip,
9869
                                           self.instance.disks,
9870
                                           self.instance.name,
9871
                                           False)
9872
    for to_node, to_result in result.items():
9873
      msg = to_result.fail_msg
9874
      if msg:
9875
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9876
                           to_node, msg,
9877
                           hint=("please do a gnt-instance info to see the"
9878
                                 " status of disks"))
9879
    cstep = 5
9880
    if self.early_release:
9881
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9882
      cstep += 1
9883
      self._RemoveOldStorage(self.target_node, iv_names)
9884
      # WARNING: we release all node locks here, do not do other RPCs
9885
      # than WaitForSync to the primary node
9886
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9887
                    names=[self.instance.primary_node,
9888
                           self.target_node,
9889
                           self.new_node])
9890

    
9891
    # Wait for sync
9892
    # This can fail as the old devices are degraded and _WaitForSync
9893
    # does a combined result over all disks, so we don't check its return value
9894
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9895
    cstep += 1
9896
    _WaitForSync(self.lu, self.instance)
9897

    
9898
    # Check all devices manually
9899
    self._CheckDevices(self.instance.primary_node, iv_names)
9900

    
9901
    # Step: remove old storage
9902
    if not self.early_release:
9903
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9904
      self._RemoveOldStorage(self.target_node, iv_names)
9905

    
9906

    
9907
class LURepairNodeStorage(NoHooksLU):
9908
  """Repairs the volume group on a node.
9909

9910
  """
9911
  REQ_BGL = False
9912

    
9913
  def CheckArguments(self):
9914
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9915

    
9916
    storage_type = self.op.storage_type
9917

    
9918
    if (constants.SO_FIX_CONSISTENCY not in
9919
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9920
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9921
                                 " repaired" % storage_type,
9922
                                 errors.ECODE_INVAL)
9923

    
9924
  def ExpandNames(self):
9925
    self.needed_locks = {
9926
      locking.LEVEL_NODE: [self.op.node_name],
9927
      }
9928

    
9929
  def _CheckFaultyDisks(self, instance, node_name):
9930
    """Ensure faulty disks abort the opcode or at least warn."""
9931
    try:
9932
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9933
                                  node_name, True):
9934
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9935
                                   " node '%s'" % (instance.name, node_name),
9936
                                   errors.ECODE_STATE)
9937
    except errors.OpPrereqError, err:
9938
      if self.op.ignore_consistency:
9939
        self.proc.LogWarning(str(err.args[0]))
9940
      else:
9941
        raise
9942

    
9943
  def CheckPrereq(self):
9944
    """Check prerequisites.
9945

9946
    """
9947
    # Check whether any instance on this node has faulty disks
9948
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9949
      if not inst.admin_up:
9950
        continue
9951
      check_nodes = set(inst.all_nodes)
9952
      check_nodes.discard(self.op.node_name)
9953
      for inst_node_name in check_nodes:
9954
        self._CheckFaultyDisks(inst, inst_node_name)
9955

    
9956
  def Exec(self, feedback_fn):
9957
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9958
                (self.op.name, self.op.node_name))
9959

    
9960
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9961
    result = self.rpc.call_storage_execute(self.op.node_name,
9962
                                           self.op.storage_type, st_args,
9963
                                           self.op.name,
9964
                                           constants.SO_FIX_CONSISTENCY)
9965
    result.Raise("Failed to repair storage unit '%s' on %s" %
9966
                 (self.op.name, self.op.node_name))
9967

    
9968

    
9969
class LUNodeEvacuate(NoHooksLU):
9970
  """Evacuates instances off a list of nodes.
9971

9972
  """
9973
  REQ_BGL = False
9974

    
9975
  def CheckArguments(self):
9976
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9977

    
9978
  def ExpandNames(self):
9979
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9980

    
9981
    if self.op.remote_node is not None:
9982
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9983
      assert self.op.remote_node
9984

    
9985
      if self.op.remote_node == self.op.node_name:
9986
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9987
                                   " secondary node", errors.ECODE_INVAL)
9988

    
9989
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9990
        raise errors.OpPrereqError("Without the use of an iallocator only"
9991
                                   " secondary instances can be evacuated",
9992
                                   errors.ECODE_INVAL)
9993

    
9994
    # Declare locks
9995
    self.share_locks = _ShareAll()
9996
    self.needed_locks = {
9997
      locking.LEVEL_INSTANCE: [],
9998
      locking.LEVEL_NODEGROUP: [],
9999
      locking.LEVEL_NODE: [],
10000
      }
10001

    
10002
    if self.op.remote_node is None:
10003
      # Iallocator will choose any node(s) in the same group
10004
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10005
    else:
10006
      group_nodes = frozenset([self.op.remote_node])
10007

    
10008
    # Determine nodes to be locked
10009
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10010

    
10011
  def _DetermineInstances(self):
10012
    """Builds list of instances to operate on.
10013

10014
    """
10015
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10016

    
10017
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10018
      # Primary instances only
10019
      inst_fn = _GetNodePrimaryInstances
10020
      assert self.op.remote_node is None, \
10021
        "Evacuating primary instances requires iallocator"
10022
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10023
      # Secondary instances only
10024
      inst_fn = _GetNodeSecondaryInstances
10025
    else:
10026
      # All instances
10027
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10028
      inst_fn = _GetNodeInstances
10029

    
10030
    return inst_fn(self.cfg, self.op.node_name)
10031

    
10032
  def DeclareLocks(self, level):
10033
    if level == locking.LEVEL_INSTANCE:
10034
      # Lock instances optimistically, needs verification once node and group
10035
      # locks have been acquired
10036
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10037
        set(i.name for i in self._DetermineInstances())
10038

    
10039
    elif level == locking.LEVEL_NODEGROUP:
10040
      # Lock node groups optimistically, needs verification once nodes have
10041
      # been acquired
10042
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10043
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10044

    
10045
    elif level == locking.LEVEL_NODE:
10046
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10047

    
10048
  def CheckPrereq(self):
10049
    # Verify locks
10050
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
10051
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10052
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
10053

    
10054
    assert owned_nodes == self.lock_nodes
10055

    
10056
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10057
    if owned_groups != wanted_groups:
10058
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10059
                               " current groups are '%s', used to be '%s'" %
10060
                               (utils.CommaJoin(wanted_groups),
10061
                                utils.CommaJoin(owned_groups)))
10062

    
10063
    # Determine affected instances
10064
    self.instances = self._DetermineInstances()
10065
    self.instance_names = [i.name for i in self.instances]
10066

    
10067
    if set(self.instance_names) != owned_instances:
10068
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10069
                               " were acquired, current instances are '%s',"
10070
                               " used to be '%s'" %
10071
                               (self.op.node_name,
10072
                                utils.CommaJoin(self.instance_names),
10073
                                utils.CommaJoin(owned_instances)))
10074

    
10075
    if self.instance_names:
10076
      self.LogInfo("Evacuating instances from node '%s': %s",
10077
                   self.op.node_name,
10078
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10079
    else:
10080
      self.LogInfo("No instances to evacuate from node '%s'",
10081
                   self.op.node_name)
10082

    
10083
    if self.op.remote_node is not None:
10084
      for i in self.instances:
10085
        if i.primary_node == self.op.remote_node:
10086
          raise errors.OpPrereqError("Node %s is the primary node of"
10087
                                     " instance %s, cannot use it as"
10088
                                     " secondary" %
10089
                                     (self.op.remote_node, i.name),
10090
                                     errors.ECODE_INVAL)
10091

    
10092
  def Exec(self, feedback_fn):
10093
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10094

    
10095
    if not self.instance_names:
10096
      # No instances to evacuate
10097
      jobs = []
10098

    
10099
    elif self.op.iallocator is not None:
10100
      # TODO: Implement relocation to other group
10101
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10102
                       evac_mode=self.op.mode,
10103
                       instances=list(self.instance_names))
10104

    
10105
      ial.Run(self.op.iallocator)
10106

    
10107
      if not ial.success:
10108
        raise errors.OpPrereqError("Can't compute node evacuation using"
10109
                                   " iallocator '%s': %s" %
10110
                                   (self.op.iallocator, ial.info),
10111
                                   errors.ECODE_NORES)
10112

    
10113
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10114

    
10115
    elif self.op.remote_node is not None:
10116
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10117
      jobs = [
10118
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10119
                                        remote_node=self.op.remote_node,
10120
                                        disks=[],
10121
                                        mode=constants.REPLACE_DISK_CHG,
10122
                                        early_release=self.op.early_release)]
10123
        for instance_name in self.instance_names
10124
        ]
10125

    
10126
    else:
10127
      raise errors.ProgrammerError("No iallocator or remote node")
10128

    
10129
    return ResultWithJobs(jobs)
10130

    
10131

    
10132
def _SetOpEarlyRelease(early_release, op):
10133
  """Sets C{early_release} flag on opcodes if available.
10134

10135
  """
10136
  try:
10137
    op.early_release = early_release
10138
  except AttributeError:
10139
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10140

    
10141
  return op
10142

    
10143

    
10144
def _NodeEvacDest(use_nodes, group, nodes):
10145
  """Returns group or nodes depending on caller's choice.
10146

10147
  """
10148
  if use_nodes:
10149
    return utils.CommaJoin(nodes)
10150
  else:
10151
    return group
10152

    
10153

    
10154
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10155
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10156

10157
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10158
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10159

10160
  @type lu: L{LogicalUnit}
10161
  @param lu: Logical unit instance
10162
  @type alloc_result: tuple/list
10163
  @param alloc_result: Result from iallocator
10164
  @type early_release: bool
10165
  @param early_release: Whether to release locks early if possible
10166
  @type use_nodes: bool
10167
  @param use_nodes: Whether to display node names instead of groups
10168

10169
  """
10170
  (moved, failed, jobs) = alloc_result
10171

    
10172
  if failed:
10173
    lu.LogWarning("Unable to evacuate instances %s",
10174
                  utils.CommaJoin("%s (%s)" % (name, reason)
10175
                                  for (name, reason) in failed))
10176

    
10177
  if moved:
10178
    lu.LogInfo("Instances to be moved: %s",
10179
               utils.CommaJoin("%s (to %s)" %
10180
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10181
                               for (name, group, nodes) in moved))
10182

    
10183
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10184
              map(opcodes.OpCode.LoadOpCode, ops))
10185
          for ops in jobs]
10186

    
10187

    
10188
class LUInstanceGrowDisk(LogicalUnit):
10189
  """Grow a disk of an instance.
10190

10191
  """
10192
  HPATH = "disk-grow"
10193
  HTYPE = constants.HTYPE_INSTANCE
10194
  REQ_BGL = False
10195

    
10196
  def ExpandNames(self):
10197
    self._ExpandAndLockInstance()
10198
    self.needed_locks[locking.LEVEL_NODE] = []
10199
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10200

    
10201
  def DeclareLocks(self, level):
10202
    if level == locking.LEVEL_NODE:
10203
      self._LockInstancesNodes()
10204

    
10205
  def BuildHooksEnv(self):
10206
    """Build hooks env.
10207

10208
    This runs on the master, the primary and all the secondaries.
10209

10210
    """
10211
    env = {
10212
      "DISK": self.op.disk,
10213
      "AMOUNT": self.op.amount,
10214
      }
10215
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10216
    return env
10217

    
10218
  def BuildHooksNodes(self):
10219
    """Build hooks nodes.
10220

10221
    """
10222
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10223
    return (nl, nl)
10224

    
10225
  def CheckPrereq(self):
10226
    """Check prerequisites.
10227

10228
    This checks that the instance is in the cluster.
10229

10230
    """
10231
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10232
    assert instance is not None, \
10233
      "Cannot retrieve locked instance %s" % self.op.instance_name
10234
    nodenames = list(instance.all_nodes)
10235
    for node in nodenames:
10236
      _CheckNodeOnline(self, node)
10237

    
10238
    self.instance = instance
10239

    
10240
    if instance.disk_template not in constants.DTS_GROWABLE:
10241
      raise errors.OpPrereqError("Instance's disk layout does not support"
10242
                                 " growing", errors.ECODE_INVAL)
10243

    
10244
    self.disk = instance.FindDisk(self.op.disk)
10245

    
10246
    if instance.disk_template not in (constants.DT_FILE,
10247
                                      constants.DT_SHARED_FILE):
10248
      # TODO: check the free disk space for file, when that feature will be
10249
      # supported
10250
      _CheckNodesFreeDiskPerVG(self, nodenames,
10251
                               self.disk.ComputeGrowth(self.op.amount))
10252

    
10253
  def Exec(self, feedback_fn):
10254
    """Execute disk grow.
10255

10256
    """
10257
    instance = self.instance
10258
    disk = self.disk
10259

    
10260
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10261
    if not disks_ok:
10262
      raise errors.OpExecError("Cannot activate block device to grow")
10263

    
10264
    # First run all grow ops in dry-run mode
10265
    for node in instance.all_nodes:
10266
      self.cfg.SetDiskID(disk, node)
10267
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10268
      result.Raise("Grow request failed to node %s" % node)
10269

    
10270
    # We know that (as far as we can test) operations across different
10271
    # nodes will succeed, time to run it for real
10272
    for node in instance.all_nodes:
10273
      self.cfg.SetDiskID(disk, node)
10274
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10275
      result.Raise("Grow request failed to node %s" % node)
10276

    
10277
      # TODO: Rewrite code to work properly
10278
      # DRBD goes into sync mode for a short amount of time after executing the
10279
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10280
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10281
      # time is a work-around.
10282
      time.sleep(5)
10283

    
10284
    disk.RecordGrow(self.op.amount)
10285
    self.cfg.Update(instance, feedback_fn)
10286
    if self.op.wait_for_sync:
10287
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10288
      if disk_abort:
10289
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10290
                             " status; please check the instance")
10291
      if not instance.admin_up:
10292
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10293
    elif not instance.admin_up:
10294
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10295
                           " not supposed to be running because no wait for"
10296
                           " sync mode was requested")
10297

    
10298

    
10299
class LUInstanceQueryData(NoHooksLU):
10300
  """Query runtime instance data.
10301

10302
  """
10303
  REQ_BGL = False
10304

    
10305
  def ExpandNames(self):
10306
    self.needed_locks = {}
10307

    
10308
    # Use locking if requested or when non-static information is wanted
10309
    if not (self.op.static or self.op.use_locking):
10310
      self.LogWarning("Non-static data requested, locks need to be acquired")
10311
      self.op.use_locking = True
10312

    
10313
    if self.op.instances or not self.op.use_locking:
10314
      # Expand instance names right here
10315
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10316
    else:
10317
      # Will use acquired locks
10318
      self.wanted_names = None
10319

    
10320
    if self.op.use_locking:
10321
      self.share_locks = _ShareAll()
10322

    
10323
      if self.wanted_names is None:
10324
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10325
      else:
10326
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10327

    
10328
      self.needed_locks[locking.LEVEL_NODE] = []
10329
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10330

    
10331
  def DeclareLocks(self, level):
10332
    if self.op.use_locking and level == locking.LEVEL_NODE:
10333
      self._LockInstancesNodes()
10334

    
10335
  def CheckPrereq(self):
10336
    """Check prerequisites.
10337

10338
    This only checks the optional instance list against the existing names.
10339

10340
    """
10341
    if self.wanted_names is None:
10342
      assert self.op.use_locking, "Locking was not used"
10343
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10344

    
10345
    self.wanted_instances = \
10346
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10347

    
10348
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10349
    """Returns the status of a block device
10350

10351
    """
10352
    if self.op.static or not node:
10353
      return None
10354

    
10355
    self.cfg.SetDiskID(dev, node)
10356

    
10357
    result = self.rpc.call_blockdev_find(node, dev)
10358
    if result.offline:
10359
      return None
10360

    
10361
    result.Raise("Can't compute disk status for %s" % instance_name)
10362

    
10363
    status = result.payload
10364
    if status is None:
10365
      return None
10366

    
10367
    return (status.dev_path, status.major, status.minor,
10368
            status.sync_percent, status.estimated_time,
10369
            status.is_degraded, status.ldisk_status)
10370

    
10371
  def _ComputeDiskStatus(self, instance, snode, dev):
10372
    """Compute block device status.
10373

10374
    """
10375
    if dev.dev_type in constants.LDS_DRBD:
10376
      # we change the snode then (otherwise we use the one passed in)
10377
      if dev.logical_id[0] == instance.primary_node:
10378
        snode = dev.logical_id[1]
10379
      else:
10380
        snode = dev.logical_id[0]
10381

    
10382
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10383
                                              instance.name, dev)
10384
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10385

    
10386
    if dev.children:
10387
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10388
                                        instance, snode),
10389
                         dev.children)
10390
    else:
10391
      dev_children = []
10392

    
10393
    return {
10394
      "iv_name": dev.iv_name,
10395
      "dev_type": dev.dev_type,
10396
      "logical_id": dev.logical_id,
10397
      "physical_id": dev.physical_id,
10398
      "pstatus": dev_pstatus,
10399
      "sstatus": dev_sstatus,
10400
      "children": dev_children,
10401
      "mode": dev.mode,
10402
      "size": dev.size,
10403
      }
10404

    
10405
  def Exec(self, feedback_fn):
10406
    """Gather and return data"""
10407
    result = {}
10408

    
10409
    cluster = self.cfg.GetClusterInfo()
10410

    
10411
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10412
                                          for i in self.wanted_instances)
10413
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10414
      if self.op.static or pnode.offline:
10415
        remote_state = None
10416
        if pnode.offline:
10417
          self.LogWarning("Primary node %s is marked offline, returning static"
10418
                          " information only for instance %s" %
10419
                          (pnode.name, instance.name))
10420
      else:
10421
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10422
                                                  instance.name,
10423
                                                  instance.hypervisor)
10424
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10425
        remote_info = remote_info.payload
10426
        if remote_info and "state" in remote_info:
10427
          remote_state = "up"
10428
        else:
10429
          remote_state = "down"
10430

    
10431
      if instance.admin_up:
10432
        config_state = "up"
10433
      else:
10434
        config_state = "down"
10435

    
10436
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10437
                  instance.disks)
10438

    
10439
      result[instance.name] = {
10440
        "name": instance.name,
10441
        "config_state": config_state,
10442
        "run_state": remote_state,
10443
        "pnode": instance.primary_node,
10444
        "snodes": instance.secondary_nodes,
10445
        "os": instance.os,
10446
        # this happens to be the same format used for hooks
10447
        "nics": _NICListToTuple(self, instance.nics),
10448
        "disk_template": instance.disk_template,
10449
        "disks": disks,
10450
        "hypervisor": instance.hypervisor,
10451
        "network_port": instance.network_port,
10452
        "hv_instance": instance.hvparams,
10453
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10454
        "be_instance": instance.beparams,
10455
        "be_actual": cluster.FillBE(instance),
10456
        "os_instance": instance.osparams,
10457
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10458
        "serial_no": instance.serial_no,
10459
        "mtime": instance.mtime,
10460
        "ctime": instance.ctime,
10461
        "uuid": instance.uuid,
10462
        }
10463

    
10464
    return result
10465

    
10466

    
10467
class LUInstanceSetParams(LogicalUnit):
10468
  """Modifies an instances's parameters.
10469

10470
  """
10471
  HPATH = "instance-modify"
10472
  HTYPE = constants.HTYPE_INSTANCE
10473
  REQ_BGL = False
10474

    
10475
  def CheckArguments(self):
10476
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10477
            self.op.hvparams or self.op.beparams or self.op.os_name):
10478
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10479

    
10480
    if self.op.hvparams:
10481
      _CheckGlobalHvParams(self.op.hvparams)
10482

    
10483
    # Disk validation
10484
    disk_addremove = 0
10485
    for disk_op, disk_dict in self.op.disks:
10486
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10487
      if disk_op == constants.DDM_REMOVE:
10488
        disk_addremove += 1
10489
        continue
10490
      elif disk_op == constants.DDM_ADD:
10491
        disk_addremove += 1
10492
      else:
10493
        if not isinstance(disk_op, int):
10494
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10495
        if not isinstance(disk_dict, dict):
10496
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10497
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10498

    
10499
      if disk_op == constants.DDM_ADD:
10500
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10501
        if mode not in constants.DISK_ACCESS_SET:
10502
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10503
                                     errors.ECODE_INVAL)
10504
        size = disk_dict.get(constants.IDISK_SIZE, None)
10505
        if size is None:
10506
          raise errors.OpPrereqError("Required disk parameter size missing",
10507
                                     errors.ECODE_INVAL)
10508
        try:
10509
          size = int(size)
10510
        except (TypeError, ValueError), err:
10511
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10512
                                     str(err), errors.ECODE_INVAL)
10513
        disk_dict[constants.IDISK_SIZE] = size
10514
      else:
10515
        # modification of disk
10516
        if constants.IDISK_SIZE in disk_dict:
10517
          raise errors.OpPrereqError("Disk size change not possible, use"
10518
                                     " grow-disk", errors.ECODE_INVAL)
10519

    
10520
    if disk_addremove > 1:
10521
      raise errors.OpPrereqError("Only one disk add or remove operation"
10522
                                 " supported at a time", errors.ECODE_INVAL)
10523

    
10524
    if self.op.disks and self.op.disk_template is not None:
10525
      raise errors.OpPrereqError("Disk template conversion and other disk"
10526
                                 " changes not supported at the same time",
10527
                                 errors.ECODE_INVAL)
10528

    
10529
    if (self.op.disk_template and
10530
        self.op.disk_template in constants.DTS_INT_MIRROR and
10531
        self.op.remote_node is None):
10532
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10533
                                 " one requires specifying a secondary node",
10534
                                 errors.ECODE_INVAL)
10535

    
10536
    # NIC validation
10537
    nic_addremove = 0
10538
    for nic_op, nic_dict in self.op.nics:
10539
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10540
      if nic_op == constants.DDM_REMOVE:
10541
        nic_addremove += 1
10542
        continue
10543
      elif nic_op == constants.DDM_ADD:
10544
        nic_addremove += 1
10545
      else:
10546
        if not isinstance(nic_op, int):
10547
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10548
        if not isinstance(nic_dict, dict):
10549
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10550
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10551

    
10552
      # nic_dict should be a dict
10553
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10554
      if nic_ip is not None:
10555
        if nic_ip.lower() == constants.VALUE_NONE:
10556
          nic_dict[constants.INIC_IP] = None
10557
        else:
10558
          if not netutils.IPAddress.IsValid(nic_ip):
10559
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10560
                                       errors.ECODE_INVAL)
10561

    
10562
      nic_bridge = nic_dict.get("bridge", None)
10563
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10564
      if nic_bridge and nic_link:
10565
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10566
                                   " at the same time", errors.ECODE_INVAL)
10567
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10568
        nic_dict["bridge"] = None
10569
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10570
        nic_dict[constants.INIC_LINK] = None
10571

    
10572
      if nic_op == constants.DDM_ADD:
10573
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10574
        if nic_mac is None:
10575
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10576

    
10577
      if constants.INIC_MAC in nic_dict:
10578
        nic_mac = nic_dict[constants.INIC_MAC]
10579
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10580
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10581

    
10582
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10583
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10584
                                     " modifying an existing nic",
10585
                                     errors.ECODE_INVAL)
10586

    
10587
    if nic_addremove > 1:
10588
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10589
                                 " supported at a time", errors.ECODE_INVAL)
10590

    
10591
  def ExpandNames(self):
10592
    self._ExpandAndLockInstance()
10593
    self.needed_locks[locking.LEVEL_NODE] = []
10594
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10595

    
10596
  def DeclareLocks(self, level):
10597
    if level == locking.LEVEL_NODE:
10598
      self._LockInstancesNodes()
10599
      if self.op.disk_template and self.op.remote_node:
10600
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10601
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10602

    
10603
  def BuildHooksEnv(self):
10604
    """Build hooks env.
10605

10606
    This runs on the master, primary and secondaries.
10607

10608
    """
10609
    args = dict()
10610
    if constants.BE_MEMORY in self.be_new:
10611
      args["memory"] = self.be_new[constants.BE_MEMORY]
10612
    if constants.BE_VCPUS in self.be_new:
10613
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10614
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10615
    # information at all.
10616
    if self.op.nics:
10617
      args["nics"] = []
10618
      nic_override = dict(self.op.nics)
10619
      for idx, nic in enumerate(self.instance.nics):
10620
        if idx in nic_override:
10621
          this_nic_override = nic_override[idx]
10622
        else:
10623
          this_nic_override = {}
10624
        if constants.INIC_IP in this_nic_override:
10625
          ip = this_nic_override[constants.INIC_IP]
10626
        else:
10627
          ip = nic.ip
10628
        if constants.INIC_MAC in this_nic_override:
10629
          mac = this_nic_override[constants.INIC_MAC]
10630
        else:
10631
          mac = nic.mac
10632
        if idx in self.nic_pnew:
10633
          nicparams = self.nic_pnew[idx]
10634
        else:
10635
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10636
        mode = nicparams[constants.NIC_MODE]
10637
        link = nicparams[constants.NIC_LINK]
10638
        args["nics"].append((ip, mac, mode, link))
10639
      if constants.DDM_ADD in nic_override:
10640
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10641
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10642
        nicparams = self.nic_pnew[constants.DDM_ADD]
10643
        mode = nicparams[constants.NIC_MODE]
10644
        link = nicparams[constants.NIC_LINK]
10645
        args["nics"].append((ip, mac, mode, link))
10646
      elif constants.DDM_REMOVE in nic_override:
10647
        del args["nics"][-1]
10648

    
10649
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10650
    if self.op.disk_template:
10651
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10652

    
10653
    return env
10654

    
10655
  def BuildHooksNodes(self):
10656
    """Build hooks nodes.
10657

10658
    """
10659
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10660
    return (nl, nl)
10661

    
10662
  def CheckPrereq(self):
10663
    """Check prerequisites.
10664

10665
    This only checks the instance list against the existing names.
10666

10667
    """
10668
    # checking the new params on the primary/secondary nodes
10669

    
10670
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10671
    cluster = self.cluster = self.cfg.GetClusterInfo()
10672
    assert self.instance is not None, \
10673
      "Cannot retrieve locked instance %s" % self.op.instance_name
10674
    pnode = instance.primary_node
10675
    nodelist = list(instance.all_nodes)
10676

    
10677
    # OS change
10678
    if self.op.os_name and not self.op.force:
10679
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10680
                      self.op.force_variant)
10681
      instance_os = self.op.os_name
10682
    else:
10683
      instance_os = instance.os
10684

    
10685
    if self.op.disk_template:
10686
      if instance.disk_template == self.op.disk_template:
10687
        raise errors.OpPrereqError("Instance already has disk template %s" %
10688
                                   instance.disk_template, errors.ECODE_INVAL)
10689

    
10690
      if (instance.disk_template,
10691
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10692
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10693
                                   " %s to %s" % (instance.disk_template,
10694
                                                  self.op.disk_template),
10695
                                   errors.ECODE_INVAL)
10696
      _CheckInstanceDown(self, instance, "cannot change disk template")
10697
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10698
        if self.op.remote_node == pnode:
10699
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10700
                                     " as the primary node of the instance" %
10701
                                     self.op.remote_node, errors.ECODE_STATE)
10702
        _CheckNodeOnline(self, self.op.remote_node)
10703
        _CheckNodeNotDrained(self, self.op.remote_node)
10704
        # FIXME: here we assume that the old instance type is DT_PLAIN
10705
        assert instance.disk_template == constants.DT_PLAIN
10706
        disks = [{constants.IDISK_SIZE: d.size,
10707
                  constants.IDISK_VG: d.logical_id[0]}
10708
                 for d in instance.disks]
10709
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10710
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10711

    
10712
    # hvparams processing
10713
    if self.op.hvparams:
10714
      hv_type = instance.hypervisor
10715
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10716
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10717
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10718

    
10719
      # local check
10720
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10721
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10722
      self.hv_new = hv_new # the new actual values
10723
      self.hv_inst = i_hvdict # the new dict (without defaults)
10724
    else:
10725
      self.hv_new = self.hv_inst = {}
10726

    
10727
    # beparams processing
10728
    if self.op.beparams:
10729
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10730
                                   use_none=True)
10731
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10732
      be_new = cluster.SimpleFillBE(i_bedict)
10733
      self.be_new = be_new # the new actual values
10734
      self.be_inst = i_bedict # the new dict (without defaults)
10735
    else:
10736
      self.be_new = self.be_inst = {}
10737
    be_old = cluster.FillBE(instance)
10738

    
10739
    # osparams processing
10740
    if self.op.osparams:
10741
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10742
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10743
      self.os_inst = i_osdict # the new dict (without defaults)
10744
    else:
10745
      self.os_inst = {}
10746

    
10747
    self.warn = []
10748

    
10749
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10750
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10751
      mem_check_list = [pnode]
10752
      if be_new[constants.BE_AUTO_BALANCE]:
10753
        # either we changed auto_balance to yes or it was from before
10754
        mem_check_list.extend(instance.secondary_nodes)
10755
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10756
                                                  instance.hypervisor)
10757
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10758
                                         instance.hypervisor)
10759
      pninfo = nodeinfo[pnode]
10760
      msg = pninfo.fail_msg
10761
      if msg:
10762
        # Assume the primary node is unreachable and go ahead
10763
        self.warn.append("Can't get info from primary node %s: %s" %
10764
                         (pnode,  msg))
10765
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10766
        self.warn.append("Node data from primary node %s doesn't contain"
10767
                         " free memory information" % pnode)
10768
      elif instance_info.fail_msg:
10769
        self.warn.append("Can't get instance runtime information: %s" %
10770
                        instance_info.fail_msg)
10771
      else:
10772
        if instance_info.payload:
10773
          current_mem = int(instance_info.payload["memory"])
10774
        else:
10775
          # Assume instance not running
10776
          # (there is a slight race condition here, but it's not very probable,
10777
          # and we have no other way to check)
10778
          current_mem = 0
10779
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10780
                    pninfo.payload["memory_free"])
10781
        if miss_mem > 0:
10782
          raise errors.OpPrereqError("This change will prevent the instance"
10783
                                     " from starting, due to %d MB of memory"
10784
                                     " missing on its primary node" % miss_mem,
10785
                                     errors.ECODE_NORES)
10786

    
10787
      if be_new[constants.BE_AUTO_BALANCE]:
10788
        for node, nres in nodeinfo.items():
10789
          if node not in instance.secondary_nodes:
10790
            continue
10791
          nres.Raise("Can't get info from secondary node %s" % node,
10792
                     prereq=True, ecode=errors.ECODE_STATE)
10793
          if not isinstance(nres.payload.get("memory_free", None), int):
10794
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10795
                                       " memory information" % node,
10796
                                       errors.ECODE_STATE)
10797
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10798
            raise errors.OpPrereqError("This change will prevent the instance"
10799
                                       " from failover to its secondary node"
10800
                                       " %s, due to not enough memory" % node,
10801
                                       errors.ECODE_STATE)
10802

    
10803
    # NIC processing
10804
    self.nic_pnew = {}
10805
    self.nic_pinst = {}
10806
    for nic_op, nic_dict in self.op.nics:
10807
      if nic_op == constants.DDM_REMOVE:
10808
        if not instance.nics:
10809
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10810
                                     errors.ECODE_INVAL)
10811
        continue
10812
      if nic_op != constants.DDM_ADD:
10813
        # an existing nic
10814
        if not instance.nics:
10815
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10816
                                     " no NICs" % nic_op,
10817
                                     errors.ECODE_INVAL)
10818
        if nic_op < 0 or nic_op >= len(instance.nics):
10819
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10820
                                     " are 0 to %d" %
10821
                                     (nic_op, len(instance.nics) - 1),
10822
                                     errors.ECODE_INVAL)
10823
        old_nic_params = instance.nics[nic_op].nicparams
10824
        old_nic_ip = instance.nics[nic_op].ip
10825
      else:
10826
        old_nic_params = {}
10827
        old_nic_ip = None
10828

    
10829
      update_params_dict = dict([(key, nic_dict[key])
10830
                                 for key in constants.NICS_PARAMETERS
10831
                                 if key in nic_dict])
10832

    
10833
      if "bridge" in nic_dict:
10834
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10835

    
10836
      new_nic_params = _GetUpdatedParams(old_nic_params,
10837
                                         update_params_dict)
10838
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10839
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10840
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10841
      self.nic_pinst[nic_op] = new_nic_params
10842
      self.nic_pnew[nic_op] = new_filled_nic_params
10843
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10844

    
10845
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10846
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10847
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10848
        if msg:
10849
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10850
          if self.op.force:
10851
            self.warn.append(msg)
10852
          else:
10853
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10854
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10855
        if constants.INIC_IP in nic_dict:
10856
          nic_ip = nic_dict[constants.INIC_IP]
10857
        else:
10858
          nic_ip = old_nic_ip
10859
        if nic_ip is None:
10860
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10861
                                     " on a routed nic", errors.ECODE_INVAL)
10862
      if constants.INIC_MAC in nic_dict:
10863
        nic_mac = nic_dict[constants.INIC_MAC]
10864
        if nic_mac is None:
10865
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10866
                                     errors.ECODE_INVAL)
10867
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10868
          # otherwise generate the mac
10869
          nic_dict[constants.INIC_MAC] = \
10870
            self.cfg.GenerateMAC(self.proc.GetECId())
10871
        else:
10872
          # or validate/reserve the current one
10873
          try:
10874
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10875
          except errors.ReservationError:
10876
            raise errors.OpPrereqError("MAC address %s already in use"
10877
                                       " in cluster" % nic_mac,
10878
                                       errors.ECODE_NOTUNIQUE)
10879

    
10880
    # DISK processing
10881
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10882
      raise errors.OpPrereqError("Disk operations not supported for"
10883
                                 " diskless instances",
10884
                                 errors.ECODE_INVAL)
10885
    for disk_op, _ in self.op.disks:
10886
      if disk_op == constants.DDM_REMOVE:
10887
        if len(instance.disks) == 1:
10888
          raise errors.OpPrereqError("Cannot remove the last disk of"
10889
                                     " an instance", errors.ECODE_INVAL)
10890
        _CheckInstanceDown(self, instance, "cannot remove disks")
10891

    
10892
      if (disk_op == constants.DDM_ADD and
10893
          len(instance.disks) >= constants.MAX_DISKS):
10894
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10895
                                   " add more" % constants.MAX_DISKS,
10896
                                   errors.ECODE_STATE)
10897
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10898
        # an existing disk
10899
        if disk_op < 0 or disk_op >= len(instance.disks):
10900
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10901
                                     " are 0 to %d" %
10902
                                     (disk_op, len(instance.disks)),
10903
                                     errors.ECODE_INVAL)
10904

    
10905
    return
10906

    
10907
  def _ConvertPlainToDrbd(self, feedback_fn):
10908
    """Converts an instance from plain to drbd.
10909

10910
    """
10911
    feedback_fn("Converting template to drbd")
10912
    instance = self.instance
10913
    pnode = instance.primary_node
10914
    snode = self.op.remote_node
10915

    
10916
    # create a fake disk info for _GenerateDiskTemplate
10917
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10918
                  constants.IDISK_VG: d.logical_id[0]}
10919
                 for d in instance.disks]
10920
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10921
                                      instance.name, pnode, [snode],
10922
                                      disk_info, None, None, 0, feedback_fn)
10923
    info = _GetInstanceInfoText(instance)
10924
    feedback_fn("Creating aditional volumes...")
10925
    # first, create the missing data and meta devices
10926
    for disk in new_disks:
10927
      # unfortunately this is... not too nice
10928
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10929
                            info, True)
10930
      for child in disk.children:
10931
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10932
    # at this stage, all new LVs have been created, we can rename the
10933
    # old ones
10934
    feedback_fn("Renaming original volumes...")
10935
    rename_list = [(o, n.children[0].logical_id)
10936
                   for (o, n) in zip(instance.disks, new_disks)]
10937
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10938
    result.Raise("Failed to rename original LVs")
10939

    
10940
    feedback_fn("Initializing DRBD devices...")
10941
    # all child devices are in place, we can now create the DRBD devices
10942
    for disk in new_disks:
10943
      for node in [pnode, snode]:
10944
        f_create = node == pnode
10945
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10946

    
10947
    # at this point, the instance has been modified
10948
    instance.disk_template = constants.DT_DRBD8
10949
    instance.disks = new_disks
10950
    self.cfg.Update(instance, feedback_fn)
10951

    
10952
    # disks are created, waiting for sync
10953
    disk_abort = not _WaitForSync(self, instance,
10954
                                  oneshot=not self.op.wait_for_sync)
10955
    if disk_abort:
10956
      raise errors.OpExecError("There are some degraded disks for"
10957
                               " this instance, please cleanup manually")
10958

    
10959
  def _ConvertDrbdToPlain(self, feedback_fn):
10960
    """Converts an instance from drbd to plain.
10961

10962
    """
10963
    instance = self.instance
10964
    assert len(instance.secondary_nodes) == 1
10965
    pnode = instance.primary_node
10966
    snode = instance.secondary_nodes[0]
10967
    feedback_fn("Converting template to plain")
10968

    
10969
    old_disks = instance.disks
10970
    new_disks = [d.children[0] for d in old_disks]
10971

    
10972
    # copy over size and mode
10973
    for parent, child in zip(old_disks, new_disks):
10974
      child.size = parent.size
10975
      child.mode = parent.mode
10976

    
10977
    # update instance structure
10978
    instance.disks = new_disks
10979
    instance.disk_template = constants.DT_PLAIN
10980
    self.cfg.Update(instance, feedback_fn)
10981

    
10982
    feedback_fn("Removing volumes on the secondary node...")
10983
    for disk in old_disks:
10984
      self.cfg.SetDiskID(disk, snode)
10985
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10986
      if msg:
10987
        self.LogWarning("Could not remove block device %s on node %s,"
10988
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10989

    
10990
    feedback_fn("Removing unneeded volumes on the primary node...")
10991
    for idx, disk in enumerate(old_disks):
10992
      meta = disk.children[1]
10993
      self.cfg.SetDiskID(meta, pnode)
10994
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10995
      if msg:
10996
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10997
                        " continuing anyway: %s", idx, pnode, msg)
10998

    
10999
  def Exec(self, feedback_fn):
11000
    """Modifies an instance.
11001

11002
    All parameters take effect only at the next restart of the instance.
11003

11004
    """
11005
    # Process here the warnings from CheckPrereq, as we don't have a
11006
    # feedback_fn there.
11007
    for warn in self.warn:
11008
      feedback_fn("WARNING: %s" % warn)
11009

    
11010
    result = []
11011
    instance = self.instance
11012
    # disk changes
11013
    for disk_op, disk_dict in self.op.disks:
11014
      if disk_op == constants.DDM_REMOVE:
11015
        # remove the last disk
11016
        device = instance.disks.pop()
11017
        device_idx = len(instance.disks)
11018
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11019
          self.cfg.SetDiskID(disk, node)
11020
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11021
          if msg:
11022
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11023
                            " continuing anyway", device_idx, node, msg)
11024
        result.append(("disk/%d" % device_idx, "remove"))
11025
      elif disk_op == constants.DDM_ADD:
11026
        # add a new disk
11027
        if instance.disk_template in (constants.DT_FILE,
11028
                                        constants.DT_SHARED_FILE):
11029
          file_driver, file_path = instance.disks[0].logical_id
11030
          file_path = os.path.dirname(file_path)
11031
        else:
11032
          file_driver = file_path = None
11033
        disk_idx_base = len(instance.disks)
11034
        new_disk = _GenerateDiskTemplate(self,
11035
                                         instance.disk_template,
11036
                                         instance.name, instance.primary_node,
11037
                                         instance.secondary_nodes,
11038
                                         [disk_dict],
11039
                                         file_path,
11040
                                         file_driver,
11041
                                         disk_idx_base, feedback_fn)[0]
11042
        instance.disks.append(new_disk)
11043
        info = _GetInstanceInfoText(instance)
11044

    
11045
        logging.info("Creating volume %s for instance %s",
11046
                     new_disk.iv_name, instance.name)
11047
        # Note: this needs to be kept in sync with _CreateDisks
11048
        #HARDCODE
11049
        for node in instance.all_nodes:
11050
          f_create = node == instance.primary_node
11051
          try:
11052
            _CreateBlockDev(self, node, instance, new_disk,
11053
                            f_create, info, f_create)
11054
          except errors.OpExecError, err:
11055
            self.LogWarning("Failed to create volume %s (%s) on"
11056
                            " node %s: %s",
11057
                            new_disk.iv_name, new_disk, node, err)
11058
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11059
                       (new_disk.size, new_disk.mode)))
11060
      else:
11061
        # change a given disk
11062
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11063
        result.append(("disk.mode/%d" % disk_op,
11064
                       disk_dict[constants.IDISK_MODE]))
11065

    
11066
    if self.op.disk_template:
11067
      r_shut = _ShutdownInstanceDisks(self, instance)
11068
      if not r_shut:
11069
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11070
                                 " proceed with disk template conversion")
11071
      mode = (instance.disk_template, self.op.disk_template)
11072
      try:
11073
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11074
      except:
11075
        self.cfg.ReleaseDRBDMinors(instance.name)
11076
        raise
11077
      result.append(("disk_template", self.op.disk_template))
11078

    
11079
    # NIC changes
11080
    for nic_op, nic_dict in self.op.nics:
11081
      if nic_op == constants.DDM_REMOVE:
11082
        # remove the last nic
11083
        del instance.nics[-1]
11084
        result.append(("nic.%d" % len(instance.nics), "remove"))
11085
      elif nic_op == constants.DDM_ADD:
11086
        # mac and bridge should be set, by now
11087
        mac = nic_dict[constants.INIC_MAC]
11088
        ip = nic_dict.get(constants.INIC_IP, None)
11089
        nicparams = self.nic_pinst[constants.DDM_ADD]
11090
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11091
        instance.nics.append(new_nic)
11092
        result.append(("nic.%d" % (len(instance.nics) - 1),
11093
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11094
                       (new_nic.mac, new_nic.ip,
11095
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11096
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11097
                       )))
11098
      else:
11099
        for key in (constants.INIC_MAC, constants.INIC_IP):
11100
          if key in nic_dict:
11101
            setattr(instance.nics[nic_op], key, nic_dict[key])
11102
        if nic_op in self.nic_pinst:
11103
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11104
        for key, val in nic_dict.iteritems():
11105
          result.append(("nic.%s/%d" % (key, nic_op), val))
11106

    
11107
    # hvparams changes
11108
    if self.op.hvparams:
11109
      instance.hvparams = self.hv_inst
11110
      for key, val in self.op.hvparams.iteritems():
11111
        result.append(("hv/%s" % key, val))
11112

    
11113
    # beparams changes
11114
    if self.op.beparams:
11115
      instance.beparams = self.be_inst
11116
      for key, val in self.op.beparams.iteritems():
11117
        result.append(("be/%s" % key, val))
11118

    
11119
    # OS change
11120
    if self.op.os_name:
11121
      instance.os = self.op.os_name
11122

    
11123
    # osparams changes
11124
    if self.op.osparams:
11125
      instance.osparams = self.os_inst
11126
      for key, val in self.op.osparams.iteritems():
11127
        result.append(("os/%s" % key, val))
11128

    
11129
    self.cfg.Update(instance, feedback_fn)
11130

    
11131
    return result
11132

    
11133
  _DISK_CONVERSIONS = {
11134
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11135
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11136
    }
11137

    
11138

    
11139
class LUBackupQuery(NoHooksLU):
11140
  """Query the exports list
11141

11142
  """
11143
  REQ_BGL = False
11144

    
11145
  def ExpandNames(self):
11146
    self.needed_locks = {}
11147
    self.share_locks[locking.LEVEL_NODE] = 1
11148
    if not self.op.nodes:
11149
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11150
    else:
11151
      self.needed_locks[locking.LEVEL_NODE] = \
11152
        _GetWantedNodes(self, self.op.nodes)
11153

    
11154
  def Exec(self, feedback_fn):
11155
    """Compute the list of all the exported system images.
11156

11157
    @rtype: dict
11158
    @return: a dictionary with the structure node->(export-list)
11159
        where export-list is a list of the instances exported on
11160
        that node.
11161

11162
    """
11163
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11164
    rpcresult = self.rpc.call_export_list(self.nodes)
11165
    result = {}
11166
    for node in rpcresult:
11167
      if rpcresult[node].fail_msg:
11168
        result[node] = False
11169
      else:
11170
        result[node] = rpcresult[node].payload
11171

    
11172
    return result
11173

    
11174

    
11175
class LUBackupPrepare(NoHooksLU):
11176
  """Prepares an instance for an export and returns useful information.
11177

11178
  """
11179
  REQ_BGL = False
11180

    
11181
  def ExpandNames(self):
11182
    self._ExpandAndLockInstance()
11183

    
11184
  def CheckPrereq(self):
11185
    """Check prerequisites.
11186

11187
    """
11188
    instance_name = self.op.instance_name
11189

    
11190
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11191
    assert self.instance is not None, \
11192
          "Cannot retrieve locked instance %s" % self.op.instance_name
11193
    _CheckNodeOnline(self, self.instance.primary_node)
11194

    
11195
    self._cds = _GetClusterDomainSecret()
11196

    
11197
  def Exec(self, feedback_fn):
11198
    """Prepares an instance for an export.
11199

11200
    """
11201
    instance = self.instance
11202

    
11203
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11204
      salt = utils.GenerateSecret(8)
11205

    
11206
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11207
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11208
                                              constants.RIE_CERT_VALIDITY)
11209
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11210

    
11211
      (name, cert_pem) = result.payload
11212

    
11213
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11214
                                             cert_pem)
11215

    
11216
      return {
11217
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11218
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11219
                          salt),
11220
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11221
        }
11222

    
11223
    return None
11224

    
11225

    
11226
class LUBackupExport(LogicalUnit):
11227
  """Export an instance to an image in the cluster.
11228

11229
  """
11230
  HPATH = "instance-export"
11231
  HTYPE = constants.HTYPE_INSTANCE
11232
  REQ_BGL = False
11233

    
11234
  def CheckArguments(self):
11235
    """Check the arguments.
11236

11237
    """
11238
    self.x509_key_name = self.op.x509_key_name
11239
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11240

    
11241
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11242
      if not self.x509_key_name:
11243
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11244
                                   errors.ECODE_INVAL)
11245

    
11246
      if not self.dest_x509_ca_pem:
11247
        raise errors.OpPrereqError("Missing destination X509 CA",
11248
                                   errors.ECODE_INVAL)
11249

    
11250
  def ExpandNames(self):
11251
    self._ExpandAndLockInstance()
11252

    
11253
    # Lock all nodes for local exports
11254
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11255
      # FIXME: lock only instance primary and destination node
11256
      #
11257
      # Sad but true, for now we have do lock all nodes, as we don't know where
11258
      # the previous export might be, and in this LU we search for it and
11259
      # remove it from its current node. In the future we could fix this by:
11260
      #  - making a tasklet to search (share-lock all), then create the
11261
      #    new one, then one to remove, after
11262
      #  - removing the removal operation altogether
11263
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11264

    
11265
  def DeclareLocks(self, level):
11266
    """Last minute lock declaration."""
11267
    # All nodes are locked anyway, so nothing to do here.
11268

    
11269
  def BuildHooksEnv(self):
11270
    """Build hooks env.
11271

11272
    This will run on the master, primary node and target node.
11273

11274
    """
11275
    env = {
11276
      "EXPORT_MODE": self.op.mode,
11277
      "EXPORT_NODE": self.op.target_node,
11278
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11279
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11280
      # TODO: Generic function for boolean env variables
11281
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11282
      }
11283

    
11284
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11285

    
11286
    return env
11287

    
11288
  def BuildHooksNodes(self):
11289
    """Build hooks nodes.
11290

11291
    """
11292
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11293

    
11294
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11295
      nl.append(self.op.target_node)
11296

    
11297
    return (nl, nl)
11298

    
11299
  def CheckPrereq(self):
11300
    """Check prerequisites.
11301

11302
    This checks that the instance and node names are valid.
11303

11304
    """
11305
    instance_name = self.op.instance_name
11306

    
11307
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11308
    assert self.instance is not None, \
11309
          "Cannot retrieve locked instance %s" % self.op.instance_name
11310
    _CheckNodeOnline(self, self.instance.primary_node)
11311

    
11312
    if (self.op.remove_instance and self.instance.admin_up and
11313
        not self.op.shutdown):
11314
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11315
                                 " down before")
11316

    
11317
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11318
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11319
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11320
      assert self.dst_node is not None
11321

    
11322
      _CheckNodeOnline(self, self.dst_node.name)
11323
      _CheckNodeNotDrained(self, self.dst_node.name)
11324

    
11325
      self._cds = None
11326
      self.dest_disk_info = None
11327
      self.dest_x509_ca = None
11328

    
11329
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11330
      self.dst_node = None
11331

    
11332
      if len(self.op.target_node) != len(self.instance.disks):
11333
        raise errors.OpPrereqError(("Received destination information for %s"
11334
                                    " disks, but instance %s has %s disks") %
11335
                                   (len(self.op.target_node), instance_name,
11336
                                    len(self.instance.disks)),
11337
                                   errors.ECODE_INVAL)
11338

    
11339
      cds = _GetClusterDomainSecret()
11340

    
11341
      # Check X509 key name
11342
      try:
11343
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11344
      except (TypeError, ValueError), err:
11345
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11346

    
11347
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11348
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11349
                                   errors.ECODE_INVAL)
11350

    
11351
      # Load and verify CA
11352
      try:
11353
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11354
      except OpenSSL.crypto.Error, err:
11355
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11356
                                   (err, ), errors.ECODE_INVAL)
11357

    
11358
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11359
      if errcode is not None:
11360
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11361
                                   (msg, ), errors.ECODE_INVAL)
11362

    
11363
      self.dest_x509_ca = cert
11364

    
11365
      # Verify target information
11366
      disk_info = []
11367
      for idx, disk_data in enumerate(self.op.target_node):
11368
        try:
11369
          (host, port, magic) = \
11370
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11371
        except errors.GenericError, err:
11372
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11373
                                     (idx, err), errors.ECODE_INVAL)
11374

    
11375
        disk_info.append((host, port, magic))
11376

    
11377
      assert len(disk_info) == len(self.op.target_node)
11378
      self.dest_disk_info = disk_info
11379

    
11380
    else:
11381
      raise errors.ProgrammerError("Unhandled export mode %r" %
11382
                                   self.op.mode)
11383

    
11384
    # instance disk type verification
11385
    # TODO: Implement export support for file-based disks
11386
    for disk in self.instance.disks:
11387
      if disk.dev_type == constants.LD_FILE:
11388
        raise errors.OpPrereqError("Export not supported for instances with"
11389
                                   " file-based disks", errors.ECODE_INVAL)
11390

    
11391
  def _CleanupExports(self, feedback_fn):
11392
    """Removes exports of current instance from all other nodes.
11393

11394
    If an instance in a cluster with nodes A..D was exported to node C, its
11395
    exports will be removed from the nodes A, B and D.
11396

11397
    """
11398
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11399

    
11400
    nodelist = self.cfg.GetNodeList()
11401
    nodelist.remove(self.dst_node.name)
11402

    
11403
    # on one-node clusters nodelist will be empty after the removal
11404
    # if we proceed the backup would be removed because OpBackupQuery
11405
    # substitutes an empty list with the full cluster node list.
11406
    iname = self.instance.name
11407
    if nodelist:
11408
      feedback_fn("Removing old exports for instance %s" % iname)
11409
      exportlist = self.rpc.call_export_list(nodelist)
11410
      for node in exportlist:
11411
        if exportlist[node].fail_msg:
11412
          continue
11413
        if iname in exportlist[node].payload:
11414
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11415
          if msg:
11416
            self.LogWarning("Could not remove older export for instance %s"
11417
                            " on node %s: %s", iname, node, msg)
11418

    
11419
  def Exec(self, feedback_fn):
11420
    """Export an instance to an image in the cluster.
11421

11422
    """
11423
    assert self.op.mode in constants.EXPORT_MODES
11424

    
11425
    instance = self.instance
11426
    src_node = instance.primary_node
11427

    
11428
    if self.op.shutdown:
11429
      # shutdown the instance, but not the disks
11430
      feedback_fn("Shutting down instance %s" % instance.name)
11431
      result = self.rpc.call_instance_shutdown(src_node, instance,
11432
                                               self.op.shutdown_timeout)
11433
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11434
      result.Raise("Could not shutdown instance %s on"
11435
                   " node %s" % (instance.name, src_node))
11436

    
11437
    # set the disks ID correctly since call_instance_start needs the
11438
    # correct drbd minor to create the symlinks
11439
    for disk in instance.disks:
11440
      self.cfg.SetDiskID(disk, src_node)
11441

    
11442
    activate_disks = (not instance.admin_up)
11443

    
11444
    if activate_disks:
11445
      # Activate the instance disks if we'exporting a stopped instance
11446
      feedback_fn("Activating disks for %s" % instance.name)
11447
      _StartInstanceDisks(self, instance, None)
11448

    
11449
    try:
11450
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11451
                                                     instance)
11452

    
11453
      helper.CreateSnapshots()
11454
      try:
11455
        if (self.op.shutdown and instance.admin_up and
11456
            not self.op.remove_instance):
11457
          assert not activate_disks
11458
          feedback_fn("Starting instance %s" % instance.name)
11459
          result = self.rpc.call_instance_start(src_node, instance,
11460
                                                None, None, False)
11461
          msg = result.fail_msg
11462
          if msg:
11463
            feedback_fn("Failed to start instance: %s" % msg)
11464
            _ShutdownInstanceDisks(self, instance)
11465
            raise errors.OpExecError("Could not start instance: %s" % msg)
11466

    
11467
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11468
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11469
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11470
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11471
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11472

    
11473
          (key_name, _, _) = self.x509_key_name
11474

    
11475
          dest_ca_pem = \
11476
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11477
                                            self.dest_x509_ca)
11478

    
11479
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11480
                                                     key_name, dest_ca_pem,
11481
                                                     timeouts)
11482
      finally:
11483
        helper.Cleanup()
11484

    
11485
      # Check for backwards compatibility
11486
      assert len(dresults) == len(instance.disks)
11487
      assert compat.all(isinstance(i, bool) for i in dresults), \
11488
             "Not all results are boolean: %r" % dresults
11489

    
11490
    finally:
11491
      if activate_disks:
11492
        feedback_fn("Deactivating disks for %s" % instance.name)
11493
        _ShutdownInstanceDisks(self, instance)
11494

    
11495
    if not (compat.all(dresults) and fin_resu):
11496
      failures = []
11497
      if not fin_resu:
11498
        failures.append("export finalization")
11499
      if not compat.all(dresults):
11500
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11501
                               if not dsk)
11502
        failures.append("disk export: disk(s) %s" % fdsk)
11503

    
11504
      raise errors.OpExecError("Export failed, errors in %s" %
11505
                               utils.CommaJoin(failures))
11506

    
11507
    # At this point, the export was successful, we can cleanup/finish
11508

    
11509
    # Remove instance if requested
11510
    if self.op.remove_instance:
11511
      feedback_fn("Removing instance %s" % instance.name)
11512
      _RemoveInstance(self, feedback_fn, instance,
11513
                      self.op.ignore_remove_failures)
11514

    
11515
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11516
      self._CleanupExports(feedback_fn)
11517

    
11518
    return fin_resu, dresults
11519

    
11520

    
11521
class LUBackupRemove(NoHooksLU):
11522
  """Remove exports related to the named instance.
11523

11524
  """
11525
  REQ_BGL = False
11526

    
11527
  def ExpandNames(self):
11528
    self.needed_locks = {}
11529
    # We need all nodes to be locked in order for RemoveExport to work, but we
11530
    # don't need to lock the instance itself, as nothing will happen to it (and
11531
    # we can remove exports also for a removed instance)
11532
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11533

    
11534
  def Exec(self, feedback_fn):
11535
    """Remove any export.
11536

11537
    """
11538
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11539
    # If the instance was not found we'll try with the name that was passed in.
11540
    # This will only work if it was an FQDN, though.
11541
    fqdn_warn = False
11542
    if not instance_name:
11543
      fqdn_warn = True
11544
      instance_name = self.op.instance_name
11545

    
11546
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11547
    exportlist = self.rpc.call_export_list(locked_nodes)
11548
    found = False
11549
    for node in exportlist:
11550
      msg = exportlist[node].fail_msg
11551
      if msg:
11552
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11553
        continue
11554
      if instance_name in exportlist[node].payload:
11555
        found = True
11556
        result = self.rpc.call_export_remove(node, instance_name)
11557
        msg = result.fail_msg
11558
        if msg:
11559
          logging.error("Could not remove export for instance %s"
11560
                        " on node %s: %s", instance_name, node, msg)
11561

    
11562
    if fqdn_warn and not found:
11563
      feedback_fn("Export not found. If trying to remove an export belonging"
11564
                  " to a deleted instance please use its Fully Qualified"
11565
                  " Domain Name.")
11566

    
11567

    
11568
class LUGroupAdd(LogicalUnit):
11569
  """Logical unit for creating node groups.
11570

11571
  """
11572
  HPATH = "group-add"
11573
  HTYPE = constants.HTYPE_GROUP
11574
  REQ_BGL = False
11575

    
11576
  def ExpandNames(self):
11577
    # We need the new group's UUID here so that we can create and acquire the
11578
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11579
    # that it should not check whether the UUID exists in the configuration.
11580
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11581
    self.needed_locks = {}
11582
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11583

    
11584
  def CheckPrereq(self):
11585
    """Check prerequisites.
11586

11587
    This checks that the given group name is not an existing node group
11588
    already.
11589

11590
    """
11591
    try:
11592
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11593
    except errors.OpPrereqError:
11594
      pass
11595
    else:
11596
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11597
                                 " node group (UUID: %s)" %
11598
                                 (self.op.group_name, existing_uuid),
11599
                                 errors.ECODE_EXISTS)
11600

    
11601
    if self.op.ndparams:
11602
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11603

    
11604
  def BuildHooksEnv(self):
11605
    """Build hooks env.
11606

11607
    """
11608
    return {
11609
      "GROUP_NAME": self.op.group_name,
11610
      }
11611

    
11612
  def BuildHooksNodes(self):
11613
    """Build hooks nodes.
11614

11615
    """
11616
    mn = self.cfg.GetMasterNode()
11617
    return ([mn], [mn])
11618

    
11619
  def Exec(self, feedback_fn):
11620
    """Add the node group to the cluster.
11621

11622
    """
11623
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11624
                                  uuid=self.group_uuid,
11625
                                  alloc_policy=self.op.alloc_policy,
11626
                                  ndparams=self.op.ndparams)
11627

    
11628
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11629
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11630

    
11631

    
11632
class LUGroupAssignNodes(NoHooksLU):
11633
  """Logical unit for assigning nodes to groups.
11634

11635
  """
11636
  REQ_BGL = False
11637

    
11638
  def ExpandNames(self):
11639
    # These raise errors.OpPrereqError on their own:
11640
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11641
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11642

    
11643
    # We want to lock all the affected nodes and groups. We have readily
11644
    # available the list of nodes, and the *destination* group. To gather the
11645
    # list of "source" groups, we need to fetch node information later on.
11646
    self.needed_locks = {
11647
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11648
      locking.LEVEL_NODE: self.op.nodes,
11649
      }
11650

    
11651
  def DeclareLocks(self, level):
11652
    if level == locking.LEVEL_NODEGROUP:
11653
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11654

    
11655
      # Try to get all affected nodes' groups without having the group or node
11656
      # lock yet. Needs verification later in the code flow.
11657
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11658

    
11659
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11660

    
11661
  def CheckPrereq(self):
11662
    """Check prerequisites.
11663

11664
    """
11665
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11666
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11667
            frozenset(self.op.nodes))
11668

    
11669
    expected_locks = (set([self.group_uuid]) |
11670
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11671
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11672
    if actual_locks != expected_locks:
11673
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11674
                               " current groups are '%s', used to be '%s'" %
11675
                               (utils.CommaJoin(expected_locks),
11676
                                utils.CommaJoin(actual_locks)))
11677

    
11678
    self.node_data = self.cfg.GetAllNodesInfo()
11679
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11680
    instance_data = self.cfg.GetAllInstancesInfo()
11681

    
11682
    if self.group is None:
11683
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11684
                               (self.op.group_name, self.group_uuid))
11685

    
11686
    (new_splits, previous_splits) = \
11687
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11688
                                             for node in self.op.nodes],
11689
                                            self.node_data, instance_data)
11690

    
11691
    if new_splits:
11692
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11693

    
11694
      if not self.op.force:
11695
        raise errors.OpExecError("The following instances get split by this"
11696
                                 " change and --force was not given: %s" %
11697
                                 fmt_new_splits)
11698
      else:
11699
        self.LogWarning("This operation will split the following instances: %s",
11700
                        fmt_new_splits)
11701

    
11702
        if previous_splits:
11703
          self.LogWarning("In addition, these already-split instances continue"
11704
                          " to be split across groups: %s",
11705
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11706

    
11707
  def Exec(self, feedback_fn):
11708
    """Assign nodes to a new group.
11709

11710
    """
11711
    for node in self.op.nodes:
11712
      self.node_data[node].group = self.group_uuid
11713

    
11714
    # FIXME: Depends on side-effects of modifying the result of
11715
    # C{cfg.GetAllNodesInfo}
11716

    
11717
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11718

    
11719
  @staticmethod
11720
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11721
    """Check for split instances after a node assignment.
11722

11723
    This method considers a series of node assignments as an atomic operation,
11724
    and returns information about split instances after applying the set of
11725
    changes.
11726

11727
    In particular, it returns information about newly split instances, and
11728
    instances that were already split, and remain so after the change.
11729

11730
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11731
    considered.
11732

11733
    @type changes: list of (node_name, new_group_uuid) pairs.
11734
    @param changes: list of node assignments to consider.
11735
    @param node_data: a dict with data for all nodes
11736
    @param instance_data: a dict with all instances to consider
11737
    @rtype: a two-tuple
11738
    @return: a list of instances that were previously okay and result split as a
11739
      consequence of this change, and a list of instances that were previously
11740
      split and this change does not fix.
11741

11742
    """
11743
    changed_nodes = dict((node, group) for node, group in changes
11744
                         if node_data[node].group != group)
11745

    
11746
    all_split_instances = set()
11747
    previously_split_instances = set()
11748

    
11749
    def InstanceNodes(instance):
11750
      return [instance.primary_node] + list(instance.secondary_nodes)
11751

    
11752
    for inst in instance_data.values():
11753
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11754
        continue
11755

    
11756
      instance_nodes = InstanceNodes(inst)
11757

    
11758
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11759
        previously_split_instances.add(inst.name)
11760

    
11761
      if len(set(changed_nodes.get(node, node_data[node].group)
11762
                 for node in instance_nodes)) > 1:
11763
        all_split_instances.add(inst.name)
11764

    
11765
    return (list(all_split_instances - previously_split_instances),
11766
            list(previously_split_instances & all_split_instances))
11767

    
11768

    
11769
class _GroupQuery(_QueryBase):
11770
  FIELDS = query.GROUP_FIELDS
11771

    
11772
  def ExpandNames(self, lu):
11773
    lu.needed_locks = {}
11774

    
11775
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11776
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11777

    
11778
    if not self.names:
11779
      self.wanted = [name_to_uuid[name]
11780
                     for name in utils.NiceSort(name_to_uuid.keys())]
11781
    else:
11782
      # Accept names to be either names or UUIDs.
11783
      missing = []
11784
      self.wanted = []
11785
      all_uuid = frozenset(self._all_groups.keys())
11786

    
11787
      for name in self.names:
11788
        if name in all_uuid:
11789
          self.wanted.append(name)
11790
        elif name in name_to_uuid:
11791
          self.wanted.append(name_to_uuid[name])
11792
        else:
11793
          missing.append(name)
11794

    
11795
      if missing:
11796
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11797
                                   utils.CommaJoin(missing),
11798
                                   errors.ECODE_NOENT)
11799

    
11800
  def DeclareLocks(self, lu, level):
11801
    pass
11802

    
11803
  def _GetQueryData(self, lu):
11804
    """Computes the list of node groups and their attributes.
11805

11806
    """
11807
    do_nodes = query.GQ_NODE in self.requested_data
11808
    do_instances = query.GQ_INST in self.requested_data
11809

    
11810
    group_to_nodes = None
11811
    group_to_instances = None
11812

    
11813
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11814
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11815
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11816
    # instance->node. Hence, we will need to process nodes even if we only need
11817
    # instance information.
11818
    if do_nodes or do_instances:
11819
      all_nodes = lu.cfg.GetAllNodesInfo()
11820
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11821
      node_to_group = {}
11822

    
11823
      for node in all_nodes.values():
11824
        if node.group in group_to_nodes:
11825
          group_to_nodes[node.group].append(node.name)
11826
          node_to_group[node.name] = node.group
11827

    
11828
      if do_instances:
11829
        all_instances = lu.cfg.GetAllInstancesInfo()
11830
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11831

    
11832
        for instance in all_instances.values():
11833
          node = instance.primary_node
11834
          if node in node_to_group:
11835
            group_to_instances[node_to_group[node]].append(instance.name)
11836

    
11837
        if not do_nodes:
11838
          # Do not pass on node information if it was not requested.
11839
          group_to_nodes = None
11840

    
11841
    return query.GroupQueryData([self._all_groups[uuid]
11842
                                 for uuid in self.wanted],
11843
                                group_to_nodes, group_to_instances)
11844

    
11845

    
11846
class LUGroupQuery(NoHooksLU):
11847
  """Logical unit for querying node groups.
11848

11849
  """
11850
  REQ_BGL = False
11851

    
11852
  def CheckArguments(self):
11853
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11854
                          self.op.output_fields, False)
11855

    
11856
  def ExpandNames(self):
11857
    self.gq.ExpandNames(self)
11858

    
11859
  def Exec(self, feedback_fn):
11860
    return self.gq.OldStyleQuery(self)
11861

    
11862

    
11863
class LUGroupSetParams(LogicalUnit):
11864
  """Modifies the parameters of a node group.
11865

11866
  """
11867
  HPATH = "group-modify"
11868
  HTYPE = constants.HTYPE_GROUP
11869
  REQ_BGL = False
11870

    
11871
  def CheckArguments(self):
11872
    all_changes = [
11873
      self.op.ndparams,
11874
      self.op.alloc_policy,
11875
      ]
11876

    
11877
    if all_changes.count(None) == len(all_changes):
11878
      raise errors.OpPrereqError("Please pass at least one modification",
11879
                                 errors.ECODE_INVAL)
11880

    
11881
  def ExpandNames(self):
11882
    # This raises errors.OpPrereqError on its own:
11883
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11884

    
11885
    self.needed_locks = {
11886
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11887
      }
11888

    
11889
  def CheckPrereq(self):
11890
    """Check prerequisites.
11891

11892
    """
11893
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11894

    
11895
    if self.group is None:
11896
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11897
                               (self.op.group_name, self.group_uuid))
11898

    
11899
    if self.op.ndparams:
11900
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11901
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11902
      self.new_ndparams = new_ndparams
11903

    
11904
  def BuildHooksEnv(self):
11905
    """Build hooks env.
11906

11907
    """
11908
    return {
11909
      "GROUP_NAME": self.op.group_name,
11910
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11911
      }
11912

    
11913
  def BuildHooksNodes(self):
11914
    """Build hooks nodes.
11915

11916
    """
11917
    mn = self.cfg.GetMasterNode()
11918
    return ([mn], [mn])
11919

    
11920
  def Exec(self, feedback_fn):
11921
    """Modifies the node group.
11922

11923
    """
11924
    result = []
11925

    
11926
    if self.op.ndparams:
11927
      self.group.ndparams = self.new_ndparams
11928
      result.append(("ndparams", str(self.group.ndparams)))
11929

    
11930
    if self.op.alloc_policy:
11931
      self.group.alloc_policy = self.op.alloc_policy
11932

    
11933
    self.cfg.Update(self.group, feedback_fn)
11934
    return result
11935

    
11936

    
11937

    
11938
class LUGroupRemove(LogicalUnit):
11939
  HPATH = "group-remove"
11940
  HTYPE = constants.HTYPE_GROUP
11941
  REQ_BGL = False
11942

    
11943
  def ExpandNames(self):
11944
    # This will raises errors.OpPrereqError on its own:
11945
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11946
    self.needed_locks = {
11947
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11948
      }
11949

    
11950
  def CheckPrereq(self):
11951
    """Check prerequisites.
11952

11953
    This checks that the given group name exists as a node group, that is
11954
    empty (i.e., contains no nodes), and that is not the last group of the
11955
    cluster.
11956

11957
    """
11958
    # Verify that the group is empty.
11959
    group_nodes = [node.name
11960
                   for node in self.cfg.GetAllNodesInfo().values()
11961
                   if node.group == self.group_uuid]
11962

    
11963
    if group_nodes:
11964
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11965
                                 " nodes: %s" %
11966
                                 (self.op.group_name,
11967
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11968
                                 errors.ECODE_STATE)
11969

    
11970
    # Verify the cluster would not be left group-less.
11971
    if len(self.cfg.GetNodeGroupList()) == 1:
11972
      raise errors.OpPrereqError("Group '%s' is the only group,"
11973
                                 " cannot be removed" %
11974
                                 self.op.group_name,
11975
                                 errors.ECODE_STATE)
11976

    
11977
  def BuildHooksEnv(self):
11978
    """Build hooks env.
11979

11980
    """
11981
    return {
11982
      "GROUP_NAME": self.op.group_name,
11983
      }
11984

    
11985
  def BuildHooksNodes(self):
11986
    """Build hooks nodes.
11987

11988
    """
11989
    mn = self.cfg.GetMasterNode()
11990
    return ([mn], [mn])
11991

    
11992
  def Exec(self, feedback_fn):
11993
    """Remove the node group.
11994

11995
    """
11996
    try:
11997
      self.cfg.RemoveNodeGroup(self.group_uuid)
11998
    except errors.ConfigurationError:
11999
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12000
                               (self.op.group_name, self.group_uuid))
12001

    
12002
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12003

    
12004

    
12005
class LUGroupRename(LogicalUnit):
12006
  HPATH = "group-rename"
12007
  HTYPE = constants.HTYPE_GROUP
12008
  REQ_BGL = False
12009

    
12010
  def ExpandNames(self):
12011
    # This raises errors.OpPrereqError on its own:
12012
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12013

    
12014
    self.needed_locks = {
12015
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12016
      }
12017

    
12018
  def CheckPrereq(self):
12019
    """Check prerequisites.
12020

12021
    Ensures requested new name is not yet used.
12022

12023
    """
12024
    try:
12025
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12026
    except errors.OpPrereqError:
12027
      pass
12028
    else:
12029
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12030
                                 " node group (UUID: %s)" %
12031
                                 (self.op.new_name, new_name_uuid),
12032
                                 errors.ECODE_EXISTS)
12033

    
12034
  def BuildHooksEnv(self):
12035
    """Build hooks env.
12036

12037
    """
12038
    return {
12039
      "OLD_NAME": self.op.group_name,
12040
      "NEW_NAME": self.op.new_name,
12041
      }
12042

    
12043
  def BuildHooksNodes(self):
12044
    """Build hooks nodes.
12045

12046
    """
12047
    mn = self.cfg.GetMasterNode()
12048

    
12049
    all_nodes = self.cfg.GetAllNodesInfo()
12050
    all_nodes.pop(mn, None)
12051

    
12052
    run_nodes = [mn]
12053
    run_nodes.extend(node.name for node in all_nodes.values()
12054
                     if node.group == self.group_uuid)
12055

    
12056
    return (run_nodes, run_nodes)
12057

    
12058
  def Exec(self, feedback_fn):
12059
    """Rename the node group.
12060

12061
    """
12062
    group = self.cfg.GetNodeGroup(self.group_uuid)
12063

    
12064
    if group is None:
12065
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12066
                               (self.op.group_name, self.group_uuid))
12067

    
12068
    group.name = self.op.new_name
12069
    self.cfg.Update(group, feedback_fn)
12070

    
12071
    return self.op.new_name
12072

    
12073

    
12074
class LUGroupEvacuate(LogicalUnit):
12075
  HPATH = "group-evacuate"
12076
  HTYPE = constants.HTYPE_GROUP
12077
  REQ_BGL = False
12078

    
12079
  def ExpandNames(self):
12080
    # This raises errors.OpPrereqError on its own:
12081
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12082

    
12083
    if self.op.target_groups:
12084
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12085
                                  self.op.target_groups)
12086
    else:
12087
      self.req_target_uuids = []
12088

    
12089
    if self.group_uuid in self.req_target_uuids:
12090
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12091
                                 " as a target group (targets are %s)" %
12092
                                 (self.group_uuid,
12093
                                  utils.CommaJoin(self.req_target_uuids)),
12094
                                 errors.ECODE_INVAL)
12095

    
12096
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12097

    
12098
    self.share_locks = _ShareAll()
12099
    self.needed_locks = {
12100
      locking.LEVEL_INSTANCE: [],
12101
      locking.LEVEL_NODEGROUP: [],
12102
      locking.LEVEL_NODE: [],
12103
      }
12104

    
12105
  def DeclareLocks(self, level):
12106
    if level == locking.LEVEL_INSTANCE:
12107
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12108

    
12109
      # Lock instances optimistically, needs verification once node and group
12110
      # locks have been acquired
12111
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12112
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12113

    
12114
    elif level == locking.LEVEL_NODEGROUP:
12115
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12116

    
12117
      if self.req_target_uuids:
12118
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12119

    
12120
        # Lock all groups used by instances optimistically; this requires going
12121
        # via the node before it's locked, requiring verification later on
12122
        lock_groups.update(group_uuid
12123
                           for instance_name in
12124
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12125
                           for group_uuid in
12126
                             self.cfg.GetInstanceNodeGroups(instance_name))
12127
      else:
12128
        # No target groups, need to lock all of them
12129
        lock_groups = locking.ALL_SET
12130

    
12131
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12132

    
12133
    elif level == locking.LEVEL_NODE:
12134
      # This will only lock the nodes in the group to be evacuated which
12135
      # contain actual instances
12136
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12137
      self._LockInstancesNodes()
12138

    
12139
      # Lock all nodes in group to be evacuated
12140
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12141
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12142
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12143

    
12144
  def CheckPrereq(self):
12145
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12146
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12147
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12148

    
12149
    assert owned_groups.issuperset(self.req_target_uuids)
12150
    assert self.group_uuid in owned_groups
12151

    
12152
    # Check if locked instances are still correct
12153
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12154
    if owned_instances != wanted_instances:
12155
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12156
                                 " changed since locks were acquired, wanted"
12157
                                 " %s, have %s; retry the operation" %
12158
                                 (self.group_uuid,
12159
                                  utils.CommaJoin(wanted_instances),
12160
                                  utils.CommaJoin(owned_instances)),
12161
                                 errors.ECODE_STATE)
12162

    
12163
    # Get instance information
12164
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12165

    
12166
    # Check if node groups for locked instances are still correct
12167
    for instance_name in owned_instances:
12168
      inst = self.instances[instance_name]
12169
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12170
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12171
      assert owned_nodes.issuperset(inst.all_nodes), \
12172
        "Instance %s's nodes changed while we kept the lock" % instance_name
12173

    
12174
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12175
      if not owned_groups.issuperset(inst_groups):
12176
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12177
                                   " locks were acquired, current groups"
12178
                                   " are '%s', owning groups '%s'; retry the"
12179
                                   " operation" %
12180
                                   (instance_name,
12181
                                    utils.CommaJoin(inst_groups),
12182
                                    utils.CommaJoin(owned_groups)),
12183
                                   errors.ECODE_STATE)
12184

    
12185
    if self.req_target_uuids:
12186
      # User requested specific target groups
12187
      self.target_uuids = self.req_target_uuids
12188
    else:
12189
      # All groups except the one to be evacuated are potential targets
12190
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12191
                           if group_uuid != self.group_uuid]
12192

    
12193
      if not self.target_uuids:
12194
        raise errors.OpPrereqError("There are no possible target groups",
12195
                                   errors.ECODE_INVAL)
12196

    
12197
  def BuildHooksEnv(self):
12198
    """Build hooks env.
12199

12200
    """
12201
    return {
12202
      "GROUP_NAME": self.op.group_name,
12203
      "TARGET_GROUPS": " ".join(self.target_uuids),
12204
      }
12205

    
12206
  def BuildHooksNodes(self):
12207
    """Build hooks nodes.
12208

12209
    """
12210
    mn = self.cfg.GetMasterNode()
12211

    
12212
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12213

    
12214
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12215

    
12216
    return (run_nodes, run_nodes)
12217

    
12218
  def Exec(self, feedback_fn):
12219
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12220

    
12221
    assert self.group_uuid not in self.target_uuids
12222

    
12223
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12224
                     instances=instances, target_groups=self.target_uuids)
12225

    
12226
    ial.Run(self.op.iallocator)
12227

    
12228
    if not ial.success:
12229
      raise errors.OpPrereqError("Can't compute group evacuation using"
12230
                                 " iallocator '%s': %s" %
12231
                                 (self.op.iallocator, ial.info),
12232
                                 errors.ECODE_NORES)
12233

    
12234
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12235

    
12236
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12237
                 len(jobs), self.op.group_name)
12238

    
12239
    return ResultWithJobs(jobs)
12240

    
12241

    
12242
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12243
  """Generic tags LU.
12244

12245
  This is an abstract class which is the parent of all the other tags LUs.
12246

12247
  """
12248
  def ExpandNames(self):
12249
    self.group_uuid = None
12250
    self.needed_locks = {}
12251
    if self.op.kind == constants.TAG_NODE:
12252
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12253
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12254
    elif self.op.kind == constants.TAG_INSTANCE:
12255
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12256
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12257
    elif self.op.kind == constants.TAG_NODEGROUP:
12258
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12259

    
12260
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12261
    # not possible to acquire the BGL based on opcode parameters)
12262

    
12263
  def CheckPrereq(self):
12264
    """Check prerequisites.
12265

12266
    """
12267
    if self.op.kind == constants.TAG_CLUSTER:
12268
      self.target = self.cfg.GetClusterInfo()
12269
    elif self.op.kind == constants.TAG_NODE:
12270
      self.target = self.cfg.GetNodeInfo(self.op.name)
12271
    elif self.op.kind == constants.TAG_INSTANCE:
12272
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12273
    elif self.op.kind == constants.TAG_NODEGROUP:
12274
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12275
    else:
12276
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12277
                                 str(self.op.kind), errors.ECODE_INVAL)
12278

    
12279

    
12280
class LUTagsGet(TagsLU):
12281
  """Returns the tags of a given object.
12282

12283
  """
12284
  REQ_BGL = False
12285

    
12286
  def ExpandNames(self):
12287
    TagsLU.ExpandNames(self)
12288

    
12289
    # Share locks as this is only a read operation
12290
    self.share_locks = _ShareAll()
12291

    
12292
  def Exec(self, feedback_fn):
12293
    """Returns the tag list.
12294

12295
    """
12296
    return list(self.target.GetTags())
12297

    
12298

    
12299
class LUTagsSearch(NoHooksLU):
12300
  """Searches the tags for a given pattern.
12301

12302
  """
12303
  REQ_BGL = False
12304

    
12305
  def ExpandNames(self):
12306
    self.needed_locks = {}
12307

    
12308
  def CheckPrereq(self):
12309
    """Check prerequisites.
12310

12311
    This checks the pattern passed for validity by compiling it.
12312

12313
    """
12314
    try:
12315
      self.re = re.compile(self.op.pattern)
12316
    except re.error, err:
12317
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12318
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12319

    
12320
  def Exec(self, feedback_fn):
12321
    """Returns the tag list.
12322

12323
    """
12324
    cfg = self.cfg
12325
    tgts = [("/cluster", cfg.GetClusterInfo())]
12326
    ilist = cfg.GetAllInstancesInfo().values()
12327
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12328
    nlist = cfg.GetAllNodesInfo().values()
12329
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12330
    tgts.extend(("/nodegroup/%s" % n.name, n)
12331
                for n in cfg.GetAllNodeGroupsInfo().values())
12332
    results = []
12333
    for path, target in tgts:
12334
      for tag in target.GetTags():
12335
        if self.re.search(tag):
12336
          results.append((path, tag))
12337
    return results
12338

    
12339

    
12340
class LUTagsSet(TagsLU):
12341
  """Sets a tag on a given object.
12342

12343
  """
12344
  REQ_BGL = False
12345

    
12346
  def CheckPrereq(self):
12347
    """Check prerequisites.
12348

12349
    This checks the type and length of the tag name and value.
12350

12351
    """
12352
    TagsLU.CheckPrereq(self)
12353
    for tag in self.op.tags:
12354
      objects.TaggableObject.ValidateTag(tag)
12355

    
12356
  def Exec(self, feedback_fn):
12357
    """Sets the tag.
12358

12359
    """
12360
    try:
12361
      for tag in self.op.tags:
12362
        self.target.AddTag(tag)
12363
    except errors.TagError, err:
12364
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12365
    self.cfg.Update(self.target, feedback_fn)
12366

    
12367

    
12368
class LUTagsDel(TagsLU):
12369
  """Delete a list of tags from a given object.
12370

12371
  """
12372
  REQ_BGL = False
12373

    
12374
  def CheckPrereq(self):
12375
    """Check prerequisites.
12376

12377
    This checks that we have the given tag.
12378

12379
    """
12380
    TagsLU.CheckPrereq(self)
12381
    for tag in self.op.tags:
12382
      objects.TaggableObject.ValidateTag(tag)
12383
    del_tags = frozenset(self.op.tags)
12384
    cur_tags = self.target.GetTags()
12385

    
12386
    diff_tags = del_tags - cur_tags
12387
    if diff_tags:
12388
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12389
      raise errors.OpPrereqError("Tag(s) %s not found" %
12390
                                 (utils.CommaJoin(diff_names), ),
12391
                                 errors.ECODE_NOENT)
12392

    
12393
  def Exec(self, feedback_fn):
12394
    """Remove the tag from the object.
12395

12396
    """
12397
    for tag in self.op.tags:
12398
      self.target.RemoveTag(tag)
12399
    self.cfg.Update(self.target, feedback_fn)
12400

    
12401

    
12402
class LUTestDelay(NoHooksLU):
12403
  """Sleep for a specified amount of time.
12404

12405
  This LU sleeps on the master and/or nodes for a specified amount of
12406
  time.
12407

12408
  """
12409
  REQ_BGL = False
12410

    
12411
  def ExpandNames(self):
12412
    """Expand names and set required locks.
12413

12414
    This expands the node list, if any.
12415

12416
    """
12417
    self.needed_locks = {}
12418
    if self.op.on_nodes:
12419
      # _GetWantedNodes can be used here, but is not always appropriate to use
12420
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12421
      # more information.
12422
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12423
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12424

    
12425
  def _TestDelay(self):
12426
    """Do the actual sleep.
12427

12428
    """
12429
    if self.op.on_master:
12430
      if not utils.TestDelay(self.op.duration):
12431
        raise errors.OpExecError("Error during master delay test")
12432
    if self.op.on_nodes:
12433
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12434
      for node, node_result in result.items():
12435
        node_result.Raise("Failure during rpc call to node %s" % node)
12436

    
12437
  def Exec(self, feedback_fn):
12438
    """Execute the test delay opcode, with the wanted repetitions.
12439

12440
    """
12441
    if self.op.repeat == 0:
12442
      self._TestDelay()
12443
    else:
12444
      top_value = self.op.repeat - 1
12445
      for i in range(self.op.repeat):
12446
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12447
        self._TestDelay()
12448

    
12449

    
12450
class LUTestJqueue(NoHooksLU):
12451
  """Utility LU to test some aspects of the job queue.
12452

12453
  """
12454
  REQ_BGL = False
12455

    
12456
  # Must be lower than default timeout for WaitForJobChange to see whether it
12457
  # notices changed jobs
12458
  _CLIENT_CONNECT_TIMEOUT = 20.0
12459
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12460

    
12461
  @classmethod
12462
  def _NotifyUsingSocket(cls, cb, errcls):
12463
    """Opens a Unix socket and waits for another program to connect.
12464

12465
    @type cb: callable
12466
    @param cb: Callback to send socket name to client
12467
    @type errcls: class
12468
    @param errcls: Exception class to use for errors
12469

12470
    """
12471
    # Using a temporary directory as there's no easy way to create temporary
12472
    # sockets without writing a custom loop around tempfile.mktemp and
12473
    # socket.bind
12474
    tmpdir = tempfile.mkdtemp()
12475
    try:
12476
      tmpsock = utils.PathJoin(tmpdir, "sock")
12477

    
12478
      logging.debug("Creating temporary socket at %s", tmpsock)
12479
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12480
      try:
12481
        sock.bind(tmpsock)
12482
        sock.listen(1)
12483

    
12484
        # Send details to client
12485
        cb(tmpsock)
12486

    
12487
        # Wait for client to connect before continuing
12488
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12489
        try:
12490
          (conn, _) = sock.accept()
12491
        except socket.error, err:
12492
          raise errcls("Client didn't connect in time (%s)" % err)
12493
      finally:
12494
        sock.close()
12495
    finally:
12496
      # Remove as soon as client is connected
12497
      shutil.rmtree(tmpdir)
12498

    
12499
    # Wait for client to close
12500
    try:
12501
      try:
12502
        # pylint: disable-msg=E1101
12503
        # Instance of '_socketobject' has no ... member
12504
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12505
        conn.recv(1)
12506
      except socket.error, err:
12507
        raise errcls("Client failed to confirm notification (%s)" % err)
12508
    finally:
12509
      conn.close()
12510

    
12511
  def _SendNotification(self, test, arg, sockname):
12512
    """Sends a notification to the client.
12513

12514
    @type test: string
12515
    @param test: Test name
12516
    @param arg: Test argument (depends on test)
12517
    @type sockname: string
12518
    @param sockname: Socket path
12519

12520
    """
12521
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12522

    
12523
  def _Notify(self, prereq, test, arg):
12524
    """Notifies the client of a test.
12525

12526
    @type prereq: bool
12527
    @param prereq: Whether this is a prereq-phase test
12528
    @type test: string
12529
    @param test: Test name
12530
    @param arg: Test argument (depends on test)
12531

12532
    """
12533
    if prereq:
12534
      errcls = errors.OpPrereqError
12535
    else:
12536
      errcls = errors.OpExecError
12537

    
12538
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12539
                                                  test, arg),
12540
                                   errcls)
12541

    
12542
  def CheckArguments(self):
12543
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12544
    self.expandnames_calls = 0
12545

    
12546
  def ExpandNames(self):
12547
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12548
    if checkargs_calls < 1:
12549
      raise errors.ProgrammerError("CheckArguments was not called")
12550

    
12551
    self.expandnames_calls += 1
12552

    
12553
    if self.op.notify_waitlock:
12554
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12555

    
12556
    self.LogInfo("Expanding names")
12557

    
12558
    # Get lock on master node (just to get a lock, not for a particular reason)
12559
    self.needed_locks = {
12560
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12561
      }
12562

    
12563
  def Exec(self, feedback_fn):
12564
    if self.expandnames_calls < 1:
12565
      raise errors.ProgrammerError("ExpandNames was not called")
12566

    
12567
    if self.op.notify_exec:
12568
      self._Notify(False, constants.JQT_EXEC, None)
12569

    
12570
    self.LogInfo("Executing")
12571

    
12572
    if self.op.log_messages:
12573
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12574
      for idx, msg in enumerate(self.op.log_messages):
12575
        self.LogInfo("Sending log message %s", idx + 1)
12576
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12577
        # Report how many test messages have been sent
12578
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12579

    
12580
    if self.op.fail:
12581
      raise errors.OpExecError("Opcode failure was requested")
12582

    
12583
    return True
12584

    
12585

    
12586
class IAllocator(object):
12587
  """IAllocator framework.
12588

12589
  An IAllocator instance has three sets of attributes:
12590
    - cfg that is needed to query the cluster
12591
    - input data (all members of the _KEYS class attribute are required)
12592
    - four buffer attributes (in|out_data|text), that represent the
12593
      input (to the external script) in text and data structure format,
12594
      and the output from it, again in two formats
12595
    - the result variables from the script (success, info, nodes) for
12596
      easy usage
12597

12598
  """
12599
  # pylint: disable-msg=R0902
12600
  # lots of instance attributes
12601

    
12602
  def __init__(self, cfg, rpc, mode, **kwargs):
12603
    self.cfg = cfg
12604
    self.rpc = rpc
12605
    # init buffer variables
12606
    self.in_text = self.out_text = self.in_data = self.out_data = None
12607
    # init all input fields so that pylint is happy
12608
    self.mode = mode
12609
    self.memory = self.disks = self.disk_template = None
12610
    self.os = self.tags = self.nics = self.vcpus = None
12611
    self.hypervisor = None
12612
    self.relocate_from = None
12613
    self.name = None
12614
    self.evac_nodes = None
12615
    self.instances = None
12616
    self.evac_mode = None
12617
    self.target_groups = []
12618
    # computed fields
12619
    self.required_nodes = None
12620
    # init result fields
12621
    self.success = self.info = self.result = None
12622

    
12623
    try:
12624
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12625
    except KeyError:
12626
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12627
                                   " IAllocator" % self.mode)
12628

    
12629
    keyset = [n for (n, _) in keydata]
12630

    
12631
    for key in kwargs:
12632
      if key not in keyset:
12633
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12634
                                     " IAllocator" % key)
12635
      setattr(self, key, kwargs[key])
12636

    
12637
    for key in keyset:
12638
      if key not in kwargs:
12639
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12640
                                     " IAllocator" % key)
12641
    self._BuildInputData(compat.partial(fn, self), keydata)
12642

    
12643
  def _ComputeClusterData(self):
12644
    """Compute the generic allocator input data.
12645

12646
    This is the data that is independent of the actual operation.
12647

12648
    """
12649
    cfg = self.cfg
12650
    cluster_info = cfg.GetClusterInfo()
12651
    # cluster data
12652
    data = {
12653
      "version": constants.IALLOCATOR_VERSION,
12654
      "cluster_name": cfg.GetClusterName(),
12655
      "cluster_tags": list(cluster_info.GetTags()),
12656
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12657
      # we don't have job IDs
12658
      }
12659
    ninfo = cfg.GetAllNodesInfo()
12660
    iinfo = cfg.GetAllInstancesInfo().values()
12661
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12662

    
12663
    # node data
12664
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12665

    
12666
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12667
      hypervisor_name = self.hypervisor
12668
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12669
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12670
    else:
12671
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12672

    
12673
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12674
                                        hypervisor_name)
12675
    node_iinfo = \
12676
      self.rpc.call_all_instances_info(node_list,
12677
                                       cluster_info.enabled_hypervisors)
12678

    
12679
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12680

    
12681
    config_ndata = self._ComputeBasicNodeData(ninfo)
12682
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12683
                                                 i_list, config_ndata)
12684
    assert len(data["nodes"]) == len(ninfo), \
12685
        "Incomplete node data computed"
12686

    
12687
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12688

    
12689
    self.in_data = data
12690

    
12691
  @staticmethod
12692
  def _ComputeNodeGroupData(cfg):
12693
    """Compute node groups data.
12694

12695
    """
12696
    ng = dict((guuid, {
12697
      "name": gdata.name,
12698
      "alloc_policy": gdata.alloc_policy,
12699
      })
12700
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12701

    
12702
    return ng
12703

    
12704
  @staticmethod
12705
  def _ComputeBasicNodeData(node_cfg):
12706
    """Compute global node data.
12707

12708
    @rtype: dict
12709
    @returns: a dict of name: (node dict, node config)
12710

12711
    """
12712
    # fill in static (config-based) values
12713
    node_results = dict((ninfo.name, {
12714
      "tags": list(ninfo.GetTags()),
12715
      "primary_ip": ninfo.primary_ip,
12716
      "secondary_ip": ninfo.secondary_ip,
12717
      "offline": ninfo.offline,
12718
      "drained": ninfo.drained,
12719
      "master_candidate": ninfo.master_candidate,
12720
      "group": ninfo.group,
12721
      "master_capable": ninfo.master_capable,
12722
      "vm_capable": ninfo.vm_capable,
12723
      })
12724
      for ninfo in node_cfg.values())
12725

    
12726
    return node_results
12727

    
12728
  @staticmethod
12729
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12730
                              node_results):
12731
    """Compute global node data.
12732

12733
    @param node_results: the basic node structures as filled from the config
12734

12735
    """
12736
    # make a copy of the current dict
12737
    node_results = dict(node_results)
12738
    for nname, nresult in node_data.items():
12739
      assert nname in node_results, "Missing basic data for node %s" % nname
12740
      ninfo = node_cfg[nname]
12741

    
12742
      if not (ninfo.offline or ninfo.drained):
12743
        nresult.Raise("Can't get data for node %s" % nname)
12744
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12745
                                nname)
12746
        remote_info = nresult.payload
12747

    
12748
        for attr in ["memory_total", "memory_free", "memory_dom0",
12749
                     "vg_size", "vg_free", "cpu_total"]:
12750
          if attr not in remote_info:
12751
            raise errors.OpExecError("Node '%s' didn't return attribute"
12752
                                     " '%s'" % (nname, attr))
12753
          if not isinstance(remote_info[attr], int):
12754
            raise errors.OpExecError("Node '%s' returned invalid value"
12755
                                     " for '%s': %s" %
12756
                                     (nname, attr, remote_info[attr]))
12757
        # compute memory used by primary instances
12758
        i_p_mem = i_p_up_mem = 0
12759
        for iinfo, beinfo in i_list:
12760
          if iinfo.primary_node == nname:
12761
            i_p_mem += beinfo[constants.BE_MEMORY]
12762
            if iinfo.name not in node_iinfo[nname].payload:
12763
              i_used_mem = 0
12764
            else:
12765
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12766
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12767
            remote_info["memory_free"] -= max(0, i_mem_diff)
12768

    
12769
            if iinfo.admin_up:
12770
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12771

    
12772
        # compute memory used by instances
12773
        pnr_dyn = {
12774
          "total_memory": remote_info["memory_total"],
12775
          "reserved_memory": remote_info["memory_dom0"],
12776
          "free_memory": remote_info["memory_free"],
12777
          "total_disk": remote_info["vg_size"],
12778
          "free_disk": remote_info["vg_free"],
12779
          "total_cpus": remote_info["cpu_total"],
12780
          "i_pri_memory": i_p_mem,
12781
          "i_pri_up_memory": i_p_up_mem,
12782
          }
12783
        pnr_dyn.update(node_results[nname])
12784
        node_results[nname] = pnr_dyn
12785

    
12786
    return node_results
12787

    
12788
  @staticmethod
12789
  def _ComputeInstanceData(cluster_info, i_list):
12790
    """Compute global instance data.
12791

12792
    """
12793
    instance_data = {}
12794
    for iinfo, beinfo in i_list:
12795
      nic_data = []
12796
      for nic in iinfo.nics:
12797
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12798
        nic_dict = {
12799
          "mac": nic.mac,
12800
          "ip": nic.ip,
12801
          "mode": filled_params[constants.NIC_MODE],
12802
          "link": filled_params[constants.NIC_LINK],
12803
          }
12804
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12805
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12806
        nic_data.append(nic_dict)
12807
      pir = {
12808
        "tags": list(iinfo.GetTags()),
12809
        "admin_up": iinfo.admin_up,
12810
        "vcpus": beinfo[constants.BE_VCPUS],
12811
        "memory": beinfo[constants.BE_MEMORY],
12812
        "os": iinfo.os,
12813
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12814
        "nics": nic_data,
12815
        "disks": [{constants.IDISK_SIZE: dsk.size,
12816
                   constants.IDISK_MODE: dsk.mode}
12817
                  for dsk in iinfo.disks],
12818
        "disk_template": iinfo.disk_template,
12819
        "hypervisor": iinfo.hypervisor,
12820
        }
12821
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12822
                                                 pir["disks"])
12823
      instance_data[iinfo.name] = pir
12824

    
12825
    return instance_data
12826

    
12827
  def _AddNewInstance(self):
12828
    """Add new instance data to allocator structure.
12829

12830
    This in combination with _AllocatorGetClusterData will create the
12831
    correct structure needed as input for the allocator.
12832

12833
    The checks for the completeness of the opcode must have already been
12834
    done.
12835

12836
    """
12837
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12838

    
12839
    if self.disk_template in constants.DTS_INT_MIRROR:
12840
      self.required_nodes = 2
12841
    else:
12842
      self.required_nodes = 1
12843

    
12844
    request = {
12845
      "name": self.name,
12846
      "disk_template": self.disk_template,
12847
      "tags": self.tags,
12848
      "os": self.os,
12849
      "vcpus": self.vcpus,
12850
      "memory": self.memory,
12851
      "disks": self.disks,
12852
      "disk_space_total": disk_space,
12853
      "nics": self.nics,
12854
      "required_nodes": self.required_nodes,
12855
      "hypervisor": self.hypervisor,
12856
      }
12857

    
12858
    return request
12859

    
12860
  def _AddRelocateInstance(self):
12861
    """Add relocate instance data to allocator structure.
12862

12863
    This in combination with _IAllocatorGetClusterData will create the
12864
    correct structure needed as input for the allocator.
12865

12866
    The checks for the completeness of the opcode must have already been
12867
    done.
12868

12869
    """
12870
    instance = self.cfg.GetInstanceInfo(self.name)
12871
    if instance is None:
12872
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12873
                                   " IAllocator" % self.name)
12874

    
12875
    if instance.disk_template not in constants.DTS_MIRRORED:
12876
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12877
                                 errors.ECODE_INVAL)
12878

    
12879
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12880
        len(instance.secondary_nodes) != 1:
12881
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12882
                                 errors.ECODE_STATE)
12883

    
12884
    self.required_nodes = 1
12885
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12886
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12887

    
12888
    request = {
12889
      "name": self.name,
12890
      "disk_space_total": disk_space,
12891
      "required_nodes": self.required_nodes,
12892
      "relocate_from": self.relocate_from,
12893
      }
12894
    return request
12895

    
12896
  def _AddEvacuateNodes(self):
12897
    """Add evacuate nodes data to allocator structure.
12898

12899
    """
12900
    request = {
12901
      "evac_nodes": self.evac_nodes
12902
      }
12903
    return request
12904

    
12905
  def _AddNodeEvacuate(self):
12906
    """Get data for node-evacuate requests.
12907

12908
    """
12909
    return {
12910
      "instances": self.instances,
12911
      "evac_mode": self.evac_mode,
12912
      }
12913

    
12914
  def _AddChangeGroup(self):
12915
    """Get data for node-evacuate requests.
12916

12917
    """
12918
    return {
12919
      "instances": self.instances,
12920
      "target_groups": self.target_groups,
12921
      }
12922

    
12923
  def _BuildInputData(self, fn, keydata):
12924
    """Build input data structures.
12925

12926
    """
12927
    self._ComputeClusterData()
12928

    
12929
    request = fn()
12930
    request["type"] = self.mode
12931
    for keyname, keytype in keydata:
12932
      if keyname not in request:
12933
        raise errors.ProgrammerError("Request parameter %s is missing" %
12934
                                     keyname)
12935
      val = request[keyname]
12936
      if not keytype(val):
12937
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12938
                                     " validation, value %s, expected"
12939
                                     " type %s" % (keyname, val, keytype))
12940
    self.in_data["request"] = request
12941

    
12942
    self.in_text = serializer.Dump(self.in_data)
12943

    
12944
  _STRING_LIST = ht.TListOf(ht.TString)
12945
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12946
     # pylint: disable-msg=E1101
12947
     # Class '...' has no 'OP_ID' member
12948
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12949
                          opcodes.OpInstanceMigrate.OP_ID,
12950
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12951
     })))
12952

    
12953
  _NEVAC_MOVED = \
12954
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12955
                       ht.TItems([ht.TNonEmptyString,
12956
                                  ht.TNonEmptyString,
12957
                                  ht.TListOf(ht.TNonEmptyString),
12958
                                 ])))
12959
  _NEVAC_FAILED = \
12960
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12961
                       ht.TItems([ht.TNonEmptyString,
12962
                                  ht.TMaybeString,
12963
                                 ])))
12964
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12965
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12966

    
12967
  _MODE_DATA = {
12968
    constants.IALLOCATOR_MODE_ALLOC:
12969
      (_AddNewInstance,
12970
       [
12971
        ("name", ht.TString),
12972
        ("memory", ht.TInt),
12973
        ("disks", ht.TListOf(ht.TDict)),
12974
        ("disk_template", ht.TString),
12975
        ("os", ht.TString),
12976
        ("tags", _STRING_LIST),
12977
        ("nics", ht.TListOf(ht.TDict)),
12978
        ("vcpus", ht.TInt),
12979
        ("hypervisor", ht.TString),
12980
        ], ht.TList),
12981
    constants.IALLOCATOR_MODE_RELOC:
12982
      (_AddRelocateInstance,
12983
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12984
       ht.TList),
12985
    constants.IALLOCATOR_MODE_MEVAC:
12986
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12987
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12988
     constants.IALLOCATOR_MODE_NODE_EVAC:
12989
      (_AddNodeEvacuate, [
12990
        ("instances", _STRING_LIST),
12991
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12992
        ], _NEVAC_RESULT),
12993
     constants.IALLOCATOR_MODE_CHG_GROUP:
12994
      (_AddChangeGroup, [
12995
        ("instances", _STRING_LIST),
12996
        ("target_groups", _STRING_LIST),
12997
        ], _NEVAC_RESULT),
12998
    }
12999

    
13000
  def Run(self, name, validate=True, call_fn=None):
13001
    """Run an instance allocator and return the results.
13002

13003
    """
13004
    if call_fn is None:
13005
      call_fn = self.rpc.call_iallocator_runner
13006

    
13007
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13008
    result.Raise("Failure while running the iallocator script")
13009

    
13010
    self.out_text = result.payload
13011
    if validate:
13012
      self._ValidateResult()
13013

    
13014
  def _ValidateResult(self):
13015
    """Process the allocator results.
13016

13017
    This will process and if successful save the result in
13018
    self.out_data and the other parameters.
13019

13020
    """
13021
    try:
13022
      rdict = serializer.Load(self.out_text)
13023
    except Exception, err:
13024
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13025

    
13026
    if not isinstance(rdict, dict):
13027
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13028

    
13029
    # TODO: remove backwards compatiblity in later versions
13030
    if "nodes" in rdict and "result" not in rdict:
13031
      rdict["result"] = rdict["nodes"]
13032
      del rdict["nodes"]
13033

    
13034
    for key in "success", "info", "result":
13035
      if key not in rdict:
13036
        raise errors.OpExecError("Can't parse iallocator results:"
13037
                                 " missing key '%s'" % key)
13038
      setattr(self, key, rdict[key])
13039

    
13040
    if not self._result_check(self.result):
13041
      raise errors.OpExecError("Iallocator returned invalid result,"
13042
                               " expected %s, got %s" %
13043
                               (self._result_check, self.result),
13044
                               errors.ECODE_INVAL)
13045

    
13046
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13047
                     constants.IALLOCATOR_MODE_MEVAC):
13048
      node2group = dict((name, ndata["group"])
13049
                        for (name, ndata) in self.in_data["nodes"].items())
13050

    
13051
      fn = compat.partial(self._NodesToGroups, node2group,
13052
                          self.in_data["nodegroups"])
13053

    
13054
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
13055
        assert self.relocate_from is not None
13056
        assert self.required_nodes == 1
13057

    
13058
        request_groups = fn(self.relocate_from)
13059
        result_groups = fn(rdict["result"])
13060

    
13061
        if result_groups != request_groups:
13062
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13063
                                   " differ from original groups (%s)" %
13064
                                   (utils.CommaJoin(result_groups),
13065
                                    utils.CommaJoin(request_groups)))
13066
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13067
        request_groups = fn(self.evac_nodes)
13068
        for (instance_name, secnode) in self.result:
13069
          result_groups = fn([secnode])
13070
          if result_groups != request_groups:
13071
            raise errors.OpExecError("Iallocator returned new secondary node"
13072
                                     " '%s' (group '%s') for instance '%s'"
13073
                                     " which is not in original group '%s'" %
13074
                                     (secnode, utils.CommaJoin(result_groups),
13075
                                      instance_name,
13076
                                      utils.CommaJoin(request_groups)))
13077
      else:
13078
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13079

    
13080
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13081
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13082

    
13083
    self.out_data = rdict
13084

    
13085
  @staticmethod
13086
  def _NodesToGroups(node2group, groups, nodes):
13087
    """Returns a list of unique group names for a list of nodes.
13088

13089
    @type node2group: dict
13090
    @param node2group: Map from node name to group UUID
13091
    @type groups: dict
13092
    @param groups: Group information
13093
    @type nodes: list
13094
    @param nodes: Node names
13095

13096
    """
13097
    result = set()
13098

    
13099
    for node in nodes:
13100
      try:
13101
        group_uuid = node2group[node]
13102
      except KeyError:
13103
        # Ignore unknown node
13104
        pass
13105
      else:
13106
        try:
13107
          group = groups[group_uuid]
13108
        except KeyError:
13109
          # Can't find group, let's use UUID
13110
          group_name = group_uuid
13111
        else:
13112
          group_name = group["name"]
13113

    
13114
        result.add(group_name)
13115

    
13116
    return sorted(result)
13117

    
13118

    
13119
class LUTestAllocator(NoHooksLU):
13120
  """Run allocator tests.
13121

13122
  This LU runs the allocator tests
13123

13124
  """
13125
  def CheckPrereq(self):
13126
    """Check prerequisites.
13127

13128
    This checks the opcode parameters depending on the director and mode test.
13129

13130
    """
13131
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13132
      for attr in ["memory", "disks", "disk_template",
13133
                   "os", "tags", "nics", "vcpus"]:
13134
        if not hasattr(self.op, attr):
13135
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13136
                                     attr, errors.ECODE_INVAL)
13137
      iname = self.cfg.ExpandInstanceName(self.op.name)
13138
      if iname is not None:
13139
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13140
                                   iname, errors.ECODE_EXISTS)
13141
      if not isinstance(self.op.nics, list):
13142
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13143
                                   errors.ECODE_INVAL)
13144
      if not isinstance(self.op.disks, list):
13145
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13146
                                   errors.ECODE_INVAL)
13147
      for row in self.op.disks:
13148
        if (not isinstance(row, dict) or
13149
            constants.IDISK_SIZE not in row or
13150
            not isinstance(row[constants.IDISK_SIZE], int) or
13151
            constants.IDISK_MODE not in row or
13152
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13153
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13154
                                     " parameter", errors.ECODE_INVAL)
13155
      if self.op.hypervisor is None:
13156
        self.op.hypervisor = self.cfg.GetHypervisorType()
13157
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13158
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13159
      self.op.name = fname
13160
      self.relocate_from = \
13161
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13162
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13163
      if not hasattr(self.op, "evac_nodes"):
13164
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13165
                                   " opcode input", errors.ECODE_INVAL)
13166
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13167
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13168
      if not self.op.instances:
13169
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13170
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13171
    else:
13172
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13173
                                 self.op.mode, errors.ECODE_INVAL)
13174

    
13175
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13176
      if self.op.allocator is None:
13177
        raise errors.OpPrereqError("Missing allocator name",
13178
                                   errors.ECODE_INVAL)
13179
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13180
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13181
                                 self.op.direction, errors.ECODE_INVAL)
13182

    
13183
  def Exec(self, feedback_fn):
13184
    """Run the allocator test.
13185

13186
    """
13187
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13188
      ial = IAllocator(self.cfg, self.rpc,
13189
                       mode=self.op.mode,
13190
                       name=self.op.name,
13191
                       memory=self.op.memory,
13192
                       disks=self.op.disks,
13193
                       disk_template=self.op.disk_template,
13194
                       os=self.op.os,
13195
                       tags=self.op.tags,
13196
                       nics=self.op.nics,
13197
                       vcpus=self.op.vcpus,
13198
                       hypervisor=self.op.hypervisor,
13199
                       )
13200
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13201
      ial = IAllocator(self.cfg, self.rpc,
13202
                       mode=self.op.mode,
13203
                       name=self.op.name,
13204
                       relocate_from=list(self.relocate_from),
13205
                       )
13206
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13207
      ial = IAllocator(self.cfg, self.rpc,
13208
                       mode=self.op.mode,
13209
                       evac_nodes=self.op.evac_nodes)
13210
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13211
      ial = IAllocator(self.cfg, self.rpc,
13212
                       mode=self.op.mode,
13213
                       instances=self.op.instances,
13214
                       target_groups=self.op.target_groups)
13215
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13216
      ial = IAllocator(self.cfg, self.rpc,
13217
                       mode=self.op.mode,
13218
                       instances=self.op.instances,
13219
                       evac_mode=self.op.evac_mode)
13220
    else:
13221
      raise errors.ProgrammerError("Uncatched mode %s in"
13222
                                   " LUTestAllocator.Exec", self.op.mode)
13223

    
13224
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13225
      result = ial.in_text
13226
    else:
13227
      ial.Run(self.op.allocator, validate=False)
13228
      result = ial.out_text
13229
    return result
13230

    
13231

    
13232
#: Query type implementations
13233
_QUERY_IMPL = {
13234
  constants.QR_INSTANCE: _InstanceQuery,
13235
  constants.QR_NODE: _NodeQuery,
13236
  constants.QR_GROUP: _GroupQuery,
13237
  constants.QR_OS: _OsQuery,
13238
  }
13239

    
13240
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13241

    
13242

    
13243
def _GetQueryImplementation(name):
13244
  """Returns the implemtnation for a query type.
13245

13246
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13247

13248
  """
13249
  try:
13250
    return _QUERY_IMPL[name]
13251
  except KeyError:
13252
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13253
                               errors.ECODE_INVAL)