Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ f39b695a

History | View | Annotate | Download (465 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    self.context = context
123
    self.rpc = rpc
124
    # Dicts used to declare locking needs to mcpu
125
    self.needed_locks = None
126
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
127
    self.add_locks = {}
128
    self.remove_locks = {}
129
    # Used to force good behavior when calling helper functions
130
    self.recalculate_locks = {}
131
    # logging
132
    self.Log = processor.Log # pylint: disable-msg=C0103
133
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136
    # support for dry-run
137
    self.dry_run_result = None
138
    # support for generic debug attribute
139
    if (not hasattr(self.op, "debug_level") or
140
        not isinstance(self.op.debug_level, int)):
141
      self.op.debug_level = 0
142

    
143
    # Tasklets
144
    self.tasklets = None
145

    
146
    # Validate opcode parameters and set defaults
147
    self.op.Validate(True)
148

    
149
    self.CheckArguments()
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    @rtype: dict
277
    @return: Dictionary containing the environment that will be used for
278
      running the hooks for this LU. The keys of the dict must not be prefixed
279
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280
      will extend the environment with additional variables. If no environment
281
      should be defined, an empty dictionary should be returned (not C{None}).
282
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
283
      will not be called.
284

285
    """
286
    raise NotImplementedError
287

    
288
  def BuildHooksNodes(self):
289
    """Build list of nodes to run LU's hooks.
290

291
    @rtype: tuple; (list, list)
292
    @return: Tuple containing a list of node names on which the hook
293
      should run before the execution and a list of node names on which the
294
      hook should run after the execution. No nodes should be returned as an
295
      empty list (and not None).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303
    """Notify the LU about the results of its hooks.
304

305
    This method is called every time a hooks phase is executed, and notifies
306
    the Logical Unit about the hooks' result. The LU can then use it to alter
307
    its result based on the hooks.  By default the method does nothing and the
308
    previous result is passed back unchanged but any LU can define it if it
309
    wants to use the local cluster hook-scripts somehow.
310

311
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
312
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313
    @param hook_results: the results of the multi-node hooks rpc call
314
    @param feedback_fn: function used send feedback back to the caller
315
    @param lu_result: the previous Exec result this LU had, or None
316
        in the PRE phase
317
    @return: the new Exec result, based on the previous result
318
        and hook results
319

320
    """
321
    # API must be kept, thus we ignore the unused argument and could
322
    # be a function warnings
323
    # pylint: disable-msg=W0613,R0201
324
    return lu_result
325

    
326
  def _ExpandAndLockInstance(self):
327
    """Helper function to expand and lock an instance.
328

329
    Many LUs that work on an instance take its name in self.op.instance_name
330
    and need to expand it and then declare the expanded name for locking. This
331
    function does it, and then updates self.op.instance_name to the expanded
332
    name. It also initializes needed_locks as a dict, if this hasn't been done
333
    before.
334

335
    """
336
    if self.needed_locks is None:
337
      self.needed_locks = {}
338
    else:
339
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340
        "_ExpandAndLockInstance called with instance-level locks set"
341
    self.op.instance_name = _ExpandInstanceName(self.cfg,
342
                                                self.op.instance_name)
343
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
344

    
345
  def _LockInstancesNodes(self, primary_only=False):
346
    """Helper function to declare instances' nodes for locking.
347

348
    This function should be called after locking one or more instances to lock
349
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350
    with all primary or secondary nodes for instances already locked and
351
    present in self.needed_locks[locking.LEVEL_INSTANCE].
352

353
    It should be called from DeclareLocks, and for safety only works if
354
    self.recalculate_locks[locking.LEVEL_NODE] is set.
355

356
    In the future it may grow parameters to just lock some instance's nodes, or
357
    to just lock primaries or secondary nodes, if needed.
358

359
    If should be called in DeclareLocks in a way similar to::
360

361
      if level == locking.LEVEL_NODE:
362
        self._LockInstancesNodes()
363

364
    @type primary_only: boolean
365
    @param primary_only: only lock primary nodes of locked instances
366

367
    """
368
    assert locking.LEVEL_NODE in self.recalculate_locks, \
369
      "_LockInstancesNodes helper function called with no nodes to recalculate"
370

    
371
    # TODO: check if we're really been called with the instance locks held
372

    
373
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374
    # future we might want to have different behaviors depending on the value
375
    # of self.recalculate_locks[locking.LEVEL_NODE]
376
    wanted_nodes = []
377
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
378
      instance = self.context.cfg.GetInstanceInfo(instance_name)
379
      wanted_nodes.append(instance.primary_node)
380
      if not primary_only:
381
        wanted_nodes.extend(instance.secondary_nodes)
382

    
383
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
387

    
388
    del self.recalculate_locks[locking.LEVEL_NODE]
389

    
390

    
391
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392
  """Simple LU which runs no hooks.
393

394
  This LU is intended as a parent for other LogicalUnits which will
395
  run no hooks, in order to reduce duplicate code.
396

397
  """
398
  HPATH = None
399
  HTYPE = None
400

    
401
  def BuildHooksEnv(self):
402
    """Empty BuildHooksEnv for NoHooksLu.
403

404
    This just raises an error.
405

406
    """
407
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
408

    
409
  def BuildHooksNodes(self):
410
    """Empty BuildHooksNodes for NoHooksLU.
411

412
    """
413
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
414

    
415

    
416
class Tasklet:
417
  """Tasklet base class.
418

419
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
421
  tasklets know nothing about locks.
422

423
  Subclasses must follow these rules:
424
    - Implement CheckPrereq
425
    - Implement Exec
426

427
  """
428
  def __init__(self, lu):
429
    self.lu = lu
430

    
431
    # Shortcuts
432
    self.cfg = lu.cfg
433
    self.rpc = lu.rpc
434

    
435
  def CheckPrereq(self):
436
    """Check prerequisites for this tasklets.
437

438
    This method should check whether the prerequisites for the execution of
439
    this tasklet are fulfilled. It can do internode communication, but it
440
    should be idempotent - no cluster or system changes are allowed.
441

442
    The method should raise errors.OpPrereqError in case something is not
443
    fulfilled. Its return value is ignored.
444

445
    This method should also update all parameters to their canonical form if it
446
    hasn't been done before.
447

448
    """
449
    pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the tasklet.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in code, or
456
    expected.
457

458
    """
459
    raise NotImplementedError
460

    
461

    
462
class _QueryBase:
463
  """Base for query utility classes.
464

465
  """
466
  #: Attribute holding field definitions
467
  FIELDS = None
468

    
469
  def __init__(self, filter_, fields, use_locking):
470
    """Initializes this class.
471

472
    """
473
    self.use_locking = use_locking
474

    
475
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
476
                             namefield="name")
477
    self.requested_data = self.query.RequestedData()
478
    self.names = self.query.RequestedNames()
479

    
480
    # Sort only if no names were requested
481
    self.sort_by_name = not self.names
482

    
483
    self.do_locking = None
484
    self.wanted = None
485

    
486
  def _GetNames(self, lu, all_names, lock_level):
487
    """Helper function to determine names asked for in the query.
488

489
    """
490
    if self.do_locking:
491
      names = lu.glm.list_owned(lock_level)
492
    else:
493
      names = all_names
494

    
495
    if self.wanted == locking.ALL_SET:
496
      assert not self.names
497
      # caller didn't specify names, so ordering is not important
498
      return utils.NiceSort(names)
499

    
500
    # caller specified names and we must keep the same order
501
    assert self.names
502
    assert not self.do_locking or lu.glm.is_owned(lock_level)
503

    
504
    missing = set(self.wanted).difference(names)
505
    if missing:
506
      raise errors.OpExecError("Some items were removed before retrieving"
507
                               " their data: %s" % missing)
508

    
509
    # Return expanded names
510
    return self.wanted
511

    
512
  def ExpandNames(self, lu):
513
    """Expand names for this query.
514

515
    See L{LogicalUnit.ExpandNames}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def DeclareLocks(self, lu, level):
521
    """Declare locks for this query.
522

523
    See L{LogicalUnit.DeclareLocks}.
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def _GetQueryData(self, lu):
529
    """Collects all data for this query.
530

531
    @return: Query data object
532

533
    """
534
    raise NotImplementedError()
535

    
536
  def NewStyleQuery(self, lu):
537
    """Collect data and execute query.
538

539
    """
540
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541
                                  sort_by_name=self.sort_by_name)
542

    
543
  def OldStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return self.query.OldStyleQuery(self._GetQueryData(lu),
548
                                    sort_by_name=self.sort_by_name)
549

    
550

    
551
def _ShareAll():
552
  """Returns a dict declaring all lock levels shared.
553

554
  """
555
  return dict.fromkeys(locking.LEVELS, 1)
556

    
557

    
558
def _SupportsOob(cfg, node):
559
  """Tells if node supports OOB.
560

561
  @type cfg: L{config.ConfigWriter}
562
  @param cfg: The cluster configuration
563
  @type node: L{objects.Node}
564
  @param node: The node
565
  @return: The OOB script if supported or an empty string otherwise
566

567
  """
568
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
569

    
570

    
571
def _GetWantedNodes(lu, nodes):
572
  """Returns list of checked and expanded node names.
573

574
  @type lu: L{LogicalUnit}
575
  @param lu: the logical unit on whose behalf we execute
576
  @type nodes: list
577
  @param nodes: list of node names or None for all nodes
578
  @rtype: list
579
  @return: the list of nodes, sorted
580
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
581

582
  """
583
  if nodes:
584
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
585

    
586
  return utils.NiceSort(lu.cfg.GetNodeList())
587

    
588

    
589
def _GetWantedInstances(lu, instances):
590
  """Returns list of checked and expanded instance names.
591

592
  @type lu: L{LogicalUnit}
593
  @param lu: the logical unit on whose behalf we execute
594
  @type instances: list
595
  @param instances: list of instance names or None for all instances
596
  @rtype: list
597
  @return: the list of instances, sorted
598
  @raise errors.OpPrereqError: if the instances parameter is wrong type
599
  @raise errors.OpPrereqError: if any of the passed instances is not found
600

601
  """
602
  if instances:
603
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
604
  else:
605
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
606
  return wanted
607

    
608

    
609
def _GetUpdatedParams(old_params, update_dict,
610
                      use_default=True, use_none=False):
611
  """Return the new version of a parameter dictionary.
612

613
  @type old_params: dict
614
  @param old_params: old parameters
615
  @type update_dict: dict
616
  @param update_dict: dict containing new parameter values, or
617
      constants.VALUE_DEFAULT to reset the parameter to its default
618
      value
619
  @param use_default: boolean
620
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621
      values as 'to be deleted' values
622
  @param use_none: boolean
623
  @type use_none: whether to recognise C{None} values as 'to be
624
      deleted' values
625
  @rtype: dict
626
  @return: the new parameter dictionary
627

628
  """
629
  params_copy = copy.deepcopy(old_params)
630
  for key, val in update_dict.iteritems():
631
    if ((use_default and val == constants.VALUE_DEFAULT) or
632
        (use_none and val is None)):
633
      try:
634
        del params_copy[key]
635
      except KeyError:
636
        pass
637
    else:
638
      params_copy[key] = val
639
  return params_copy
640

    
641

    
642
def _ReleaseLocks(lu, level, names=None, keep=None):
643
  """Releases locks owned by an LU.
644

645
  @type lu: L{LogicalUnit}
646
  @param level: Lock level
647
  @type names: list or None
648
  @param names: Names of locks to release
649
  @type keep: list or None
650
  @param keep: Names of locks to retain
651

652
  """
653
  assert not (keep is not None and names is not None), \
654
         "Only one of the 'names' and the 'keep' parameters can be given"
655

    
656
  if names is not None:
657
    should_release = names.__contains__
658
  elif keep:
659
    should_release = lambda name: name not in keep
660
  else:
661
    should_release = None
662

    
663
  if should_release:
664
    retain = []
665
    release = []
666

    
667
    # Determine which locks to release
668
    for name in lu.glm.list_owned(level):
669
      if should_release(name):
670
        release.append(name)
671
      else:
672
        retain.append(name)
673

    
674
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
675

    
676
    # Release just some locks
677
    lu.glm.release(level, names=release)
678

    
679
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
680
  else:
681
    # Release everything
682
    lu.glm.release(level)
683

    
684
    assert not lu.glm.is_owned(level), "No locks should be owned"
685

    
686

    
687
def _MapInstanceDisksToNodes(instances):
688
  """Creates a map from (node, volume) to instance name.
689

690
  @type instances: list of L{objects.Instance}
691
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
692

693
  """
694
  return dict(((node, vol), inst.name)
695
              for inst in instances
696
              for (node, vols) in inst.MapLVsByNode().items()
697
              for vol in vols)
698

    
699

    
700
def _RunPostHook(lu, node_name):
701
  """Runs the post-hook for an opcode on a single node.
702

703
  """
704
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
705
  try:
706
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
707
  except:
708
    # pylint: disable-msg=W0702
709
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
710

    
711

    
712
def _CheckOutputFields(static, dynamic, selected):
713
  """Checks whether all selected fields are valid.
714

715
  @type static: L{utils.FieldSet}
716
  @param static: static fields set
717
  @type dynamic: L{utils.FieldSet}
718
  @param dynamic: dynamic fields set
719

720
  """
721
  f = utils.FieldSet()
722
  f.Extend(static)
723
  f.Extend(dynamic)
724

    
725
  delta = f.NonMatching(selected)
726
  if delta:
727
    raise errors.OpPrereqError("Unknown output fields selected: %s"
728
                               % ",".join(delta), errors.ECODE_INVAL)
729

    
730

    
731
def _CheckGlobalHvParams(params):
732
  """Validates that given hypervisor params are not global ones.
733

734
  This will ensure that instances don't get customised versions of
735
  global params.
736

737
  """
738
  used_globals = constants.HVC_GLOBALS.intersection(params)
739
  if used_globals:
740
    msg = ("The following hypervisor parameters are global and cannot"
741
           " be customized at instance level, please modify them at"
742
           " cluster level: %s" % utils.CommaJoin(used_globals))
743
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
744

    
745

    
746
def _CheckNodeOnline(lu, node, msg=None):
747
  """Ensure that a given node is online.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param msg: if passed, should be a message to replace the default one
752
  @raise errors.OpPrereqError: if the node is offline
753

754
  """
755
  if msg is None:
756
    msg = "Can't use offline node"
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
759

    
760

    
761
def _CheckNodeNotDrained(lu, node):
762
  """Ensure that a given node is not drained.
763

764
  @param lu: the LU on behalf of which we make the check
765
  @param node: the node to check
766
  @raise errors.OpPrereqError: if the node is drained
767

768
  """
769
  if lu.cfg.GetNodeInfo(node).drained:
770
    raise errors.OpPrereqError("Can't use drained node %s" % node,
771
                               errors.ECODE_STATE)
772

    
773

    
774
def _CheckNodeVmCapable(lu, node):
775
  """Ensure that a given node is vm capable.
776

777
  @param lu: the LU on behalf of which we make the check
778
  @param node: the node to check
779
  @raise errors.OpPrereqError: if the node is not vm capable
780

781
  """
782
  if not lu.cfg.GetNodeInfo(node).vm_capable:
783
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
784
                               errors.ECODE_STATE)
785

    
786

    
787
def _CheckNodeHasOS(lu, node, os_name, force_variant):
788
  """Ensure that a node supports a given OS.
789

790
  @param lu: the LU on behalf of which we make the check
791
  @param node: the node to check
792
  @param os_name: the OS to query about
793
  @param force_variant: whether to ignore variant errors
794
  @raise errors.OpPrereqError: if the node is not supporting the OS
795

796
  """
797
  result = lu.rpc.call_os_get(node, os_name)
798
  result.Raise("OS '%s' not in supported OS list for node %s" %
799
               (os_name, node),
800
               prereq=True, ecode=errors.ECODE_INVAL)
801
  if not force_variant:
802
    _CheckOSVariant(result.payload, os_name)
803

    
804

    
805
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806
  """Ensure that a node has the given secondary ip.
807

808
  @type lu: L{LogicalUnit}
809
  @param lu: the LU on behalf of which we make the check
810
  @type node: string
811
  @param node: the node to check
812
  @type secondary_ip: string
813
  @param secondary_ip: the ip to check
814
  @type prereq: boolean
815
  @param prereq: whether to throw a prerequisite or an execute error
816
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
818

819
  """
820
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821
  result.Raise("Failure checking secondary ip on node %s" % node,
822
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
823
  if not result.payload:
824
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825
           " please fix and re-run this command" % secondary_ip)
826
    if prereq:
827
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
828
    else:
829
      raise errors.OpExecError(msg)
830

    
831

    
832
def _GetClusterDomainSecret():
833
  """Reads the cluster domain secret.
834

835
  """
836
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
837
                               strict=True)
838

    
839

    
840
def _CheckInstanceDown(lu, instance, reason):
841
  """Ensure that an instance is not running."""
842
  if instance.admin_up:
843
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844
                               (instance.name, reason), errors.ECODE_STATE)
845

    
846
  pnode = instance.primary_node
847
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
849
              prereq=True, ecode=errors.ECODE_ENVIRON)
850

    
851
  if instance.name in ins_l.payload:
852
    raise errors.OpPrereqError("Instance %s is running, %s" %
853
                               (instance.name, reason), errors.ECODE_STATE)
854

    
855

    
856
def _ExpandItemName(fn, name, kind):
857
  """Expand an item name.
858

859
  @param fn: the function to use for expansion
860
  @param name: requested item name
861
  @param kind: text description ('Node' or 'Instance')
862
  @return: the resolved (full) name
863
  @raise errors.OpPrereqError: if the item is not found
864

865
  """
866
  full_name = fn(name)
867
  if full_name is None:
868
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
869
                               errors.ECODE_NOENT)
870
  return full_name
871

    
872

    
873
def _ExpandNodeName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for nodes."""
875
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
876

    
877

    
878
def _ExpandInstanceName(cfg, name):
879
  """Wrapper over L{_ExpandItemName} for instance."""
880
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
881

    
882

    
883
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884
                          memory, vcpus, nics, disk_template, disks,
885
                          bep, hvp, hypervisor_name, tags):
886
  """Builds instance related env variables for hooks
887

888
  This builds the hook environment from individual variables.
889

890
  @type name: string
891
  @param name: the name of the instance
892
  @type primary_node: string
893
  @param primary_node: the name of the instance's primary node
894
  @type secondary_nodes: list
895
  @param secondary_nodes: list of secondary nodes as strings
896
  @type os_type: string
897
  @param os_type: the name of the instance's OS
898
  @type status: boolean
899
  @param status: the should_run status of the instance
900
  @type memory: string
901
  @param memory: the memory size of the instance
902
  @type vcpus: string
903
  @param vcpus: the count of VCPUs the instance has
904
  @type nics: list
905
  @param nics: list of tuples (ip, mac, mode, link) representing
906
      the NICs the instance has
907
  @type disk_template: string
908
  @param disk_template: the disk template of the instance
909
  @type disks: list
910
  @param disks: the list of (size, mode) pairs
911
  @type bep: dict
912
  @param bep: the backend parameters for the instance
913
  @type hvp: dict
914
  @param hvp: the hypervisor parameters for the instance
915
  @type hypervisor_name: string
916
  @param hypervisor_name: the hypervisor for the instance
917
  @type tags: list
918
  @param tags: list of instance tags as strings
919
  @rtype: dict
920
  @return: the hook environment for this instance
921

922
  """
923
  if status:
924
    str_status = "up"
925
  else:
926
    str_status = "down"
927
  env = {
928
    "OP_TARGET": name,
929
    "INSTANCE_NAME": name,
930
    "INSTANCE_PRIMARY": primary_node,
931
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932
    "INSTANCE_OS_TYPE": os_type,
933
    "INSTANCE_STATUS": str_status,
934
    "INSTANCE_MEMORY": memory,
935
    "INSTANCE_VCPUS": vcpus,
936
    "INSTANCE_DISK_TEMPLATE": disk_template,
937
    "INSTANCE_HYPERVISOR": hypervisor_name,
938
  }
939

    
940
  if nics:
941
    nic_count = len(nics)
942
    for idx, (ip, mac, mode, link) in enumerate(nics):
943
      if ip is None:
944
        ip = ""
945
      env["INSTANCE_NIC%d_IP" % idx] = ip
946
      env["INSTANCE_NIC%d_MAC" % idx] = mac
947
      env["INSTANCE_NIC%d_MODE" % idx] = mode
948
      env["INSTANCE_NIC%d_LINK" % idx] = link
949
      if mode == constants.NIC_MODE_BRIDGED:
950
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
951
  else:
952
    nic_count = 0
953

    
954
  env["INSTANCE_NIC_COUNT"] = nic_count
955

    
956
  if disks:
957
    disk_count = len(disks)
958
    for idx, (size, mode) in enumerate(disks):
959
      env["INSTANCE_DISK%d_SIZE" % idx] = size
960
      env["INSTANCE_DISK%d_MODE" % idx] = mode
961
  else:
962
    disk_count = 0
963

    
964
  env["INSTANCE_DISK_COUNT"] = disk_count
965

    
966
  if not tags:
967
    tags = []
968

    
969
  env["INSTANCE_TAGS"] = " ".join(tags)
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUInstanceQueryData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    "name": instance.name,
1022
    "primary_node": instance.primary_node,
1023
    "secondary_nodes": instance.secondary_nodes,
1024
    "os_type": instance.os,
1025
    "status": instance.admin_up,
1026
    "memory": bep[constants.BE_MEMORY],
1027
    "vcpus": bep[constants.BE_VCPUS],
1028
    "nics": _NICListToTuple(lu, instance.nics),
1029
    "disk_template": instance.disk_template,
1030
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031
    "bep": bep,
1032
    "hvp": hvp,
1033
    "hypervisor_name": instance.hypervisor,
1034
    "tags": instance.tags,
1035
  }
1036
  if override:
1037
    args.update(override)
1038
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039

    
1040

    
1041
def _AdjustCandidatePool(lu, exceptions):
1042
  """Adjust the candidate pool after node operations.
1043

1044
  """
1045
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046
  if mod_list:
1047
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1048
               utils.CommaJoin(node.name for node in mod_list))
1049
    for name in mod_list:
1050
      lu.context.ReaddNode(name)
1051
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  if mc_now > mc_max:
1053
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054
               (mc_now, mc_max))
1055

    
1056

    
1057
def _DecideSelfPromotion(lu, exceptions=None):
1058
  """Decide whether I should promote myself as a master candidate.
1059

1060
  """
1061
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063
  # the new node will increase mc_max with one, so:
1064
  mc_should = min(mc_should + 1, cp_size)
1065
  return mc_now < mc_should
1066

    
1067

    
1068
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069
  """Check that the brigdes needed by a list of nics exist.
1070

1071
  """
1072
  cluster = lu.cfg.GetClusterInfo()
1073
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074
  brlist = [params[constants.NIC_LINK] for params in paramslist
1075
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076
  if brlist:
1077
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1078
    result.Raise("Error checking bridges on destination node '%s'" %
1079
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080

    
1081

    
1082
def _CheckInstanceBridgesExist(lu, instance, node=None):
1083
  """Check that the brigdes needed by an instance exist.
1084

1085
  """
1086
  if node is None:
1087
    node = instance.primary_node
1088
  _CheckNicsBridgesExist(lu, instance.nics, node)
1089

    
1090

    
1091
def _CheckOSVariant(os_obj, name):
1092
  """Check whether an OS name conforms to the os variants specification.
1093

1094
  @type os_obj: L{objects.OS}
1095
  @param os_obj: OS object to check
1096
  @type name: string
1097
  @param name: OS name passed by the user, to check for validity
1098

1099
  """
1100
  variant = objects.OS.GetVariant(name)
1101
  if not os_obj.supported_variants:
1102
    if variant:
1103
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104
                                 " passed)" % (os_obj.name, variant),
1105
                                 errors.ECODE_INVAL)
1106
    return
1107
  if not variant:
1108
    raise errors.OpPrereqError("OS name must include a variant",
1109
                               errors.ECODE_INVAL)
1110

    
1111
  if variant not in os_obj.supported_variants:
1112
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1113

    
1114

    
1115
def _GetNodeInstancesInner(cfg, fn):
1116
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1117

    
1118

    
1119
def _GetNodeInstances(cfg, node_name):
1120
  """Returns a list of all primary and secondary instances on a node.
1121

1122
  """
1123

    
1124
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1125

    
1126

    
1127
def _GetNodePrimaryInstances(cfg, node_name):
1128
  """Returns primary instances on a node.
1129

1130
  """
1131
  return _GetNodeInstancesInner(cfg,
1132
                                lambda inst: node_name == inst.primary_node)
1133

    
1134

    
1135
def _GetNodeSecondaryInstances(cfg, node_name):
1136
  """Returns secondary instances on a node.
1137

1138
  """
1139
  return _GetNodeInstancesInner(cfg,
1140
                                lambda inst: node_name in inst.secondary_nodes)
1141

    
1142

    
1143
def _GetStorageTypeArgs(cfg, storage_type):
1144
  """Returns the arguments for a storage type.
1145

1146
  """
1147
  # Special case for file storage
1148
  if storage_type == constants.ST_FILE:
1149
    # storage.FileStorage wants a list of storage directories
1150
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1151

    
1152
  return []
1153

    
1154

    
1155
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1156
  faulty = []
1157

    
1158
  for dev in instance.disks:
1159
    cfg.SetDiskID(dev, node_name)
1160

    
1161
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162
  result.Raise("Failed to get disk status from node %s" % node_name,
1163
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164

    
1165
  for idx, bdev_status in enumerate(result.payload):
1166
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1167
      faulty.append(idx)
1168

    
1169
  return faulty
1170

    
1171

    
1172
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173
  """Check the sanity of iallocator and node arguments and use the
1174
  cluster-wide iallocator if appropriate.
1175

1176
  Check that at most one of (iallocator, node) is specified. If none is
1177
  specified, then the LU's opcode's iallocator slot is filled with the
1178
  cluster-wide default iallocator.
1179

1180
  @type iallocator_slot: string
1181
  @param iallocator_slot: the name of the opcode iallocator slot
1182
  @type node_slot: string
1183
  @param node_slot: the name of the opcode target node slot
1184

1185
  """
1186
  node = getattr(lu.op, node_slot, None)
1187
  iallocator = getattr(lu.op, iallocator_slot, None)
1188

    
1189
  if node is not None and iallocator is not None:
1190
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1191
                               errors.ECODE_INVAL)
1192
  elif node is None and iallocator is None:
1193
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1194
    if default_iallocator:
1195
      setattr(lu.op, iallocator_slot, default_iallocator)
1196
    else:
1197
      raise errors.OpPrereqError("No iallocator or node given and no"
1198
                                 " cluster-wide default iallocator found;"
1199
                                 " please specify either an iallocator or a"
1200
                                 " node, or set a cluster-wide default"
1201
                                 " iallocator")
1202

    
1203

    
1204
class LUClusterPostInit(LogicalUnit):
1205
  """Logical unit for running hooks after cluster initialization.
1206

1207
  """
1208
  HPATH = "cluster-init"
1209
  HTYPE = constants.HTYPE_CLUSTER
1210

    
1211
  def BuildHooksEnv(self):
1212
    """Build hooks env.
1213

1214
    """
1215
    return {
1216
      "OP_TARGET": self.cfg.GetClusterName(),
1217
      }
1218

    
1219
  def BuildHooksNodes(self):
1220
    """Build hooks nodes.
1221

1222
    """
1223
    return ([], [self.cfg.GetMasterNode()])
1224

    
1225
  def Exec(self, feedback_fn):
1226
    """Nothing to do.
1227

1228
    """
1229
    return True
1230

    
1231

    
1232
class LUClusterDestroy(LogicalUnit):
1233
  """Logical unit for destroying the cluster.
1234

1235
  """
1236
  HPATH = "cluster-destroy"
1237
  HTYPE = constants.HTYPE_CLUSTER
1238

    
1239
  def BuildHooksEnv(self):
1240
    """Build hooks env.
1241

1242
    """
1243
    return {
1244
      "OP_TARGET": self.cfg.GetClusterName(),
1245
      }
1246

    
1247
  def BuildHooksNodes(self):
1248
    """Build hooks nodes.
1249

1250
    """
1251
    return ([], [])
1252

    
1253
  def CheckPrereq(self):
1254
    """Check prerequisites.
1255

1256
    This checks whether the cluster is empty.
1257

1258
    Any errors are signaled by raising errors.OpPrereqError.
1259

1260
    """
1261
    master = self.cfg.GetMasterNode()
1262

    
1263
    nodelist = self.cfg.GetNodeList()
1264
    if len(nodelist) != 1 or nodelist[0] != master:
1265
      raise errors.OpPrereqError("There are still %d node(s) in"
1266
                                 " this cluster." % (len(nodelist) - 1),
1267
                                 errors.ECODE_INVAL)
1268
    instancelist = self.cfg.GetInstanceList()
1269
    if instancelist:
1270
      raise errors.OpPrereqError("There are still %d instance(s) in"
1271
                                 " this cluster." % len(instancelist),
1272
                                 errors.ECODE_INVAL)
1273

    
1274
  def Exec(self, feedback_fn):
1275
    """Destroys the cluster.
1276

1277
    """
1278
    master = self.cfg.GetMasterNode()
1279

    
1280
    # Run post hooks on master node before it's removed
1281
    _RunPostHook(self, master)
1282

    
1283
    result = self.rpc.call_node_stop_master(master, False)
1284
    result.Raise("Could not disable the master role")
1285

    
1286
    return master
1287

    
1288

    
1289
def _VerifyCertificate(filename):
1290
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1291

1292
  @type filename: string
1293
  @param filename: Path to PEM file
1294

1295
  """
1296
  try:
1297
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1298
                                           utils.ReadFile(filename))
1299
  except Exception, err: # pylint: disable-msg=W0703
1300
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1301
            "Failed to load X509 certificate %s: %s" % (filename, err))
1302

    
1303
  (errcode, msg) = \
1304
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1305
                                constants.SSL_CERT_EXPIRATION_ERROR)
1306

    
1307
  if msg:
1308
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1309
  else:
1310
    fnamemsg = None
1311

    
1312
  if errcode is None:
1313
    return (None, fnamemsg)
1314
  elif errcode == utils.CERT_WARNING:
1315
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1316
  elif errcode == utils.CERT_ERROR:
1317
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1318

    
1319
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1320

    
1321

    
1322
def _GetAllHypervisorParameters(cluster, instances):
1323
  """Compute the set of all hypervisor parameters.
1324

1325
  @type cluster: L{objects.Cluster}
1326
  @param cluster: the cluster object
1327
  @param instances: list of L{objects.Instance}
1328
  @param instances: additional instances from which to obtain parameters
1329
  @rtype: list of (origin, hypervisor, parameters)
1330
  @return: a list with all parameters found, indicating the hypervisor they
1331
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1332

1333
  """
1334
  hvp_data = []
1335

    
1336
  for hv_name in cluster.enabled_hypervisors:
1337
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1338

    
1339
  for os_name, os_hvp in cluster.os_hvp.items():
1340
    for hv_name, hv_params in os_hvp.items():
1341
      if hv_params:
1342
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1343
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1344

    
1345
  # TODO: collapse identical parameter values in a single one
1346
  for instance in instances:
1347
    if instance.hvparams:
1348
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1349
                       cluster.FillHV(instance)))
1350

    
1351
  return hvp_data
1352

    
1353

    
1354
class _VerifyErrors(object):
1355
  """Mix-in for cluster/group verify LUs.
1356

1357
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1358
  self.op and self._feedback_fn to be available.)
1359

1360
  """
1361
  TCLUSTER = "cluster"
1362
  TNODE = "node"
1363
  TINSTANCE = "instance"
1364

    
1365
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1366
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1367
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1368
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1369
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1370
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1371
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1372
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1373
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1374
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1375
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1376
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1377
  ENODEDRBD = (TNODE, "ENODEDRBD")
1378
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1379
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1380
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1381
  ENODEHV = (TNODE, "ENODEHV")
1382
  ENODELVM = (TNODE, "ENODELVM")
1383
  ENODEN1 = (TNODE, "ENODEN1")
1384
  ENODENET = (TNODE, "ENODENET")
1385
  ENODEOS = (TNODE, "ENODEOS")
1386
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1387
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1388
  ENODERPC = (TNODE, "ENODERPC")
1389
  ENODESSH = (TNODE, "ENODESSH")
1390
  ENODEVERSION = (TNODE, "ENODEVERSION")
1391
  ENODESETUP = (TNODE, "ENODESETUP")
1392
  ENODETIME = (TNODE, "ENODETIME")
1393
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1394

    
1395
  ETYPE_FIELD = "code"
1396
  ETYPE_ERROR = "ERROR"
1397
  ETYPE_WARNING = "WARNING"
1398

    
1399
  def _Error(self, ecode, item, msg, *args, **kwargs):
1400
    """Format an error message.
1401

1402
    Based on the opcode's error_codes parameter, either format a
1403
    parseable error code, or a simpler error string.
1404

1405
    This must be called only from Exec and functions called from Exec.
1406

1407
    """
1408
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1409
    itype, etxt = ecode
1410
    # first complete the msg
1411
    if args:
1412
      msg = msg % args
1413
    # then format the whole message
1414
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1415
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1416
    else:
1417
      if item:
1418
        item = " " + item
1419
      else:
1420
        item = ""
1421
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1422
    # and finally report it via the feedback_fn
1423
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1424

    
1425
  def _ErrorIf(self, cond, *args, **kwargs):
1426
    """Log an error message if the passed condition is True.
1427

1428
    """
1429
    cond = (bool(cond)
1430
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1431
    if cond:
1432
      self._Error(*args, **kwargs)
1433
    # do not mark the operation as failed for WARN cases only
1434
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1435
      self.bad = self.bad or cond
1436

    
1437

    
1438
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1439
  """Verifies the cluster config.
1440

1441
  """
1442
  REQ_BGL = True
1443

    
1444
  def _VerifyHVP(self, hvp_data):
1445
    """Verifies locally the syntax of the hypervisor parameters.
1446

1447
    """
1448
    for item, hv_name, hv_params in hvp_data:
1449
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1450
             (item, hv_name))
1451
      try:
1452
        hv_class = hypervisor.GetHypervisor(hv_name)
1453
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1454
        hv_class.CheckParameterSyntax(hv_params)
1455
      except errors.GenericError, err:
1456
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1457

    
1458
  def ExpandNames(self):
1459
    # Information can be safely retrieved as the BGL is acquired in exclusive
1460
    # mode
1461
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1462
    self.all_node_info = self.cfg.GetAllNodesInfo()
1463
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1464
    self.needed_locks = {}
1465

    
1466
  def Exec(self, feedback_fn):
1467
    """Verify integrity of cluster, performing various test on nodes.
1468

1469
    """
1470
    self.bad = False
1471
    self._feedback_fn = feedback_fn
1472

    
1473
    feedback_fn("* Verifying cluster config")
1474

    
1475
    for msg in self.cfg.VerifyConfig():
1476
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1477

    
1478
    feedback_fn("* Verifying cluster certificate files")
1479

    
1480
    for cert_filename in constants.ALL_CERT_FILES:
1481
      (errcode, msg) = _VerifyCertificate(cert_filename)
1482
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1483

    
1484
    feedback_fn("* Verifying hypervisor parameters")
1485

    
1486
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1487
                                                self.all_inst_info.values()))
1488

    
1489
    feedback_fn("* Verifying all nodes belong to an existing group")
1490

    
1491
    # We do this verification here because, should this bogus circumstance
1492
    # occur, it would never be caught by VerifyGroup, which only acts on
1493
    # nodes/instances reachable from existing node groups.
1494

    
1495
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1496
                         if node.group not in self.all_group_info)
1497

    
1498
    dangling_instances = {}
1499
    no_node_instances = []
1500

    
1501
    for inst in self.all_inst_info.values():
1502
      if inst.primary_node in dangling_nodes:
1503
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1504
      elif inst.primary_node not in self.all_node_info:
1505
        no_node_instances.append(inst.name)
1506

    
1507
    pretty_dangling = [
1508
        "%s (%s)" %
1509
        (node.name,
1510
         utils.CommaJoin(dangling_instances.get(node.name,
1511
                                                ["no instances"])))
1512
        for node in dangling_nodes]
1513

    
1514
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1515
                  "the following nodes (and their instances) belong to a non"
1516
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1517

    
1518
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1519
                  "the following instances have a non-existing primary-node:"
1520
                  " %s", utils.CommaJoin(no_node_instances))
1521

    
1522
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1523

    
1524

    
1525
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1526
  """Verifies the status of a node group.
1527

1528
  """
1529
  HPATH = "cluster-verify"
1530
  HTYPE = constants.HTYPE_CLUSTER
1531
  REQ_BGL = False
1532

    
1533
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1534

    
1535
  class NodeImage(object):
1536
    """A class representing the logical and physical status of a node.
1537

1538
    @type name: string
1539
    @ivar name: the node name to which this object refers
1540
    @ivar volumes: a structure as returned from
1541
        L{ganeti.backend.GetVolumeList} (runtime)
1542
    @ivar instances: a list of running instances (runtime)
1543
    @ivar pinst: list of configured primary instances (config)
1544
    @ivar sinst: list of configured secondary instances (config)
1545
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1546
        instances for which this node is secondary (config)
1547
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1548
    @ivar dfree: free disk, as reported by the node (runtime)
1549
    @ivar offline: the offline status (config)
1550
    @type rpc_fail: boolean
1551
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1552
        not whether the individual keys were correct) (runtime)
1553
    @type lvm_fail: boolean
1554
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1555
    @type hyp_fail: boolean
1556
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1557
    @type ghost: boolean
1558
    @ivar ghost: whether this is a known node or not (config)
1559
    @type os_fail: boolean
1560
    @ivar os_fail: whether the RPC call didn't return valid OS data
1561
    @type oslist: list
1562
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1563
    @type vm_capable: boolean
1564
    @ivar vm_capable: whether the node can host instances
1565

1566
    """
1567
    def __init__(self, offline=False, name=None, vm_capable=True):
1568
      self.name = name
1569
      self.volumes = {}
1570
      self.instances = []
1571
      self.pinst = []
1572
      self.sinst = []
1573
      self.sbp = {}
1574
      self.mfree = 0
1575
      self.dfree = 0
1576
      self.offline = offline
1577
      self.vm_capable = vm_capable
1578
      self.rpc_fail = False
1579
      self.lvm_fail = False
1580
      self.hyp_fail = False
1581
      self.ghost = False
1582
      self.os_fail = False
1583
      self.oslist = {}
1584

    
1585
  def ExpandNames(self):
1586
    # This raises errors.OpPrereqError on its own:
1587
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1588

    
1589
    # Get instances in node group; this is unsafe and needs verification later
1590
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1591

    
1592
    self.needed_locks = {
1593
      locking.LEVEL_INSTANCE: inst_names,
1594
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1595
      locking.LEVEL_NODE: [],
1596
      }
1597

    
1598
    self.share_locks = _ShareAll()
1599

    
1600
  def DeclareLocks(self, level):
1601
    if level == locking.LEVEL_NODE:
1602
      # Get members of node group; this is unsafe and needs verification later
1603
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1604

    
1605
      all_inst_info = self.cfg.GetAllInstancesInfo()
1606

    
1607
      # In Exec(), we warn about mirrored instances that have primary and
1608
      # secondary living in separate node groups. To fully verify that
1609
      # volumes for these instances are healthy, we will need to do an
1610
      # extra call to their secondaries. We ensure here those nodes will
1611
      # be locked.
1612
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1613
        # Important: access only the instances whose lock is owned
1614
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1615
          nodes.update(all_inst_info[inst].secondary_nodes)
1616

    
1617
      self.needed_locks[locking.LEVEL_NODE] = nodes
1618

    
1619
  def CheckPrereq(self):
1620
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1621
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1622

    
1623
    unlocked_nodes = \
1624
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1625

    
1626
    unlocked_instances = \
1627
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1628

    
1629
    if unlocked_nodes:
1630
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1631
                                 utils.CommaJoin(unlocked_nodes))
1632

    
1633
    if unlocked_instances:
1634
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1635
                                 utils.CommaJoin(unlocked_instances))
1636

    
1637
    self.all_node_info = self.cfg.GetAllNodesInfo()
1638
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1639

    
1640
    self.my_node_names = utils.NiceSort(group_nodes)
1641
    self.my_inst_names = utils.NiceSort(group_instances)
1642

    
1643
    self.my_node_info = dict((name, self.all_node_info[name])
1644
                             for name in self.my_node_names)
1645

    
1646
    self.my_inst_info = dict((name, self.all_inst_info[name])
1647
                             for name in self.my_inst_names)
1648

    
1649
    # We detect here the nodes that will need the extra RPC calls for verifying
1650
    # split LV volumes; they should be locked.
1651
    extra_lv_nodes = set()
1652

    
1653
    for inst in self.my_inst_info.values():
1654
      if inst.disk_template in constants.DTS_INT_MIRROR:
1655
        group = self.my_node_info[inst.primary_node].group
1656
        for nname in inst.secondary_nodes:
1657
          if self.all_node_info[nname].group != group:
1658
            extra_lv_nodes.add(nname)
1659

    
1660
    unlocked_lv_nodes = \
1661
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1662

    
1663
    if unlocked_lv_nodes:
1664
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1665
                                 utils.CommaJoin(unlocked_lv_nodes))
1666
    self.extra_lv_nodes = list(extra_lv_nodes)
1667

    
1668
  def _VerifyNode(self, ninfo, nresult):
1669
    """Perform some basic validation on data returned from a node.
1670

1671
      - check the result data structure is well formed and has all the
1672
        mandatory fields
1673
      - check ganeti version
1674

1675
    @type ninfo: L{objects.Node}
1676
    @param ninfo: the node to check
1677
    @param nresult: the results from the node
1678
    @rtype: boolean
1679
    @return: whether overall this call was successful (and we can expect
1680
         reasonable values in the respose)
1681

1682
    """
1683
    node = ninfo.name
1684
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1685

    
1686
    # main result, nresult should be a non-empty dict
1687
    test = not nresult or not isinstance(nresult, dict)
1688
    _ErrorIf(test, self.ENODERPC, node,
1689
                  "unable to verify node: no data returned")
1690
    if test:
1691
      return False
1692

    
1693
    # compares ganeti version
1694
    local_version = constants.PROTOCOL_VERSION
1695
    remote_version = nresult.get("version", None)
1696
    test = not (remote_version and
1697
                isinstance(remote_version, (list, tuple)) and
1698
                len(remote_version) == 2)
1699
    _ErrorIf(test, self.ENODERPC, node,
1700
             "connection to node returned invalid data")
1701
    if test:
1702
      return False
1703

    
1704
    test = local_version != remote_version[0]
1705
    _ErrorIf(test, self.ENODEVERSION, node,
1706
             "incompatible protocol versions: master %s,"
1707
             " node %s", local_version, remote_version[0])
1708
    if test:
1709
      return False
1710

    
1711
    # node seems compatible, we can actually try to look into its results
1712

    
1713
    # full package version
1714
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1715
                  self.ENODEVERSION, node,
1716
                  "software version mismatch: master %s, node %s",
1717
                  constants.RELEASE_VERSION, remote_version[1],
1718
                  code=self.ETYPE_WARNING)
1719

    
1720
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1721
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1722
      for hv_name, hv_result in hyp_result.iteritems():
1723
        test = hv_result is not None
1724
        _ErrorIf(test, self.ENODEHV, node,
1725
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1726

    
1727
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1728
    if ninfo.vm_capable and isinstance(hvp_result, list):
1729
      for item, hv_name, hv_result in hvp_result:
1730
        _ErrorIf(True, self.ENODEHV, node,
1731
                 "hypervisor %s parameter verify failure (source %s): %s",
1732
                 hv_name, item, hv_result)
1733

    
1734
    test = nresult.get(constants.NV_NODESETUP,
1735
                       ["Missing NODESETUP results"])
1736
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1737
             "; ".join(test))
1738

    
1739
    return True
1740

    
1741
  def _VerifyNodeTime(self, ninfo, nresult,
1742
                      nvinfo_starttime, nvinfo_endtime):
1743
    """Check the node time.
1744

1745
    @type ninfo: L{objects.Node}
1746
    @param ninfo: the node to check
1747
    @param nresult: the remote results for the node
1748
    @param nvinfo_starttime: the start time of the RPC call
1749
    @param nvinfo_endtime: the end time of the RPC call
1750

1751
    """
1752
    node = ninfo.name
1753
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1754

    
1755
    ntime = nresult.get(constants.NV_TIME, None)
1756
    try:
1757
      ntime_merged = utils.MergeTime(ntime)
1758
    except (ValueError, TypeError):
1759
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1760
      return
1761

    
1762
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1763
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1764
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1765
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1766
    else:
1767
      ntime_diff = None
1768

    
1769
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1770
             "Node time diverges by at least %s from master node time",
1771
             ntime_diff)
1772

    
1773
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1774
    """Check the node LVM results.
1775

1776
    @type ninfo: L{objects.Node}
1777
    @param ninfo: the node to check
1778
    @param nresult: the remote results for the node
1779
    @param vg_name: the configured VG name
1780

1781
    """
1782
    if vg_name is None:
1783
      return
1784

    
1785
    node = ninfo.name
1786
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1787

    
1788
    # checks vg existence and size > 20G
1789
    vglist = nresult.get(constants.NV_VGLIST, None)
1790
    test = not vglist
1791
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1792
    if not test:
1793
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1794
                                            constants.MIN_VG_SIZE)
1795
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1796

    
1797
    # check pv names
1798
    pvlist = nresult.get(constants.NV_PVLIST, None)
1799
    test = pvlist is None
1800
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1801
    if not test:
1802
      # check that ':' is not present in PV names, since it's a
1803
      # special character for lvcreate (denotes the range of PEs to
1804
      # use on the PV)
1805
      for _, pvname, owner_vg in pvlist:
1806
        test = ":" in pvname
1807
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1808
                 " '%s' of VG '%s'", pvname, owner_vg)
1809

    
1810
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1811
    """Check the node bridges.
1812

1813
    @type ninfo: L{objects.Node}
1814
    @param ninfo: the node to check
1815
    @param nresult: the remote results for the node
1816
    @param bridges: the expected list of bridges
1817

1818
    """
1819
    if not bridges:
1820
      return
1821

    
1822
    node = ninfo.name
1823
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824

    
1825
    missing = nresult.get(constants.NV_BRIDGES, None)
1826
    test = not isinstance(missing, list)
1827
    _ErrorIf(test, self.ENODENET, node,
1828
             "did not return valid bridge information")
1829
    if not test:
1830
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1831
               utils.CommaJoin(sorted(missing)))
1832

    
1833
  def _VerifyNodeNetwork(self, ninfo, nresult):
1834
    """Check the node network connectivity results.
1835

1836
    @type ninfo: L{objects.Node}
1837
    @param ninfo: the node to check
1838
    @param nresult: the remote results for the node
1839

1840
    """
1841
    node = ninfo.name
1842
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1843

    
1844
    test = constants.NV_NODELIST not in nresult
1845
    _ErrorIf(test, self.ENODESSH, node,
1846
             "node hasn't returned node ssh connectivity data")
1847
    if not test:
1848
      if nresult[constants.NV_NODELIST]:
1849
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1850
          _ErrorIf(True, self.ENODESSH, node,
1851
                   "ssh communication with node '%s': %s", a_node, a_msg)
1852

    
1853
    test = constants.NV_NODENETTEST not in nresult
1854
    _ErrorIf(test, self.ENODENET, node,
1855
             "node hasn't returned node tcp connectivity data")
1856
    if not test:
1857
      if nresult[constants.NV_NODENETTEST]:
1858
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1859
        for anode in nlist:
1860
          _ErrorIf(True, self.ENODENET, node,
1861
                   "tcp communication with node '%s': %s",
1862
                   anode, nresult[constants.NV_NODENETTEST][anode])
1863

    
1864
    test = constants.NV_MASTERIP not in nresult
1865
    _ErrorIf(test, self.ENODENET, node,
1866
             "node hasn't returned node master IP reachability data")
1867
    if not test:
1868
      if not nresult[constants.NV_MASTERIP]:
1869
        if node == self.master_node:
1870
          msg = "the master node cannot reach the master IP (not configured?)"
1871
        else:
1872
          msg = "cannot reach the master IP"
1873
        _ErrorIf(True, self.ENODENET, node, msg)
1874

    
1875
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1876
                      diskstatus):
1877
    """Verify an instance.
1878

1879
    This function checks to see if the required block devices are
1880
    available on the instance's node.
1881

1882
    """
1883
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884
    node_current = instanceconfig.primary_node
1885

    
1886
    node_vol_should = {}
1887
    instanceconfig.MapLVsByNode(node_vol_should)
1888

    
1889
    for node in node_vol_should:
1890
      n_img = node_image[node]
1891
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1892
        # ignore missing volumes on offline or broken nodes
1893
        continue
1894
      for volume in node_vol_should[node]:
1895
        test = volume not in n_img.volumes
1896
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1897
                 "volume %s missing on node %s", volume, node)
1898

    
1899
    if instanceconfig.admin_up:
1900
      pri_img = node_image[node_current]
1901
      test = instance not in pri_img.instances and not pri_img.offline
1902
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1903
               "instance not running on its primary node %s",
1904
               node_current)
1905

    
1906
    diskdata = [(nname, success, status, idx)
1907
                for (nname, disks) in diskstatus.items()
1908
                for idx, (success, status) in enumerate(disks)]
1909

    
1910
    for nname, success, bdev_status, idx in diskdata:
1911
      # the 'ghost node' construction in Exec() ensures that we have a
1912
      # node here
1913
      snode = node_image[nname]
1914
      bad_snode = snode.ghost or snode.offline
1915
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1916
               self.EINSTANCEFAULTYDISK, instance,
1917
               "couldn't retrieve status for disk/%s on %s: %s",
1918
               idx, nname, bdev_status)
1919
      _ErrorIf((instanceconfig.admin_up and success and
1920
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1921
               self.EINSTANCEFAULTYDISK, instance,
1922
               "disk/%s on %s is faulty", idx, nname)
1923

    
1924
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1925
    """Verify if there are any unknown volumes in the cluster.
1926

1927
    The .os, .swap and backup volumes are ignored. All other volumes are
1928
    reported as unknown.
1929

1930
    @type reserved: L{ganeti.utils.FieldSet}
1931
    @param reserved: a FieldSet of reserved volume names
1932

1933
    """
1934
    for node, n_img in node_image.items():
1935
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1936
        # skip non-healthy nodes
1937
        continue
1938
      for volume in n_img.volumes:
1939
        test = ((node not in node_vol_should or
1940
                volume not in node_vol_should[node]) and
1941
                not reserved.Matches(volume))
1942
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1943
                      "volume %s is unknown", volume)
1944

    
1945
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1946
    """Verify N+1 Memory Resilience.
1947

1948
    Check that if one single node dies we can still start all the
1949
    instances it was primary for.
1950

1951
    """
1952
    cluster_info = self.cfg.GetClusterInfo()
1953
    for node, n_img in node_image.items():
1954
      # This code checks that every node which is now listed as
1955
      # secondary has enough memory to host all instances it is
1956
      # supposed to should a single other node in the cluster fail.
1957
      # FIXME: not ready for failover to an arbitrary node
1958
      # FIXME: does not support file-backed instances
1959
      # WARNING: we currently take into account down instances as well
1960
      # as up ones, considering that even if they're down someone
1961
      # might want to start them even in the event of a node failure.
1962
      if n_img.offline:
1963
        # we're skipping offline nodes from the N+1 warning, since
1964
        # most likely we don't have good memory infromation from them;
1965
        # we already list instances living on such nodes, and that's
1966
        # enough warning
1967
        continue
1968
      for prinode, instances in n_img.sbp.items():
1969
        needed_mem = 0
1970
        for instance in instances:
1971
          bep = cluster_info.FillBE(instance_cfg[instance])
1972
          if bep[constants.BE_AUTO_BALANCE]:
1973
            needed_mem += bep[constants.BE_MEMORY]
1974
        test = n_img.mfree < needed_mem
1975
        self._ErrorIf(test, self.ENODEN1, node,
1976
                      "not enough memory to accomodate instance failovers"
1977
                      " should node %s fail (%dMiB needed, %dMiB available)",
1978
                      prinode, needed_mem, n_img.mfree)
1979

    
1980
  @classmethod
1981
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1982
                   (files_all, files_all_opt, files_mc, files_vm)):
1983
    """Verifies file checksums collected from all nodes.
1984

1985
    @param errorif: Callback for reporting errors
1986
    @param nodeinfo: List of L{objects.Node} objects
1987
    @param master_node: Name of master node
1988
    @param all_nvinfo: RPC results
1989

1990
    """
1991
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
1992

    
1993
    assert master_node in node_names
1994
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1995
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1996
           "Found file listed in more than one file list"
1997

    
1998
    # Define functions determining which nodes to consider for a file
1999
    file2nodefn = dict([(filename, fn)
2000
      for (files, fn) in [(files_all, None),
2001
                          (files_all_opt, None),
2002
                          (files_mc, lambda node: (node.master_candidate or
2003
                                                   node.name == master_node)),
2004
                          (files_vm, lambda node: node.vm_capable)]
2005
      for filename in files])
2006

    
2007
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2008

    
2009
    for node in nodeinfo:
2010
      if node.offline:
2011
        continue
2012

    
2013
      nresult = all_nvinfo[node.name]
2014

    
2015
      if nresult.fail_msg or not nresult.payload:
2016
        node_files = None
2017
      else:
2018
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2019

    
2020
      test = not (node_files and isinstance(node_files, dict))
2021
      errorif(test, cls.ENODEFILECHECK, node.name,
2022
              "Node did not return file checksum data")
2023
      if test:
2024
        continue
2025

    
2026
      for (filename, checksum) in node_files.items():
2027
        # Check if the file should be considered for a node
2028
        fn = file2nodefn[filename]
2029
        if fn is None or fn(node):
2030
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2031

    
2032
    for (filename, checksums) in fileinfo.items():
2033
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2034

    
2035
      # Nodes having the file
2036
      with_file = frozenset(node_name
2037
                            for nodes in fileinfo[filename].values()
2038
                            for node_name in nodes)
2039

    
2040
      # Nodes missing file
2041
      missing_file = node_names - with_file
2042

    
2043
      if filename in files_all_opt:
2044
        # All or no nodes
2045
        errorif(missing_file and missing_file != node_names,
2046
                cls.ECLUSTERFILECHECK, None,
2047
                "File %s is optional, but it must exist on all or no"
2048
                " nodes (not found on %s)",
2049
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2050
      else:
2051
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2052
                "File %s is missing from node(s) %s", filename,
2053
                utils.CommaJoin(utils.NiceSort(missing_file)))
2054

    
2055
      # See if there are multiple versions of the file
2056
      test = len(checksums) > 1
2057
      if test:
2058
        variants = ["variant %s on %s" %
2059
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2060
                    for (idx, (checksum, nodes)) in
2061
                      enumerate(sorted(checksums.items()))]
2062
      else:
2063
        variants = []
2064

    
2065
      errorif(test, cls.ECLUSTERFILECHECK, None,
2066
              "File %s found with %s different checksums (%s)",
2067
              filename, len(checksums), "; ".join(variants))
2068

    
2069
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2070
                      drbd_map):
2071
    """Verifies and the node DRBD status.
2072

2073
    @type ninfo: L{objects.Node}
2074
    @param ninfo: the node to check
2075
    @param nresult: the remote results for the node
2076
    @param instanceinfo: the dict of instances
2077
    @param drbd_helper: the configured DRBD usermode helper
2078
    @param drbd_map: the DRBD map as returned by
2079
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2080

2081
    """
2082
    node = ninfo.name
2083
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2084

    
2085
    if drbd_helper:
2086
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2087
      test = (helper_result == None)
2088
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2089
               "no drbd usermode helper returned")
2090
      if helper_result:
2091
        status, payload = helper_result
2092
        test = not status
2093
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2094
                 "drbd usermode helper check unsuccessful: %s", payload)
2095
        test = status and (payload != drbd_helper)
2096
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2097
                 "wrong drbd usermode helper: %s", payload)
2098

    
2099
    # compute the DRBD minors
2100
    node_drbd = {}
2101
    for minor, instance in drbd_map[node].items():
2102
      test = instance not in instanceinfo
2103
      _ErrorIf(test, self.ECLUSTERCFG, None,
2104
               "ghost instance '%s' in temporary DRBD map", instance)
2105
        # ghost instance should not be running, but otherwise we
2106
        # don't give double warnings (both ghost instance and
2107
        # unallocated minor in use)
2108
      if test:
2109
        node_drbd[minor] = (instance, False)
2110
      else:
2111
        instance = instanceinfo[instance]
2112
        node_drbd[minor] = (instance.name, instance.admin_up)
2113

    
2114
    # and now check them
2115
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2116
    test = not isinstance(used_minors, (tuple, list))
2117
    _ErrorIf(test, self.ENODEDRBD, node,
2118
             "cannot parse drbd status file: %s", str(used_minors))
2119
    if test:
2120
      # we cannot check drbd status
2121
      return
2122

    
2123
    for minor, (iname, must_exist) in node_drbd.items():
2124
      test = minor not in used_minors and must_exist
2125
      _ErrorIf(test, self.ENODEDRBD, node,
2126
               "drbd minor %d of instance %s is not active", minor, iname)
2127
    for minor in used_minors:
2128
      test = minor not in node_drbd
2129
      _ErrorIf(test, self.ENODEDRBD, node,
2130
               "unallocated drbd minor %d is in use", minor)
2131

    
2132
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2133
    """Builds the node OS structures.
2134

2135
    @type ninfo: L{objects.Node}
2136
    @param ninfo: the node to check
2137
    @param nresult: the remote results for the node
2138
    @param nimg: the node image object
2139

2140
    """
2141
    node = ninfo.name
2142
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2143

    
2144
    remote_os = nresult.get(constants.NV_OSLIST, None)
2145
    test = (not isinstance(remote_os, list) or
2146
            not compat.all(isinstance(v, list) and len(v) == 7
2147
                           for v in remote_os))
2148

    
2149
    _ErrorIf(test, self.ENODEOS, node,
2150
             "node hasn't returned valid OS data")
2151

    
2152
    nimg.os_fail = test
2153

    
2154
    if test:
2155
      return
2156

    
2157
    os_dict = {}
2158

    
2159
    for (name, os_path, status, diagnose,
2160
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2161

    
2162
      if name not in os_dict:
2163
        os_dict[name] = []
2164

    
2165
      # parameters is a list of lists instead of list of tuples due to
2166
      # JSON lacking a real tuple type, fix it:
2167
      parameters = [tuple(v) for v in parameters]
2168
      os_dict[name].append((os_path, status, diagnose,
2169
                            set(variants), set(parameters), set(api_ver)))
2170

    
2171
    nimg.oslist = os_dict
2172

    
2173
  def _VerifyNodeOS(self, ninfo, nimg, base):
2174
    """Verifies the node OS list.
2175

2176
    @type ninfo: L{objects.Node}
2177
    @param ninfo: the node to check
2178
    @param nimg: the node image object
2179
    @param base: the 'template' node we match against (e.g. from the master)
2180

2181
    """
2182
    node = ninfo.name
2183
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2184

    
2185
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2186

    
2187
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2188
    for os_name, os_data in nimg.oslist.items():
2189
      assert os_data, "Empty OS status for OS %s?!" % os_name
2190
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2191
      _ErrorIf(not f_status, self.ENODEOS, node,
2192
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2193
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2194
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2195
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2196
      # this will catched in backend too
2197
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2198
               and not f_var, self.ENODEOS, node,
2199
               "OS %s with API at least %d does not declare any variant",
2200
               os_name, constants.OS_API_V15)
2201
      # comparisons with the 'base' image
2202
      test = os_name not in base.oslist
2203
      _ErrorIf(test, self.ENODEOS, node,
2204
               "Extra OS %s not present on reference node (%s)",
2205
               os_name, base.name)
2206
      if test:
2207
        continue
2208
      assert base.oslist[os_name], "Base node has empty OS status?"
2209
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2210
      if not b_status:
2211
        # base OS is invalid, skipping
2212
        continue
2213
      for kind, a, b in [("API version", f_api, b_api),
2214
                         ("variants list", f_var, b_var),
2215
                         ("parameters", beautify_params(f_param),
2216
                          beautify_params(b_param))]:
2217
        _ErrorIf(a != b, self.ENODEOS, node,
2218
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2219
                 kind, os_name, base.name,
2220
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2221

    
2222
    # check any missing OSes
2223
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2224
    _ErrorIf(missing, self.ENODEOS, node,
2225
             "OSes present on reference node %s but missing on this node: %s",
2226
             base.name, utils.CommaJoin(missing))
2227

    
2228
  def _VerifyOob(self, ninfo, nresult):
2229
    """Verifies out of band functionality of a node.
2230

2231
    @type ninfo: L{objects.Node}
2232
    @param ninfo: the node to check
2233
    @param nresult: the remote results for the node
2234

2235
    """
2236
    node = ninfo.name
2237
    # We just have to verify the paths on master and/or master candidates
2238
    # as the oob helper is invoked on the master
2239
    if ((ninfo.master_candidate or ninfo.master_capable) and
2240
        constants.NV_OOB_PATHS in nresult):
2241
      for path_result in nresult[constants.NV_OOB_PATHS]:
2242
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2243

    
2244
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2245
    """Verifies and updates the node volume data.
2246

2247
    This function will update a L{NodeImage}'s internal structures
2248
    with data from the remote call.
2249

2250
    @type ninfo: L{objects.Node}
2251
    @param ninfo: the node to check
2252
    @param nresult: the remote results for the node
2253
    @param nimg: the node image object
2254
    @param vg_name: the configured VG name
2255

2256
    """
2257
    node = ninfo.name
2258
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2259

    
2260
    nimg.lvm_fail = True
2261
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2262
    if vg_name is None:
2263
      pass
2264
    elif isinstance(lvdata, basestring):
2265
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2266
               utils.SafeEncode(lvdata))
2267
    elif not isinstance(lvdata, dict):
2268
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2269
    else:
2270
      nimg.volumes = lvdata
2271
      nimg.lvm_fail = False
2272

    
2273
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2274
    """Verifies and updates the node instance list.
2275

2276
    If the listing was successful, then updates this node's instance
2277
    list. Otherwise, it marks the RPC call as failed for the instance
2278
    list key.
2279

2280
    @type ninfo: L{objects.Node}
2281
    @param ninfo: the node to check
2282
    @param nresult: the remote results for the node
2283
    @param nimg: the node image object
2284

2285
    """
2286
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2287
    test = not isinstance(idata, list)
2288
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2289
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2290
    if test:
2291
      nimg.hyp_fail = True
2292
    else:
2293
      nimg.instances = idata
2294

    
2295
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2296
    """Verifies and computes a node information map
2297

2298
    @type ninfo: L{objects.Node}
2299
    @param ninfo: the node to check
2300
    @param nresult: the remote results for the node
2301
    @param nimg: the node image object
2302
    @param vg_name: the configured VG name
2303

2304
    """
2305
    node = ninfo.name
2306
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2307

    
2308
    # try to read free memory (from the hypervisor)
2309
    hv_info = nresult.get(constants.NV_HVINFO, None)
2310
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2311
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2312
    if not test:
2313
      try:
2314
        nimg.mfree = int(hv_info["memory_free"])
2315
      except (ValueError, TypeError):
2316
        _ErrorIf(True, self.ENODERPC, node,
2317
                 "node returned invalid nodeinfo, check hypervisor")
2318

    
2319
    # FIXME: devise a free space model for file based instances as well
2320
    if vg_name is not None:
2321
      test = (constants.NV_VGLIST not in nresult or
2322
              vg_name not in nresult[constants.NV_VGLIST])
2323
      _ErrorIf(test, self.ENODELVM, node,
2324
               "node didn't return data for the volume group '%s'"
2325
               " - it is either missing or broken", vg_name)
2326
      if not test:
2327
        try:
2328
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2329
        except (ValueError, TypeError):
2330
          _ErrorIf(True, self.ENODERPC, node,
2331
                   "node returned invalid LVM info, check LVM status")
2332

    
2333
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2334
    """Gets per-disk status information for all instances.
2335

2336
    @type nodelist: list of strings
2337
    @param nodelist: Node names
2338
    @type node_image: dict of (name, L{objects.Node})
2339
    @param node_image: Node objects
2340
    @type instanceinfo: dict of (name, L{objects.Instance})
2341
    @param instanceinfo: Instance objects
2342
    @rtype: {instance: {node: [(succes, payload)]}}
2343
    @return: a dictionary of per-instance dictionaries with nodes as
2344
        keys and disk information as values; the disk information is a
2345
        list of tuples (success, payload)
2346

2347
    """
2348
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2349

    
2350
    node_disks = {}
2351
    node_disks_devonly = {}
2352
    diskless_instances = set()
2353
    diskless = constants.DT_DISKLESS
2354

    
2355
    for nname in nodelist:
2356
      node_instances = list(itertools.chain(node_image[nname].pinst,
2357
                                            node_image[nname].sinst))
2358
      diskless_instances.update(inst for inst in node_instances
2359
                                if instanceinfo[inst].disk_template == diskless)
2360
      disks = [(inst, disk)
2361
               for inst in node_instances
2362
               for disk in instanceinfo[inst].disks]
2363

    
2364
      if not disks:
2365
        # No need to collect data
2366
        continue
2367

    
2368
      node_disks[nname] = disks
2369

    
2370
      # Creating copies as SetDiskID below will modify the objects and that can
2371
      # lead to incorrect data returned from nodes
2372
      devonly = [dev.Copy() for (_, dev) in disks]
2373

    
2374
      for dev in devonly:
2375
        self.cfg.SetDiskID(dev, nname)
2376

    
2377
      node_disks_devonly[nname] = devonly
2378

    
2379
    assert len(node_disks) == len(node_disks_devonly)
2380

    
2381
    # Collect data from all nodes with disks
2382
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2383
                                                          node_disks_devonly)
2384

    
2385
    assert len(result) == len(node_disks)
2386

    
2387
    instdisk = {}
2388

    
2389
    for (nname, nres) in result.items():
2390
      disks = node_disks[nname]
2391

    
2392
      if nres.offline:
2393
        # No data from this node
2394
        data = len(disks) * [(False, "node offline")]
2395
      else:
2396
        msg = nres.fail_msg
2397
        _ErrorIf(msg, self.ENODERPC, nname,
2398
                 "while getting disk information: %s", msg)
2399
        if msg:
2400
          # No data from this node
2401
          data = len(disks) * [(False, msg)]
2402
        else:
2403
          data = []
2404
          for idx, i in enumerate(nres.payload):
2405
            if isinstance(i, (tuple, list)) and len(i) == 2:
2406
              data.append(i)
2407
            else:
2408
              logging.warning("Invalid result from node %s, entry %d: %s",
2409
                              nname, idx, i)
2410
              data.append((False, "Invalid result from the remote node"))
2411

    
2412
      for ((inst, _), status) in zip(disks, data):
2413
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2414

    
2415
    # Add empty entries for diskless instances.
2416
    for inst in diskless_instances:
2417
      assert inst not in instdisk
2418
      instdisk[inst] = {}
2419

    
2420
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2421
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2422
                      compat.all(isinstance(s, (tuple, list)) and
2423
                                 len(s) == 2 for s in statuses)
2424
                      for inst, nnames in instdisk.items()
2425
                      for nname, statuses in nnames.items())
2426
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2427

    
2428
    return instdisk
2429

    
2430
  def BuildHooksEnv(self):
2431
    """Build hooks env.
2432

2433
    Cluster-Verify hooks just ran in the post phase and their failure makes
2434
    the output be logged in the verify output and the verification to fail.
2435

2436
    """
2437
    env = {
2438
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2439
      }
2440

    
2441
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2442
               for node in self.my_node_info.values())
2443

    
2444
    return env
2445

    
2446
  def BuildHooksNodes(self):
2447
    """Build hooks nodes.
2448

2449
    """
2450
    return ([], self.my_node_names)
2451

    
2452
  def Exec(self, feedback_fn):
2453
    """Verify integrity of the node group, performing various test on nodes.
2454

2455
    """
2456
    # This method has too many local variables. pylint: disable-msg=R0914
2457

    
2458
    if not self.my_node_names:
2459
      # empty node group
2460
      feedback_fn("* Empty node group, skipping verification")
2461
      return True
2462

    
2463
    self.bad = False
2464
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2465
    verbose = self.op.verbose
2466
    self._feedback_fn = feedback_fn
2467

    
2468
    vg_name = self.cfg.GetVGName()
2469
    drbd_helper = self.cfg.GetDRBDHelper()
2470
    cluster = self.cfg.GetClusterInfo()
2471
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2472
    hypervisors = cluster.enabled_hypervisors
2473
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2474

    
2475
    i_non_redundant = [] # Non redundant instances
2476
    i_non_a_balanced = [] # Non auto-balanced instances
2477
    n_offline = 0 # Count of offline nodes
2478
    n_drained = 0 # Count of nodes being drained
2479
    node_vol_should = {}
2480

    
2481
    # FIXME: verify OS list
2482

    
2483
    # File verification
2484
    filemap = _ComputeAncillaryFiles(cluster, False)
2485

    
2486
    # do local checksums
2487
    master_node = self.master_node = self.cfg.GetMasterNode()
2488
    master_ip = self.cfg.GetMasterIP()
2489

    
2490
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2491

    
2492
    # We will make nodes contact all nodes in their group, and one node from
2493
    # every other group.
2494
    # TODO: should it be a *random* node, different every time?
2495
    online_nodes = [node.name for node in node_data_list if not node.offline]
2496
    other_group_nodes = {}
2497

    
2498
    for name in sorted(self.all_node_info):
2499
      node = self.all_node_info[name]
2500
      if (node.group not in other_group_nodes
2501
          and node.group != self.group_uuid
2502
          and not node.offline):
2503
        other_group_nodes[node.group] = node.name
2504

    
2505
    node_verify_param = {
2506
      constants.NV_FILELIST:
2507
        utils.UniqueSequence(filename
2508
                             for files in filemap
2509
                             for filename in files),
2510
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2511
      constants.NV_HYPERVISOR: hypervisors,
2512
      constants.NV_HVPARAMS:
2513
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2514
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2515
                                 for node in node_data_list
2516
                                 if not node.offline],
2517
      constants.NV_INSTANCELIST: hypervisors,
2518
      constants.NV_VERSION: None,
2519
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2520
      constants.NV_NODESETUP: None,
2521
      constants.NV_TIME: None,
2522
      constants.NV_MASTERIP: (master_node, master_ip),
2523
      constants.NV_OSLIST: None,
2524
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2525
      }
2526

    
2527
    if vg_name is not None:
2528
      node_verify_param[constants.NV_VGLIST] = None
2529
      node_verify_param[constants.NV_LVLIST] = vg_name
2530
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2531
      node_verify_param[constants.NV_DRBDLIST] = None
2532

    
2533
    if drbd_helper:
2534
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2535

    
2536
    # bridge checks
2537
    # FIXME: this needs to be changed per node-group, not cluster-wide
2538
    bridges = set()
2539
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2540
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2541
      bridges.add(default_nicpp[constants.NIC_LINK])
2542
    for instance in self.my_inst_info.values():
2543
      for nic in instance.nics:
2544
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2545
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2546
          bridges.add(full_nic[constants.NIC_LINK])
2547

    
2548
    if bridges:
2549
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2550

    
2551
    # Build our expected cluster state
2552
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2553
                                                 name=node.name,
2554
                                                 vm_capable=node.vm_capable))
2555
                      for node in node_data_list)
2556

    
2557
    # Gather OOB paths
2558
    oob_paths = []
2559
    for node in self.all_node_info.values():
2560
      path = _SupportsOob(self.cfg, node)
2561
      if path and path not in oob_paths:
2562
        oob_paths.append(path)
2563

    
2564
    if oob_paths:
2565
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2566

    
2567
    for instance in self.my_inst_names:
2568
      inst_config = self.my_inst_info[instance]
2569

    
2570
      for nname in inst_config.all_nodes:
2571
        if nname not in node_image:
2572
          gnode = self.NodeImage(name=nname)
2573
          gnode.ghost = (nname not in self.all_node_info)
2574
          node_image[nname] = gnode
2575

    
2576
      inst_config.MapLVsByNode(node_vol_should)
2577

    
2578
      pnode = inst_config.primary_node
2579
      node_image[pnode].pinst.append(instance)
2580

    
2581
      for snode in inst_config.secondary_nodes:
2582
        nimg = node_image[snode]
2583
        nimg.sinst.append(instance)
2584
        if pnode not in nimg.sbp:
2585
          nimg.sbp[pnode] = []
2586
        nimg.sbp[pnode].append(instance)
2587

    
2588
    # At this point, we have the in-memory data structures complete,
2589
    # except for the runtime information, which we'll gather next
2590

    
2591
    # Due to the way our RPC system works, exact response times cannot be
2592
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2593
    # time before and after executing the request, we can at least have a time
2594
    # window.
2595
    nvinfo_starttime = time.time()
2596
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2597
                                           node_verify_param,
2598
                                           self.cfg.GetClusterName())
2599
    nvinfo_endtime = time.time()
2600

    
2601
    if self.extra_lv_nodes and vg_name is not None:
2602
      extra_lv_nvinfo = \
2603
          self.rpc.call_node_verify(self.extra_lv_nodes,
2604
                                    {constants.NV_LVLIST: vg_name},
2605
                                    self.cfg.GetClusterName())
2606
    else:
2607
      extra_lv_nvinfo = {}
2608

    
2609
    all_drbd_map = self.cfg.ComputeDRBDMap()
2610

    
2611
    feedback_fn("* Gathering disk information (%s nodes)" %
2612
                len(self.my_node_names))
2613
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2614
                                     self.my_inst_info)
2615

    
2616
    feedback_fn("* Verifying configuration file consistency")
2617

    
2618
    # If not all nodes are being checked, we need to make sure the master node
2619
    # and a non-checked vm_capable node are in the list.
2620
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2621
    if absent_nodes:
2622
      vf_nvinfo = all_nvinfo.copy()
2623
      vf_node_info = list(self.my_node_info.values())
2624
      additional_nodes = []
2625
      if master_node not in self.my_node_info:
2626
        additional_nodes.append(master_node)
2627
        vf_node_info.append(self.all_node_info[master_node])
2628
      # Add the first vm_capable node we find which is not included
2629
      for node in absent_nodes:
2630
        nodeinfo = self.all_node_info[node]
2631
        if nodeinfo.vm_capable and not nodeinfo.offline:
2632
          additional_nodes.append(node)
2633
          vf_node_info.append(self.all_node_info[node])
2634
          break
2635
      key = constants.NV_FILELIST
2636
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2637
                                                 {key: node_verify_param[key]},
2638
                                                 self.cfg.GetClusterName()))
2639
    else:
2640
      vf_nvinfo = all_nvinfo
2641
      vf_node_info = self.my_node_info.values()
2642

    
2643
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2644

    
2645
    feedback_fn("* Verifying node status")
2646

    
2647
    refos_img = None
2648

    
2649
    for node_i in node_data_list:
2650
      node = node_i.name
2651
      nimg = node_image[node]
2652

    
2653
      if node_i.offline:
2654
        if verbose:
2655
          feedback_fn("* Skipping offline node %s" % (node,))
2656
        n_offline += 1
2657
        continue
2658

    
2659
      if node == master_node:
2660
        ntype = "master"
2661
      elif node_i.master_candidate:
2662
        ntype = "master candidate"
2663
      elif node_i.drained:
2664
        ntype = "drained"
2665
        n_drained += 1
2666
      else:
2667
        ntype = "regular"
2668
      if verbose:
2669
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2670

    
2671
      msg = all_nvinfo[node].fail_msg
2672
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2673
      if msg:
2674
        nimg.rpc_fail = True
2675
        continue
2676

    
2677
      nresult = all_nvinfo[node].payload
2678

    
2679
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2680
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2681
      self._VerifyNodeNetwork(node_i, nresult)
2682
      self._VerifyOob(node_i, nresult)
2683

    
2684
      if nimg.vm_capable:
2685
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2686
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2687
                             all_drbd_map)
2688

    
2689
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2690
        self._UpdateNodeInstances(node_i, nresult, nimg)
2691
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2692
        self._UpdateNodeOS(node_i, nresult, nimg)
2693

    
2694
        if not nimg.os_fail:
2695
          if refos_img is None:
2696
            refos_img = nimg
2697
          self._VerifyNodeOS(node_i, nimg, refos_img)
2698
        self._VerifyNodeBridges(node_i, nresult, bridges)
2699

    
2700
        # Check whether all running instancies are primary for the node. (This
2701
        # can no longer be done from _VerifyInstance below, since some of the
2702
        # wrong instances could be from other node groups.)
2703
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2704

    
2705
        for inst in non_primary_inst:
2706
          test = inst in self.all_inst_info
2707
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2708
                   "instance should not run on node %s", node_i.name)
2709
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2710
                   "node is running unknown instance %s", inst)
2711

    
2712
    for node, result in extra_lv_nvinfo.items():
2713
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2714
                              node_image[node], vg_name)
2715

    
2716
    feedback_fn("* Verifying instance status")
2717
    for instance in self.my_inst_names:
2718
      if verbose:
2719
        feedback_fn("* Verifying instance %s" % instance)
2720
      inst_config = self.my_inst_info[instance]
2721
      self._VerifyInstance(instance, inst_config, node_image,
2722
                           instdisk[instance])
2723
      inst_nodes_offline = []
2724

    
2725
      pnode = inst_config.primary_node
2726
      pnode_img = node_image[pnode]
2727
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2728
               self.ENODERPC, pnode, "instance %s, connection to"
2729
               " primary node failed", instance)
2730

    
2731
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2732
               self.EINSTANCEBADNODE, instance,
2733
               "instance is marked as running and lives on offline node %s",
2734
               inst_config.primary_node)
2735

    
2736
      # If the instance is non-redundant we cannot survive losing its primary
2737
      # node, so we are not N+1 compliant. On the other hand we have no disk
2738
      # templates with more than one secondary so that situation is not well
2739
      # supported either.
2740
      # FIXME: does not support file-backed instances
2741
      if not inst_config.secondary_nodes:
2742
        i_non_redundant.append(instance)
2743

    
2744
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2745
               instance, "instance has multiple secondary nodes: %s",
2746
               utils.CommaJoin(inst_config.secondary_nodes),
2747
               code=self.ETYPE_WARNING)
2748

    
2749
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2750
        pnode = inst_config.primary_node
2751
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2752
        instance_groups = {}
2753

    
2754
        for node in instance_nodes:
2755
          instance_groups.setdefault(self.all_node_info[node].group,
2756
                                     []).append(node)
2757

    
2758
        pretty_list = [
2759
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2760
          # Sort so that we always list the primary node first.
2761
          for group, nodes in sorted(instance_groups.items(),
2762
                                     key=lambda (_, nodes): pnode in nodes,
2763
                                     reverse=True)]
2764

    
2765
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2766
                      instance, "instance has primary and secondary nodes in"
2767
                      " different groups: %s", utils.CommaJoin(pretty_list),
2768
                      code=self.ETYPE_WARNING)
2769

    
2770
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2771
        i_non_a_balanced.append(instance)
2772

    
2773
      for snode in inst_config.secondary_nodes:
2774
        s_img = node_image[snode]
2775
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2776
                 "instance %s, connection to secondary node failed", instance)
2777

    
2778
        if s_img.offline:
2779
          inst_nodes_offline.append(snode)
2780

    
2781
      # warn that the instance lives on offline nodes
2782
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2783
               "instance has offline secondary node(s) %s",
2784
               utils.CommaJoin(inst_nodes_offline))
2785
      # ... or ghost/non-vm_capable nodes
2786
      for node in inst_config.all_nodes:
2787
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2788
                 "instance lives on ghost node %s", node)
2789
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2790
                 instance, "instance lives on non-vm_capable node %s", node)
2791

    
2792
    feedback_fn("* Verifying orphan volumes")
2793
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2794

    
2795
    # We will get spurious "unknown volume" warnings if any node of this group
2796
    # is secondary for an instance whose primary is in another group. To avoid
2797
    # them, we find these instances and add their volumes to node_vol_should.
2798
    for inst in self.all_inst_info.values():
2799
      for secondary in inst.secondary_nodes:
2800
        if (secondary in self.my_node_info
2801
            and inst.name not in self.my_inst_info):
2802
          inst.MapLVsByNode(node_vol_should)
2803
          break
2804

    
2805
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2806

    
2807
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2808
      feedback_fn("* Verifying N+1 Memory redundancy")
2809
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2810

    
2811
    feedback_fn("* Other Notes")
2812
    if i_non_redundant:
2813
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2814
                  % len(i_non_redundant))
2815

    
2816
    if i_non_a_balanced:
2817
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2818
                  % len(i_non_a_balanced))
2819

    
2820
    if n_offline:
2821
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2822

    
2823
    if n_drained:
2824
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2825

    
2826
    return not self.bad
2827

    
2828
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2829
    """Analyze the post-hooks' result
2830

2831
    This method analyses the hook result, handles it, and sends some
2832
    nicely-formatted feedback back to the user.
2833

2834
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2835
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2836
    @param hooks_results: the results of the multi-node hooks rpc call
2837
    @param feedback_fn: function used send feedback back to the caller
2838
    @param lu_result: previous Exec result
2839
    @return: the new Exec result, based on the previous result
2840
        and hook results
2841

2842
    """
2843
    # We only really run POST phase hooks, only for non-empty groups,
2844
    # and are only interested in their results
2845
    if not self.my_node_names:
2846
      # empty node group
2847
      pass
2848
    elif phase == constants.HOOKS_PHASE_POST:
2849
      # Used to change hooks' output to proper indentation
2850
      feedback_fn("* Hooks Results")
2851
      assert hooks_results, "invalid result from hooks"
2852

    
2853
      for node_name in hooks_results:
2854
        res = hooks_results[node_name]
2855
        msg = res.fail_msg
2856
        test = msg and not res.offline
2857
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2858
                      "Communication failure in hooks execution: %s", msg)
2859
        if res.offline or msg:
2860
          # No need to investigate payload if node is offline or gave an error.
2861
          # override manually lu_result here as _ErrorIf only
2862
          # overrides self.bad
2863
          lu_result = 1
2864
          continue
2865
        for script, hkr, output in res.payload:
2866
          test = hkr == constants.HKR_FAIL
2867
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2868
                        "Script %s failed, output:", script)
2869
          if test:
2870
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2871
            feedback_fn("%s" % output)
2872
            lu_result = 0
2873

    
2874
    return lu_result
2875

    
2876

    
2877
class LUClusterVerifyDisks(NoHooksLU):
2878
  """Verifies the cluster disks status.
2879

2880
  """
2881
  REQ_BGL = False
2882

    
2883
  def ExpandNames(self):
2884
    self.share_locks = _ShareAll()
2885
    self.needed_locks = {
2886
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2887
      }
2888

    
2889
  def Exec(self, feedback_fn):
2890
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2891

    
2892
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2893
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2894
                           for group in group_names])
2895

    
2896

    
2897
class LUGroupVerifyDisks(NoHooksLU):
2898
  """Verifies the status of all disks in a node group.
2899

2900
  """
2901
  REQ_BGL = False
2902

    
2903
  def ExpandNames(self):
2904
    # Raises errors.OpPrereqError on its own if group can't be found
2905
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2906

    
2907
    self.share_locks = _ShareAll()
2908
    self.needed_locks = {
2909
      locking.LEVEL_INSTANCE: [],
2910
      locking.LEVEL_NODEGROUP: [],
2911
      locking.LEVEL_NODE: [],
2912
      }
2913

    
2914
  def DeclareLocks(self, level):
2915
    if level == locking.LEVEL_INSTANCE:
2916
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2917

    
2918
      # Lock instances optimistically, needs verification once node and group
2919
      # locks have been acquired
2920
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2921
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2922

    
2923
    elif level == locking.LEVEL_NODEGROUP:
2924
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2925

    
2926
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2927
        set([self.group_uuid] +
2928
            # Lock all groups used by instances optimistically; this requires
2929
            # going via the node before it's locked, requiring verification
2930
            # later on
2931
            [group_uuid
2932
             for instance_name in
2933
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2934
             for group_uuid in
2935
               self.cfg.GetInstanceNodeGroups(instance_name)])
2936

    
2937
    elif level == locking.LEVEL_NODE:
2938
      # This will only lock the nodes in the group to be verified which contain
2939
      # actual instances
2940
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2941
      self._LockInstancesNodes()
2942

    
2943
      # Lock all nodes in group to be verified
2944
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2945
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2946
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2947

    
2948
  def CheckPrereq(self):
2949
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2950
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2951
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2952

    
2953
    assert self.group_uuid in owned_groups
2954

    
2955
    # Check if locked instances are still correct
2956
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2957
    if owned_instances != wanted_instances:
2958
      raise errors.OpPrereqError("Instances in node group %s changed since"
2959
                                 " locks were acquired, wanted %s, have %s;"
2960
                                 " retry the operation" %
2961
                                 (self.op.group_name,
2962
                                  utils.CommaJoin(wanted_instances),
2963
                                  utils.CommaJoin(owned_instances)),
2964
                                 errors.ECODE_STATE)
2965

    
2966
    # Get instance information
2967
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
2968
                          for name in owned_instances)
2969

    
2970
    # Check if node groups for locked instances are still correct
2971
    for (instance_name, inst) in self.instances.items():
2972
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2973
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2974
      assert owned_nodes.issuperset(inst.all_nodes), \
2975
        "Instance %s's nodes changed while we kept the lock" % instance_name
2976

    
2977
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2978
      if not owned_groups.issuperset(inst_groups):
2979
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2980
                                   " locks were acquired, current groups are"
2981
                                   " are '%s', owning groups '%s'; retry the"
2982
                                   " operation" %
2983
                                   (instance_name,
2984
                                    utils.CommaJoin(inst_groups),
2985
                                    utils.CommaJoin(owned_groups)),
2986
                                   errors.ECODE_STATE)
2987

    
2988
  def Exec(self, feedback_fn):
2989
    """Verify integrity of cluster disks.
2990

2991
    @rtype: tuple of three items
2992
    @return: a tuple of (dict of node-to-node_error, list of instances
2993
        which need activate-disks, dict of instance: (node, volume) for
2994
        missing volumes
2995

2996
    """
2997
    res_nodes = {}
2998
    res_instances = set()
2999
    res_missing = {}
3000

    
3001
    nv_dict = _MapInstanceDisksToNodes([inst
3002
                                        for inst in self.instances.values()
3003
                                        if inst.admin_up])
3004

    
3005
    if nv_dict:
3006
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3007
                             set(self.cfg.GetVmCapableNodeList()))
3008

    
3009
      node_lvs = self.rpc.call_lv_list(nodes, [])
3010

    
3011
      for (node, node_res) in node_lvs.items():
3012
        if node_res.offline:
3013
          continue
3014

    
3015
        msg = node_res.fail_msg
3016
        if msg:
3017
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3018
          res_nodes[node] = msg
3019
          continue
3020

    
3021
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3022
          inst = nv_dict.pop((node, lv_name), None)
3023
          if not (lv_online or inst is None):
3024
            res_instances.add(inst)
3025

    
3026
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3027
      # better
3028
      for key, inst in nv_dict.iteritems():
3029
        res_missing.setdefault(inst, []).append(key)
3030

    
3031
    return (res_nodes, list(res_instances), res_missing)
3032

    
3033

    
3034
class LUClusterRepairDiskSizes(NoHooksLU):
3035
  """Verifies the cluster disks sizes.
3036

3037
  """
3038
  REQ_BGL = False
3039

    
3040
  def ExpandNames(self):
3041
    if self.op.instances:
3042
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3043
      self.needed_locks = {
3044
        locking.LEVEL_NODE: [],
3045
        locking.LEVEL_INSTANCE: self.wanted_names,
3046
        }
3047
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3048
    else:
3049
      self.wanted_names = None
3050
      self.needed_locks = {
3051
        locking.LEVEL_NODE: locking.ALL_SET,
3052
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3053
        }
3054
    self.share_locks = _ShareAll()
3055

    
3056
  def DeclareLocks(self, level):
3057
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3058
      self._LockInstancesNodes(primary_only=True)
3059

    
3060
  def CheckPrereq(self):
3061
    """Check prerequisites.
3062

3063
    This only checks the optional instance list against the existing names.
3064

3065
    """
3066
    if self.wanted_names is None:
3067
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3068

    
3069
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3070
                             in self.wanted_names]
3071

    
3072
  def _EnsureChildSizes(self, disk):
3073
    """Ensure children of the disk have the needed disk size.
3074

3075
    This is valid mainly for DRBD8 and fixes an issue where the
3076
    children have smaller disk size.
3077

3078
    @param disk: an L{ganeti.objects.Disk} object
3079

3080
    """
3081
    if disk.dev_type == constants.LD_DRBD8:
3082
      assert disk.children, "Empty children for DRBD8?"
3083
      fchild = disk.children[0]
3084
      mismatch = fchild.size < disk.size
3085
      if mismatch:
3086
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3087
                     fchild.size, disk.size)
3088
        fchild.size = disk.size
3089

    
3090
      # and we recurse on this child only, not on the metadev
3091
      return self._EnsureChildSizes(fchild) or mismatch
3092
    else:
3093
      return False
3094

    
3095
  def Exec(self, feedback_fn):
3096
    """Verify the size of cluster disks.
3097

3098
    """
3099
    # TODO: check child disks too
3100
    # TODO: check differences in size between primary/secondary nodes
3101
    per_node_disks = {}
3102
    for instance in self.wanted_instances:
3103
      pnode = instance.primary_node
3104
      if pnode not in per_node_disks:
3105
        per_node_disks[pnode] = []
3106
      for idx, disk in enumerate(instance.disks):
3107
        per_node_disks[pnode].append((instance, idx, disk))
3108

    
3109
    changed = []
3110
    for node, dskl in per_node_disks.items():
3111
      newl = [v[2].Copy() for v in dskl]
3112
      for dsk in newl:
3113
        self.cfg.SetDiskID(dsk, node)
3114
      result = self.rpc.call_blockdev_getsize(node, newl)
3115
      if result.fail_msg:
3116
        self.LogWarning("Failure in blockdev_getsize call to node"
3117
                        " %s, ignoring", node)
3118
        continue
3119
      if len(result.payload) != len(dskl):
3120
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3121
                        " result.payload=%s", node, len(dskl), result.payload)
3122
        self.LogWarning("Invalid result from node %s, ignoring node results",
3123
                        node)
3124
        continue
3125
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3126
        if size is None:
3127
          self.LogWarning("Disk %d of instance %s did not return size"
3128
                          " information, ignoring", idx, instance.name)
3129
          continue
3130
        if not isinstance(size, (int, long)):
3131
          self.LogWarning("Disk %d of instance %s did not return valid"
3132
                          " size information, ignoring", idx, instance.name)
3133
          continue
3134
        size = size >> 20
3135
        if size != disk.size:
3136
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3137
                       " correcting: recorded %d, actual %d", idx,
3138
                       instance.name, disk.size, size)
3139
          disk.size = size
3140
          self.cfg.Update(instance, feedback_fn)
3141
          changed.append((instance.name, idx, size))
3142
        if self._EnsureChildSizes(disk):
3143
          self.cfg.Update(instance, feedback_fn)
3144
          changed.append((instance.name, idx, disk.size))
3145
    return changed
3146

    
3147

    
3148
class LUClusterRename(LogicalUnit):
3149
  """Rename the cluster.
3150

3151
  """
3152
  HPATH = "cluster-rename"
3153
  HTYPE = constants.HTYPE_CLUSTER
3154

    
3155
  def BuildHooksEnv(self):
3156
    """Build hooks env.
3157

3158
    """
3159
    return {
3160
      "OP_TARGET": self.cfg.GetClusterName(),
3161
      "NEW_NAME": self.op.name,
3162
      }
3163

    
3164
  def BuildHooksNodes(self):
3165
    """Build hooks nodes.
3166

3167
    """
3168
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3169

    
3170
  def CheckPrereq(self):
3171
    """Verify that the passed name is a valid one.
3172

3173
    """
3174
    hostname = netutils.GetHostname(name=self.op.name,
3175
                                    family=self.cfg.GetPrimaryIPFamily())
3176

    
3177
    new_name = hostname.name
3178
    self.ip = new_ip = hostname.ip
3179
    old_name = self.cfg.GetClusterName()
3180
    old_ip = self.cfg.GetMasterIP()
3181
    if new_name == old_name and new_ip == old_ip:
3182
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3183
                                 " cluster has changed",
3184
                                 errors.ECODE_INVAL)
3185
    if new_ip != old_ip:
3186
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3187
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3188
                                   " reachable on the network" %
3189
                                   new_ip, errors.ECODE_NOTUNIQUE)
3190

    
3191
    self.op.name = new_name
3192

    
3193
  def Exec(self, feedback_fn):
3194
    """Rename the cluster.
3195

3196
    """
3197
    clustername = self.op.name
3198
    ip = self.ip
3199

    
3200
    # shutdown the master IP
3201
    master = self.cfg.GetMasterNode()
3202
    result = self.rpc.call_node_stop_master(master, False)
3203
    result.Raise("Could not disable the master role")
3204

    
3205
    try:
3206
      cluster = self.cfg.GetClusterInfo()
3207
      cluster.cluster_name = clustername
3208
      cluster.master_ip = ip
3209
      self.cfg.Update(cluster, feedback_fn)
3210

    
3211
      # update the known hosts file
3212
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3213
      node_list = self.cfg.GetOnlineNodeList()
3214
      try:
3215
        node_list.remove(master)
3216
      except ValueError:
3217
        pass
3218
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3219
    finally:
3220
      result = self.rpc.call_node_start_master(master, False, False)
3221
      msg = result.fail_msg
3222
      if msg:
3223
        self.LogWarning("Could not re-enable the master role on"
3224
                        " the master, please restart manually: %s", msg)
3225

    
3226
    return clustername
3227

    
3228

    
3229
class LUClusterSetParams(LogicalUnit):
3230
  """Change the parameters of the cluster.
3231

3232
  """
3233
  HPATH = "cluster-modify"
3234
  HTYPE = constants.HTYPE_CLUSTER
3235
  REQ_BGL = False
3236

    
3237
  def CheckArguments(self):
3238
    """Check parameters
3239

3240
    """
3241
    if self.op.uid_pool:
3242
      uidpool.CheckUidPool(self.op.uid_pool)
3243

    
3244
    if self.op.add_uids:
3245
      uidpool.CheckUidPool(self.op.add_uids)
3246

    
3247
    if self.op.remove_uids:
3248
      uidpool.CheckUidPool(self.op.remove_uids)
3249

    
3250
  def ExpandNames(self):
3251
    # FIXME: in the future maybe other cluster params won't require checking on
3252
    # all nodes to be modified.
3253
    self.needed_locks = {
3254
      locking.LEVEL_NODE: locking.ALL_SET,
3255
    }
3256
    self.share_locks[locking.LEVEL_NODE] = 1
3257

    
3258
  def BuildHooksEnv(self):
3259
    """Build hooks env.
3260

3261
    """
3262
    return {
3263
      "OP_TARGET": self.cfg.GetClusterName(),
3264
      "NEW_VG_NAME": self.op.vg_name,
3265
      }
3266

    
3267
  def BuildHooksNodes(self):
3268
    """Build hooks nodes.
3269

3270
    """
3271
    mn = self.cfg.GetMasterNode()
3272
    return ([mn], [mn])
3273

    
3274
  def CheckPrereq(self):
3275
    """Check prerequisites.
3276

3277
    This checks whether the given params don't conflict and
3278
    if the given volume group is valid.
3279

3280
    """
3281
    if self.op.vg_name is not None and not self.op.vg_name:
3282
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3283
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3284
                                   " instances exist", errors.ECODE_INVAL)
3285

    
3286
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3287
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3288
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3289
                                   " drbd-based instances exist",
3290
                                   errors.ECODE_INVAL)
3291

    
3292
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3293

    
3294
    # if vg_name not None, checks given volume group on all nodes
3295
    if self.op.vg_name:
3296
      vglist = self.rpc.call_vg_list(node_list)
3297
      for node in node_list:
3298
        msg = vglist[node].fail_msg
3299
        if msg:
3300
          # ignoring down node
3301
          self.LogWarning("Error while gathering data on node %s"
3302
                          " (ignoring node): %s", node, msg)
3303
          continue
3304
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3305
                                              self.op.vg_name,
3306
                                              constants.MIN_VG_SIZE)
3307
        if vgstatus:
3308
          raise errors.OpPrereqError("Error on node '%s': %s" %
3309
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3310

    
3311
    if self.op.drbd_helper:
3312
      # checks given drbd helper on all nodes
3313
      helpers = self.rpc.call_drbd_helper(node_list)
3314
      for node in node_list:
3315
        ninfo = self.cfg.GetNodeInfo(node)
3316
        if ninfo.offline:
3317
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3318
          continue
3319
        msg = helpers[node].fail_msg
3320
        if msg:
3321
          raise errors.OpPrereqError("Error checking drbd helper on node"
3322
                                     " '%s': %s" % (node, msg),
3323
                                     errors.ECODE_ENVIRON)
3324
        node_helper = helpers[node].payload
3325
        if node_helper != self.op.drbd_helper:
3326
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3327
                                     (node, node_helper), errors.ECODE_ENVIRON)
3328

    
3329
    self.cluster = cluster = self.cfg.GetClusterInfo()
3330
    # validate params changes
3331
    if self.op.beparams:
3332
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3333
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3334

    
3335
    if self.op.ndparams:
3336
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3337
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3338

    
3339
      # TODO: we need a more general way to handle resetting
3340
      # cluster-level parameters to default values
3341
      if self.new_ndparams["oob_program"] == "":
3342
        self.new_ndparams["oob_program"] = \
3343
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3344

    
3345
    if self.op.nicparams:
3346
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3347
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3348
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3349
      nic_errors = []
3350

    
3351
      # check all instances for consistency
3352
      for instance in self.cfg.GetAllInstancesInfo().values():
3353
        for nic_idx, nic in enumerate(instance.nics):
3354
          params_copy = copy.deepcopy(nic.nicparams)
3355
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3356

    
3357
          # check parameter syntax
3358
          try:
3359
            objects.NIC.CheckParameterSyntax(params_filled)
3360
          except errors.ConfigurationError, err:
3361
            nic_errors.append("Instance %s, nic/%d: %s" %
3362
                              (instance.name, nic_idx, err))
3363

    
3364
          # if we're moving instances to routed, check that they have an ip
3365
          target_mode = params_filled[constants.NIC_MODE]
3366
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3367
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3368
                              " address" % (instance.name, nic_idx))
3369
      if nic_errors:
3370
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3371
                                   "\n".join(nic_errors))
3372

    
3373
    # hypervisor list/parameters
3374
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3375
    if self.op.hvparams:
3376
      for hv_name, hv_dict in self.op.hvparams.items():
3377
        if hv_name not in self.new_hvparams:
3378
          self.new_hvparams[hv_name] = hv_dict
3379
        else:
3380
          self.new_hvparams[hv_name].update(hv_dict)
3381

    
3382
    # os hypervisor parameters
3383
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3384
    if self.op.os_hvp:
3385
      for os_name, hvs in self.op.os_hvp.items():
3386
        if os_name not in self.new_os_hvp:
3387
          self.new_os_hvp[os_name] = hvs
3388
        else:
3389
          for hv_name, hv_dict in hvs.items():
3390
            if hv_name not in self.new_os_hvp[os_name]:
3391
              self.new_os_hvp[os_name][hv_name] = hv_dict
3392
            else:
3393
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3394

    
3395
    # os parameters
3396
    self.new_osp = objects.FillDict(cluster.osparams, {})
3397
    if self.op.osparams:
3398
      for os_name, osp in self.op.osparams.items():
3399
        if os_name not in self.new_osp:
3400
          self.new_osp[os_name] = {}
3401

    
3402
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3403
                                                  use_none=True)
3404

    
3405
        if not self.new_osp[os_name]:
3406
          # we removed all parameters
3407
          del self.new_osp[os_name]
3408
        else:
3409
          # check the parameter validity (remote check)
3410
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3411
                         os_name, self.new_osp[os_name])
3412

    
3413
    # changes to the hypervisor list
3414
    if self.op.enabled_hypervisors is not None:
3415
      self.hv_list = self.op.enabled_hypervisors
3416
      for hv in self.hv_list:
3417
        # if the hypervisor doesn't already exist in the cluster
3418
        # hvparams, we initialize it to empty, and then (in both
3419
        # cases) we make sure to fill the defaults, as we might not
3420
        # have a complete defaults list if the hypervisor wasn't
3421
        # enabled before
3422
        if hv not in new_hvp:
3423
          new_hvp[hv] = {}
3424
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3425
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3426
    else:
3427
      self.hv_list = cluster.enabled_hypervisors
3428

    
3429
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3430
      # either the enabled list has changed, or the parameters have, validate
3431
      for hv_name, hv_params in self.new_hvparams.items():
3432
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3433
            (self.op.enabled_hypervisors and
3434
             hv_name in self.op.enabled_hypervisors)):
3435
          # either this is a new hypervisor, or its parameters have changed
3436
          hv_class = hypervisor.GetHypervisor(hv_name)
3437
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3438
          hv_class.CheckParameterSyntax(hv_params)
3439
          _CheckHVParams(self, node_list, hv_name, hv_params)
3440

    
3441
    if self.op.os_hvp:
3442
      # no need to check any newly-enabled hypervisors, since the
3443
      # defaults have already been checked in the above code-block
3444
      for os_name, os_hvp in self.new_os_hvp.items():
3445
        for hv_name, hv_params in os_hvp.items():
3446
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3447
          # we need to fill in the new os_hvp on top of the actual hv_p
3448
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3449
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3450
          hv_class = hypervisor.GetHypervisor(hv_name)
3451
          hv_class.CheckParameterSyntax(new_osp)
3452
          _CheckHVParams(self, node_list, hv_name, new_osp)
3453

    
3454
    if self.op.default_iallocator:
3455
      alloc_script = utils.FindFile(self.op.default_iallocator,
3456
                                    constants.IALLOCATOR_SEARCH_PATH,
3457
                                    os.path.isfile)
3458
      if alloc_script is None:
3459
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3460
                                   " specified" % self.op.default_iallocator,
3461
                                   errors.ECODE_INVAL)
3462

    
3463
  def Exec(self, feedback_fn):
3464
    """Change the parameters of the cluster.
3465

3466
    """
3467
    if self.op.vg_name is not None:
3468
      new_volume = self.op.vg_name
3469
      if not new_volume:
3470
        new_volume = None
3471
      if new_volume != self.cfg.GetVGName():
3472
        self.cfg.SetVGName(new_volume)
3473
      else:
3474
        feedback_fn("Cluster LVM configuration already in desired"
3475
                    " state, not changing")
3476
    if self.op.drbd_helper is not None:
3477
      new_helper = self.op.drbd_helper
3478
      if not new_helper:
3479
        new_helper = None
3480
      if new_helper != self.cfg.GetDRBDHelper():
3481
        self.cfg.SetDRBDHelper(new_helper)
3482
      else:
3483
        feedback_fn("Cluster DRBD helper already in desired state,"
3484
                    " not changing")
3485
    if self.op.hvparams:
3486
      self.cluster.hvparams = self.new_hvparams
3487
    if self.op.os_hvp:
3488
      self.cluster.os_hvp = self.new_os_hvp
3489
    if self.op.enabled_hypervisors is not None:
3490
      self.cluster.hvparams = self.new_hvparams
3491
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3492
    if self.op.beparams:
3493
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3494
    if self.op.nicparams:
3495
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3496
    if self.op.osparams:
3497
      self.cluster.osparams = self.new_osp
3498
    if self.op.ndparams:
3499
      self.cluster.ndparams = self.new_ndparams
3500

    
3501
    if self.op.candidate_pool_size is not None:
3502
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3503
      # we need to update the pool size here, otherwise the save will fail
3504
      _AdjustCandidatePool(self, [])
3505

    
3506
    if self.op.maintain_node_health is not None:
3507
      self.cluster.maintain_node_health = self.op.maintain_node_health
3508

    
3509
    if self.op.prealloc_wipe_disks is not None:
3510
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3511

    
3512
    if self.op.add_uids is not None:
3513
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3514

    
3515
    if self.op.remove_uids is not None:
3516
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3517

    
3518
    if self.op.uid_pool is not None:
3519
      self.cluster.uid_pool = self.op.uid_pool
3520

    
3521
    if self.op.default_iallocator is not None:
3522
      self.cluster.default_iallocator = self.op.default_iallocator
3523

    
3524
    if self.op.reserved_lvs is not None:
3525
      self.cluster.reserved_lvs = self.op.reserved_lvs
3526

    
3527
    def helper_os(aname, mods, desc):
3528
      desc += " OS list"
3529
      lst = getattr(self.cluster, aname)
3530
      for key, val in mods:
3531
        if key == constants.DDM_ADD:
3532
          if val in lst:
3533
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3534
          else:
3535
            lst.append(val)
3536
        elif key == constants.DDM_REMOVE:
3537
          if val in lst:
3538
            lst.remove(val)
3539
          else:
3540
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3541
        else:
3542
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3543

    
3544
    if self.op.hidden_os:
3545
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3546

    
3547
    if self.op.blacklisted_os:
3548
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3549

    
3550
    if self.op.master_netdev:
3551
      master = self.cfg.GetMasterNode()
3552
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3553
                  self.cluster.master_netdev)
3554
      result = self.rpc.call_node_stop_master(master, False)
3555
      result.Raise("Could not disable the master ip")
3556
      feedback_fn("Changing master_netdev from %s to %s" %
3557
                  (self.cluster.master_netdev, self.op.master_netdev))
3558
      self.cluster.master_netdev = self.op.master_netdev
3559

    
3560
    self.cfg.Update(self.cluster, feedback_fn)
3561

    
3562
    if self.op.master_netdev:
3563
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3564
                  self.op.master_netdev)
3565
      result = self.rpc.call_node_start_master(master, False, False)
3566
      if result.fail_msg:
3567
        self.LogWarning("Could not re-enable the master ip on"
3568
                        " the master, please restart manually: %s",
3569
                        result.fail_msg)
3570

    
3571

    
3572
def _UploadHelper(lu, nodes, fname):
3573
  """Helper for uploading a file and showing warnings.
3574

3575
  """
3576
  if os.path.exists(fname):
3577
    result = lu.rpc.call_upload_file(nodes, fname)
3578
    for to_node, to_result in result.items():
3579
      msg = to_result.fail_msg
3580
      if msg:
3581
        msg = ("Copy of file %s to node %s failed: %s" %
3582
               (fname, to_node, msg))
3583
        lu.proc.LogWarning(msg)
3584

    
3585

    
3586
def _ComputeAncillaryFiles(cluster, redist):
3587
  """Compute files external to Ganeti which need to be consistent.
3588

3589
  @type redist: boolean
3590
  @param redist: Whether to include files which need to be redistributed
3591

3592
  """
3593
  # Compute files for all nodes
3594
  files_all = set([
3595
    constants.SSH_KNOWN_HOSTS_FILE,
3596
    constants.CONFD_HMAC_KEY,
3597
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3598
    ])
3599

    
3600
  if not redist:
3601
    files_all.update(constants.ALL_CERT_FILES)
3602
    files_all.update(ssconf.SimpleStore().GetFileList())
3603

    
3604
  if cluster.modify_etc_hosts:
3605
    files_all.add(constants.ETC_HOSTS)
3606

    
3607
  # Files which must either exist on all nodes or on none
3608
  files_all_opt = set([
3609
    constants.RAPI_USERS_FILE,
3610
    ])
3611

    
3612
  # Files which should only be on master candidates
3613
  files_mc = set()
3614
  if not redist:
3615
    files_mc.add(constants.CLUSTER_CONF_FILE)
3616

    
3617
  # Files which should only be on VM-capable nodes
3618
  files_vm = set(filename
3619
    for hv_name in cluster.enabled_hypervisors
3620
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3621

    
3622
  # Filenames must be unique
3623
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3624
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3625
         "Found file listed in more than one file list"
3626

    
3627
  return (files_all, files_all_opt, files_mc, files_vm)
3628

    
3629

    
3630
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3631
  """Distribute additional files which are part of the cluster configuration.
3632

3633
  ConfigWriter takes care of distributing the config and ssconf files, but
3634
  there are more files which should be distributed to all nodes. This function
3635
  makes sure those are copied.
3636

3637
  @param lu: calling logical unit
3638
  @param additional_nodes: list of nodes not in the config to distribute to
3639
  @type additional_vm: boolean
3640
  @param additional_vm: whether the additional nodes are vm-capable or not
3641

3642
  """
3643
  # Gather target nodes
3644
  cluster = lu.cfg.GetClusterInfo()
3645
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3646

    
3647
  online_nodes = lu.cfg.GetOnlineNodeList()
3648
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3649

    
3650
  if additional_nodes is not None:
3651
    online_nodes.extend(additional_nodes)
3652
    if additional_vm:
3653
      vm_nodes.extend(additional_nodes)
3654

    
3655
  # Never distribute to master node
3656
  for nodelist in [online_nodes, vm_nodes]:
3657
    if master_info.name in nodelist:
3658
      nodelist.remove(master_info.name)
3659

    
3660
  # Gather file lists
3661
  (files_all, files_all_opt, files_mc, files_vm) = \
3662
    _ComputeAncillaryFiles(cluster, True)
3663

    
3664
  # Never re-distribute configuration file from here
3665
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3666
              constants.CLUSTER_CONF_FILE in files_vm)
3667
  assert not files_mc, "Master candidates not handled in this function"
3668

    
3669
  filemap = [
3670
    (online_nodes, files_all),
3671
    (online_nodes, files_all_opt),
3672
    (vm_nodes, files_vm),
3673
    ]
3674

    
3675
  # Upload the files
3676
  for (node_list, files) in filemap:
3677
    for fname in files:
3678
      _UploadHelper(lu, node_list, fname)
3679

    
3680

    
3681
class LUClusterRedistConf(NoHooksLU):
3682
  """Force the redistribution of cluster configuration.
3683

3684
  This is a very simple LU.
3685

3686
  """
3687
  REQ_BGL = False
3688

    
3689
  def ExpandNames(self):
3690
    self.needed_locks = {
3691
      locking.LEVEL_NODE: locking.ALL_SET,
3692
    }
3693
    self.share_locks[locking.LEVEL_NODE] = 1
3694

    
3695
  def Exec(self, feedback_fn):
3696
    """Redistribute the configuration.
3697

3698
    """
3699
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3700
    _RedistributeAncillaryFiles(self)
3701

    
3702

    
3703
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3704
  """Sleep and poll for an instance's disk to sync.
3705

3706
  """
3707
  if not instance.disks or disks is not None and not disks:
3708
    return True
3709

    
3710
  disks = _ExpandCheckDisks(instance, disks)
3711

    
3712
  if not oneshot:
3713
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3714

    
3715
  node = instance.primary_node
3716

    
3717
  for dev in disks:
3718
    lu.cfg.SetDiskID(dev, node)
3719

    
3720
  # TODO: Convert to utils.Retry
3721

    
3722
  retries = 0
3723
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3724
  while True:
3725
    max_time = 0
3726
    done = True
3727
    cumul_degraded = False
3728
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3729
    msg = rstats.fail_msg
3730
    if msg:
3731
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3732
      retries += 1
3733
      if retries >= 10:
3734
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3735
                                 " aborting." % node)
3736
      time.sleep(6)
3737
      continue
3738
    rstats = rstats.payload
3739
    retries = 0
3740
    for i, mstat in enumerate(rstats):
3741
      if mstat is None:
3742
        lu.LogWarning("Can't compute data for node %s/%s",
3743
                           node, disks[i].iv_name)
3744
        continue
3745

    
3746
      cumul_degraded = (cumul_degraded or
3747
                        (mstat.is_degraded and mstat.sync_percent is None))
3748
      if mstat.sync_percent is not None:
3749
        done = False
3750
        if mstat.estimated_time is not None:
3751
          rem_time = ("%s remaining (estimated)" %
3752
                      utils.FormatSeconds(mstat.estimated_time))
3753
          max_time = mstat.estimated_time
3754
        else:
3755
          rem_time = "no time estimate"
3756
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3757
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3758

    
3759
    # if we're done but degraded, let's do a few small retries, to
3760
    # make sure we see a stable and not transient situation; therefore
3761
    # we force restart of the loop
3762
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3763
      logging.info("Degraded disks found, %d retries left", degr_retries)
3764
      degr_retries -= 1
3765
      time.sleep(1)
3766
      continue
3767

    
3768
    if done or oneshot:
3769
      break
3770

    
3771
    time.sleep(min(60, max_time))
3772

    
3773
  if done:
3774
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3775
  return not cumul_degraded
3776

    
3777

    
3778
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3779
  """Check that mirrors are not degraded.
3780

3781
  The ldisk parameter, if True, will change the test from the
3782
  is_degraded attribute (which represents overall non-ok status for
3783
  the device(s)) to the ldisk (representing the local storage status).
3784

3785
  """
3786
  lu.cfg.SetDiskID(dev, node)
3787

    
3788
  result = True
3789

    
3790
  if on_primary or dev.AssembleOnSecondary():
3791
    rstats = lu.rpc.call_blockdev_find(node, dev)
3792
    msg = rstats.fail_msg
3793
    if msg:
3794
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3795
      result = False
3796
    elif not rstats.payload:
3797
      lu.LogWarning("Can't find disk on node %s", node)
3798
      result = False
3799
    else:
3800
      if ldisk:
3801
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3802
      else:
3803
        result = result and not rstats.payload.is_degraded
3804

    
3805
  if dev.children:
3806
    for child in dev.children:
3807
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3808

    
3809
  return result
3810

    
3811

    
3812
class LUOobCommand(NoHooksLU):
3813
  """Logical unit for OOB handling.
3814

3815
  """
3816
  REG_BGL = False
3817
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3818

    
3819
  def ExpandNames(self):
3820
    """Gather locks we need.
3821

3822
    """
3823
    if self.op.node_names:
3824
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3825
      lock_names = self.op.node_names
3826
    else:
3827
      lock_names = locking.ALL_SET
3828

    
3829
    self.needed_locks = {
3830
      locking.LEVEL_NODE: lock_names,
3831
      }
3832

    
3833
  def CheckPrereq(self):
3834
    """Check prerequisites.
3835

3836
    This checks:
3837
     - the node exists in the configuration
3838
     - OOB is supported
3839

3840
    Any errors are signaled by raising errors.OpPrereqError.
3841

3842
    """
3843
    self.nodes = []
3844
    self.master_node = self.cfg.GetMasterNode()
3845

    
3846
    assert self.op.power_delay >= 0.0
3847

    
3848
    if self.op.node_names:
3849
      if (self.op.command in self._SKIP_MASTER and
3850
          self.master_node in self.op.node_names):
3851
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3852
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3853

    
3854
        if master_oob_handler:
3855
          additional_text = ("run '%s %s %s' if you want to operate on the"
3856
                             " master regardless") % (master_oob_handler,
3857
                                                      self.op.command,
3858
                                                      self.master_node)
3859
        else:
3860
          additional_text = "it does not support out-of-band operations"
3861

    
3862
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3863
                                    " allowed for %s; %s") %
3864
                                   (self.master_node, self.op.command,
3865
                                    additional_text), errors.ECODE_INVAL)
3866
    else:
3867
      self.op.node_names = self.cfg.GetNodeList()
3868
      if self.op.command in self._SKIP_MASTER:
3869
        self.op.node_names.remove(self.master_node)
3870

    
3871
    if self.op.command in self._SKIP_MASTER:
3872
      assert self.master_node not in self.op.node_names
3873

    
3874
    for node_name in self.op.node_names:
3875
      node = self.cfg.GetNodeInfo(node_name)
3876

    
3877
      if node is None:
3878
        raise errors.OpPrereqError("Node %s not found" % node_name,
3879
                                   errors.ECODE_NOENT)
3880
      else:
3881
        self.nodes.append(node)
3882

    
3883
      if (not self.op.ignore_status and
3884
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3885
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3886
                                    " not marked offline") % node_name,
3887
                                   errors.ECODE_STATE)
3888

    
3889
  def Exec(self, feedback_fn):
3890
    """Execute OOB and return result if we expect any.
3891

3892
    """
3893
    master_node = self.master_node
3894
    ret = []
3895

    
3896
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3897
                                              key=lambda node: node.name)):
3898
      node_entry = [(constants.RS_NORMAL, node.name)]
3899
      ret.append(node_entry)
3900

    
3901
      oob_program = _SupportsOob(self.cfg, node)
3902

    
3903
      if not oob_program:
3904
        node_entry.append((constants.RS_UNAVAIL, None))
3905
        continue
3906

    
3907
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3908
                   self.op.command, oob_program, node.name)
3909
      result = self.rpc.call_run_oob(master_node, oob_program,
3910
                                     self.op.command, node.name,
3911
                                     self.op.timeout)
3912

    
3913
      if result.fail_msg:
3914
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3915
                        node.name, result.fail_msg)
3916
        node_entry.append((constants.RS_NODATA, None))
3917
      else:
3918
        try:
3919
          self._CheckPayload(result)
3920
        except errors.OpExecError, err:
3921
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3922
                          node.name, err)
3923
          node_entry.append((constants.RS_NODATA, None))
3924
        else:
3925
          if self.op.command == constants.OOB_HEALTH:
3926
            # For health we should log important events
3927
            for item, status in result.payload:
3928
              if status in [constants.OOB_STATUS_WARNING,
3929
                            constants.OOB_STATUS_CRITICAL]:
3930
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3931
                                item, node.name, status)
3932

    
3933
          if self.op.command == constants.OOB_POWER_ON:
3934
            node.powered = True
3935
          elif self.op.command == constants.OOB_POWER_OFF:
3936
            node.powered = False
3937
          elif self.op.command == constants.OOB_POWER_STATUS:
3938
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3939
            if powered != node.powered:
3940
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3941
                               " match actual power state (%s)"), node.powered,
3942
                              node.name, powered)
3943

    
3944
          # For configuration changing commands we should update the node
3945
          if self.op.command in (constants.OOB_POWER_ON,
3946
                                 constants.OOB_POWER_OFF):
3947
            self.cfg.Update(node, feedback_fn)
3948

    
3949
          node_entry.append((constants.RS_NORMAL, result.payload))
3950

    
3951
          if (self.op.command == constants.OOB_POWER_ON and
3952
              idx < len(self.nodes) - 1):
3953
            time.sleep(self.op.power_delay)
3954

    
3955
    return ret
3956

    
3957
  def _CheckPayload(self, result):
3958
    """Checks if the payload is valid.
3959

3960
    @param result: RPC result
3961
    @raises errors.OpExecError: If payload is not valid
3962

3963
    """
3964
    errs = []
3965
    if self.op.command == constants.OOB_HEALTH:
3966
      if not isinstance(result.payload, list):
3967
        errs.append("command 'health' is expected to return a list but got %s" %
3968
                    type(result.payload))
3969
      else:
3970
        for item, status in result.payload:
3971
          if status not in constants.OOB_STATUSES:
3972
            errs.append("health item '%s' has invalid status '%s'" %
3973
                        (item, status))
3974

    
3975
    if self.op.command == constants.OOB_POWER_STATUS:
3976
      if not isinstance(result.payload, dict):
3977
        errs.append("power-status is expected to return a dict but got %s" %
3978
                    type(result.payload))
3979

    
3980
    if self.op.command in [
3981
        constants.OOB_POWER_ON,
3982
        constants.OOB_POWER_OFF,
3983
        constants.OOB_POWER_CYCLE,
3984
        ]:
3985
      if result.payload is not None:
3986
        errs.append("%s is expected to not return payload but got '%s'" %
3987
                    (self.op.command, result.payload))
3988

    
3989
    if errs:
3990
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3991
                               utils.CommaJoin(errs))
3992

    
3993
class _OsQuery(_QueryBase):
3994
  FIELDS = query.OS_FIELDS
3995

    
3996
  def ExpandNames(self, lu):
3997
    # Lock all nodes in shared mode
3998
    # Temporary removal of locks, should be reverted later
3999
    # TODO: reintroduce locks when they are lighter-weight
4000
    lu.needed_locks = {}
4001
    #self.share_locks[locking.LEVEL_NODE] = 1
4002
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4003

    
4004
    # The following variables interact with _QueryBase._GetNames
4005
    if self.names:
4006
      self.wanted = self.names
4007
    else:
4008
      self.wanted = locking.ALL_SET
4009

    
4010
    self.do_locking = self.use_locking
4011

    
4012
  def DeclareLocks(self, lu, level):
4013
    pass
4014

    
4015
  @staticmethod
4016
  def _DiagnoseByOS(rlist):
4017
    """Remaps a per-node return list into an a per-os per-node dictionary
4018

4019
    @param rlist: a map with node names as keys and OS objects as values
4020

4021
    @rtype: dict
4022
    @return: a dictionary with osnames as keys and as value another
4023
        map, with nodes as keys and tuples of (path, status, diagnose,
4024
        variants, parameters, api_versions) as values, eg::
4025

4026
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4027
                                     (/srv/..., False, "invalid api")],
4028
                           "node2": [(/srv/..., True, "", [], [])]}
4029
          }
4030

4031
    """
4032
    all_os = {}
4033
    # we build here the list of nodes that didn't fail the RPC (at RPC
4034
    # level), so that nodes with a non-responding node daemon don't
4035
    # make all OSes invalid
4036
    good_nodes = [node_name for node_name in rlist
4037
                  if not rlist[node_name].fail_msg]
4038
    for node_name, nr in rlist.items():
4039
      if nr.fail_msg or not nr.payload:
4040
        continue
4041
      for (name, path, status, diagnose, variants,
4042
           params, api_versions) in nr.payload:
4043
        if name not in all_os:
4044
          # build a list of nodes for this os containing empty lists
4045
          # for each node in node_list
4046
          all_os[name] = {}
4047
          for nname in good_nodes:
4048
            all_os[name][nname] = []
4049
        # convert params from [name, help] to (name, help)
4050
        params = [tuple(v) for v in params]
4051
        all_os[name][node_name].append((path, status, diagnose,
4052
                                        variants, params, api_versions))
4053
    return all_os
4054

    
4055
  def _GetQueryData(self, lu):
4056
    """Computes the list of nodes and their attributes.
4057

4058
    """
4059
    # Locking is not used
4060
    assert not (compat.any(lu.glm.is_owned(level)
4061
                           for level in locking.LEVELS
4062
                           if level != locking.LEVEL_CLUSTER) or
4063
                self.do_locking or self.use_locking)
4064

    
4065
    valid_nodes = [node.name
4066
                   for node in lu.cfg.GetAllNodesInfo().values()
4067
                   if not node.offline and node.vm_capable]
4068
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4069
    cluster = lu.cfg.GetClusterInfo()
4070

    
4071
    data = {}
4072

    
4073
    for (os_name, os_data) in pol.items():
4074
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4075
                          hidden=(os_name in cluster.hidden_os),
4076
                          blacklisted=(os_name in cluster.blacklisted_os))
4077

    
4078
      variants = set()
4079
      parameters = set()
4080
      api_versions = set()
4081

    
4082
      for idx, osl in enumerate(os_data.values()):
4083
        info.valid = bool(info.valid and osl and osl[0][1])
4084
        if not info.valid:
4085
          break
4086

    
4087
        (node_variants, node_params, node_api) = osl[0][3:6]
4088
        if idx == 0:
4089
          # First entry
4090
          variants.update(node_variants)
4091
          parameters.update(node_params)
4092
          api_versions.update(node_api)
4093
        else:
4094
          # Filter out inconsistent values
4095
          variants.intersection_update(node_variants)
4096
          parameters.intersection_update(node_params)
4097
          api_versions.intersection_update(node_api)
4098

    
4099
      info.variants = list(variants)
4100
      info.parameters = list(parameters)
4101
      info.api_versions = list(api_versions)
4102

    
4103
      data[os_name] = info
4104

    
4105
    # Prepare data in requested order
4106
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4107
            if name in data]
4108

    
4109

    
4110
class LUOsDiagnose(NoHooksLU):
4111
  """Logical unit for OS diagnose/query.
4112

4113
  """
4114
  REQ_BGL = False
4115

    
4116
  @staticmethod
4117
  def _BuildFilter(fields, names):
4118
    """Builds a filter for querying OSes.
4119

4120
    """
4121
    name_filter = qlang.MakeSimpleFilter("name", names)
4122

    
4123
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4124
    # respective field is not requested
4125
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4126
                     for fname in ["hidden", "blacklisted"]
4127
                     if fname not in fields]
4128
    if "valid" not in fields:
4129
      status_filter.append([qlang.OP_TRUE, "valid"])
4130

    
4131
    if status_filter:
4132
      status_filter.insert(0, qlang.OP_AND)
4133
    else:
4134
      status_filter = None
4135

    
4136
    if name_filter and status_filter:
4137
      return [qlang.OP_AND, name_filter, status_filter]
4138
    elif name_filter:
4139
      return name_filter
4140
    else:
4141
      return status_filter
4142

    
4143
  def CheckArguments(self):
4144
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4145
                       self.op.output_fields, False)
4146

    
4147
  def ExpandNames(self):
4148
    self.oq.ExpandNames(self)
4149

    
4150
  def Exec(self, feedback_fn):
4151
    return self.oq.OldStyleQuery(self)
4152

    
4153

    
4154
class LUNodeRemove(LogicalUnit):
4155
  """Logical unit for removing a node.
4156

4157
  """
4158
  HPATH = "node-remove"
4159
  HTYPE = constants.HTYPE_NODE
4160

    
4161
  def BuildHooksEnv(self):
4162
    """Build hooks env.
4163

4164
    This doesn't run on the target node in the pre phase as a failed
4165
    node would then be impossible to remove.
4166

4167
    """
4168
    return {
4169
      "OP_TARGET": self.op.node_name,
4170
      "NODE_NAME": self.op.node_name,
4171
      }
4172

    
4173
  def BuildHooksNodes(self):
4174
    """Build hooks nodes.
4175

4176
    """
4177
    all_nodes = self.cfg.GetNodeList()
4178
    try:
4179
      all_nodes.remove(self.op.node_name)
4180
    except ValueError:
4181
      logging.warning("Node '%s', which is about to be removed, was not found"
4182
                      " in the list of all nodes", self.op.node_name)
4183
    return (all_nodes, all_nodes)
4184

    
4185
  def CheckPrereq(self):
4186
    """Check prerequisites.
4187

4188
    This checks:
4189
     - the node exists in the configuration
4190
     - it does not have primary or secondary instances
4191
     - it's not the master
4192

4193
    Any errors are signaled by raising errors.OpPrereqError.
4194

4195
    """
4196
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4197
    node = self.cfg.GetNodeInfo(self.op.node_name)
4198
    assert node is not None
4199

    
4200
    instance_list = self.cfg.GetInstanceList()
4201

    
4202
    masternode = self.cfg.GetMasterNode()
4203
    if node.name == masternode:
4204
      raise errors.OpPrereqError("Node is the master node, failover to another"
4205
                                 " node is required", errors.ECODE_INVAL)
4206

    
4207
    for instance_name in instance_list:
4208
      instance = self.cfg.GetInstanceInfo(instance_name)
4209
      if node.name in instance.all_nodes:
4210
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4211
                                   " please remove first" % instance_name,
4212
                                   errors.ECODE_INVAL)
4213
    self.op.node_name = node.name
4214
    self.node = node
4215

    
4216
  def Exec(self, feedback_fn):
4217
    """Removes the node from the cluster.
4218

4219
    """
4220
    node = self.node
4221
    logging.info("Stopping the node daemon and removing configs from node %s",
4222
                 node.name)
4223

    
4224
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4225

    
4226
    # Promote nodes to master candidate as needed
4227
    _AdjustCandidatePool(self, exceptions=[node.name])
4228
    self.context.RemoveNode(node.name)
4229

    
4230
    # Run post hooks on the node before it's removed
4231
    _RunPostHook(self, node.name)
4232

    
4233
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4234
    msg = result.fail_msg
4235
    if msg:
4236
      self.LogWarning("Errors encountered on the remote node while leaving"
4237
                      " the cluster: %s", msg)
4238

    
4239
    # Remove node from our /etc/hosts
4240
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4241
      master_node = self.cfg.GetMasterNode()
4242
      result = self.rpc.call_etc_hosts_modify(master_node,
4243
                                              constants.ETC_HOSTS_REMOVE,
4244
                                              node.name, None)
4245
      result.Raise("Can't update hosts file with new host data")
4246
      _RedistributeAncillaryFiles(self)
4247

    
4248

    
4249
class _NodeQuery(_QueryBase):
4250
  FIELDS = query.NODE_FIELDS
4251

    
4252
  def ExpandNames(self, lu):
4253
    lu.needed_locks = {}
4254
    lu.share_locks[locking.LEVEL_NODE] = 1
4255

    
4256
    if self.names:
4257
      self.wanted = _GetWantedNodes(lu, self.names)
4258
    else:
4259
      self.wanted = locking.ALL_SET
4260

    
4261
    self.do_locking = (self.use_locking and
4262
                       query.NQ_LIVE in self.requested_data)
4263

    
4264
    if self.do_locking:
4265
      # if we don't request only static fields, we need to lock the nodes
4266
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4267

    
4268
  def DeclareLocks(self, lu, level):
4269
    pass
4270

    
4271
  def _GetQueryData(self, lu):
4272
    """Computes the list of nodes and their attributes.
4273

4274
    """
4275
    all_info = lu.cfg.GetAllNodesInfo()
4276

    
4277
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4278

    
4279
    # Gather data as requested
4280
    if query.NQ_LIVE in self.requested_data:
4281
      # filter out non-vm_capable nodes
4282
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4283

    
4284
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4285
                                        lu.cfg.GetHypervisorType())
4286
      live_data = dict((name, nresult.payload)
4287
                       for (name, nresult) in node_data.items()
4288
                       if not nresult.fail_msg and nresult.payload)
4289
    else:
4290
      live_data = None
4291

    
4292
    if query.NQ_INST in self.requested_data:
4293
      node_to_primary = dict([(name, set()) for name in nodenames])
4294
      node_to_secondary = dict([(name, set()) for name in nodenames])
4295

    
4296
      inst_data = lu.cfg.GetAllInstancesInfo()
4297

    
4298
      for inst in inst_data.values():
4299
        if inst.primary_node in node_to_primary:
4300
          node_to_primary[inst.primary_node].add(inst.name)
4301
        for secnode in inst.secondary_nodes:
4302
          if secnode in node_to_secondary:
4303
            node_to_secondary[secnode].add(inst.name)
4304
    else:
4305
      node_to_primary = None
4306
      node_to_secondary = None
4307

    
4308
    if query.NQ_OOB in self.requested_data:
4309
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4310
                         for name, node in all_info.iteritems())
4311
    else:
4312
      oob_support = None
4313

    
4314
    if query.NQ_GROUP in self.requested_data:
4315
      groups = lu.cfg.GetAllNodeGroupsInfo()
4316
    else:
4317
      groups = {}
4318

    
4319
    return query.NodeQueryData([all_info[name] for name in nodenames],
4320
                               live_data, lu.cfg.GetMasterNode(),
4321
                               node_to_primary, node_to_secondary, groups,
4322
                               oob_support, lu.cfg.GetClusterInfo())
4323

    
4324

    
4325
class LUNodeQuery(NoHooksLU):
4326
  """Logical unit for querying nodes.
4327

4328
  """
4329
  # pylint: disable-msg=W0142
4330
  REQ_BGL = False
4331

    
4332
  def CheckArguments(self):
4333
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4334
                         self.op.output_fields, self.op.use_locking)
4335

    
4336
  def ExpandNames(self):
4337
    self.nq.ExpandNames(self)
4338

    
4339
  def Exec(self, feedback_fn):
4340
    return self.nq.OldStyleQuery(self)
4341

    
4342

    
4343
class LUNodeQueryvols(NoHooksLU):
4344
  """Logical unit for getting volumes on node(s).
4345

4346
  """
4347
  REQ_BGL = False
4348
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4349
  _FIELDS_STATIC = utils.FieldSet("node")
4350

    
4351
  def CheckArguments(self):
4352
    _CheckOutputFields(static=self._FIELDS_STATIC,
4353
                       dynamic=self._FIELDS_DYNAMIC,
4354
                       selected=self.op.output_fields)
4355

    
4356
  def ExpandNames(self):
4357
    self.needed_locks = {}
4358
    self.share_locks[locking.LEVEL_NODE] = 1
4359
    if not self.op.nodes:
4360
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4361
    else:
4362
      self.needed_locks[locking.LEVEL_NODE] = \
4363
        _GetWantedNodes(self, self.op.nodes)
4364

    
4365
  def Exec(self, feedback_fn):
4366
    """Computes the list of nodes and their attributes.
4367

4368
    """
4369
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4370
    volumes = self.rpc.call_node_volumes(nodenames)
4371

    
4372
    ilist = self.cfg.GetAllInstancesInfo()
4373
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4374

    
4375
    output = []
4376
    for node in nodenames:
4377
      nresult = volumes[node]
4378
      if nresult.offline:
4379
        continue
4380
      msg = nresult.fail_msg
4381
      if msg:
4382
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4383
        continue
4384

    
4385
      node_vols = sorted(nresult.payload,
4386
                         key=operator.itemgetter("dev"))
4387

    
4388
      for vol in node_vols:
4389
        node_output = []
4390
        for field in self.op.output_fields:
4391
          if field == "node":
4392
            val = node
4393
          elif field == "phys":
4394
            val = vol["dev"]
4395
          elif field == "vg":
4396
            val = vol["vg"]
4397
          elif field == "name":
4398
            val = vol["name"]
4399
          elif field == "size":
4400
            val = int(float(vol["size"]))
4401
          elif field == "instance":
4402
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4403
          else:
4404
            raise errors.ParameterError(field)
4405
          node_output.append(str(val))
4406

    
4407
        output.append(node_output)
4408

    
4409
    return output
4410

    
4411

    
4412
class LUNodeQueryStorage(NoHooksLU):
4413
  """Logical unit for getting information on storage units on node(s).
4414

4415
  """
4416
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4417
  REQ_BGL = False
4418

    
4419
  def CheckArguments(self):
4420
    _CheckOutputFields(static=self._FIELDS_STATIC,
4421
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4422
                       selected=self.op.output_fields)
4423

    
4424
  def ExpandNames(self):
4425
    self.needed_locks = {}
4426
    self.share_locks[locking.LEVEL_NODE] = 1
4427

    
4428
    if self.op.nodes:
4429
      self.needed_locks[locking.LEVEL_NODE] = \
4430
        _GetWantedNodes(self, self.op.nodes)
4431
    else:
4432
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4433

    
4434
  def Exec(self, feedback_fn):
4435
    """Computes the list of nodes and their attributes.
4436

4437
    """
4438
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4439

    
4440
    # Always get name to sort by
4441
    if constants.SF_NAME in self.op.output_fields:
4442
      fields = self.op.output_fields[:]
4443
    else:
4444
      fields = [constants.SF_NAME] + self.op.output_fields
4445

    
4446
    # Never ask for node or type as it's only known to the LU
4447
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4448
      while extra in fields:
4449
        fields.remove(extra)
4450

    
4451
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4452
    name_idx = field_idx[constants.SF_NAME]
4453

    
4454
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4455
    data = self.rpc.call_storage_list(self.nodes,
4456
                                      self.op.storage_type, st_args,
4457
                                      self.op.name, fields)
4458

    
4459
    result = []
4460

    
4461
    for node in utils.NiceSort(self.nodes):
4462
      nresult = data[node]
4463
      if nresult.offline:
4464
        continue
4465

    
4466
      msg = nresult.fail_msg
4467
      if msg:
4468
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4469
        continue
4470

    
4471
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4472

    
4473
      for name in utils.NiceSort(rows.keys()):
4474
        row = rows[name]
4475

    
4476
        out = []
4477

    
4478
        for field in self.op.output_fields:
4479
          if field == constants.SF_NODE:
4480
            val = node
4481
          elif field == constants.SF_TYPE:
4482
            val = self.op.storage_type
4483
          elif field in field_idx:
4484
            val = row[field_idx[field]]
4485
          else:
4486
            raise errors.ParameterError(field)
4487

    
4488
          out.append(val)
4489

    
4490
        result.append(out)
4491

    
4492
    return result
4493

    
4494

    
4495
class _InstanceQuery(_QueryBase):
4496
  FIELDS = query.INSTANCE_FIELDS
4497

    
4498
  def ExpandNames(self, lu):
4499
    lu.needed_locks = {}
4500
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4501
    lu.share_locks[locking.LEVEL_NODE] = 1
4502

    
4503
    if self.names:
4504
      self.wanted = _GetWantedInstances(lu, self.names)
4505
    else:
4506
      self.wanted = locking.ALL_SET
4507

    
4508
    self.do_locking = (self.use_locking and
4509
                       query.IQ_LIVE in self.requested_data)
4510
    if self.do_locking:
4511
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4512
      lu.needed_locks[locking.LEVEL_NODE] = []
4513
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4514

    
4515
  def DeclareLocks(self, lu, level):
4516
    if level == locking.LEVEL_NODE and self.do_locking:
4517
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4518

    
4519
  def _GetQueryData(self, lu):
4520
    """Computes the list of instances and their attributes.
4521

4522
    """
4523
    cluster = lu.cfg.GetClusterInfo()
4524
    all_info = lu.cfg.GetAllInstancesInfo()
4525

    
4526
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4527

    
4528
    instance_list = [all_info[name] for name in instance_names]
4529
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4530
                                        for inst in instance_list)))
4531
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4532
    bad_nodes = []
4533
    offline_nodes = []
4534
    wrongnode_inst = set()
4535

    
4536
    # Gather data as requested
4537
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4538
      live_data = {}
4539
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4540
      for name in nodes:
4541
        result = node_data[name]
4542
        if result.offline:
4543
          # offline nodes will be in both lists
4544
          assert result.fail_msg
4545
          offline_nodes.append(name)
4546
        if result.fail_msg:
4547
          bad_nodes.append(name)
4548
        elif result.payload:
4549
          for inst in result.payload:
4550
            if inst in all_info:
4551
              if all_info[inst].primary_node == name:
4552
                live_data.update(result.payload)
4553
              else:
4554
                wrongnode_inst.add(inst)
4555
            else:
4556
              # orphan instance; we don't list it here as we don't
4557
              # handle this case yet in the output of instance listing
4558
              logging.warning("Orphan instance '%s' found on node %s",
4559
                              inst, name)
4560
        # else no instance is alive
4561
    else:
4562
      live_data = {}
4563

    
4564
    if query.IQ_DISKUSAGE in self.requested_data:
4565
      disk_usage = dict((inst.name,
4566
                         _ComputeDiskSize(inst.disk_template,
4567
                                          [{constants.IDISK_SIZE: disk.size}
4568
                                           for disk in inst.disks]))
4569
                        for inst in instance_list)
4570
    else:
4571
      disk_usage = None
4572

    
4573
    if query.IQ_CONSOLE in self.requested_data:
4574
      consinfo = {}
4575
      for inst in instance_list:
4576
        if inst.name in live_data:
4577
          # Instance is running
4578
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4579
        else:
4580
          consinfo[inst.name] = None
4581
      assert set(consinfo.keys()) == set(instance_names)
4582
    else:
4583
      consinfo = None
4584

    
4585
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4586
                                   disk_usage, offline_nodes, bad_nodes,
4587
                                   live_data, wrongnode_inst, consinfo)
4588

    
4589

    
4590
class LUQuery(NoHooksLU):
4591
  """Query for resources/items of a certain kind.
4592

4593
  """
4594
  # pylint: disable-msg=W0142
4595
  REQ_BGL = False
4596

    
4597
  def CheckArguments(self):
4598
    qcls = _GetQueryImplementation(self.op.what)
4599

    
4600
    self.impl = qcls(self.op.filter, self.op.fields, False)
4601

    
4602
  def ExpandNames(self):
4603
    self.impl.ExpandNames(self)
4604

    
4605
  def DeclareLocks(self, level):
4606
    self.impl.DeclareLocks(self, level)
4607

    
4608
  def Exec(self, feedback_fn):
4609
    return self.impl.NewStyleQuery(self)
4610

    
4611

    
4612
class LUQueryFields(NoHooksLU):
4613
  """Query for resources/items of a certain kind.
4614

4615
  """
4616
  # pylint: disable-msg=W0142
4617
  REQ_BGL = False
4618

    
4619
  def CheckArguments(self):
4620
    self.qcls = _GetQueryImplementation(self.op.what)
4621

    
4622
  def ExpandNames(self):
4623
    self.needed_locks = {}
4624

    
4625
  def Exec(self, feedback_fn):
4626
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4627

    
4628

    
4629
class LUNodeModifyStorage(NoHooksLU):
4630
  """Logical unit for modifying a storage volume on a node.
4631

4632
  """
4633
  REQ_BGL = False
4634

    
4635
  def CheckArguments(self):
4636
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4637

    
4638
    storage_type = self.op.storage_type
4639

    
4640
    try:
4641
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4642
    except KeyError:
4643
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4644
                                 " modified" % storage_type,
4645
                                 errors.ECODE_INVAL)
4646

    
4647
    diff = set(self.op.changes.keys()) - modifiable
4648
    if diff:
4649
      raise errors.OpPrereqError("The following fields can not be modified for"
4650
                                 " storage units of type '%s': %r" %
4651
                                 (storage_type, list(diff)),
4652
                                 errors.ECODE_INVAL)
4653

    
4654
  def ExpandNames(self):
4655
    self.needed_locks = {
4656
      locking.LEVEL_NODE: self.op.node_name,
4657
      }
4658

    
4659
  def Exec(self, feedback_fn):
4660
    """Computes the list of nodes and their attributes.
4661

4662
    """
4663
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4664
    result = self.rpc.call_storage_modify(self.op.node_name,
4665
                                          self.op.storage_type, st_args,
4666
                                          self.op.name, self.op.changes)
4667
    result.Raise("Failed to modify storage unit '%s' on %s" %
4668
                 (self.op.name, self.op.node_name))
4669

    
4670

    
4671
class LUNodeAdd(LogicalUnit):
4672
  """Logical unit for adding node to the cluster.
4673

4674
  """
4675
  HPATH = "node-add"
4676
  HTYPE = constants.HTYPE_NODE
4677
  _NFLAGS = ["master_capable", "vm_capable"]
4678

    
4679
  def CheckArguments(self):
4680
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4681
    # validate/normalize the node name
4682
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4683
                                         family=self.primary_ip_family)
4684
    self.op.node_name = self.hostname.name
4685

    
4686
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4687
      raise errors.OpPrereqError("Cannot readd the master node",
4688
                                 errors.ECODE_STATE)
4689

    
4690
    if self.op.readd and self.op.group:
4691
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4692
                                 " being readded", errors.ECODE_INVAL)
4693

    
4694
  def BuildHooksEnv(self):
4695
    """Build hooks env.
4696

4697
    This will run on all nodes before, and on all nodes + the new node after.
4698

4699
    """
4700
    return {
4701
      "OP_TARGET": self.op.node_name,
4702
      "NODE_NAME": self.op.node_name,
4703
      "NODE_PIP": self.op.primary_ip,
4704
      "NODE_SIP": self.op.secondary_ip,
4705
      "MASTER_CAPABLE": str(self.op.master_capable),
4706
      "VM_CAPABLE": str(self.op.vm_capable),
4707
      }
4708

    
4709
  def BuildHooksNodes(self):
4710
    """Build hooks nodes.
4711

4712
    """
4713
    # Exclude added node
4714
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4715
    post_nodes = pre_nodes + [self.op.node_name, ]
4716

    
4717
    return (pre_nodes, post_nodes)
4718

    
4719
  def CheckPrereq(self):
4720
    """Check prerequisites.
4721

4722
    This checks:
4723
     - the new node is not already in the config
4724
     - it is resolvable
4725
     - its parameters (single/dual homed) matches the cluster
4726

4727
    Any errors are signaled by raising errors.OpPrereqError.
4728

4729
    """
4730
    cfg = self.cfg
4731
    hostname = self.hostname
4732
    node = hostname.name
4733
    primary_ip = self.op.primary_ip = hostname.ip
4734
    if self.op.secondary_ip is None:
4735
      if self.primary_ip_family == netutils.IP6Address.family:
4736
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4737
                                   " IPv4 address must be given as secondary",
4738
                                   errors.ECODE_INVAL)
4739
      self.op.secondary_ip = primary_ip
4740

    
4741
    secondary_ip = self.op.secondary_ip
4742
    if not netutils.IP4Address.IsValid(secondary_ip):
4743
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4744
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4745

    
4746
    node_list = cfg.GetNodeList()
4747
    if not self.op.readd and node in node_list:
4748
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4749
                                 node, errors.ECODE_EXISTS)
4750
    elif self.op.readd and node not in node_list:
4751
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4752
                                 errors.ECODE_NOENT)
4753

    
4754
    self.changed_primary_ip = False
4755

    
4756
    for existing_node_name in node_list:
4757
      existing_node = cfg.GetNodeInfo(existing_node_name)
4758

    
4759
      if self.op.readd and node == existing_node_name:
4760
        if existing_node.secondary_ip != secondary_ip:
4761
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4762
                                     " address configuration as before",
4763
                                     errors.ECODE_INVAL)
4764
        if existing_node.primary_ip != primary_ip:
4765
          self.changed_primary_ip = True
4766

    
4767
        continue
4768

    
4769
      if (existing_node.primary_ip == primary_ip or
4770
          existing_node.secondary_ip == primary_ip or
4771
          existing_node.primary_ip == secondary_ip or
4772
          existing_node.secondary_ip == secondary_ip):
4773
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4774
                                   " existing node %s" % existing_node.name,
4775
                                   errors.ECODE_NOTUNIQUE)
4776

    
4777
    # After this 'if' block, None is no longer a valid value for the
4778
    # _capable op attributes
4779
    if self.op.readd:
4780
      old_node = self.cfg.GetNodeInfo(node)
4781
      assert old_node is not None, "Can't retrieve locked node %s" % node
4782
      for attr in self._NFLAGS:
4783
        if getattr(self.op, attr) is None:
4784
          setattr(self.op, attr, getattr(old_node, attr))
4785
    else:
4786
      for attr in self._NFLAGS:
4787
        if getattr(self.op, attr) is None:
4788
          setattr(self.op, attr, True)
4789

    
4790
    if self.op.readd and not self.op.vm_capable:
4791
      pri, sec = cfg.GetNodeInstances(node)
4792
      if pri or sec:
4793
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4794
                                   " flag set to false, but it already holds"
4795
                                   " instances" % node,
4796
                                   errors.ECODE_STATE)
4797

    
4798
    # check that the type of the node (single versus dual homed) is the
4799
    # same as for the master
4800
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4801
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4802
    newbie_singlehomed = secondary_ip == primary_ip
4803
    if master_singlehomed != newbie_singlehomed:
4804
      if master_singlehomed:
4805
        raise errors.OpPrereqError("The master has no secondary ip but the"
4806
                                   " new node has one",
4807
                                   errors.ECODE_INVAL)
4808
      else:
4809
        raise errors.OpPrereqError("The master has a secondary ip but the"
4810
                                   " new node doesn't have one",
4811
                                   errors.ECODE_INVAL)
4812

    
4813
    # checks reachability
4814
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4815
      raise errors.OpPrereqError("Node not reachable by ping",
4816
                                 errors.ECODE_ENVIRON)
4817

    
4818
    if not newbie_singlehomed:
4819
      # check reachability from my secondary ip to newbie's secondary ip
4820
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4821
                           source=myself.secondary_ip):
4822
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4823
                                   " based ping to node daemon port",
4824
                                   errors.ECODE_ENVIRON)
4825

    
4826
    if self.op.readd:
4827
      exceptions = [node]
4828
    else:
4829
      exceptions = []
4830

    
4831
    if self.op.master_capable:
4832
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4833
    else:
4834
      self.master_candidate = False
4835

    
4836
    if self.op.readd:
4837
      self.new_node = old_node
4838
    else:
4839
      node_group = cfg.LookupNodeGroup(self.op.group)
4840
      self.new_node = objects.Node(name=node,
4841
                                   primary_ip=primary_ip,
4842
                                   secondary_ip=secondary_ip,
4843
                                   master_candidate=self.master_candidate,
4844
                                   offline=False, drained=False,
4845
                                   group=node_group)
4846

    
4847
    if self.op.ndparams:
4848
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4849

    
4850
  def Exec(self, feedback_fn):
4851
    """Adds the new node to the cluster.
4852

4853
    """
4854
    new_node = self.new_node
4855
    node = new_node.name
4856

    
4857
    # We adding a new node so we assume it's powered
4858
    new_node.powered = True
4859

    
4860
    # for re-adds, reset the offline/drained/master-candidate flags;
4861
    # we need to reset here, otherwise offline would prevent RPC calls
4862
    # later in the procedure; this also means that if the re-add
4863
    # fails, we are left with a non-offlined, broken node
4864
    if self.op.readd:
4865
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4866
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4867
      # if we demote the node, we do cleanup later in the procedure
4868
      new_node.master_candidate = self.master_candidate
4869
      if self.changed_primary_ip:
4870
        new_node.primary_ip = self.op.primary_ip
4871

    
4872
    # copy the master/vm_capable flags
4873
    for attr in self._NFLAGS:
4874
      setattr(new_node, attr, getattr(self.op, attr))
4875

    
4876
    # notify the user about any possible mc promotion
4877
    if new_node.master_candidate:
4878
      self.LogInfo("Node will be a master candidate")
4879

    
4880
    if self.op.ndparams:
4881
      new_node.ndparams = self.op.ndparams
4882
    else:
4883
      new_node.ndparams = {}
4884

    
4885
    # check connectivity
4886
    result = self.rpc.call_version([node])[node]
4887
    result.Raise("Can't get version information from node %s" % node)
4888
    if constants.PROTOCOL_VERSION == result.payload:
4889
      logging.info("Communication to node %s fine, sw version %s match",
4890
                   node, result.payload)
4891
    else:
4892
      raise errors.OpExecError("Version mismatch master version %s,"
4893
                               " node version %s" %
4894
                               (constants.PROTOCOL_VERSION, result.payload))
4895

    
4896
    # Add node to our /etc/hosts, and add key to known_hosts
4897
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4898
      master_node = self.cfg.GetMasterNode()
4899
      result = self.rpc.call_etc_hosts_modify(master_node,
4900
                                              constants.ETC_HOSTS_ADD,
4901
                                              self.hostname.name,
4902
                                              self.hostname.ip)
4903
      result.Raise("Can't update hosts file with new host data")
4904

    
4905
    if new_node.secondary_ip != new_node.primary_ip:
4906
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4907
                               False)
4908

    
4909
    node_verify_list = [self.cfg.GetMasterNode()]
4910
    node_verify_param = {
4911
      constants.NV_NODELIST: [node],
4912
      # TODO: do a node-net-test as well?
4913
    }
4914

    
4915
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4916
                                       self.cfg.GetClusterName())
4917
    for verifier in node_verify_list:
4918
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4919
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4920
      if nl_payload:
4921
        for failed in nl_payload:
4922
          feedback_fn("ssh/hostname verification failed"
4923
                      " (checking from %s): %s" %
4924
                      (verifier, nl_payload[failed]))
4925
        raise errors.OpExecError("ssh/hostname verification failed")
4926

    
4927
    if self.op.readd:
4928
      _RedistributeAncillaryFiles(self)
4929
      self.context.ReaddNode(new_node)
4930
      # make sure we redistribute the config
4931
      self.cfg.Update(new_node, feedback_fn)
4932
      # and make sure the new node will not have old files around
4933
      if not new_node.master_candidate:
4934
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4935
        msg = result.fail_msg
4936
        if msg:
4937
          self.LogWarning("Node failed to demote itself from master"
4938
                          " candidate status: %s" % msg)
4939
    else:
4940
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4941
                                  additional_vm=self.op.vm_capable)
4942
      self.context.AddNode(new_node, self.proc.GetECId())
4943

    
4944

    
4945
class LUNodeSetParams(LogicalUnit):
4946
  """Modifies the parameters of a node.
4947

4948
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4949
      to the node role (as _ROLE_*)
4950
  @cvar _R2F: a dictionary from node role to tuples of flags
4951
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4952

4953
  """
4954
  HPATH = "node-modify"
4955
  HTYPE = constants.HTYPE_NODE
4956
  REQ_BGL = False
4957
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4958
  _F2R = {
4959
    (True, False, False): _ROLE_CANDIDATE,
4960
    (False, True, False): _ROLE_DRAINED,
4961
    (False, False, True): _ROLE_OFFLINE,
4962
    (False, False, False): _ROLE_REGULAR,
4963
    }
4964
  _R2F = dict((v, k) for k, v in _F2R.items())
4965
  _FLAGS = ["master_candidate", "drained", "offline"]
4966

    
4967
  def CheckArguments(self):
4968
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4969
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4970
                self.op.master_capable, self.op.vm_capable,
4971
                self.op.secondary_ip, self.op.ndparams]
4972
    if all_mods.count(None) == len(all_mods):
4973
      raise errors.OpPrereqError("Please pass at least one modification",
4974
                                 errors.ECODE_INVAL)
4975
    if all_mods.count(True) > 1:
4976
      raise errors.OpPrereqError("Can't set the node into more than one"
4977
                                 " state at the same time",
4978
                                 errors.ECODE_INVAL)
4979

    
4980
    # Boolean value that tells us whether we might be demoting from MC
4981
    self.might_demote = (self.op.master_candidate == False or
4982
                         self.op.offline == True or
4983
                         self.op.drained == True or
4984
                         self.op.master_capable == False)
4985

    
4986
    if self.op.secondary_ip:
4987
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4988
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4989
                                   " address" % self.op.secondary_ip,
4990
                                   errors.ECODE_INVAL)
4991

    
4992
    self.lock_all = self.op.auto_promote and self.might_demote
4993
    self.lock_instances = self.op.secondary_ip is not None
4994

    
4995
  def ExpandNames(self):
4996
    if self.lock_all:
4997
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4998
    else:
4999
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5000

    
5001
    if self.lock_instances:
5002
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5003

    
5004
  def DeclareLocks(self, level):
5005
    # If we have locked all instances, before waiting to lock nodes, release
5006
    # all the ones living on nodes unrelated to the current operation.
5007
    if level == locking.LEVEL_NODE and self.lock_instances:
5008
      self.affected_instances = []
5009
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5010
        instances_keep = []
5011

    
5012
        # Build list of instances to release
5013
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
5014
          instance = self.context.cfg.GetInstanceInfo(instance_name)
5015
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5016
              self.op.node_name in instance.all_nodes):
5017
            instances_keep.append(instance_name)
5018
            self.affected_instances.append(instance)
5019

    
5020
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5021

    
5022
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5023
                set(instances_keep))
5024

    
5025
  def BuildHooksEnv(self):
5026
    """Build hooks env.
5027

5028
    This runs on the master node.
5029

5030
    """
5031
    return {
5032
      "OP_TARGET": self.op.node_name,
5033
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5034
      "OFFLINE": str(self.op.offline),
5035
      "DRAINED": str(self.op.drained),
5036
      "MASTER_CAPABLE": str(self.op.master_capable),
5037
      "VM_CAPABLE": str(self.op.vm_capable),
5038
      }
5039

    
5040
  def BuildHooksNodes(self):
5041
    """Build hooks nodes.
5042

5043
    """
5044
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5045
    return (nl, nl)
5046

    
5047
  def CheckPrereq(self):
5048
    """Check prerequisites.
5049

5050
    This only checks the instance list against the existing names.
5051

5052
    """
5053
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5054

    
5055
    if (self.op.master_candidate is not None or
5056
        self.op.drained is not None or
5057
        self.op.offline is not None):
5058
      # we can't change the master's node flags
5059
      if self.op.node_name == self.cfg.GetMasterNode():
5060
        raise errors.OpPrereqError("The master role can be changed"
5061
                                   " only via master-failover",
5062
                                   errors.ECODE_INVAL)
5063

    
5064
    if self.op.master_candidate and not node.master_capable:
5065
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5066
                                 " it a master candidate" % node.name,
5067
                                 errors.ECODE_STATE)
5068

    
5069
    if self.op.vm_capable == False:
5070
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5071
      if ipri or isec:
5072
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5073
                                   " the vm_capable flag" % node.name,
5074
                                   errors.ECODE_STATE)
5075

    
5076
    if node.master_candidate and self.might_demote and not self.lock_all:
5077
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5078
      # check if after removing the current node, we're missing master
5079
      # candidates
5080
      (mc_remaining, mc_should, _) = \
5081
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5082
      if mc_remaining < mc_should:
5083
        raise errors.OpPrereqError("Not enough master candidates, please"
5084
                                   " pass auto promote option to allow"
5085
                                   " promotion", errors.ECODE_STATE)
5086

    
5087
    self.old_flags = old_flags = (node.master_candidate,
5088
                                  node.drained, node.offline)
5089
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5090
    self.old_role = old_role = self._F2R[old_flags]
5091

    
5092
    # Check for ineffective changes
5093
    for attr in self._FLAGS:
5094
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5095
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5096
        setattr(self.op, attr, None)
5097

    
5098
    # Past this point, any flag change to False means a transition
5099
    # away from the respective state, as only real changes are kept
5100

    
5101
    # TODO: We might query the real power state if it supports OOB
5102
    if _SupportsOob(self.cfg, node):
5103
      if self.op.offline is False and not (node.powered or
5104
                                           self.op.powered == True):
5105
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5106
                                    " offline status can be reset") %
5107
                                   self.op.node_name)
5108
    elif self.op.powered is not None:
5109
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5110
                                  " as it does not support out-of-band"
5111
                                  " handling") % self.op.node_name)
5112

    
5113
    # If we're being deofflined/drained, we'll MC ourself if needed
5114
    if (self.op.drained == False or self.op.offline == False or
5115
        (self.op.master_capable and not node.master_capable)):
5116
      if _DecideSelfPromotion(self):
5117
        self.op.master_candidate = True
5118
        self.LogInfo("Auto-promoting node to master candidate")
5119

    
5120
    # If we're no longer master capable, we'll demote ourselves from MC
5121
    if self.op.master_capable == False and node.master_candidate:
5122
      self.LogInfo("Demoting from master candidate")
5123
      self.op.master_candidate = False
5124

    
5125
    # Compute new role
5126
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5127
    if self.op.master_candidate:
5128
      new_role = self._ROLE_CANDIDATE
5129
    elif self.op.drained:
5130
      new_role = self._ROLE_DRAINED
5131
    elif self.op.offline:
5132
      new_role = self._ROLE_OFFLINE
5133
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5134
      # False is still in new flags, which means we're un-setting (the
5135
      # only) True flag
5136
      new_role = self._ROLE_REGULAR
5137
    else: # no new flags, nothing, keep old role
5138
      new_role = old_role
5139

    
5140
    self.new_role = new_role
5141

    
5142
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5143
      # Trying to transition out of offline status
5144
      result = self.rpc.call_version([node.name])[node.name]
5145
      if result.fail_msg:
5146
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5147
                                   " to report its version: %s" %
5148
                                   (node.name, result.fail_msg),
5149
                                   errors.ECODE_STATE)
5150
      else:
5151
        self.LogWarning("Transitioning node from offline to online state"
5152
                        " without using re-add. Please make sure the node"
5153
                        " is healthy!")
5154

    
5155
    if self.op.secondary_ip:
5156
      # Ok even without locking, because this can't be changed by any LU
5157
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5158
      master_singlehomed = master.secondary_ip == master.primary_ip
5159
      if master_singlehomed and self.op.secondary_ip:
5160
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5161
                                   " homed cluster", errors.ECODE_INVAL)
5162

    
5163
      if node.offline:
5164
        if self.affected_instances:
5165
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5166
                                     " node has instances (%s) configured"
5167
                                     " to use it" % self.affected_instances)
5168
      else:
5169
        # On online nodes, check that no instances are running, and that
5170
        # the node has the new ip and we can reach it.
5171
        for instance in self.affected_instances:
5172
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5173

    
5174
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5175
        if master.name != node.name:
5176
          # check reachability from master secondary ip to new secondary ip
5177
          if not netutils.TcpPing(self.op.secondary_ip,
5178
                                  constants.DEFAULT_NODED_PORT,
5179
                                  source=master.secondary_ip):
5180
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5181
                                       " based ping to node daemon port",
5182
                                       errors.ECODE_ENVIRON)
5183

    
5184
    if self.op.ndparams:
5185
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5186
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5187
      self.new_ndparams = new_ndparams
5188

    
5189
  def Exec(self, feedback_fn):
5190
    """Modifies a node.
5191

5192
    """
5193
    node = self.node
5194
    old_role = self.old_role
5195
    new_role = self.new_role
5196

    
5197
    result = []
5198

    
5199
    if self.op.ndparams:
5200
      node.ndparams = self.new_ndparams
5201

    
5202
    if self.op.powered is not None:
5203
      node.powered = self.op.powered
5204

    
5205
    for attr in ["master_capable", "vm_capable"]:
5206
      val = getattr(self.op, attr)
5207
      if val is not None:
5208
        setattr(node, attr, val)
5209
        result.append((attr, str(val)))
5210

    
5211
    if new_role != old_role:
5212
      # Tell the node to demote itself, if no longer MC and not offline
5213
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5214
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5215
        if msg:
5216
          self.LogWarning("Node failed to demote itself: %s", msg)
5217

    
5218
      new_flags = self._R2F[new_role]
5219
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5220
        if of != nf:
5221
          result.append((desc, str(nf)))
5222
      (node.master_candidate, node.drained, node.offline) = new_flags
5223

    
5224
      # we locked all nodes, we adjust the CP before updating this node
5225
      if self.lock_all:
5226
        _AdjustCandidatePool(self, [node.name])
5227

    
5228
    if self.op.secondary_ip:
5229
      node.secondary_ip = self.op.secondary_ip
5230
      result.append(("secondary_ip", self.op.secondary_ip))
5231

    
5232
    # this will trigger configuration file update, if needed
5233
    self.cfg.Update(node, feedback_fn)
5234

    
5235
    # this will trigger job queue propagation or cleanup if the mc
5236
    # flag changed
5237
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5238
      self.context.ReaddNode(node)
5239

    
5240
    return result
5241

    
5242

    
5243
class LUNodePowercycle(NoHooksLU):
5244
  """Powercycles a node.
5245

5246
  """
5247
  REQ_BGL = False
5248

    
5249
  def CheckArguments(self):
5250
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5251
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5252
      raise errors.OpPrereqError("The node is the master and the force"
5253
                                 " parameter was not set",
5254
                                 errors.ECODE_INVAL)
5255

    
5256
  def ExpandNames(self):
5257
    """Locking for PowercycleNode.
5258

5259
    This is a last-resort option and shouldn't block on other
5260
    jobs. Therefore, we grab no locks.
5261

5262
    """
5263
    self.needed_locks = {}
5264

    
5265
  def Exec(self, feedback_fn):
5266
    """Reboots a node.
5267

5268
    """
5269
    result = self.rpc.call_node_powercycle(self.op.node_name,
5270
                                           self.cfg.GetHypervisorType())
5271
    result.Raise("Failed to schedule the reboot")
5272
    return result.payload
5273

    
5274

    
5275
class LUClusterQuery(NoHooksLU):
5276
  """Query cluster configuration.
5277

5278
  """
5279
  REQ_BGL = False
5280

    
5281
  def ExpandNames(self):
5282
    self.needed_locks = {}
5283

    
5284
  def Exec(self, feedback_fn):
5285
    """Return cluster config.
5286

5287
    """
5288
    cluster = self.cfg.GetClusterInfo()
5289
    os_hvp = {}
5290

    
5291
    # Filter just for enabled hypervisors
5292
    for os_name, hv_dict in cluster.os_hvp.items():
5293
      os_hvp[os_name] = {}
5294
      for hv_name, hv_params in hv_dict.items():
5295
        if hv_name in cluster.enabled_hypervisors:
5296
          os_hvp[os_name][hv_name] = hv_params
5297

    
5298
    # Convert ip_family to ip_version
5299
    primary_ip_version = constants.IP4_VERSION
5300
    if cluster.primary_ip_family == netutils.IP6Address.family:
5301
      primary_ip_version = constants.IP6_VERSION
5302

    
5303
    result = {
5304
      "software_version": constants.RELEASE_VERSION,
5305
      "protocol_version": constants.PROTOCOL_VERSION,
5306
      "config_version": constants.CONFIG_VERSION,
5307
      "os_api_version": max(constants.OS_API_VERSIONS),
5308
      "export_version": constants.EXPORT_VERSION,
5309
      "architecture": (platform.architecture()[0], platform.machine()),
5310
      "name": cluster.cluster_name,
5311
      "master": cluster.master_node,
5312
      "default_hypervisor": cluster.enabled_hypervisors[0],
5313
      "enabled_hypervisors": cluster.enabled_hypervisors,
5314
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5315
                        for hypervisor_name in cluster.enabled_hypervisors]),
5316
      "os_hvp": os_hvp,
5317
      "beparams": cluster.beparams,
5318
      "osparams": cluster.osparams,
5319
      "nicparams": cluster.nicparams,
5320
      "ndparams": cluster.ndparams,
5321
      "candidate_pool_size": cluster.candidate_pool_size,
5322
      "master_netdev": cluster.master_netdev,
5323
      "volume_group_name": cluster.volume_group_name,
5324
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5325
      "file_storage_dir": cluster.file_storage_dir,
5326
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5327
      "maintain_node_health": cluster.maintain_node_health,
5328
      "ctime": cluster.ctime,
5329
      "mtime": cluster.mtime,
5330
      "uuid": cluster.uuid,
5331
      "tags": list(cluster.GetTags()),
5332
      "uid_pool": cluster.uid_pool,
5333
      "default_iallocator": cluster.default_iallocator,
5334
      "reserved_lvs": cluster.reserved_lvs,
5335
      "primary_ip_version": primary_ip_version,
5336
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5337
      "hidden_os": cluster.hidden_os,
5338
      "blacklisted_os": cluster.blacklisted_os,
5339
      }
5340

    
5341
    return result
5342

    
5343

    
5344
class LUClusterConfigQuery(NoHooksLU):
5345
  """Return configuration values.
5346

5347
  """
5348
  REQ_BGL = False
5349
  _FIELDS_DYNAMIC = utils.FieldSet()
5350
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5351
                                  "watcher_pause", "volume_group_name")
5352

    
5353
  def CheckArguments(self):
5354
    _CheckOutputFields(static=self._FIELDS_STATIC,
5355
                       dynamic=self._FIELDS_DYNAMIC,
5356
                       selected=self.op.output_fields)
5357

    
5358
  def ExpandNames(self):
5359
    self.needed_locks = {}
5360

    
5361
  def Exec(self, feedback_fn):
5362
    """Dump a representation of the cluster config to the standard output.
5363

5364
    """
5365
    values = []
5366
    for field in self.op.output_fields:
5367
      if field == "cluster_name":
5368
        entry = self.cfg.GetClusterName()
5369
      elif field == "master_node":
5370
        entry = self.cfg.GetMasterNode()
5371
      elif field == "drain_flag":
5372
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5373
      elif field == "watcher_pause":
5374
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5375
      elif field == "volume_group_name":
5376
        entry = self.cfg.GetVGName()
5377
      else:
5378
        raise errors.ParameterError(field)
5379
      values.append(entry)
5380
    return values
5381

    
5382

    
5383
class LUInstanceActivateDisks(NoHooksLU):
5384
  """Bring up an instance's disks.
5385

5386
  """
5387
  REQ_BGL = False
5388

    
5389
  def ExpandNames(self):
5390
    self._ExpandAndLockInstance()
5391
    self.needed_locks[locking.LEVEL_NODE] = []
5392
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5393

    
5394
  def DeclareLocks(self, level):
5395
    if level == locking.LEVEL_NODE:
5396
      self._LockInstancesNodes()
5397

    
5398
  def CheckPrereq(self):
5399
    """Check prerequisites.
5400

5401
    This checks that the instance is in the cluster.
5402

5403
    """
5404
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5405
    assert self.instance is not None, \
5406
      "Cannot retrieve locked instance %s" % self.op.instance_name
5407
    _CheckNodeOnline(self, self.instance.primary_node)
5408

    
5409
  def Exec(self, feedback_fn):
5410
    """Activate the disks.
5411

5412
    """
5413
    disks_ok, disks_info = \
5414
              _AssembleInstanceDisks(self, self.instance,
5415
                                     ignore_size=self.op.ignore_size)
5416
    if not disks_ok:
5417
      raise errors.OpExecError("Cannot activate block devices")
5418

    
5419
    return disks_info
5420

    
5421

    
5422
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5423
                           ignore_size=False):
5424
  """Prepare the block devices for an instance.
5425

5426
  This sets up the block devices on all nodes.
5427

5428
  @type lu: L{LogicalUnit}
5429
  @param lu: the logical unit on whose behalf we execute
5430
  @type instance: L{objects.Instance}
5431
  @param instance: the instance for whose disks we assemble
5432
  @type disks: list of L{objects.Disk} or None
5433
  @param disks: which disks to assemble (or all, if None)
5434
  @type ignore_secondaries: boolean
5435
  @param ignore_secondaries: if true, errors on secondary nodes
5436
      won't result in an error return from the function
5437
  @type ignore_size: boolean
5438
  @param ignore_size: if true, the current known size of the disk
5439
      will not be used during the disk activation, useful for cases
5440
      when the size is wrong
5441
  @return: False if the operation failed, otherwise a list of
5442
      (host, instance_visible_name, node_visible_name)
5443
      with the mapping from node devices to instance devices
5444

5445
  """
5446
  device_info = []
5447
  disks_ok = True
5448
  iname = instance.name
5449
  disks = _ExpandCheckDisks(instance, disks)
5450

    
5451
  # With the two passes mechanism we try to reduce the window of
5452
  # opportunity for the race condition of switching DRBD to primary
5453
  # before handshaking occured, but we do not eliminate it
5454

    
5455
  # The proper fix would be to wait (with some limits) until the
5456
  # connection has been made and drbd transitions from WFConnection
5457
  # into any other network-connected state (Connected, SyncTarget,
5458
  # SyncSource, etc.)
5459

    
5460
  # 1st pass, assemble on all nodes in secondary mode
5461
  for idx, inst_disk in enumerate(disks):
5462
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5463
      if ignore_size:
5464
        node_disk = node_disk.Copy()
5465
        node_disk.UnsetSize()
5466
      lu.cfg.SetDiskID(node_disk, node)
5467
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5468
      msg = result.fail_msg
5469
      if msg:
5470
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5471
                           " (is_primary=False, pass=1): %s",
5472
                           inst_disk.iv_name, node, msg)
5473
        if not ignore_secondaries:
5474
          disks_ok = False
5475

    
5476
  # FIXME: race condition on drbd migration to primary
5477

    
5478
  # 2nd pass, do only the primary node
5479
  for idx, inst_disk in enumerate(disks):
5480
    dev_path = None
5481

    
5482
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5483
      if node != instance.primary_node:
5484
        continue
5485
      if ignore_size:
5486
        node_disk = node_disk.Copy()
5487
        node_disk.UnsetSize()
5488
      lu.cfg.SetDiskID(node_disk, node)
5489
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5490
      msg = result.fail_msg
5491
      if msg:
5492
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5493
                           " (is_primary=True, pass=2): %s",
5494
                           inst_disk.iv_name, node, msg)
5495
        disks_ok = False
5496
      else:
5497
        dev_path = result.payload
5498

    
5499
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5500

    
5501
  # leave the disks configured for the primary node
5502
  # this is a workaround that would be fixed better by
5503
  # improving the logical/physical id handling
5504
  for disk in disks:
5505
    lu.cfg.SetDiskID(disk, instance.primary_node)
5506

    
5507
  return disks_ok, device_info
5508

    
5509

    
5510
def _StartInstanceDisks(lu, instance, force):
5511
  """Start the disks of an instance.
5512

5513
  """
5514
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5515
                                           ignore_secondaries=force)
5516
  if not disks_ok:
5517
    _ShutdownInstanceDisks(lu, instance)
5518
    if force is not None and not force:
5519
      lu.proc.LogWarning("", hint="If the message above refers to a"
5520
                         " secondary node,"
5521
                         " you can retry the operation using '--force'.")
5522
    raise errors.OpExecError("Disk consistency error")
5523

    
5524

    
5525
class LUInstanceDeactivateDisks(NoHooksLU):
5526
  """Shutdown an instance's disks.
5527

5528
  """
5529
  REQ_BGL = False
5530

    
5531
  def ExpandNames(self):
5532
    self._ExpandAndLockInstance()
5533
    self.needed_locks[locking.LEVEL_NODE] = []
5534
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5535

    
5536
  def DeclareLocks(self, level):
5537
    if level == locking.LEVEL_NODE:
5538
      self._LockInstancesNodes()
5539

    
5540
  def CheckPrereq(self):
5541
    """Check prerequisites.
5542

5543
    This checks that the instance is in the cluster.
5544

5545
    """
5546
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5547
    assert self.instance is not None, \
5548
      "Cannot retrieve locked instance %s" % self.op.instance_name
5549

    
5550
  def Exec(self, feedback_fn):
5551
    """Deactivate the disks
5552

5553
    """
5554
    instance = self.instance
5555
    if self.op.force:
5556
      _ShutdownInstanceDisks(self, instance)
5557
    else:
5558
      _SafeShutdownInstanceDisks(self, instance)
5559

    
5560

    
5561
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5562
  """Shutdown block devices of an instance.
5563

5564
  This function checks if an instance is running, before calling
5565
  _ShutdownInstanceDisks.
5566

5567
  """
5568
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5569
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5570

    
5571

    
5572
def _ExpandCheckDisks(instance, disks):
5573
  """Return the instance disks selected by the disks list
5574

5575
  @type disks: list of L{objects.Disk} or None
5576
  @param disks: selected disks
5577
  @rtype: list of L{objects.Disk}
5578
  @return: selected instance disks to act on
5579

5580
  """
5581
  if disks is None:
5582
    return instance.disks
5583
  else:
5584
    if not set(disks).issubset(instance.disks):
5585
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5586
                                   " target instance")
5587
    return disks
5588

    
5589

    
5590
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5591
  """Shutdown block devices of an instance.
5592

5593
  This does the shutdown on all nodes of the instance.
5594

5595
  If the ignore_primary is false, errors on the primary node are
5596
  ignored.
5597

5598
  """
5599
  all_result = True
5600
  disks = _ExpandCheckDisks(instance, disks)
5601

    
5602
  for disk in disks:
5603
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5604
      lu.cfg.SetDiskID(top_disk, node)
5605
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5606
      msg = result.fail_msg
5607
      if msg:
5608
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5609
                      disk.iv_name, node, msg)
5610
        if ((node == instance.primary_node and not ignore_primary) or
5611
            (node != instance.primary_node and not result.offline)):
5612
          all_result = False
5613
  return all_result
5614

    
5615

    
5616
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5617
  """Checks if a node has enough free memory.
5618

5619
  This function check if a given node has the needed amount of free
5620
  memory. In case the node has less memory or we cannot get the
5621
  information from the node, this function raise an OpPrereqError
5622
  exception.
5623

5624
  @type lu: C{LogicalUnit}
5625
  @param lu: a logical unit from which we get configuration data
5626
  @type node: C{str}
5627
  @param node: the node to check
5628
  @type reason: C{str}
5629
  @param reason: string to use in the error message
5630
  @type requested: C{int}
5631
  @param requested: the amount of memory in MiB to check for
5632
  @type hypervisor_name: C{str}
5633
  @param hypervisor_name: the hypervisor to ask for memory stats
5634
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5635
      we cannot check the node
5636

5637
  """
5638
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5639
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5640
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5641
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5642
  if not isinstance(free_mem, int):
5643
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5644
                               " was '%s'" % (node, free_mem),
5645
                               errors.ECODE_ENVIRON)
5646
  if requested > free_mem:
5647
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5648
                               " needed %s MiB, available %s MiB" %
5649
                               (node, reason, requested, free_mem),
5650
                               errors.ECODE_NORES)
5651

    
5652

    
5653
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5654
  """Checks if nodes have enough free disk space in the all VGs.
5655

5656
  This function check if all given nodes have the needed amount of
5657
  free disk. In case any node has less disk or we cannot get the
5658
  information from the node, this function raise an OpPrereqError
5659
  exception.
5660

5661
  @type lu: C{LogicalUnit}
5662
  @param lu: a logical unit from which we get configuration data
5663
  @type nodenames: C{list}
5664
  @param nodenames: the list of node names to check
5665
  @type req_sizes: C{dict}
5666
  @param req_sizes: the hash of vg and corresponding amount of disk in
5667
      MiB to check for
5668
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5669
      or we cannot check the node
5670

5671
  """
5672
  for vg, req_size in req_sizes.items():
5673
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5674

    
5675

    
5676
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5677
  """Checks if nodes have enough free disk space in the specified VG.
5678

5679
  This function check if all given nodes have the needed amount of
5680
  free disk. In case any node has less disk or we cannot get the
5681
  information from the node, this function raise an OpPrereqError
5682
  exception.
5683

5684
  @type lu: C{LogicalUnit}
5685
  @param lu: a logical unit from which we get configuration data
5686
  @type nodenames: C{list}
5687
  @param nodenames: the list of node names to check
5688
  @type vg: C{str}
5689
  @param vg: the volume group to check
5690
  @type requested: C{int}
5691
  @param requested: the amount of disk in MiB to check for
5692
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5693
      or we cannot check the node
5694

5695
  """
5696
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5697
  for node in nodenames:
5698
    info = nodeinfo[node]
5699
    info.Raise("Cannot get current information from node %s" % node,
5700
               prereq=True, ecode=errors.ECODE_ENVIRON)
5701
    vg_free = info.payload.get("vg_free", None)
5702
    if not isinstance(vg_free, int):
5703
      raise errors.OpPrereqError("Can't compute free disk space on node"
5704
                                 " %s for vg %s, result was '%s'" %
5705
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5706
    if requested > vg_free:
5707
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5708
                                 " vg %s: required %d MiB, available %d MiB" %
5709
                                 (node, vg, requested, vg_free),
5710
                                 errors.ECODE_NORES)
5711

    
5712

    
5713
class LUInstanceStartup(LogicalUnit):
5714
  """Starts an instance.
5715

5716
  """
5717
  HPATH = "instance-start"
5718
  HTYPE = constants.HTYPE_INSTANCE
5719
  REQ_BGL = False
5720

    
5721
  def CheckArguments(self):
5722
    # extra beparams
5723
    if self.op.beparams:
5724
      # fill the beparams dict
5725
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5726

    
5727
  def ExpandNames(self):
5728
    self._ExpandAndLockInstance()
5729

    
5730
  def BuildHooksEnv(self):
5731
    """Build hooks env.
5732

5733
    This runs on master, primary and secondary nodes of the instance.
5734

5735
    """
5736
    env = {
5737
      "FORCE": self.op.force,
5738
      }
5739

    
5740
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5741

    
5742
    return env
5743

    
5744
  def BuildHooksNodes(self):
5745
    """Build hooks nodes.
5746

5747
    """
5748
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5749
    return (nl, nl)
5750

    
5751
  def CheckPrereq(self):
5752
    """Check prerequisites.
5753

5754
    This checks that the instance is in the cluster.
5755

5756
    """
5757
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5758
    assert self.instance is not None, \
5759
      "Cannot retrieve locked instance %s" % self.op.instance_name
5760

    
5761
    # extra hvparams
5762
    if self.op.hvparams:
5763
      # check hypervisor parameter syntax (locally)
5764
      cluster = self.cfg.GetClusterInfo()
5765
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5766
      filled_hvp = cluster.FillHV(instance)
5767
      filled_hvp.update(self.op.hvparams)
5768
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5769
      hv_type.CheckParameterSyntax(filled_hvp)
5770
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5771

    
5772
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5773

    
5774
    if self.primary_offline and self.op.ignore_offline_nodes:
5775
      self.proc.LogWarning("Ignoring offline primary node")
5776

    
5777
      if self.op.hvparams or self.op.beparams:
5778
        self.proc.LogWarning("Overridden parameters are ignored")
5779
    else:
5780
      _CheckNodeOnline(self, instance.primary_node)
5781

    
5782
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5783

    
5784
      # check bridges existence
5785
      _CheckInstanceBridgesExist(self, instance)
5786

    
5787
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5788
                                                instance.name,
5789
                                                instance.hypervisor)
5790
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5791
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5792
      if not remote_info.payload: # not running already
5793
        _CheckNodeFreeMemory(self, instance.primary_node,
5794
                             "starting instance %s" % instance.name,
5795
                             bep[constants.BE_MEMORY], instance.hypervisor)
5796

    
5797
  def Exec(self, feedback_fn):
5798
    """Start the instance.
5799

5800
    """
5801
    instance = self.instance
5802
    force = self.op.force
5803

    
5804
    if not self.op.no_remember:
5805
      self.cfg.MarkInstanceUp(instance.name)
5806

    
5807
    if self.primary_offline:
5808
      assert self.op.ignore_offline_nodes
5809
      self.proc.LogInfo("Primary node offline, marked instance as started")
5810
    else:
5811
      node_current = instance.primary_node
5812

    
5813
      _StartInstanceDisks(self, instance, force)
5814

    
5815
      result = self.rpc.call_instance_start(node_current, instance,
5816
                                            self.op.hvparams, self.op.beparams,
5817
                                            self.op.startup_paused)
5818
      msg = result.fail_msg
5819
      if msg:
5820
        _ShutdownInstanceDisks(self, instance)
5821
        raise errors.OpExecError("Could not start instance: %s" % msg)
5822

    
5823

    
5824
class LUInstanceReboot(LogicalUnit):
5825
  """Reboot an instance.
5826

5827
  """
5828
  HPATH = "instance-reboot"
5829
  HTYPE = constants.HTYPE_INSTANCE
5830
  REQ_BGL = False
5831

    
5832
  def ExpandNames(self):
5833
    self._ExpandAndLockInstance()
5834

    
5835
  def BuildHooksEnv(self):
5836
    """Build hooks env.
5837

5838
    This runs on master, primary and secondary nodes of the instance.
5839

5840
    """
5841
    env = {
5842
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5843
      "REBOOT_TYPE": self.op.reboot_type,
5844
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5845
      }
5846

    
5847
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5848

    
5849
    return env
5850

    
5851
  def BuildHooksNodes(self):
5852
    """Build hooks nodes.
5853

5854
    """
5855
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5856
    return (nl, nl)
5857

    
5858
  def CheckPrereq(self):
5859
    """Check prerequisites.
5860

5861
    This checks that the instance is in the cluster.
5862

5863
    """
5864
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5865
    assert self.instance is not None, \
5866
      "Cannot retrieve locked instance %s" % self.op.instance_name
5867

    
5868
    _CheckNodeOnline(self, instance.primary_node)
5869

    
5870
    # check bridges existence
5871
    _CheckInstanceBridgesExist(self, instance)
5872

    
5873
  def Exec(self, feedback_fn):
5874
    """Reboot the instance.
5875

5876
    """
5877
    instance = self.instance
5878
    ignore_secondaries = self.op.ignore_secondaries
5879
    reboot_type = self.op.reboot_type
5880

    
5881
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5882
                                              instance.name,
5883
                                              instance.hypervisor)
5884
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5885
    instance_running = bool(remote_info.payload)
5886

    
5887
    node_current = instance.primary_node
5888

    
5889
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5890
                                            constants.INSTANCE_REBOOT_HARD]:
5891
      for disk in instance.disks:
5892
        self.cfg.SetDiskID(disk, node_current)
5893
      result = self.rpc.call_instance_reboot(node_current, instance,
5894
                                             reboot_type,
5895
                                             self.op.shutdown_timeout)
5896
      result.Raise("Could not reboot instance")
5897
    else:
5898
      if instance_running:
5899
        result = self.rpc.call_instance_shutdown(node_current, instance,
5900
                                                 self.op.shutdown_timeout)
5901
        result.Raise("Could not shutdown instance for full reboot")
5902
        _ShutdownInstanceDisks(self, instance)
5903
      else:
5904
        self.LogInfo("Instance %s was already stopped, starting now",
5905
                     instance.name)
5906
      _StartInstanceDisks(self, instance, ignore_secondaries)
5907
      result = self.rpc.call_instance_start(node_current, instance,
5908
                                            None, None, False)
5909
      msg = result.fail_msg
5910
      if msg:
5911
        _ShutdownInstanceDisks(self, instance)
5912
        raise errors.OpExecError("Could not start instance for"
5913
                                 " full reboot: %s" % msg)
5914

    
5915
    self.cfg.MarkInstanceUp(instance.name)
5916

    
5917

    
5918
class LUInstanceShutdown(LogicalUnit):
5919
  """Shutdown an instance.
5920

5921
  """
5922
  HPATH = "instance-stop"
5923
  HTYPE = constants.HTYPE_INSTANCE
5924
  REQ_BGL = False
5925

    
5926
  def ExpandNames(self):
5927
    self._ExpandAndLockInstance()
5928

    
5929
  def BuildHooksEnv(self):
5930
    """Build hooks env.
5931

5932
    This runs on master, primary and secondary nodes of the instance.
5933

5934
    """
5935
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5936
    env["TIMEOUT"] = self.op.timeout
5937
    return env
5938

    
5939
  def BuildHooksNodes(self):
5940
    """Build hooks nodes.
5941

5942
    """
5943
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5944
    return (nl, nl)
5945

    
5946
  def CheckPrereq(self):
5947
    """Check prerequisites.
5948

5949
    This checks that the instance is in the cluster.
5950

5951
    """
5952
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5953
    assert self.instance is not None, \
5954
      "Cannot retrieve locked instance %s" % self.op.instance_name
5955

    
5956
    self.primary_offline = \
5957
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5958

    
5959
    if self.primary_offline and self.op.ignore_offline_nodes:
5960
      self.proc.LogWarning("Ignoring offline primary node")
5961
    else:
5962
      _CheckNodeOnline(self, self.instance.primary_node)
5963

    
5964
  def Exec(self, feedback_fn):
5965
    """Shutdown the instance.
5966

5967
    """
5968
    instance = self.instance
5969
    node_current = instance.primary_node
5970
    timeout = self.op.timeout
5971

    
5972
    if not self.op.no_remember:
5973
      self.cfg.MarkInstanceDown(instance.name)
5974

    
5975
    if self.primary_offline:
5976
      assert self.op.ignore_offline_nodes
5977
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5978
    else:
5979
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5980
      msg = result.fail_msg
5981
      if msg:
5982
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5983

    
5984
      _ShutdownInstanceDisks(self, instance)
5985

    
5986

    
5987
class LUInstanceReinstall(LogicalUnit):
5988
  """Reinstall an instance.
5989

5990
  """
5991
  HPATH = "instance-reinstall"
5992
  HTYPE = constants.HTYPE_INSTANCE
5993
  REQ_BGL = False
5994

    
5995
  def ExpandNames(self):
5996
    self._ExpandAndLockInstance()
5997

    
5998
  def BuildHooksEnv(self):
5999
    """Build hooks env.
6000

6001
    This runs on master, primary and secondary nodes of the instance.
6002

6003
    """
6004
    return _BuildInstanceHookEnvByObject(self, self.instance)
6005

    
6006
  def BuildHooksNodes(self):
6007
    """Build hooks nodes.
6008

6009
    """
6010
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6011
    return (nl, nl)
6012

    
6013
  def CheckPrereq(self):
6014
    """Check prerequisites.
6015

6016
    This checks that the instance is in the cluster and is not running.
6017

6018
    """
6019
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6020
    assert instance is not None, \
6021
      "Cannot retrieve locked instance %s" % self.op.instance_name
6022
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6023
                     " offline, cannot reinstall")
6024
    for node in instance.secondary_nodes:
6025
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6026
                       " cannot reinstall")
6027

    
6028
    if instance.disk_template == constants.DT_DISKLESS:
6029
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6030
                                 self.op.instance_name,
6031
                                 errors.ECODE_INVAL)
6032
    _CheckInstanceDown(self, instance, "cannot reinstall")
6033

    
6034
    if self.op.os_type is not None:
6035
      # OS verification
6036
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6037
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6038
      instance_os = self.op.os_type
6039
    else:
6040
      instance_os = instance.os
6041

    
6042
    nodelist = list(instance.all_nodes)
6043

    
6044
    if self.op.osparams:
6045
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6046
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6047
      self.os_inst = i_osdict # the new dict (without defaults)
6048
    else:
6049
      self.os_inst = None
6050

    
6051
    self.instance = instance
6052

    
6053
  def Exec(self, feedback_fn):
6054
    """Reinstall the instance.
6055

6056
    """
6057
    inst = self.instance
6058

    
6059
    if self.op.os_type is not None:
6060
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6061
      inst.os = self.op.os_type
6062
      # Write to configuration
6063
      self.cfg.Update(inst, feedback_fn)
6064

    
6065
    _StartInstanceDisks(self, inst, None)
6066
    try:
6067
      feedback_fn("Running the instance OS create scripts...")
6068
      # FIXME: pass debug option from opcode to backend
6069
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6070
                                             self.op.debug_level,
6071
                                             osparams=self.os_inst)
6072
      result.Raise("Could not install OS for instance %s on node %s" %
6073
                   (inst.name, inst.primary_node))
6074
    finally:
6075
      _ShutdownInstanceDisks(self, inst)
6076

    
6077

    
6078
class LUInstanceRecreateDisks(LogicalUnit):
6079
  """Recreate an instance's missing disks.
6080

6081
  """
6082
  HPATH = "instance-recreate-disks"
6083
  HTYPE = constants.HTYPE_INSTANCE
6084
  REQ_BGL = False
6085

    
6086
  def CheckArguments(self):
6087
    # normalise the disk list
6088
    self.op.disks = sorted(frozenset(self.op.disks))
6089

    
6090
  def ExpandNames(self):
6091
    self._ExpandAndLockInstance()
6092
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6093
    if self.op.nodes:
6094
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6095
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6096
    else:
6097
      self.needed_locks[locking.LEVEL_NODE] = []
6098

    
6099
  def DeclareLocks(self, level):
6100
    if level == locking.LEVEL_NODE:
6101
      # if we replace the nodes, we only need to lock the old primary,
6102
      # otherwise we need to lock all nodes for disk re-creation
6103
      primary_only = bool(self.op.nodes)
6104
      self._LockInstancesNodes(primary_only=primary_only)
6105

    
6106
  def BuildHooksEnv(self):
6107
    """Build hooks env.
6108

6109
    This runs on master, primary and secondary nodes of the instance.
6110

6111
    """
6112
    return _BuildInstanceHookEnvByObject(self, self.instance)
6113

    
6114
  def BuildHooksNodes(self):
6115
    """Build hooks nodes.
6116

6117
    """
6118
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6119
    return (nl, nl)
6120

    
6121
  def CheckPrereq(self):
6122
    """Check prerequisites.
6123

6124
    This checks that the instance is in the cluster and is not running.
6125

6126
    """
6127
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6128
    assert instance is not None, \
6129
      "Cannot retrieve locked instance %s" % self.op.instance_name
6130
    if self.op.nodes:
6131
      if len(self.op.nodes) != len(instance.all_nodes):
6132
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6133
                                   " %d replacement nodes were specified" %
6134
                                   (instance.name, len(instance.all_nodes),
6135
                                    len(self.op.nodes)),
6136
                                   errors.ECODE_INVAL)
6137
      assert instance.disk_template != constants.DT_DRBD8 or \
6138
          len(self.op.nodes) == 2
6139
      assert instance.disk_template != constants.DT_PLAIN or \
6140
          len(self.op.nodes) == 1
6141
      primary_node = self.op.nodes[0]
6142
    else:
6143
      primary_node = instance.primary_node
6144
    _CheckNodeOnline(self, primary_node)
6145

    
6146
    if instance.disk_template == constants.DT_DISKLESS:
6147
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6148
                                 self.op.instance_name, errors.ECODE_INVAL)
6149
    # if we replace nodes *and* the old primary is offline, we don't
6150
    # check
6151
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6152
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6153
    if not (self.op.nodes and old_pnode.offline):
6154
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6155

    
6156
    if not self.op.disks:
6157
      self.op.disks = range(len(instance.disks))
6158
    else:
6159
      for idx in self.op.disks:
6160
        if idx >= len(instance.disks):
6161
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6162
                                     errors.ECODE_INVAL)
6163
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6164
      raise errors.OpPrereqError("Can't recreate disks partially and"
6165
                                 " change the nodes at the same time",
6166
                                 errors.ECODE_INVAL)
6167
    self.instance = instance
6168

    
6169
  def Exec(self, feedback_fn):
6170
    """Recreate the disks.
6171

6172
    """
6173
    instance = self.instance
6174

    
6175
    to_skip = []
6176
    mods = [] # keeps track of needed logical_id changes
6177

    
6178
    for idx, disk in enumerate(instance.disks):
6179
      if idx not in self.op.disks: # disk idx has not been passed in
6180
        to_skip.append(idx)
6181
        continue
6182
      # update secondaries for disks, if needed
6183
      if self.op.nodes:
6184
        if disk.dev_type == constants.LD_DRBD8:
6185
          # need to update the nodes and minors
6186
          assert len(self.op.nodes) == 2
6187
          assert len(disk.logical_id) == 6 # otherwise disk internals
6188
                                           # have changed
6189
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6190
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6191
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6192
                    new_minors[0], new_minors[1], old_secret)
6193
          assert len(disk.logical_id) == len(new_id)
6194
          mods.append((idx, new_id))
6195

    
6196
    # now that we have passed all asserts above, we can apply the mods
6197
    # in a single run (to avoid partial changes)
6198
    for idx, new_id in mods:
6199
      instance.disks[idx].logical_id = new_id
6200

    
6201
    # change primary node, if needed
6202
    if self.op.nodes:
6203
      instance.primary_node = self.op.nodes[0]
6204
      self.LogWarning("Changing the instance's nodes, you will have to"
6205
                      " remove any disks left on the older nodes manually")
6206

    
6207
    if self.op.nodes:
6208
      self.cfg.Update(instance, feedback_fn)
6209

    
6210
    _CreateDisks(self, instance, to_skip=to_skip)
6211

    
6212

    
6213
class LUInstanceRename(LogicalUnit):
6214
  """Rename an instance.
6215

6216
  """
6217
  HPATH = "instance-rename"
6218
  HTYPE = constants.HTYPE_INSTANCE
6219

    
6220
  def CheckArguments(self):
6221
    """Check arguments.
6222

6223
    """
6224
    if self.op.ip_check and not self.op.name_check:
6225
      # TODO: make the ip check more flexible and not depend on the name check
6226
      raise errors.OpPrereqError("IP address check requires a name check",
6227
                                 errors.ECODE_INVAL)
6228

    
6229
  def BuildHooksEnv(self):
6230
    """Build hooks env.
6231

6232
    This runs on master, primary and secondary nodes of the instance.
6233

6234
    """
6235
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6236
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6237
    return env
6238

    
6239
  def BuildHooksNodes(self):
6240
    """Build hooks nodes.
6241

6242
    """
6243
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6244
    return (nl, nl)
6245

    
6246
  def CheckPrereq(self):
6247
    """Check prerequisites.
6248

6249
    This checks that the instance is in the cluster and is not running.
6250

6251
    """
6252
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6253
                                                self.op.instance_name)
6254
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6255
    assert instance is not None
6256
    _CheckNodeOnline(self, instance.primary_node)
6257
    _CheckInstanceDown(self, instance, "cannot rename")
6258
    self.instance = instance
6259

    
6260
    new_name = self.op.new_name
6261
    if self.op.name_check:
6262
      hostname = netutils.GetHostname(name=new_name)
6263
      if hostname != new_name:
6264
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6265
                     hostname.name)
6266
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6267
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6268
                                    " same as given hostname '%s'") %
6269
                                    (hostname.name, self.op.new_name),
6270
                                    errors.ECODE_INVAL)
6271
      new_name = self.op.new_name = hostname.name
6272
      if (self.op.ip_check and
6273
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6274
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6275
                                   (hostname.ip, new_name),
6276
                                   errors.ECODE_NOTUNIQUE)
6277

    
6278
    instance_list = self.cfg.GetInstanceList()
6279
    if new_name in instance_list and new_name != instance.name:
6280
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6281
                                 new_name, errors.ECODE_EXISTS)
6282

    
6283
  def Exec(self, feedback_fn):
6284
    """Rename the instance.
6285

6286
    """
6287
    inst = self.instance
6288
    old_name = inst.name
6289

    
6290
    rename_file_storage = False
6291
    if (inst.disk_template in constants.DTS_FILEBASED and
6292
        self.op.new_name != inst.name):
6293
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6294
      rename_file_storage = True
6295

    
6296
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6297
    # Change the instance lock. This is definitely safe while we hold the BGL.
6298
    # Otherwise the new lock would have to be added in acquired mode.
6299
    assert self.REQ_BGL
6300
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6301
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6302

    
6303
    # re-read the instance from the configuration after rename
6304
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6305

    
6306
    if rename_file_storage:
6307
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6308
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6309
                                                     old_file_storage_dir,
6310
                                                     new_file_storage_dir)
6311
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6312
                   " (but the instance has been renamed in Ganeti)" %
6313
                   (inst.primary_node, old_file_storage_dir,
6314
                    new_file_storage_dir))
6315

    
6316
    _StartInstanceDisks(self, inst, None)
6317
    try:
6318
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6319
                                                 old_name, self.op.debug_level)
6320
      msg = result.fail_msg
6321
      if msg:
6322
        msg = ("Could not run OS rename script for instance %s on node %s"
6323
               " (but the instance has been renamed in Ganeti): %s" %
6324
               (inst.name, inst.primary_node, msg))
6325
        self.proc.LogWarning(msg)
6326
    finally:
6327
      _ShutdownInstanceDisks(self, inst)
6328

    
6329
    return inst.name
6330

    
6331

    
6332
class LUInstanceRemove(LogicalUnit):
6333
  """Remove an instance.
6334

6335
  """
6336
  HPATH = "instance-remove"
6337
  HTYPE = constants.HTYPE_INSTANCE
6338
  REQ_BGL = False
6339

    
6340
  def ExpandNames(self):
6341
    self._ExpandAndLockInstance()
6342
    self.needed_locks[locking.LEVEL_NODE] = []
6343
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6344

    
6345
  def DeclareLocks(self, level):
6346
    if level == locking.LEVEL_NODE:
6347
      self._LockInstancesNodes()
6348

    
6349
  def BuildHooksEnv(self):
6350
    """Build hooks env.
6351

6352
    This runs on master, primary and secondary nodes of the instance.
6353

6354
    """
6355
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6356
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6357
    return env
6358

    
6359
  def BuildHooksNodes(self):
6360
    """Build hooks nodes.
6361

6362
    """
6363
    nl = [self.cfg.GetMasterNode()]
6364
    nl_post = list(self.instance.all_nodes) + nl
6365
    return (nl, nl_post)
6366

    
6367
  def CheckPrereq(self):
6368
    """Check prerequisites.
6369

6370
    This checks that the instance is in the cluster.
6371

6372
    """
6373
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6374
    assert self.instance is not None, \
6375
      "Cannot retrieve locked instance %s" % self.op.instance_name
6376

    
6377
  def Exec(self, feedback_fn):
6378
    """Remove the instance.
6379

6380
    """
6381
    instance = self.instance
6382
    logging.info("Shutting down instance %s on node %s",
6383
                 instance.name, instance.primary_node)
6384

    
6385
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6386
                                             self.op.shutdown_timeout)
6387
    msg = result.fail_msg
6388
    if msg:
6389
      if self.op.ignore_failures:
6390
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6391
      else:
6392
        raise errors.OpExecError("Could not shutdown instance %s on"
6393
                                 " node %s: %s" %
6394
                                 (instance.name, instance.primary_node, msg))
6395

    
6396
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6397

    
6398

    
6399
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6400
  """Utility function to remove an instance.
6401

6402
  """
6403
  logging.info("Removing block devices for instance %s", instance.name)
6404

    
6405
  if not _RemoveDisks(lu, instance):
6406
    if not ignore_failures:
6407
      raise errors.OpExecError("Can't remove instance's disks")
6408
    feedback_fn("Warning: can't remove instance's disks")
6409

    
6410
  logging.info("Removing instance %s out of cluster config", instance.name)
6411

    
6412
  lu.cfg.RemoveInstance(instance.name)
6413

    
6414
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6415
    "Instance lock removal conflict"
6416

    
6417
  # Remove lock for the instance
6418
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6419

    
6420

    
6421
class LUInstanceQuery(NoHooksLU):
6422
  """Logical unit for querying instances.
6423

6424
  """
6425
  # pylint: disable-msg=W0142
6426
  REQ_BGL = False
6427

    
6428
  def CheckArguments(self):
6429
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6430
                             self.op.output_fields, self.op.use_locking)
6431

    
6432
  def ExpandNames(self):
6433
    self.iq.ExpandNames(self)
6434

    
6435
  def DeclareLocks(self, level):
6436
    self.iq.DeclareLocks(self, level)
6437

    
6438
  def Exec(self, feedback_fn):
6439
    return self.iq.OldStyleQuery(self)
6440

    
6441

    
6442
class LUInstanceFailover(LogicalUnit):
6443
  """Failover an instance.
6444

6445
  """
6446
  HPATH = "instance-failover"
6447
  HTYPE = constants.HTYPE_INSTANCE
6448
  REQ_BGL = False
6449

    
6450
  def CheckArguments(self):
6451
    """Check the arguments.
6452

6453
    """
6454
    self.iallocator = getattr(self.op, "iallocator", None)
6455
    self.target_node = getattr(self.op, "target_node", None)
6456

    
6457
  def ExpandNames(self):
6458
    self._ExpandAndLockInstance()
6459

    
6460
    if self.op.target_node is not None:
6461
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6462

    
6463
    self.needed_locks[locking.LEVEL_NODE] = []
6464
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6465

    
6466
    ignore_consistency = self.op.ignore_consistency
6467
    shutdown_timeout = self.op.shutdown_timeout
6468
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6469
                                       cleanup=False,
6470
                                       failover=True,
6471
                                       ignore_consistency=ignore_consistency,
6472
                                       shutdown_timeout=shutdown_timeout)
6473
    self.tasklets = [self._migrater]
6474

    
6475
  def DeclareLocks(self, level):
6476
    if level == locking.LEVEL_NODE:
6477
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6478
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6479
        if self.op.target_node is None:
6480
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6481
        else:
6482
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6483
                                                   self.op.target_node]
6484
        del self.recalculate_locks[locking.LEVEL_NODE]
6485
      else:
6486
        self._LockInstancesNodes()
6487

    
6488
  def BuildHooksEnv(self):
6489
    """Build hooks env.
6490

6491
    This runs on master, primary and secondary nodes of the instance.
6492

6493
    """
6494
    instance = self._migrater.instance
6495
    source_node = instance.primary_node
6496
    target_node = self.op.target_node
6497
    env = {
6498
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6499
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6500
      "OLD_PRIMARY": source_node,
6501
      "NEW_PRIMARY": target_node,
6502
      }
6503

    
6504
    if instance.disk_template in constants.DTS_INT_MIRROR:
6505
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6506
      env["NEW_SECONDARY"] = source_node
6507
    else:
6508
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6509

    
6510
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6511

    
6512
    return env
6513

    
6514
  def BuildHooksNodes(self):
6515
    """Build hooks nodes.
6516

6517
    """
6518
    instance = self._migrater.instance
6519
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6520
    return (nl, nl + [instance.primary_node])
6521

    
6522

    
6523
class LUInstanceMigrate(LogicalUnit):
6524
  """Migrate an instance.
6525

6526
  This is migration without shutting down, compared to the failover,
6527
  which is done with shutdown.
6528

6529
  """
6530
  HPATH = "instance-migrate"
6531
  HTYPE = constants.HTYPE_INSTANCE
6532
  REQ_BGL = False
6533

    
6534
  def ExpandNames(self):
6535
    self._ExpandAndLockInstance()
6536

    
6537
    if self.op.target_node is not None:
6538
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6539

    
6540
    self.needed_locks[locking.LEVEL_NODE] = []
6541
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6542

    
6543
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6544
                                       cleanup=self.op.cleanup,
6545
                                       failover=False,
6546
                                       fallback=self.op.allow_failover)
6547
    self.tasklets = [self._migrater]
6548

    
6549
  def DeclareLocks(self, level):
6550
    if level == locking.LEVEL_NODE:
6551
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6552
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6553
        if self.op.target_node is None:
6554
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6555
        else:
6556
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6557
                                                   self.op.target_node]
6558
        del self.recalculate_locks[locking.LEVEL_NODE]
6559
      else:
6560
        self._LockInstancesNodes()
6561

    
6562
  def BuildHooksEnv(self):
6563
    """Build hooks env.
6564

6565
    This runs on master, primary and secondary nodes of the instance.
6566

6567
    """
6568
    instance = self._migrater.instance
6569
    source_node = instance.primary_node
6570
    target_node = self.op.target_node
6571
    env = _BuildInstanceHookEnvByObject(self, instance)
6572
    env.update({
6573
      "MIGRATE_LIVE": self._migrater.live,
6574
      "MIGRATE_CLEANUP": self.op.cleanup,
6575
      "OLD_PRIMARY": source_node,
6576
      "NEW_PRIMARY": target_node,
6577
      })
6578

    
6579
    if instance.disk_template in constants.DTS_INT_MIRROR:
6580
      env["OLD_SECONDARY"] = target_node
6581
      env["NEW_SECONDARY"] = source_node
6582
    else:
6583
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6584

    
6585
    return env
6586

    
6587
  def BuildHooksNodes(self):
6588
    """Build hooks nodes.
6589

6590
    """
6591
    instance = self._migrater.instance
6592
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6593
    return (nl, nl + [instance.primary_node])
6594

    
6595

    
6596
class LUInstanceMove(LogicalUnit):
6597
  """Move an instance by data-copying.
6598

6599
  """
6600
  HPATH = "instance-move"
6601
  HTYPE = constants.HTYPE_INSTANCE
6602
  REQ_BGL = False
6603

    
6604
  def ExpandNames(self):
6605
    self._ExpandAndLockInstance()
6606
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6607
    self.op.target_node = target_node
6608
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6609
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6610

    
6611
  def DeclareLocks(self, level):
6612
    if level == locking.LEVEL_NODE:
6613
      self._LockInstancesNodes(primary_only=True)
6614

    
6615
  def BuildHooksEnv(self):
6616
    """Build hooks env.
6617

6618
    This runs on master, primary and secondary nodes of the instance.
6619

6620
    """
6621
    env = {
6622
      "TARGET_NODE": self.op.target_node,
6623
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6624
      }
6625
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6626
    return env
6627

    
6628
  def BuildHooksNodes(self):
6629
    """Build hooks nodes.
6630

6631
    """
6632
    nl = [
6633
      self.cfg.GetMasterNode(),
6634
      self.instance.primary_node,
6635
      self.op.target_node,
6636
      ]
6637
    return (nl, nl)
6638

    
6639
  def CheckPrereq(self):
6640
    """Check prerequisites.
6641

6642
    This checks that the instance is in the cluster.
6643

6644
    """
6645
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6646
    assert self.instance is not None, \
6647
      "Cannot retrieve locked instance %s" % self.op.instance_name
6648

    
6649
    node = self.cfg.GetNodeInfo(self.op.target_node)
6650
    assert node is not None, \
6651
      "Cannot retrieve locked node %s" % self.op.target_node
6652

    
6653
    self.target_node = target_node = node.name
6654

    
6655
    if target_node == instance.primary_node:
6656
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6657
                                 (instance.name, target_node),
6658
                                 errors.ECODE_STATE)
6659

    
6660
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6661

    
6662
    for idx, dsk in enumerate(instance.disks):
6663
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6664
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6665
                                   " cannot copy" % idx, errors.ECODE_STATE)
6666

    
6667
    _CheckNodeOnline(self, target_node)
6668
    _CheckNodeNotDrained(self, target_node)
6669
    _CheckNodeVmCapable(self, target_node)
6670

    
6671
    if instance.admin_up:
6672
      # check memory requirements on the secondary node
6673
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6674
                           instance.name, bep[constants.BE_MEMORY],
6675
                           instance.hypervisor)
6676
    else:
6677
      self.LogInfo("Not checking memory on the secondary node as"
6678
                   " instance will not be started")
6679

    
6680
    # check bridge existance
6681
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6682

    
6683
  def Exec(self, feedback_fn):
6684
    """Move an instance.
6685

6686
    The move is done by shutting it down on its present node, copying
6687
    the data over (slow) and starting it on the new node.
6688

6689
    """
6690
    instance = self.instance
6691

    
6692
    source_node = instance.primary_node
6693
    target_node = self.target_node
6694

    
6695
    self.LogInfo("Shutting down instance %s on source node %s",
6696
                 instance.name, source_node)
6697

    
6698
    result = self.rpc.call_instance_shutdown(source_node, instance,
6699
                                             self.op.shutdown_timeout)
6700
    msg = result.fail_msg
6701
    if msg:
6702
      if self.op.ignore_consistency:
6703
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6704
                             " Proceeding anyway. Please make sure node"
6705
                             " %s is down. Error details: %s",
6706
                             instance.name, source_node, source_node, msg)
6707
      else:
6708
        raise errors.OpExecError("Could not shutdown instance %s on"
6709
                                 " node %s: %s" %
6710
                                 (instance.name, source_node, msg))
6711

    
6712
    # create the target disks
6713
    try:
6714
      _CreateDisks(self, instance, target_node=target_node)
6715
    except errors.OpExecError:
6716
      self.LogWarning("Device creation failed, reverting...")
6717
      try:
6718
        _RemoveDisks(self, instance, target_node=target_node)
6719
      finally:
6720
        self.cfg.ReleaseDRBDMinors(instance.name)
6721
        raise
6722

    
6723
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6724

    
6725
    errs = []
6726
    # activate, get path, copy the data over
6727
    for idx, disk in enumerate(instance.disks):
6728
      self.LogInfo("Copying data for disk %d", idx)
6729
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6730
                                               instance.name, True, idx)
6731
      if result.fail_msg:
6732
        self.LogWarning("Can't assemble newly created disk %d: %s",
6733
                        idx, result.fail_msg)
6734
        errs.append(result.fail_msg)
6735
        break
6736
      dev_path = result.payload
6737
      result = self.rpc.call_blockdev_export(source_node, disk,
6738
                                             target_node, dev_path,
6739
                                             cluster_name)
6740
      if result.fail_msg:
6741
        self.LogWarning("Can't copy data over for disk %d: %s",
6742
                        idx, result.fail_msg)
6743
        errs.append(result.fail_msg)
6744
        break
6745

    
6746
    if errs:
6747
      self.LogWarning("Some disks failed to copy, aborting")
6748
      try:
6749
        _RemoveDisks(self, instance, target_node=target_node)
6750
      finally:
6751
        self.cfg.ReleaseDRBDMinors(instance.name)
6752
        raise errors.OpExecError("Errors during disk copy: %s" %
6753
                                 (",".join(errs),))
6754

    
6755
    instance.primary_node = target_node
6756
    self.cfg.Update(instance, feedback_fn)
6757

    
6758
    self.LogInfo("Removing the disks on the original node")
6759
    _RemoveDisks(self, instance, target_node=source_node)
6760

    
6761
    # Only start the instance if it's marked as up
6762
    if instance.admin_up:
6763
      self.LogInfo("Starting instance %s on node %s",
6764
                   instance.name, target_node)
6765

    
6766
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6767
                                           ignore_secondaries=True)
6768
      if not disks_ok:
6769
        _ShutdownInstanceDisks(self, instance)
6770
        raise errors.OpExecError("Can't activate the instance's disks")
6771

    
6772
      result = self.rpc.call_instance_start(target_node, instance,
6773
                                            None, None, False)
6774
      msg = result.fail_msg
6775
      if msg:
6776
        _ShutdownInstanceDisks(self, instance)
6777
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6778
                                 (instance.name, target_node, msg))
6779

    
6780

    
6781
class LUNodeMigrate(LogicalUnit):
6782
  """Migrate all instances from a node.
6783

6784
  """
6785
  HPATH = "node-migrate"
6786
  HTYPE = constants.HTYPE_NODE
6787
  REQ_BGL = False
6788

    
6789
  def CheckArguments(self):
6790
    pass
6791

    
6792
  def ExpandNames(self):
6793
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6794

    
6795
    self.share_locks = _ShareAll()
6796
    self.needed_locks = {
6797
      locking.LEVEL_NODE: [self.op.node_name],
6798
      }
6799

    
6800
  def BuildHooksEnv(self):
6801
    """Build hooks env.
6802

6803
    This runs on the master, the primary and all the secondaries.
6804

6805
    """
6806
    return {
6807
      "NODE_NAME": self.op.node_name,
6808
      }
6809

    
6810
  def BuildHooksNodes(self):
6811
    """Build hooks nodes.
6812

6813
    """
6814
    nl = [self.cfg.GetMasterNode()]
6815
    return (nl, nl)
6816

    
6817
  def CheckPrereq(self):
6818
    pass
6819

    
6820
  def Exec(self, feedback_fn):
6821
    # Prepare jobs for migration instances
6822
    jobs = [
6823
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6824
                                 mode=self.op.mode,
6825
                                 live=self.op.live,
6826
                                 iallocator=self.op.iallocator,
6827
                                 target_node=self.op.target_node)]
6828
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6829
      ]
6830

    
6831
    # TODO: Run iallocator in this opcode and pass correct placement options to
6832
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6833
    # running the iallocator and the actual migration, a good consistency model
6834
    # will have to be found.
6835

    
6836
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6837
            frozenset([self.op.node_name]))
6838

    
6839
    return ResultWithJobs(jobs)
6840

    
6841

    
6842
class TLMigrateInstance(Tasklet):
6843
  """Tasklet class for instance migration.
6844

6845
  @type live: boolean
6846
  @ivar live: whether the migration will be done live or non-live;
6847
      this variable is initalized only after CheckPrereq has run
6848
  @type cleanup: boolean
6849
  @ivar cleanup: Wheater we cleanup from a failed migration
6850
  @type iallocator: string
6851
  @ivar iallocator: The iallocator used to determine target_node
6852
  @type target_node: string
6853
  @ivar target_node: If given, the target_node to reallocate the instance to
6854
  @type failover: boolean
6855
  @ivar failover: Whether operation results in failover or migration
6856
  @type fallback: boolean
6857
  @ivar fallback: Whether fallback to failover is allowed if migration not
6858
                  possible
6859
  @type ignore_consistency: boolean
6860
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6861
                            and target node
6862
  @type shutdown_timeout: int
6863
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6864

6865
  """
6866
  def __init__(self, lu, instance_name, cleanup=False,
6867
               failover=False, fallback=False,
6868
               ignore_consistency=False,
6869
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6870
    """Initializes this class.
6871

6872
    """
6873
    Tasklet.__init__(self, lu)
6874

    
6875
    # Parameters
6876
    self.instance_name = instance_name
6877
    self.cleanup = cleanup
6878
    self.live = False # will be overridden later
6879
    self.failover = failover
6880
    self.fallback = fallback
6881
    self.ignore_consistency = ignore_consistency
6882
    self.shutdown_timeout = shutdown_timeout
6883

    
6884
  def CheckPrereq(self):
6885
    """Check prerequisites.
6886

6887
    This checks that the instance is in the cluster.
6888

6889
    """
6890
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6891
    instance = self.cfg.GetInstanceInfo(instance_name)
6892
    assert instance is not None
6893
    self.instance = instance
6894

    
6895
    if (not self.cleanup and not instance.admin_up and not self.failover and
6896
        self.fallback):
6897
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6898
                      " to failover")
6899
      self.failover = True
6900

    
6901
    if instance.disk_template not in constants.DTS_MIRRORED:
6902
      if self.failover:
6903
        text = "failovers"
6904
      else:
6905
        text = "migrations"
6906
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6907
                                 " %s" % (instance.disk_template, text),
6908
                                 errors.ECODE_STATE)
6909

    
6910
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6911
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6912

    
6913
      if self.lu.op.iallocator:
6914
        self._RunAllocator()
6915
      else:
6916
        # We set set self.target_node as it is required by
6917
        # BuildHooksEnv
6918
        self.target_node = self.lu.op.target_node
6919

    
6920
      # self.target_node is already populated, either directly or by the
6921
      # iallocator run
6922
      target_node = self.target_node
6923
      if self.target_node == instance.primary_node:
6924
        raise errors.OpPrereqError("Cannot migrate instance %s"
6925
                                   " to its primary (%s)" %
6926
                                   (instance.name, instance.primary_node))
6927

    
6928
      if len(self.lu.tasklets) == 1:
6929
        # It is safe to release locks only when we're the only tasklet
6930
        # in the LU
6931
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6932
                      keep=[instance.primary_node, self.target_node])
6933

    
6934
    else:
6935
      secondary_nodes = instance.secondary_nodes
6936
      if not secondary_nodes:
6937
        raise errors.ConfigurationError("No secondary node but using"
6938
                                        " %s disk template" %
6939
                                        instance.disk_template)
6940
      target_node = secondary_nodes[0]
6941
      if self.lu.op.iallocator or (self.lu.op.target_node and
6942
                                   self.lu.op.target_node != target_node):
6943
        if self.failover:
6944
          text = "failed over"
6945
        else:
6946
          text = "migrated"
6947
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6948
                                   " be %s to arbitrary nodes"
6949
                                   " (neither an iallocator nor a target"
6950
                                   " node can be passed)" %
6951
                                   (instance.disk_template, text),
6952
                                   errors.ECODE_INVAL)
6953

    
6954
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6955

    
6956
    # check memory requirements on the secondary node
6957
    if not self.failover or instance.admin_up:
6958
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6959
                           instance.name, i_be[constants.BE_MEMORY],
6960
                           instance.hypervisor)
6961
    else:
6962
      self.lu.LogInfo("Not checking memory on the secondary node as"
6963
                      " instance will not be started")
6964

    
6965
    # check bridge existance
6966
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6967

    
6968
    if not self.cleanup:
6969
      _CheckNodeNotDrained(self.lu, target_node)
6970
      if not self.failover:
6971
        result = self.rpc.call_instance_migratable(instance.primary_node,
6972
                                                   instance)
6973
        if result.fail_msg and self.fallback:
6974
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6975
                          " failover")
6976
          self.failover = True
6977
        else:
6978
          result.Raise("Can't migrate, please use failover",
6979
                       prereq=True, ecode=errors.ECODE_STATE)
6980

    
6981
    assert not (self.failover and self.cleanup)
6982

    
6983
    if not self.failover:
6984
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6985
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6986
                                   " parameters are accepted",
6987
                                   errors.ECODE_INVAL)
6988
      if self.lu.op.live is not None:
6989
        if self.lu.op.live:
6990
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6991
        else:
6992
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6993
        # reset the 'live' parameter to None so that repeated
6994
        # invocations of CheckPrereq do not raise an exception
6995
        self.lu.op.live = None
6996
      elif self.lu.op.mode is None:
6997
        # read the default value from the hypervisor
6998
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6999
                                                skip_globals=False)
7000
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7001

    
7002
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7003
    else:
7004
      # Failover is never live
7005
      self.live = False
7006

    
7007
  def _RunAllocator(self):
7008
    """Run the allocator based on input opcode.
7009

7010
    """
7011
    ial = IAllocator(self.cfg, self.rpc,
7012
                     mode=constants.IALLOCATOR_MODE_RELOC,
7013
                     name=self.instance_name,
7014
                     # TODO See why hail breaks with a single node below
7015
                     relocate_from=[self.instance.primary_node,
7016
                                    self.instance.primary_node],
7017
                     )
7018

    
7019
    ial.Run(self.lu.op.iallocator)
7020

    
7021
    if not ial.success:
7022
      raise errors.OpPrereqError("Can't compute nodes using"
7023
                                 " iallocator '%s': %s" %
7024
                                 (self.lu.op.iallocator, ial.info),
7025
                                 errors.ECODE_NORES)
7026
    if len(ial.result) != ial.required_nodes:
7027
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7028
                                 " of nodes (%s), required %s" %
7029
                                 (self.lu.op.iallocator, len(ial.result),
7030
                                  ial.required_nodes), errors.ECODE_FAULT)
7031
    self.target_node = ial.result[0]
7032
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7033
                 self.instance_name, self.lu.op.iallocator,
7034
                 utils.CommaJoin(ial.result))
7035

    
7036
  def _WaitUntilSync(self):
7037
    """Poll with custom rpc for disk sync.
7038

7039
    This uses our own step-based rpc call.
7040

7041
    """
7042
    self.feedback_fn("* wait until resync is done")
7043
    all_done = False
7044
    while not all_done:
7045
      all_done = True
7046
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7047
                                            self.nodes_ip,
7048
                                            self.instance.disks)
7049
      min_percent = 100
7050
      for node, nres in result.items():
7051
        nres.Raise("Cannot resync disks on node %s" % node)
7052
        node_done, node_percent = nres.payload
7053
        all_done = all_done and node_done
7054
        if node_percent is not None:
7055
          min_percent = min(min_percent, node_percent)
7056
      if not all_done:
7057
        if min_percent < 100:
7058
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7059
        time.sleep(2)
7060

    
7061
  def _EnsureSecondary(self, node):
7062
    """Demote a node to secondary.
7063

7064
    """
7065
    self.feedback_fn("* switching node %s to secondary mode" % node)
7066

    
7067
    for dev in self.instance.disks:
7068
      self.cfg.SetDiskID(dev, node)
7069

    
7070
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7071
                                          self.instance.disks)
7072
    result.Raise("Cannot change disk to secondary on node %s" % node)
7073

    
7074
  def _GoStandalone(self):
7075
    """Disconnect from the network.
7076

7077
    """
7078
    self.feedback_fn("* changing into standalone mode")
7079
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7080
                                               self.instance.disks)
7081
    for node, nres in result.items():
7082
      nres.Raise("Cannot disconnect disks node %s" % node)
7083

    
7084
  def _GoReconnect(self, multimaster):
7085
    """Reconnect to the network.
7086

7087
    """
7088
    if multimaster:
7089
      msg = "dual-master"
7090
    else:
7091
      msg = "single-master"
7092
    self.feedback_fn("* changing disks into %s mode" % msg)
7093
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7094
                                           self.instance.disks,
7095
                                           self.instance.name, multimaster)
7096
    for node, nres in result.items():
7097
      nres.Raise("Cannot change disks config on node %s" % node)
7098

    
7099
  def _ExecCleanup(self):
7100
    """Try to cleanup after a failed migration.
7101

7102
    The cleanup is done by:
7103
      - check that the instance is running only on one node
7104
        (and update the config if needed)
7105
      - change disks on its secondary node to secondary
7106
      - wait until disks are fully synchronized
7107
      - disconnect from the network
7108
      - change disks into single-master mode
7109
      - wait again until disks are fully synchronized
7110

7111
    """
7112
    instance = self.instance
7113
    target_node = self.target_node
7114
    source_node = self.source_node
7115

    
7116
    # check running on only one node
7117
    self.feedback_fn("* checking where the instance actually runs"
7118
                     " (if this hangs, the hypervisor might be in"
7119
                     " a bad state)")
7120
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7121
    for node, result in ins_l.items():
7122
      result.Raise("Can't contact node %s" % node)
7123

    
7124
    runningon_source = instance.name in ins_l[source_node].payload
7125
    runningon_target = instance.name in ins_l[target_node].payload
7126

    
7127
    if runningon_source and runningon_target:
7128
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7129
                               " or the hypervisor is confused; you will have"
7130
                               " to ensure manually that it runs only on one"
7131
                               " and restart this operation")
7132

    
7133
    if not (runningon_source or runningon_target):
7134
      raise errors.OpExecError("Instance does not seem to be running at all;"
7135
                               " in this case it's safer to repair by"
7136
                               " running 'gnt-instance stop' to ensure disk"
7137
                               " shutdown, and then restarting it")
7138

    
7139
    if runningon_target:
7140
      # the migration has actually succeeded, we need to update the config
7141
      self.feedback_fn("* instance running on secondary node (%s),"
7142
                       " updating config" % target_node)
7143
      instance.primary_node = target_node
7144
      self.cfg.Update(instance, self.feedback_fn)
7145
      demoted_node = source_node
7146
    else:
7147
      self.feedback_fn("* instance confirmed to be running on its"
7148
                       " primary node (%s)" % source_node)
7149
      demoted_node = target_node
7150

    
7151
    if instance.disk_template in constants.DTS_INT_MIRROR:
7152
      self._EnsureSecondary(demoted_node)
7153
      try:
7154
        self._WaitUntilSync()
7155
      except errors.OpExecError:
7156
        # we ignore here errors, since if the device is standalone, it
7157
        # won't be able to sync
7158
        pass
7159
      self._GoStandalone()
7160
      self._GoReconnect(False)
7161
      self._WaitUntilSync()
7162

    
7163
    self.feedback_fn("* done")
7164

    
7165
  def _RevertDiskStatus(self):
7166
    """Try to revert the disk status after a failed migration.
7167

7168
    """
7169
    target_node = self.target_node
7170
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7171
      return
7172

    
7173
    try:
7174
      self._EnsureSecondary(target_node)
7175
      self._GoStandalone()
7176
      self._GoReconnect(False)
7177
      self._WaitUntilSync()
7178
    except errors.OpExecError, err:
7179
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7180
                         " please try to recover the instance manually;"
7181
                         " error '%s'" % str(err))
7182

    
7183
  def _AbortMigration(self):
7184
    """Call the hypervisor code to abort a started migration.
7185

7186
    """
7187
    instance = self.instance
7188
    target_node = self.target_node
7189
    migration_info = self.migration_info
7190

    
7191
    abort_result = self.rpc.call_finalize_migration(target_node,
7192
                                                    instance,
7193
                                                    migration_info,
7194
                                                    False)
7195
    abort_msg = abort_result.fail_msg
7196
    if abort_msg:
7197
      logging.error("Aborting migration failed on target node %s: %s",
7198
                    target_node, abort_msg)
7199
      # Don't raise an exception here, as we stil have to try to revert the
7200
      # disk status, even if this step failed.
7201

    
7202
  def _ExecMigration(self):
7203
    """Migrate an instance.
7204

7205
    The migrate is done by:
7206
      - change the disks into dual-master mode
7207
      - wait until disks are fully synchronized again
7208
      - migrate the instance
7209
      - change disks on the new secondary node (the old primary) to secondary
7210
      - wait until disks are fully synchronized
7211
      - change disks into single-master mode
7212

7213
    """
7214
    instance = self.instance
7215
    target_node = self.target_node
7216
    source_node = self.source_node
7217

    
7218
    self.feedback_fn("* checking disk consistency between source and target")
7219
    for dev in instance.disks:
7220
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7221
        raise errors.OpExecError("Disk %s is degraded or not fully"
7222
                                 " synchronized on target node,"
7223
                                 " aborting migration" % dev.iv_name)
7224

    
7225
    # First get the migration information from the remote node
7226
    result = self.rpc.call_migration_info(source_node, instance)
7227
    msg = result.fail_msg
7228
    if msg:
7229
      log_err = ("Failed fetching source migration information from %s: %s" %
7230
                 (source_node, msg))
7231
      logging.error(log_err)
7232
      raise errors.OpExecError(log_err)
7233

    
7234
    self.migration_info = migration_info = result.payload
7235

    
7236
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7237
      # Then switch the disks to master/master mode
7238
      self._EnsureSecondary(target_node)
7239
      self._GoStandalone()
7240
      self._GoReconnect(True)
7241
      self._WaitUntilSync()
7242

    
7243
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7244
    result = self.rpc.call_accept_instance(target_node,
7245
                                           instance,
7246
                                           migration_info,
7247
                                           self.nodes_ip[target_node])
7248

    
7249
    msg = result.fail_msg
7250
    if msg:
7251
      logging.error("Instance pre-migration failed, trying to revert"
7252
                    " disk status: %s", msg)
7253
      self.feedback_fn("Pre-migration failed, aborting")
7254
      self._AbortMigration()
7255
      self._RevertDiskStatus()
7256
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7257
                               (instance.name, msg))
7258

    
7259
    self.feedback_fn("* migrating instance to %s" % target_node)
7260
    result = self.rpc.call_instance_migrate(source_node, instance,
7261
                                            self.nodes_ip[target_node],
7262
                                            self.live)
7263
    msg = result.fail_msg
7264
    if msg:
7265
      logging.error("Instance migration failed, trying to revert"
7266
                    " disk status: %s", msg)
7267
      self.feedback_fn("Migration failed, aborting")
7268
      self._AbortMigration()
7269
      self._RevertDiskStatus()
7270
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7271
                               (instance.name, msg))
7272

    
7273
    instance.primary_node = target_node
7274
    # distribute new instance config to the other nodes
7275
    self.cfg.Update(instance, self.feedback_fn)
7276

    
7277
    result = self.rpc.call_finalize_migration(target_node,
7278
                                              instance,
7279
                                              migration_info,
7280
                                              True)
7281
    msg = result.fail_msg
7282
    if msg:
7283
      logging.error("Instance migration succeeded, but finalization failed:"
7284
                    " %s", msg)
7285
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7286
                               msg)
7287

    
7288
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7289
      self._EnsureSecondary(source_node)
7290
      self._WaitUntilSync()
7291
      self._GoStandalone()
7292
      self._GoReconnect(False)
7293
      self._WaitUntilSync()
7294

    
7295
    self.feedback_fn("* done")
7296

    
7297
  def _ExecFailover(self):
7298
    """Failover an instance.
7299

7300
    The failover is done by shutting it down on its present node and
7301
    starting it on the secondary.
7302

7303
    """
7304
    instance = self.instance
7305
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7306

    
7307
    source_node = instance.primary_node
7308
    target_node = self.target_node
7309

    
7310
    if instance.admin_up:
7311
      self.feedback_fn("* checking disk consistency between source and target")
7312
      for dev in instance.disks:
7313
        # for drbd, these are drbd over lvm
7314
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7315
          if primary_node.offline:
7316
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7317
                             " target node %s" %
7318
                             (primary_node.name, dev.iv_name, target_node))
7319
          elif not self.ignore_consistency:
7320
            raise errors.OpExecError("Disk %s is degraded on target node,"
7321
                                     " aborting failover" % dev.iv_name)
7322
    else:
7323
      self.feedback_fn("* not checking disk consistency as instance is not"
7324
                       " running")
7325

    
7326
    self.feedback_fn("* shutting down instance on source node")
7327
    logging.info("Shutting down instance %s on node %s",
7328
                 instance.name, source_node)
7329

    
7330
    result = self.rpc.call_instance_shutdown(source_node, instance,
7331
                                             self.shutdown_timeout)
7332
    msg = result.fail_msg
7333
    if msg:
7334
      if self.ignore_consistency or primary_node.offline:
7335
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7336
                           " proceeding anyway; please make sure node"
7337
                           " %s is down; error details: %s",
7338
                           instance.name, source_node, source_node, msg)
7339
      else:
7340
        raise errors.OpExecError("Could not shutdown instance %s on"
7341
                                 " node %s: %s" %
7342
                                 (instance.name, source_node, msg))
7343

    
7344
    self.feedback_fn("* deactivating the instance's disks on source node")
7345
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7346
      raise errors.OpExecError("Can't shut down the instance's disks")
7347

    
7348
    instance.primary_node = target_node
7349
    # distribute new instance config to the other nodes
7350
    self.cfg.Update(instance, self.feedback_fn)
7351

    
7352
    # Only start the instance if it's marked as up
7353
    if instance.admin_up:
7354
      self.feedback_fn("* activating the instance's disks on target node %s" %
7355
                       target_node)
7356
      logging.info("Starting instance %s on node %s",
7357
                   instance.name, target_node)
7358

    
7359
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7360
                                           ignore_secondaries=True)
7361
      if not disks_ok:
7362
        _ShutdownInstanceDisks(self.lu, instance)
7363
        raise errors.OpExecError("Can't activate the instance's disks")
7364

    
7365
      self.feedback_fn("* starting the instance on the target node %s" %
7366
                       target_node)
7367
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7368
                                            False)
7369
      msg = result.fail_msg
7370
      if msg:
7371
        _ShutdownInstanceDisks(self.lu, instance)
7372
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7373
                                 (instance.name, target_node, msg))
7374

    
7375
  def Exec(self, feedback_fn):
7376
    """Perform the migration.
7377

7378
    """
7379
    self.feedback_fn = feedback_fn
7380
    self.source_node = self.instance.primary_node
7381

    
7382
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7383
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7384
      self.target_node = self.instance.secondary_nodes[0]
7385
      # Otherwise self.target_node has been populated either
7386
      # directly, or through an iallocator.
7387

    
7388
    self.all_nodes = [self.source_node, self.target_node]
7389
    self.nodes_ip = {
7390
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7391
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7392
      }
7393

    
7394
    if self.failover:
7395
      feedback_fn("Failover instance %s" % self.instance.name)
7396
      self._ExecFailover()
7397
    else:
7398
      feedback_fn("Migrating instance %s" % self.instance.name)
7399

    
7400
      if self.cleanup:
7401
        return self._ExecCleanup()
7402
      else:
7403
        return self._ExecMigration()
7404

    
7405

    
7406
def _CreateBlockDev(lu, node, instance, device, force_create,
7407
                    info, force_open):
7408
  """Create a tree of block devices on a given node.
7409

7410
  If this device type has to be created on secondaries, create it and
7411
  all its children.
7412

7413
  If not, just recurse to children keeping the same 'force' value.
7414

7415
  @param lu: the lu on whose behalf we execute
7416
  @param node: the node on which to create the device
7417
  @type instance: L{objects.Instance}
7418
  @param instance: the instance which owns the device
7419
  @type device: L{objects.Disk}
7420
  @param device: the device to create
7421
  @type force_create: boolean
7422
  @param force_create: whether to force creation of this device; this
7423
      will be change to True whenever we find a device which has
7424
      CreateOnSecondary() attribute
7425
  @param info: the extra 'metadata' we should attach to the device
7426
      (this will be represented as a LVM tag)
7427
  @type force_open: boolean
7428
  @param force_open: this parameter will be passes to the
7429
      L{backend.BlockdevCreate} function where it specifies
7430
      whether we run on primary or not, and it affects both
7431
      the child assembly and the device own Open() execution
7432

7433
  """
7434
  if device.CreateOnSecondary():
7435
    force_create = True
7436

    
7437
  if device.children:
7438
    for child in device.children:
7439
      _CreateBlockDev(lu, node, instance, child, force_create,
7440
                      info, force_open)
7441

    
7442
  if not force_create:
7443
    return
7444

    
7445
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7446

    
7447

    
7448
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7449
  """Create a single block device on a given node.
7450

7451
  This will not recurse over children of the device, so they must be
7452
  created in advance.
7453

7454
  @param lu: the lu on whose behalf we execute
7455
  @param node: the node on which to create the device
7456
  @type instance: L{objects.Instance}
7457
  @param instance: the instance which owns the device
7458
  @type device: L{objects.Disk}
7459
  @param device: the device to create
7460
  @param info: the extra 'metadata' we should attach to the device
7461
      (this will be represented as a LVM tag)
7462
  @type force_open: boolean
7463
  @param force_open: this parameter will be passes to the
7464
      L{backend.BlockdevCreate} function where it specifies
7465
      whether we run on primary or not, and it affects both
7466
      the child assembly and the device own Open() execution
7467

7468
  """
7469
  lu.cfg.SetDiskID(device, node)
7470
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7471
                                       instance.name, force_open, info)
7472
  result.Raise("Can't create block device %s on"
7473
               " node %s for instance %s" % (device, node, instance.name))
7474
  if device.physical_id is None:
7475
    device.physical_id = result.payload
7476

    
7477

    
7478
def _GenerateUniqueNames(lu, exts):
7479
  """Generate a suitable LV name.
7480

7481
  This will generate a logical volume name for the given instance.
7482

7483
  """
7484
  results = []
7485
  for val in exts:
7486
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7487
    results.append("%s%s" % (new_id, val))
7488
  return results
7489

    
7490

    
7491
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7492
                         iv_name, p_minor, s_minor):
7493
  """Generate a drbd8 device complete with its children.
7494

7495
  """
7496
  assert len(vgnames) == len(names) == 2
7497
  port = lu.cfg.AllocatePort()
7498
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7499
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7500
                          logical_id=(vgnames[0], names[0]))
7501
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7502
                          logical_id=(vgnames[1], names[1]))
7503
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7504
                          logical_id=(primary, secondary, port,
7505
                                      p_minor, s_minor,
7506
                                      shared_secret),
7507
                          children=[dev_data, dev_meta],
7508
                          iv_name=iv_name)
7509
  return drbd_dev
7510

    
7511

    
7512
def _GenerateDiskTemplate(lu, template_name,
7513
                          instance_name, primary_node,
7514
                          secondary_nodes, disk_info,
7515
                          file_storage_dir, file_driver,
7516
                          base_index, feedback_fn):
7517
  """Generate the entire disk layout for a given template type.
7518

7519
  """
7520
  #TODO: compute space requirements
7521

    
7522
  vgname = lu.cfg.GetVGName()
7523
  disk_count = len(disk_info)
7524
  disks = []
7525
  if template_name == constants.DT_DISKLESS:
7526
    pass
7527
  elif template_name == constants.DT_PLAIN:
7528
    if len(secondary_nodes) != 0:
7529
      raise errors.ProgrammerError("Wrong template configuration")
7530

    
7531
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7532
                                      for i in range(disk_count)])
7533
    for idx, disk in enumerate(disk_info):
7534
      disk_index = idx + base_index
7535
      vg = disk.get(constants.IDISK_VG, vgname)
7536
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7537
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7538
                              size=disk[constants.IDISK_SIZE],
7539
                              logical_id=(vg, names[idx]),
7540
                              iv_name="disk/%d" % disk_index,
7541
                              mode=disk[constants.IDISK_MODE])
7542
      disks.append(disk_dev)
7543
  elif template_name == constants.DT_DRBD8:
7544
    if len(secondary_nodes) != 1:
7545
      raise errors.ProgrammerError("Wrong template configuration")
7546
    remote_node = secondary_nodes[0]
7547
    minors = lu.cfg.AllocateDRBDMinor(
7548
      [primary_node, remote_node] * len(disk_info), instance_name)
7549

    
7550
    names = []
7551
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7552
                                               for i in range(disk_count)]):
7553
      names.append(lv_prefix + "_data")
7554
      names.append(lv_prefix + "_meta")
7555
    for idx, disk in enumerate(disk_info):
7556
      disk_index = idx + base_index
7557
      data_vg = disk.get(constants.IDISK_VG, vgname)
7558
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7559
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7560
                                      disk[constants.IDISK_SIZE],
7561
                                      [data_vg, meta_vg],
7562
                                      names[idx * 2:idx * 2 + 2],
7563
                                      "disk/%d" % disk_index,
7564
                                      minors[idx * 2], minors[idx * 2 + 1])
7565
      disk_dev.mode = disk[constants.IDISK_MODE]
7566
      disks.append(disk_dev)
7567
  elif template_name == constants.DT_FILE:
7568
    if len(secondary_nodes) != 0:
7569
      raise errors.ProgrammerError("Wrong template configuration")
7570

    
7571
    opcodes.RequireFileStorage()
7572

    
7573
    for idx, disk in enumerate(disk_info):
7574
      disk_index = idx + base_index
7575
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7576
                              size=disk[constants.IDISK_SIZE],
7577
                              iv_name="disk/%d" % disk_index,
7578
                              logical_id=(file_driver,
7579
                                          "%s/disk%d" % (file_storage_dir,
7580
                                                         disk_index)),
7581
                              mode=disk[constants.IDISK_MODE])
7582
      disks.append(disk_dev)
7583
  elif template_name == constants.DT_SHARED_FILE:
7584
    if len(secondary_nodes) != 0:
7585
      raise errors.ProgrammerError("Wrong template configuration")
7586

    
7587
    opcodes.RequireSharedFileStorage()
7588

    
7589
    for idx, disk in enumerate(disk_info):
7590
      disk_index = idx + base_index
7591
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7592
                              size=disk[constants.IDISK_SIZE],
7593
                              iv_name="disk/%d" % disk_index,
7594
                              logical_id=(file_driver,
7595
                                          "%s/disk%d" % (file_storage_dir,
7596
                                                         disk_index)),
7597
                              mode=disk[constants.IDISK_MODE])
7598
      disks.append(disk_dev)
7599
  elif template_name == constants.DT_BLOCK:
7600
    if len(secondary_nodes) != 0:
7601
      raise errors.ProgrammerError("Wrong template configuration")
7602

    
7603
    for idx, disk in enumerate(disk_info):
7604
      disk_index = idx + base_index
7605
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7606
                              size=disk[constants.IDISK_SIZE],
7607
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7608
                                          disk[constants.IDISK_ADOPT]),
7609
                              iv_name="disk/%d" % disk_index,
7610
                              mode=disk[constants.IDISK_MODE])
7611
      disks.append(disk_dev)
7612

    
7613
  else:
7614
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7615
  return disks
7616

    
7617

    
7618
def _GetInstanceInfoText(instance):
7619
  """Compute that text that should be added to the disk's metadata.
7620

7621
  """
7622
  return "originstname+%s" % instance.name
7623

    
7624

    
7625
def _CalcEta(time_taken, written, total_size):
7626
  """Calculates the ETA based on size written and total size.
7627

7628
  @param time_taken: The time taken so far
7629
  @param written: amount written so far
7630
  @param total_size: The total size of data to be written
7631
  @return: The remaining time in seconds
7632

7633
  """
7634
  avg_time = time_taken / float(written)
7635
  return (total_size - written) * avg_time
7636

    
7637

    
7638
def _WipeDisks(lu, instance):
7639
  """Wipes instance disks.
7640

7641
  @type lu: L{LogicalUnit}
7642
  @param lu: the logical unit on whose behalf we execute
7643
  @type instance: L{objects.Instance}
7644
  @param instance: the instance whose disks we should create
7645
  @return: the success of the wipe
7646

7647
  """
7648
  node = instance.primary_node
7649

    
7650
  for device in instance.disks:
7651
    lu.cfg.SetDiskID(device, node)
7652

    
7653
  logging.info("Pause sync of instance %s disks", instance.name)
7654
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7655

    
7656
  for idx, success in enumerate(result.payload):
7657
    if not success:
7658
      logging.warn("pause-sync of instance %s for disks %d failed",
7659
                   instance.name, idx)
7660

    
7661
  try:
7662
    for idx, device in enumerate(instance.disks):
7663
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7664
      # MAX_WIPE_CHUNK at max
7665
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7666
                            constants.MIN_WIPE_CHUNK_PERCENT)
7667
      # we _must_ make this an int, otherwise rounding errors will
7668
      # occur
7669
      wipe_chunk_size = int(wipe_chunk_size)
7670

    
7671
      lu.LogInfo("* Wiping disk %d", idx)
7672
      logging.info("Wiping disk %d for instance %s, node %s using"
7673
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7674

    
7675
      offset = 0
7676
      size = device.size
7677
      last_output = 0
7678
      start_time = time.time()
7679

    
7680
      while offset < size:
7681
        wipe_size = min(wipe_chunk_size, size - offset)
7682
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7683
                      idx, offset, wipe_size)
7684
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7685
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7686
                     (idx, offset, wipe_size))
7687
        now = time.time()
7688
        offset += wipe_size
7689
        if now - last_output >= 60:
7690
          eta = _CalcEta(now - start_time, offset, size)
7691
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7692
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7693
          last_output = now
7694
  finally:
7695
    logging.info("Resume sync of instance %s disks", instance.name)
7696

    
7697
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7698

    
7699
    for idx, success in enumerate(result.payload):
7700
      if not success:
7701
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7702
                      " look at the status and troubleshoot the issue", idx)
7703
        logging.warn("resume-sync of instance %s for disks %d failed",
7704
                     instance.name, idx)
7705

    
7706

    
7707
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7708
  """Create all disks for an instance.
7709

7710
  This abstracts away some work from AddInstance.
7711

7712
  @type lu: L{LogicalUnit}
7713
  @param lu: the logical unit on whose behalf we execute
7714
  @type instance: L{objects.Instance}
7715
  @param instance: the instance whose disks we should create
7716
  @type to_skip: list
7717
  @param to_skip: list of indices to skip
7718
  @type target_node: string
7719
  @param target_node: if passed, overrides the target node for creation
7720
  @rtype: boolean
7721
  @return: the success of the creation
7722

7723
  """
7724
  info = _GetInstanceInfoText(instance)
7725
  if target_node is None:
7726
    pnode = instance.primary_node
7727
    all_nodes = instance.all_nodes
7728
  else:
7729
    pnode = target_node
7730
    all_nodes = [pnode]
7731

    
7732
  if instance.disk_template in constants.DTS_FILEBASED:
7733
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7734
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7735

    
7736
    result.Raise("Failed to create directory '%s' on"
7737
                 " node %s" % (file_storage_dir, pnode))
7738

    
7739
  # Note: this needs to be kept in sync with adding of disks in
7740
  # LUInstanceSetParams
7741
  for idx, device in enumerate(instance.disks):
7742
    if to_skip and idx in to_skip:
7743
      continue
7744
    logging.info("Creating volume %s for instance %s",
7745
                 device.iv_name, instance.name)
7746
    #HARDCODE
7747
    for node in all_nodes:
7748
      f_create = node == pnode
7749
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7750

    
7751

    
7752
def _RemoveDisks(lu, instance, target_node=None):
7753
  """Remove all disks for an instance.
7754

7755
  This abstracts away some work from `AddInstance()` and
7756
  `RemoveInstance()`. Note that in case some of the devices couldn't
7757
  be removed, the removal will continue with the other ones (compare
7758
  with `_CreateDisks()`).
7759

7760
  @type lu: L{LogicalUnit}
7761
  @param lu: the logical unit on whose behalf we execute
7762
  @type instance: L{objects.Instance}
7763
  @param instance: the instance whose disks we should remove
7764
  @type target_node: string
7765
  @param target_node: used to override the node on which to remove the disks
7766
  @rtype: boolean
7767
  @return: the success of the removal
7768

7769
  """
7770
  logging.info("Removing block devices for instance %s", instance.name)
7771

    
7772
  all_result = True
7773
  for device in instance.disks:
7774
    if target_node:
7775
      edata = [(target_node, device)]
7776
    else:
7777
      edata = device.ComputeNodeTree(instance.primary_node)
7778
    for node, disk in edata:
7779
      lu.cfg.SetDiskID(disk, node)
7780
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7781
      if msg:
7782
        lu.LogWarning("Could not remove block device %s on node %s,"
7783
                      " continuing anyway: %s", device.iv_name, node, msg)
7784
        all_result = False
7785

    
7786
  if instance.disk_template == constants.DT_FILE:
7787
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7788
    if target_node:
7789
      tgt = target_node
7790
    else:
7791
      tgt = instance.primary_node
7792
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7793
    if result.fail_msg:
7794
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7795
                    file_storage_dir, instance.primary_node, result.fail_msg)
7796
      all_result = False
7797

    
7798
  return all_result
7799

    
7800

    
7801
def _ComputeDiskSizePerVG(disk_template, disks):
7802
  """Compute disk size requirements in the volume group
7803

7804
  """
7805
  def _compute(disks, payload):
7806
    """Universal algorithm.
7807

7808
    """
7809
    vgs = {}
7810
    for disk in disks:
7811
      vgs[disk[constants.IDISK_VG]] = \
7812
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7813

    
7814
    return vgs
7815

    
7816
  # Required free disk space as a function of disk and swap space
7817
  req_size_dict = {
7818
    constants.DT_DISKLESS: {},
7819
    constants.DT_PLAIN: _compute(disks, 0),
7820
    # 128 MB are added for drbd metadata for each disk
7821
    constants.DT_DRBD8: _compute(disks, 128),
7822
    constants.DT_FILE: {},
7823
    constants.DT_SHARED_FILE: {},
7824
  }
7825

    
7826
  if disk_template not in req_size_dict:
7827
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7828
                                 " is unknown" %  disk_template)
7829

    
7830
  return req_size_dict[disk_template]
7831

    
7832

    
7833
def _ComputeDiskSize(disk_template, disks):
7834
  """Compute disk size requirements in the volume group
7835

7836
  """
7837
  # Required free disk space as a function of disk and swap space
7838
  req_size_dict = {
7839
    constants.DT_DISKLESS: None,
7840
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7841
    # 128 MB are added for drbd metadata for each disk
7842
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7843
    constants.DT_FILE: None,
7844
    constants.DT_SHARED_FILE: 0,
7845
    constants.DT_BLOCK: 0,
7846
  }
7847

    
7848
  if disk_template not in req_size_dict:
7849
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7850
                                 " is unknown" %  disk_template)
7851

    
7852
  return req_size_dict[disk_template]
7853

    
7854

    
7855
def _FilterVmNodes(lu, nodenames):
7856
  """Filters out non-vm_capable nodes from a list.
7857

7858
  @type lu: L{LogicalUnit}
7859
  @param lu: the logical unit for which we check
7860
  @type nodenames: list
7861
  @param nodenames: the list of nodes on which we should check
7862
  @rtype: list
7863
  @return: the list of vm-capable nodes
7864

7865
  """
7866
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7867
  return [name for name in nodenames if name not in vm_nodes]
7868

    
7869

    
7870
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7871
  """Hypervisor parameter validation.
7872

7873
  This function abstract the hypervisor parameter validation to be
7874
  used in both instance create and instance modify.
7875

7876
  @type lu: L{LogicalUnit}
7877
  @param lu: the logical unit for which we check
7878
  @type nodenames: list
7879
  @param nodenames: the list of nodes on which we should check
7880
  @type hvname: string
7881
  @param hvname: the name of the hypervisor we should use
7882
  @type hvparams: dict
7883
  @param hvparams: the parameters which we need to check
7884
  @raise errors.OpPrereqError: if the parameters are not valid
7885

7886
  """
7887
  nodenames = _FilterVmNodes(lu, nodenames)
7888
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7889
                                                  hvname,
7890
                                                  hvparams)
7891
  for node in nodenames:
7892
    info = hvinfo[node]
7893
    if info.offline:
7894
      continue
7895
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7896

    
7897

    
7898
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7899
  """OS parameters validation.
7900

7901
  @type lu: L{LogicalUnit}
7902
  @param lu: the logical unit for which we check
7903
  @type required: boolean
7904
  @param required: whether the validation should fail if the OS is not
7905
      found
7906
  @type nodenames: list
7907
  @param nodenames: the list of nodes on which we should check
7908
  @type osname: string
7909
  @param osname: the name of the hypervisor we should use
7910
  @type osparams: dict
7911
  @param osparams: the parameters which we need to check
7912
  @raise errors.OpPrereqError: if the parameters are not valid
7913

7914
  """
7915
  nodenames = _FilterVmNodes(lu, nodenames)
7916
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7917
                                   [constants.OS_VALIDATE_PARAMETERS],
7918
                                   osparams)
7919
  for node, nres in result.items():
7920
    # we don't check for offline cases since this should be run only
7921
    # against the master node and/or an instance's nodes
7922
    nres.Raise("OS Parameters validation failed on node %s" % node)
7923
    if not nres.payload:
7924
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7925
                 osname, node)
7926

    
7927

    
7928
class LUInstanceCreate(LogicalUnit):
7929
  """Create an instance.
7930

7931
  """
7932
  HPATH = "instance-add"
7933
  HTYPE = constants.HTYPE_INSTANCE
7934
  REQ_BGL = False
7935

    
7936
  def CheckArguments(self):
7937
    """Check arguments.
7938

7939
    """
7940
    # do not require name_check to ease forward/backward compatibility
7941
    # for tools
7942
    if self.op.no_install and self.op.start:
7943
      self.LogInfo("No-installation mode selected, disabling startup")
7944
      self.op.start = False
7945
    # validate/normalize the instance name
7946
    self.op.instance_name = \
7947
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7948

    
7949
    if self.op.ip_check and not self.op.name_check:
7950
      # TODO: make the ip check more flexible and not depend on the name check
7951
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7952
                                 " check", errors.ECODE_INVAL)
7953

    
7954
    # check nics' parameter names
7955
    for nic in self.op.nics:
7956
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7957

    
7958
    # check disks. parameter names and consistent adopt/no-adopt strategy
7959
    has_adopt = has_no_adopt = False
7960
    for disk in self.op.disks:
7961
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7962
      if constants.IDISK_ADOPT in disk:
7963
        has_adopt = True
7964
      else:
7965
        has_no_adopt = True
7966
    if has_adopt and has_no_adopt:
7967
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7968
                                 errors.ECODE_INVAL)
7969
    if has_adopt:
7970
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7971
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7972
                                   " '%s' disk template" %
7973
                                   self.op.disk_template,
7974
                                   errors.ECODE_INVAL)
7975
      if self.op.iallocator is not None:
7976
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7977
                                   " iallocator script", errors.ECODE_INVAL)
7978
      if self.op.mode == constants.INSTANCE_IMPORT:
7979
        raise errors.OpPrereqError("Disk adoption not allowed for"
7980
                                   " instance import", errors.ECODE_INVAL)
7981
    else:
7982
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7983
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7984
                                   " but no 'adopt' parameter given" %
7985
                                   self.op.disk_template,
7986
                                   errors.ECODE_INVAL)
7987

    
7988
    self.adopt_disks = has_adopt
7989

    
7990
    # instance name verification
7991
    if self.op.name_check:
7992
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7993
      self.op.instance_name = self.hostname1.name
7994
      # used in CheckPrereq for ip ping check
7995
      self.check_ip = self.hostname1.ip
7996
    else:
7997
      self.check_ip = None
7998

    
7999
    # file storage checks
8000
    if (self.op.file_driver and
8001
        not self.op.file_driver in constants.FILE_DRIVER):
8002
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8003
                                 self.op.file_driver, errors.ECODE_INVAL)
8004

    
8005
    if self.op.disk_template == constants.DT_FILE:
8006
      opcodes.RequireFileStorage()
8007
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8008
      opcodes.RequireSharedFileStorage()
8009

    
8010
    ### Node/iallocator related checks
8011
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8012

    
8013
    if self.op.pnode is not None:
8014
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8015
        if self.op.snode is None:
8016
          raise errors.OpPrereqError("The networked disk templates need"
8017
                                     " a mirror node", errors.ECODE_INVAL)
8018
      elif self.op.snode:
8019
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8020
                        " template")
8021
        self.op.snode = None
8022

    
8023
    self._cds = _GetClusterDomainSecret()
8024

    
8025
    if self.op.mode == constants.INSTANCE_IMPORT:
8026
      # On import force_variant must be True, because if we forced it at
8027
      # initial install, our only chance when importing it back is that it
8028
      # works again!
8029
      self.op.force_variant = True
8030

    
8031
      if self.op.no_install:
8032
        self.LogInfo("No-installation mode has no effect during import")
8033

    
8034
    elif self.op.mode == constants.INSTANCE_CREATE:
8035
      if self.op.os_type is None:
8036
        raise errors.OpPrereqError("No guest OS specified",
8037
                                   errors.ECODE_INVAL)
8038
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8039
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8040
                                   " installation" % self.op.os_type,
8041
                                   errors.ECODE_STATE)
8042
      if self.op.disk_template is None:
8043
        raise errors.OpPrereqError("No disk template specified",
8044
                                   errors.ECODE_INVAL)
8045

    
8046
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8047
      # Check handshake to ensure both clusters have the same domain secret
8048
      src_handshake = self.op.source_handshake
8049
      if not src_handshake:
8050
        raise errors.OpPrereqError("Missing source handshake",
8051
                                   errors.ECODE_INVAL)
8052

    
8053
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8054
                                                           src_handshake)
8055
      if errmsg:
8056
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8057
                                   errors.ECODE_INVAL)
8058

    
8059
      # Load and check source CA
8060
      self.source_x509_ca_pem = self.op.source_x509_ca
8061
      if not self.source_x509_ca_pem:
8062
        raise errors.OpPrereqError("Missing source X509 CA",
8063
                                   errors.ECODE_INVAL)
8064

    
8065
      try:
8066
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8067
                                                    self._cds)
8068
      except OpenSSL.crypto.Error, err:
8069
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8070
                                   (err, ), errors.ECODE_INVAL)
8071

    
8072
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8073
      if errcode is not None:
8074
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8075
                                   errors.ECODE_INVAL)
8076

    
8077
      self.source_x509_ca = cert
8078

    
8079
      src_instance_name = self.op.source_instance_name
8080
      if not src_instance_name:
8081
        raise errors.OpPrereqError("Missing source instance name",
8082
                                   errors.ECODE_INVAL)
8083

    
8084
      self.source_instance_name = \
8085
          netutils.GetHostname(name=src_instance_name).name
8086

    
8087
    else:
8088
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8089
                                 self.op.mode, errors.ECODE_INVAL)
8090

    
8091
  def ExpandNames(self):
8092
    """ExpandNames for CreateInstance.
8093

8094
    Figure out the right locks for instance creation.
8095

8096
    """
8097
    self.needed_locks = {}
8098

    
8099
    instance_name = self.op.instance_name
8100
    # this is just a preventive check, but someone might still add this
8101
    # instance in the meantime, and creation will fail at lock-add time
8102
    if instance_name in self.cfg.GetInstanceList():
8103
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8104
                                 instance_name, errors.ECODE_EXISTS)
8105

    
8106
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8107

    
8108
    if self.op.iallocator:
8109
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8110
    else:
8111
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8112
      nodelist = [self.op.pnode]
8113
      if self.op.snode is not None:
8114
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8115
        nodelist.append(self.op.snode)
8116
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8117

    
8118
    # in case of import lock the source node too
8119
    if self.op.mode == constants.INSTANCE_IMPORT:
8120
      src_node = self.op.src_node
8121
      src_path = self.op.src_path
8122

    
8123
      if src_path is None:
8124
        self.op.src_path = src_path = self.op.instance_name
8125

    
8126
      if src_node is None:
8127
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8128
        self.op.src_node = None
8129
        if os.path.isabs(src_path):
8130
          raise errors.OpPrereqError("Importing an instance from an absolute"
8131
                                     " path requires a source node option",
8132
                                     errors.ECODE_INVAL)
8133
      else:
8134
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8135
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8136
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8137
        if not os.path.isabs(src_path):
8138
          self.op.src_path = src_path = \
8139
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8140

    
8141
  def _RunAllocator(self):
8142
    """Run the allocator based on input opcode.
8143

8144
    """
8145
    nics = [n.ToDict() for n in self.nics]
8146
    ial = IAllocator(self.cfg, self.rpc,
8147
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8148
                     name=self.op.instance_name,
8149
                     disk_template=self.op.disk_template,
8150
                     tags=self.op.tags,
8151
                     os=self.op.os_type,
8152
                     vcpus=self.be_full[constants.BE_VCPUS],
8153
                     memory=self.be_full[constants.BE_MEMORY],
8154
                     disks=self.disks,
8155
                     nics=nics,
8156
                     hypervisor=self.op.hypervisor,
8157
                     )
8158

    
8159
    ial.Run(self.op.iallocator)
8160

    
8161
    if not ial.success:
8162
      raise errors.OpPrereqError("Can't compute nodes using"
8163
                                 " iallocator '%s': %s" %
8164
                                 (self.op.iallocator, ial.info),
8165
                                 errors.ECODE_NORES)
8166
    if len(ial.result) != ial.required_nodes:
8167
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8168
                                 " of nodes (%s), required %s" %
8169
                                 (self.op.iallocator, len(ial.result),
8170
                                  ial.required_nodes), errors.ECODE_FAULT)
8171
    self.op.pnode = ial.result[0]
8172
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8173
                 self.op.instance_name, self.op.iallocator,
8174
                 utils.CommaJoin(ial.result))
8175
    if ial.required_nodes == 2:
8176
      self.op.snode = ial.result[1]
8177

    
8178
  def BuildHooksEnv(self):
8179
    """Build hooks env.
8180

8181
    This runs on master, primary and secondary nodes of the instance.
8182

8183
    """
8184
    env = {
8185
      "ADD_MODE": self.op.mode,
8186
      }
8187
    if self.op.mode == constants.INSTANCE_IMPORT:
8188
      env["SRC_NODE"] = self.op.src_node
8189
      env["SRC_PATH"] = self.op.src_path
8190
      env["SRC_IMAGES"] = self.src_images
8191

    
8192
    env.update(_BuildInstanceHookEnv(
8193
      name=self.op.instance_name,
8194
      primary_node=self.op.pnode,
8195
      secondary_nodes=self.secondaries,
8196
      status=self.op.start,
8197
      os_type=self.op.os_type,
8198
      memory=self.be_full[constants.BE_MEMORY],
8199
      vcpus=self.be_full[constants.BE_VCPUS],
8200
      nics=_NICListToTuple(self, self.nics),
8201
      disk_template=self.op.disk_template,
8202
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8203
             for d in self.disks],
8204
      bep=self.be_full,
8205
      hvp=self.hv_full,
8206
      hypervisor_name=self.op.hypervisor,
8207
      tags=self.op.tags,
8208
    ))
8209

    
8210
    return env
8211

    
8212
  def BuildHooksNodes(self):
8213
    """Build hooks nodes.
8214

8215
    """
8216
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8217
    return nl, nl
8218

    
8219
  def _ReadExportInfo(self):
8220
    """Reads the export information from disk.
8221

8222
    It will override the opcode source node and path with the actual
8223
    information, if these two were not specified before.
8224

8225
    @return: the export information
8226

8227
    """
8228
    assert self.op.mode == constants.INSTANCE_IMPORT
8229

    
8230
    src_node = self.op.src_node
8231
    src_path = self.op.src_path
8232

    
8233
    if src_node is None:
8234
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8235
      exp_list = self.rpc.call_export_list(locked_nodes)
8236
      found = False
8237
      for node in exp_list:
8238
        if exp_list[node].fail_msg:
8239
          continue
8240
        if src_path in exp_list[node].payload:
8241
          found = True
8242
          self.op.src_node = src_node = node
8243
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8244
                                                       src_path)
8245
          break
8246
      if not found:
8247
        raise errors.OpPrereqError("No export found for relative path %s" %
8248
                                    src_path, errors.ECODE_INVAL)
8249

    
8250
    _CheckNodeOnline(self, src_node)
8251
    result = self.rpc.call_export_info(src_node, src_path)
8252
    result.Raise("No export or invalid export found in dir %s" % src_path)
8253

    
8254
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8255
    if not export_info.has_section(constants.INISECT_EXP):
8256
      raise errors.ProgrammerError("Corrupted export config",
8257
                                   errors.ECODE_ENVIRON)
8258

    
8259
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8260
    if (int(ei_version) != constants.EXPORT_VERSION):
8261
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8262
                                 (ei_version, constants.EXPORT_VERSION),
8263
                                 errors.ECODE_ENVIRON)
8264
    return export_info
8265

    
8266
  def _ReadExportParams(self, einfo):
8267
    """Use export parameters as defaults.
8268

8269
    In case the opcode doesn't specify (as in override) some instance
8270
    parameters, then try to use them from the export information, if
8271
    that declares them.
8272

8273
    """
8274
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8275

    
8276
    if self.op.disk_template is None:
8277
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8278
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8279
                                          "disk_template")
8280
      else:
8281
        raise errors.OpPrereqError("No disk template specified and the export"
8282
                                   " is missing the disk_template information",
8283
                                   errors.ECODE_INVAL)
8284

    
8285
    if not self.op.disks:
8286
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8287
        disks = []
8288
        # TODO: import the disk iv_name too
8289
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8290
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8291
          disks.append({constants.IDISK_SIZE: disk_sz})
8292
        self.op.disks = disks
8293
      else:
8294
        raise errors.OpPrereqError("No disk info specified and the export"
8295
                                   " is missing the disk information",
8296
                                   errors.ECODE_INVAL)
8297

    
8298
    if (not self.op.nics and
8299
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8300
      nics = []
8301
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8302
        ndict = {}
8303
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8304
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8305
          ndict[name] = v
8306
        nics.append(ndict)
8307
      self.op.nics = nics
8308

    
8309
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8310
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8311

    
8312
    if (self.op.hypervisor is None and
8313
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8314
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8315

    
8316
    if einfo.has_section(constants.INISECT_HYP):
8317
      # use the export parameters but do not override the ones
8318
      # specified by the user
8319
      for name, value in einfo.items(constants.INISECT_HYP):
8320
        if name not in self.op.hvparams:
8321
          self.op.hvparams[name] = value
8322

    
8323
    if einfo.has_section(constants.INISECT_BEP):
8324
      # use the parameters, without overriding
8325
      for name, value in einfo.items(constants.INISECT_BEP):
8326
        if name not in self.op.beparams:
8327
          self.op.beparams[name] = value
8328
    else:
8329
      # try to read the parameters old style, from the main section
8330
      for name in constants.BES_PARAMETERS:
8331
        if (name not in self.op.beparams and
8332
            einfo.has_option(constants.INISECT_INS, name)):
8333
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8334

    
8335
    if einfo.has_section(constants.INISECT_OSP):
8336
      # use the parameters, without overriding
8337
      for name, value in einfo.items(constants.INISECT_OSP):
8338
        if name not in self.op.osparams:
8339
          self.op.osparams[name] = value
8340

    
8341
  def _RevertToDefaults(self, cluster):
8342
    """Revert the instance parameters to the default values.
8343

8344
    """
8345
    # hvparams
8346
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8347
    for name in self.op.hvparams.keys():
8348
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8349
        del self.op.hvparams[name]
8350
    # beparams
8351
    be_defs = cluster.SimpleFillBE({})
8352
    for name in self.op.beparams.keys():
8353
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8354
        del self.op.beparams[name]
8355
    # nic params
8356
    nic_defs = cluster.SimpleFillNIC({})
8357
    for nic in self.op.nics:
8358
      for name in constants.NICS_PARAMETERS:
8359
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8360
          del nic[name]
8361
    # osparams
8362
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8363
    for name in self.op.osparams.keys():
8364
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8365
        del self.op.osparams[name]
8366

    
8367
  def _CalculateFileStorageDir(self):
8368
    """Calculate final instance file storage dir.
8369

8370
    """
8371
    # file storage dir calculation/check
8372
    self.instance_file_storage_dir = None
8373
    if self.op.disk_template in constants.DTS_FILEBASED:
8374
      # build the full file storage dir path
8375
      joinargs = []
8376

    
8377
      if self.op.disk_template == constants.DT_SHARED_FILE:
8378
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8379
      else:
8380
        get_fsd_fn = self.cfg.GetFileStorageDir
8381

    
8382
      cfg_storagedir = get_fsd_fn()
8383
      if not cfg_storagedir:
8384
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8385
      joinargs.append(cfg_storagedir)
8386

    
8387
      if self.op.file_storage_dir is not None:
8388
        joinargs.append(self.op.file_storage_dir)
8389

    
8390
      joinargs.append(self.op.instance_name)
8391

    
8392
      # pylint: disable-msg=W0142
8393
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8394

    
8395
  def CheckPrereq(self):
8396
    """Check prerequisites.
8397

8398
    """
8399
    self._CalculateFileStorageDir()
8400

    
8401
    if self.op.mode == constants.INSTANCE_IMPORT:
8402
      export_info = self._ReadExportInfo()
8403
      self._ReadExportParams(export_info)
8404

    
8405
    if (not self.cfg.GetVGName() and
8406
        self.op.disk_template not in constants.DTS_NOT_LVM):
8407
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8408
                                 " instances", errors.ECODE_STATE)
8409

    
8410
    if self.op.hypervisor is None:
8411
      self.op.hypervisor = self.cfg.GetHypervisorType()
8412

    
8413
    cluster = self.cfg.GetClusterInfo()
8414
    enabled_hvs = cluster.enabled_hypervisors
8415
    if self.op.hypervisor not in enabled_hvs:
8416
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8417
                                 " cluster (%s)" % (self.op.hypervisor,
8418
                                  ",".join(enabled_hvs)),
8419
                                 errors.ECODE_STATE)
8420

    
8421
    # Check tag validity
8422
    for tag in self.op.tags:
8423
      objects.TaggableObject.ValidateTag(tag)
8424

    
8425
    # check hypervisor parameter syntax (locally)
8426
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8427
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8428
                                      self.op.hvparams)
8429
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8430
    hv_type.CheckParameterSyntax(filled_hvp)
8431
    self.hv_full = filled_hvp
8432
    # check that we don't specify global parameters on an instance
8433
    _CheckGlobalHvParams(self.op.hvparams)
8434

    
8435
    # fill and remember the beparams dict
8436
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8437
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8438

    
8439
    # build os parameters
8440
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8441

    
8442
    # now that hvp/bep are in final format, let's reset to defaults,
8443
    # if told to do so
8444
    if self.op.identify_defaults:
8445
      self._RevertToDefaults(cluster)
8446

    
8447
    # NIC buildup
8448
    self.nics = []
8449
    for idx, nic in enumerate(self.op.nics):
8450
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8451
      nic_mode = nic_mode_req
8452
      if nic_mode is None:
8453
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8454

    
8455
      # in routed mode, for the first nic, the default ip is 'auto'
8456
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8457
        default_ip_mode = constants.VALUE_AUTO
8458
      else:
8459
        default_ip_mode = constants.VALUE_NONE
8460

    
8461
      # ip validity checks
8462
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8463
      if ip is None or ip.lower() == constants.VALUE_NONE:
8464
        nic_ip = None
8465
      elif ip.lower() == constants.VALUE_AUTO:
8466
        if not self.op.name_check:
8467
          raise errors.OpPrereqError("IP address set to auto but name checks"
8468
                                     " have been skipped",
8469
                                     errors.ECODE_INVAL)
8470
        nic_ip = self.hostname1.ip
8471
      else:
8472
        if not netutils.IPAddress.IsValid(ip):
8473
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8474
                                     errors.ECODE_INVAL)
8475
        nic_ip = ip
8476

    
8477
      # TODO: check the ip address for uniqueness
8478
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8479
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8480
                                   errors.ECODE_INVAL)
8481

    
8482
      # MAC address verification
8483
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8484
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8485
        mac = utils.NormalizeAndValidateMac(mac)
8486

    
8487
        try:
8488
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8489
        except errors.ReservationError:
8490
          raise errors.OpPrereqError("MAC address %s already in use"
8491
                                     " in cluster" % mac,
8492
                                     errors.ECODE_NOTUNIQUE)
8493

    
8494
      #  Build nic parameters
8495
      link = nic.get(constants.INIC_LINK, None)
8496
      nicparams = {}
8497
      if nic_mode_req:
8498
        nicparams[constants.NIC_MODE] = nic_mode_req
8499
      if link:
8500
        nicparams[constants.NIC_LINK] = link
8501

    
8502
      check_params = cluster.SimpleFillNIC(nicparams)
8503
      objects.NIC.CheckParameterSyntax(check_params)
8504
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8505

    
8506
    # disk checks/pre-build
8507
    default_vg = self.cfg.GetVGName()
8508
    self.disks = []
8509
    for disk in self.op.disks:
8510
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8511
      if mode not in constants.DISK_ACCESS_SET:
8512
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8513
                                   mode, errors.ECODE_INVAL)
8514
      size = disk.get(constants.IDISK_SIZE, None)
8515
      if size is None:
8516
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8517
      try:
8518
        size = int(size)
8519
      except (TypeError, ValueError):
8520
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8521
                                   errors.ECODE_INVAL)
8522

    
8523
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8524
      new_disk = {
8525
        constants.IDISK_SIZE: size,
8526
        constants.IDISK_MODE: mode,
8527
        constants.IDISK_VG: data_vg,
8528
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8529
        }
8530
      if constants.IDISK_ADOPT in disk:
8531
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8532
      self.disks.append(new_disk)
8533

    
8534
    if self.op.mode == constants.INSTANCE_IMPORT:
8535

    
8536
      # Check that the new instance doesn't have less disks than the export
8537
      instance_disks = len(self.disks)
8538
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8539
      if instance_disks < export_disks:
8540
        raise errors.OpPrereqError("Not enough disks to import."
8541
                                   " (instance: %d, export: %d)" %
8542
                                   (instance_disks, export_disks),
8543
                                   errors.ECODE_INVAL)
8544

    
8545
      disk_images = []
8546
      for idx in range(export_disks):
8547
        option = "disk%d_dump" % idx
8548
        if export_info.has_option(constants.INISECT_INS, option):
8549
          # FIXME: are the old os-es, disk sizes, etc. useful?
8550
          export_name = export_info.get(constants.INISECT_INS, option)
8551
          image = utils.PathJoin(self.op.src_path, export_name)
8552
          disk_images.append(image)
8553
        else:
8554
          disk_images.append(False)
8555

    
8556
      self.src_images = disk_images
8557

    
8558
      old_name = export_info.get(constants.INISECT_INS, "name")
8559
      try:
8560
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8561
      except (TypeError, ValueError), err:
8562
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8563
                                   " an integer: %s" % str(err),
8564
                                   errors.ECODE_STATE)
8565
      if self.op.instance_name == old_name:
8566
        for idx, nic in enumerate(self.nics):
8567
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8568
            nic_mac_ini = "nic%d_mac" % idx
8569
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8570

    
8571
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8572

    
8573
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8574
    if self.op.ip_check:
8575
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8576
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8577
                                   (self.check_ip, self.op.instance_name),
8578
                                   errors.ECODE_NOTUNIQUE)
8579

    
8580
    #### mac address generation
8581
    # By generating here the mac address both the allocator and the hooks get
8582
    # the real final mac address rather than the 'auto' or 'generate' value.
8583
    # There is a race condition between the generation and the instance object
8584
    # creation, which means that we know the mac is valid now, but we're not
8585
    # sure it will be when we actually add the instance. If things go bad
8586
    # adding the instance will abort because of a duplicate mac, and the
8587
    # creation job will fail.
8588
    for nic in self.nics:
8589
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8590
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8591

    
8592
    #### allocator run
8593

    
8594
    if self.op.iallocator is not None:
8595
      self._RunAllocator()
8596

    
8597
    #### node related checks
8598

    
8599
    # check primary node
8600
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8601
    assert self.pnode is not None, \
8602
      "Cannot retrieve locked node %s" % self.op.pnode
8603
    if pnode.offline:
8604
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8605
                                 pnode.name, errors.ECODE_STATE)
8606
    if pnode.drained:
8607
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8608
                                 pnode.name, errors.ECODE_STATE)
8609
    if not pnode.vm_capable:
8610
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8611
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8612

    
8613
    self.secondaries = []
8614

    
8615
    # mirror node verification
8616
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8617
      if self.op.snode == pnode.name:
8618
        raise errors.OpPrereqError("The secondary node cannot be the"
8619
                                   " primary node", errors.ECODE_INVAL)
8620
      _CheckNodeOnline(self, self.op.snode)
8621
      _CheckNodeNotDrained(self, self.op.snode)
8622
      _CheckNodeVmCapable(self, self.op.snode)
8623
      self.secondaries.append(self.op.snode)
8624

    
8625
    nodenames = [pnode.name] + self.secondaries
8626

    
8627
    if not self.adopt_disks:
8628
      # Check lv size requirements, if not adopting
8629
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8630
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8631

    
8632
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8633
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8634
                                disk[constants.IDISK_ADOPT])
8635
                     for disk in self.disks])
8636
      if len(all_lvs) != len(self.disks):
8637
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8638
                                   errors.ECODE_INVAL)
8639
      for lv_name in all_lvs:
8640
        try:
8641
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8642
          # to ReserveLV uses the same syntax
8643
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8644
        except errors.ReservationError:
8645
          raise errors.OpPrereqError("LV named %s used by another instance" %
8646
                                     lv_name, errors.ECODE_NOTUNIQUE)
8647

    
8648
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8649
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8650

    
8651
      node_lvs = self.rpc.call_lv_list([pnode.name],
8652
                                       vg_names.payload.keys())[pnode.name]
8653
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8654
      node_lvs = node_lvs.payload
8655

    
8656
      delta = all_lvs.difference(node_lvs.keys())
8657
      if delta:
8658
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8659
                                   utils.CommaJoin(delta),
8660
                                   errors.ECODE_INVAL)
8661
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8662
      if online_lvs:
8663
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8664
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8665
                                   errors.ECODE_STATE)
8666
      # update the size of disk based on what is found
8667
      for dsk in self.disks:
8668
        dsk[constants.IDISK_SIZE] = \
8669
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8670
                                        dsk[constants.IDISK_ADOPT])][0]))
8671

    
8672
    elif self.op.disk_template == constants.DT_BLOCK:
8673
      # Normalize and de-duplicate device paths
8674
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8675
                       for disk in self.disks])
8676
      if len(all_disks) != len(self.disks):
8677
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8678
                                   errors.ECODE_INVAL)
8679
      baddisks = [d for d in all_disks
8680
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8681
      if baddisks:
8682
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8683
                                   " cannot be adopted" %
8684
                                   (", ".join(baddisks),
8685
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8686
                                   errors.ECODE_INVAL)
8687

    
8688
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8689
                                            list(all_disks))[pnode.name]
8690
      node_disks.Raise("Cannot get block device information from node %s" %
8691
                       pnode.name)
8692
      node_disks = node_disks.payload
8693
      delta = all_disks.difference(node_disks.keys())
8694
      if delta:
8695
        raise errors.OpPrereqError("Missing block device(s): %s" %
8696
                                   utils.CommaJoin(delta),
8697
                                   errors.ECODE_INVAL)
8698
      for dsk in self.disks:
8699
        dsk[constants.IDISK_SIZE] = \
8700
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8701

    
8702
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8703

    
8704
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8705
    # check OS parameters (remotely)
8706
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8707

    
8708
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8709

    
8710
    # memory check on primary node
8711
    if self.op.start:
8712
      _CheckNodeFreeMemory(self, self.pnode.name,
8713
                           "creating instance %s" % self.op.instance_name,
8714
                           self.be_full[constants.BE_MEMORY],
8715
                           self.op.hypervisor)
8716

    
8717
    self.dry_run_result = list(nodenames)
8718

    
8719
  def Exec(self, feedback_fn):
8720
    """Create and add the instance to the cluster.
8721

8722
    """
8723
    instance = self.op.instance_name
8724
    pnode_name = self.pnode.name
8725

    
8726
    ht_kind = self.op.hypervisor
8727
    if ht_kind in constants.HTS_REQ_PORT:
8728
      network_port = self.cfg.AllocatePort()
8729
    else:
8730
      network_port = None
8731

    
8732
    disks = _GenerateDiskTemplate(self,
8733
                                  self.op.disk_template,
8734
                                  instance, pnode_name,
8735
                                  self.secondaries,
8736
                                  self.disks,
8737
                                  self.instance_file_storage_dir,
8738
                                  self.op.file_driver,
8739
                                  0,
8740
                                  feedback_fn)
8741

    
8742
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8743
                            primary_node=pnode_name,
8744
                            nics=self.nics, disks=disks,
8745
                            disk_template=self.op.disk_template,
8746
                            admin_up=False,
8747
                            network_port=network_port,
8748
                            beparams=self.op.beparams,
8749
                            hvparams=self.op.hvparams,
8750
                            hypervisor=self.op.hypervisor,
8751
                            osparams=self.op.osparams,
8752
                            )
8753

    
8754
    if self.op.tags:
8755
      for tag in self.op.tags:
8756
        iobj.AddTag(tag)
8757

    
8758
    if self.adopt_disks:
8759
      if self.op.disk_template == constants.DT_PLAIN:
8760
        # rename LVs to the newly-generated names; we need to construct
8761
        # 'fake' LV disks with the old data, plus the new unique_id
8762
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8763
        rename_to = []
8764
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8765
          rename_to.append(t_dsk.logical_id)
8766
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8767
          self.cfg.SetDiskID(t_dsk, pnode_name)
8768
        result = self.rpc.call_blockdev_rename(pnode_name,
8769
                                               zip(tmp_disks, rename_to))
8770
        result.Raise("Failed to rename adoped LVs")
8771
    else:
8772
      feedback_fn("* creating instance disks...")
8773
      try:
8774
        _CreateDisks(self, iobj)
8775
      except errors.OpExecError:
8776
        self.LogWarning("Device creation failed, reverting...")
8777
        try:
8778
          _RemoveDisks(self, iobj)
8779
        finally:
8780
          self.cfg.ReleaseDRBDMinors(instance)
8781
          raise
8782

    
8783
    feedback_fn("adding instance %s to cluster config" % instance)
8784

    
8785
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8786

    
8787
    # Declare that we don't want to remove the instance lock anymore, as we've
8788
    # added the instance to the config
8789
    del self.remove_locks[locking.LEVEL_INSTANCE]
8790

    
8791
    if self.op.mode == constants.INSTANCE_IMPORT:
8792
      # Release unused nodes
8793
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8794
    else:
8795
      # Release all nodes
8796
      _ReleaseLocks(self, locking.LEVEL_NODE)
8797

    
8798
    disk_abort = False
8799
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8800
      feedback_fn("* wiping instance disks...")
8801
      try:
8802
        _WipeDisks(self, iobj)
8803
      except errors.OpExecError, err:
8804
        logging.exception("Wiping disks failed")
8805
        self.LogWarning("Wiping instance disks failed (%s)", err)
8806
        disk_abort = True
8807

    
8808
    if disk_abort:
8809
      # Something is already wrong with the disks, don't do anything else
8810
      pass
8811
    elif self.op.wait_for_sync:
8812
      disk_abort = not _WaitForSync(self, iobj)
8813
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8814
      # make sure the disks are not degraded (still sync-ing is ok)
8815
      time.sleep(15)
8816
      feedback_fn("* checking mirrors status")
8817
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8818
    else:
8819
      disk_abort = False
8820

    
8821
    if disk_abort:
8822
      _RemoveDisks(self, iobj)
8823
      self.cfg.RemoveInstance(iobj.name)
8824
      # Make sure the instance lock gets removed
8825
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8826
      raise errors.OpExecError("There are some degraded disks for"
8827
                               " this instance")
8828

    
8829
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8830
      if self.op.mode == constants.INSTANCE_CREATE:
8831
        if not self.op.no_install:
8832
          feedback_fn("* running the instance OS create scripts...")
8833
          # FIXME: pass debug option from opcode to backend
8834
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8835
                                                 self.op.debug_level)
8836
          result.Raise("Could not add os for instance %s"
8837
                       " on node %s" % (instance, pnode_name))
8838

    
8839
      elif self.op.mode == constants.INSTANCE_IMPORT:
8840
        feedback_fn("* running the instance OS import scripts...")
8841

    
8842
        transfers = []
8843

    
8844
        for idx, image in enumerate(self.src_images):
8845
          if not image:
8846
            continue
8847

    
8848
          # FIXME: pass debug option from opcode to backend
8849
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8850
                                             constants.IEIO_FILE, (image, ),
8851
                                             constants.IEIO_SCRIPT,
8852
                                             (iobj.disks[idx], idx),
8853
                                             None)
8854
          transfers.append(dt)
8855

    
8856
        import_result = \
8857
          masterd.instance.TransferInstanceData(self, feedback_fn,
8858
                                                self.op.src_node, pnode_name,
8859
                                                self.pnode.secondary_ip,
8860
                                                iobj, transfers)
8861
        if not compat.all(import_result):
8862
          self.LogWarning("Some disks for instance %s on node %s were not"
8863
                          " imported successfully" % (instance, pnode_name))
8864

    
8865
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8866
        feedback_fn("* preparing remote import...")
8867
        # The source cluster will stop the instance before attempting to make a
8868
        # connection. In some cases stopping an instance can take a long time,
8869
        # hence the shutdown timeout is added to the connection timeout.
8870
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8871
                           self.op.source_shutdown_timeout)
8872
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8873

    
8874
        assert iobj.primary_node == self.pnode.name
8875
        disk_results = \
8876
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8877
                                        self.source_x509_ca,
8878
                                        self._cds, timeouts)
8879
        if not compat.all(disk_results):
8880
          # TODO: Should the instance still be started, even if some disks
8881
          # failed to import (valid for local imports, too)?
8882
          self.LogWarning("Some disks for instance %s on node %s were not"
8883
                          " imported successfully" % (instance, pnode_name))
8884

    
8885
        # Run rename script on newly imported instance
8886
        assert iobj.name == instance
8887
        feedback_fn("Running rename script for %s" % instance)
8888
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8889
                                                   self.source_instance_name,
8890
                                                   self.op.debug_level)
8891
        if result.fail_msg:
8892
          self.LogWarning("Failed to run rename script for %s on node"
8893
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8894

    
8895
      else:
8896
        # also checked in the prereq part
8897
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8898
                                     % self.op.mode)
8899

    
8900
    if self.op.start:
8901
      iobj.admin_up = True
8902
      self.cfg.Update(iobj, feedback_fn)
8903
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8904
      feedback_fn("* starting instance...")
8905
      result = self.rpc.call_instance_start(pnode_name, iobj,
8906
                                            None, None, False)
8907
      result.Raise("Could not start instance")
8908

    
8909
    return list(iobj.all_nodes)
8910

    
8911

    
8912
class LUInstanceConsole(NoHooksLU):
8913
  """Connect to an instance's console.
8914

8915
  This is somewhat special in that it returns the command line that
8916
  you need to run on the master node in order to connect to the
8917
  console.
8918

8919
  """
8920
  REQ_BGL = False
8921

    
8922
  def ExpandNames(self):
8923
    self._ExpandAndLockInstance()
8924

    
8925
  def CheckPrereq(self):
8926
    """Check prerequisites.
8927

8928
    This checks that the instance is in the cluster.
8929

8930
    """
8931
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8932
    assert self.instance is not None, \
8933
      "Cannot retrieve locked instance %s" % self.op.instance_name
8934
    _CheckNodeOnline(self, self.instance.primary_node)
8935

    
8936
  def Exec(self, feedback_fn):
8937
    """Connect to the console of an instance
8938

8939
    """
8940
    instance = self.instance
8941
    node = instance.primary_node
8942

    
8943
    node_insts = self.rpc.call_instance_list([node],
8944
                                             [instance.hypervisor])[node]
8945
    node_insts.Raise("Can't get node information from %s" % node)
8946

    
8947
    if instance.name not in node_insts.payload:
8948
      if instance.admin_up:
8949
        state = constants.INSTST_ERRORDOWN
8950
      else:
8951
        state = constants.INSTST_ADMINDOWN
8952
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8953
                               (instance.name, state))
8954

    
8955
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8956

    
8957
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8958

    
8959

    
8960
def _GetInstanceConsole(cluster, instance):
8961
  """Returns console information for an instance.
8962

8963
  @type cluster: L{objects.Cluster}
8964
  @type instance: L{objects.Instance}
8965
  @rtype: dict
8966

8967
  """
8968
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8969
  # beparams and hvparams are passed separately, to avoid editing the
8970
  # instance and then saving the defaults in the instance itself.
8971
  hvparams = cluster.FillHV(instance)
8972
  beparams = cluster.FillBE(instance)
8973
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8974

    
8975
  assert console.instance == instance.name
8976
  assert console.Validate()
8977

    
8978
  return console.ToDict()
8979

    
8980

    
8981
class LUInstanceReplaceDisks(LogicalUnit):
8982
  """Replace the disks of an instance.
8983

8984
  """
8985
  HPATH = "mirrors-replace"
8986
  HTYPE = constants.HTYPE_INSTANCE
8987
  REQ_BGL = False
8988

    
8989
  def CheckArguments(self):
8990
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8991
                                  self.op.iallocator)
8992

    
8993
  def ExpandNames(self):
8994
    self._ExpandAndLockInstance()
8995

    
8996
    assert locking.LEVEL_NODE not in self.needed_locks
8997
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8998

    
8999
    assert self.op.iallocator is None or self.op.remote_node is None, \
9000
      "Conflicting options"
9001

    
9002
    if self.op.remote_node is not None:
9003
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9004

    
9005
      # Warning: do not remove the locking of the new secondary here
9006
      # unless DRBD8.AddChildren is changed to work in parallel;
9007
      # currently it doesn't since parallel invocations of
9008
      # FindUnusedMinor will conflict
9009
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9010
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9011
    else:
9012
      self.needed_locks[locking.LEVEL_NODE] = []
9013
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9014

    
9015
      if self.op.iallocator is not None:
9016
        # iallocator will select a new node in the same group
9017
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9018

    
9019
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9020
                                   self.op.iallocator, self.op.remote_node,
9021
                                   self.op.disks, False, self.op.early_release)
9022

    
9023
    self.tasklets = [self.replacer]
9024

    
9025
  def DeclareLocks(self, level):
9026
    if level == locking.LEVEL_NODEGROUP:
9027
      assert self.op.remote_node is None
9028
      assert self.op.iallocator is not None
9029
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9030

    
9031
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9032
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9033
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9034

    
9035
    elif level == locking.LEVEL_NODE:
9036
      if self.op.iallocator is not None:
9037
        assert self.op.remote_node is None
9038
        assert not self.needed_locks[locking.LEVEL_NODE]
9039

    
9040
        # Lock member nodes of all locked groups
9041
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9042
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9043
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9044
      else:
9045
        self._LockInstancesNodes()
9046

    
9047
  def BuildHooksEnv(self):
9048
    """Build hooks env.
9049

9050
    This runs on the master, the primary and all the secondaries.
9051

9052
    """
9053
    instance = self.replacer.instance
9054
    env = {
9055
      "MODE": self.op.mode,
9056
      "NEW_SECONDARY": self.op.remote_node,
9057
      "OLD_SECONDARY": instance.secondary_nodes[0],
9058
      }
9059
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9060
    return env
9061

    
9062
  def BuildHooksNodes(self):
9063
    """Build hooks nodes.
9064

9065
    """
9066
    instance = self.replacer.instance
9067
    nl = [
9068
      self.cfg.GetMasterNode(),
9069
      instance.primary_node,
9070
      ]
9071
    if self.op.remote_node is not None:
9072
      nl.append(self.op.remote_node)
9073
    return nl, nl
9074

    
9075
  def CheckPrereq(self):
9076
    """Check prerequisites.
9077

9078
    """
9079
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9080
            self.op.iallocator is None)
9081

    
9082
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9083
    if owned_groups:
9084
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9085
      if owned_groups != groups:
9086
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9087
                                 " since lock was acquired, current list is %r,"
9088
                                 " used to be '%s'" %
9089
                                 (self.op.instance_name,
9090
                                  utils.CommaJoin(groups),
9091
                                  utils.CommaJoin(owned_groups)))
9092

    
9093
    return LogicalUnit.CheckPrereq(self)
9094

    
9095

    
9096
class TLReplaceDisks(Tasklet):
9097
  """Replaces disks for an instance.
9098

9099
  Note: Locking is not within the scope of this class.
9100

9101
  """
9102
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9103
               disks, delay_iallocator, early_release):
9104
    """Initializes this class.
9105

9106
    """
9107
    Tasklet.__init__(self, lu)
9108

    
9109
    # Parameters
9110
    self.instance_name = instance_name
9111
    self.mode = mode
9112
    self.iallocator_name = iallocator_name
9113
    self.remote_node = remote_node
9114
    self.disks = disks
9115
    self.delay_iallocator = delay_iallocator
9116
    self.early_release = early_release
9117

    
9118
    # Runtime data
9119
    self.instance = None
9120
    self.new_node = None
9121
    self.target_node = None
9122
    self.other_node = None
9123
    self.remote_node_info = None
9124
    self.node_secondary_ip = None
9125

    
9126
  @staticmethod
9127
  def CheckArguments(mode, remote_node, iallocator):
9128
    """Helper function for users of this class.
9129

9130
    """
9131
    # check for valid parameter combination
9132
    if mode == constants.REPLACE_DISK_CHG:
9133
      if remote_node is None and iallocator is None:
9134
        raise errors.OpPrereqError("When changing the secondary either an"
9135
                                   " iallocator script must be used or the"
9136
                                   " new node given", errors.ECODE_INVAL)
9137

    
9138
      if remote_node is not None and iallocator is not None:
9139
        raise errors.OpPrereqError("Give either the iallocator or the new"
9140
                                   " secondary, not both", errors.ECODE_INVAL)
9141

    
9142
    elif remote_node is not None or iallocator is not None:
9143
      # Not replacing the secondary
9144
      raise errors.OpPrereqError("The iallocator and new node options can"
9145
                                 " only be used when changing the"
9146
                                 " secondary node", errors.ECODE_INVAL)
9147

    
9148
  @staticmethod
9149
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9150
    """Compute a new secondary node using an IAllocator.
9151

9152
    """
9153
    ial = IAllocator(lu.cfg, lu.rpc,
9154
                     mode=constants.IALLOCATOR_MODE_RELOC,
9155
                     name=instance_name,
9156
                     relocate_from=relocate_from)
9157

    
9158
    ial.Run(iallocator_name)
9159

    
9160
    if not ial.success:
9161
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9162
                                 " %s" % (iallocator_name, ial.info),
9163
                                 errors.ECODE_NORES)
9164

    
9165
    if len(ial.result) != ial.required_nodes:
9166
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9167
                                 " of nodes (%s), required %s" %
9168
                                 (iallocator_name,
9169
                                  len(ial.result), ial.required_nodes),
9170
                                 errors.ECODE_FAULT)
9171

    
9172
    remote_node_name = ial.result[0]
9173

    
9174
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9175
               instance_name, remote_node_name)
9176

    
9177
    return remote_node_name
9178

    
9179
  def _FindFaultyDisks(self, node_name):
9180
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9181
                                    node_name, True)
9182

    
9183
  def _CheckDisksActivated(self, instance):
9184
    """Checks if the instance disks are activated.
9185

9186
    @param instance: The instance to check disks
9187
    @return: True if they are activated, False otherwise
9188

9189
    """
9190
    nodes = instance.all_nodes
9191

    
9192
    for idx, dev in enumerate(instance.disks):
9193
      for node in nodes:
9194
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9195
        self.cfg.SetDiskID(dev, node)
9196

    
9197
        result = self.rpc.call_blockdev_find(node, dev)
9198

    
9199
        if result.offline:
9200
          continue
9201
        elif result.fail_msg or not result.payload:
9202
          return False
9203

    
9204
    return True
9205

    
9206
  def CheckPrereq(self):
9207
    """Check prerequisites.
9208

9209
    This checks that the instance is in the cluster.
9210

9211
    """
9212
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9213
    assert instance is not None, \
9214
      "Cannot retrieve locked instance %s" % self.instance_name
9215

    
9216
    if instance.disk_template != constants.DT_DRBD8:
9217
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9218
                                 " instances", errors.ECODE_INVAL)
9219

    
9220
    if len(instance.secondary_nodes) != 1:
9221
      raise errors.OpPrereqError("The instance has a strange layout,"
9222
                                 " expected one secondary but found %d" %
9223
                                 len(instance.secondary_nodes),
9224
                                 errors.ECODE_FAULT)
9225

    
9226
    if not self.delay_iallocator:
9227
      self._CheckPrereq2()
9228

    
9229
  def _CheckPrereq2(self):
9230
    """Check prerequisites, second part.
9231

9232
    This function should always be part of CheckPrereq. It was separated and is
9233
    now called from Exec because during node evacuation iallocator was only
9234
    called with an unmodified cluster model, not taking planned changes into
9235
    account.
9236

9237
    """
9238
    instance = self.instance
9239
    secondary_node = instance.secondary_nodes[0]
9240

    
9241
    if self.iallocator_name is None:
9242
      remote_node = self.remote_node
9243
    else:
9244
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9245
                                       instance.name, instance.secondary_nodes)
9246

    
9247
    if remote_node is None:
9248
      self.remote_node_info = None
9249
    else:
9250
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9251
             "Remote node '%s' is not locked" % remote_node
9252

    
9253
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9254
      assert self.remote_node_info is not None, \
9255
        "Cannot retrieve locked node %s" % remote_node
9256

    
9257
    if remote_node == self.instance.primary_node:
9258
      raise errors.OpPrereqError("The specified node is the primary node of"
9259
                                 " the instance", errors.ECODE_INVAL)
9260

    
9261
    if remote_node == secondary_node:
9262
      raise errors.OpPrereqError("The specified node is already the"
9263
                                 " secondary node of the instance",
9264
                                 errors.ECODE_INVAL)
9265

    
9266
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9267
                                    constants.REPLACE_DISK_CHG):
9268
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9269
                                 errors.ECODE_INVAL)
9270

    
9271
    if self.mode == constants.REPLACE_DISK_AUTO:
9272
      if not self._CheckDisksActivated(instance):
9273
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9274
                                   " first" % self.instance_name,
9275
                                   errors.ECODE_STATE)
9276
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9277
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9278

    
9279
      if faulty_primary and faulty_secondary:
9280
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9281
                                   " one node and can not be repaired"
9282
                                   " automatically" % self.instance_name,
9283
                                   errors.ECODE_STATE)
9284

    
9285
      if faulty_primary:
9286
        self.disks = faulty_primary
9287
        self.target_node = instance.primary_node
9288
        self.other_node = secondary_node
9289
        check_nodes = [self.target_node, self.other_node]
9290
      elif faulty_secondary:
9291
        self.disks = faulty_secondary
9292
        self.target_node = secondary_node
9293
        self.other_node = instance.primary_node
9294
        check_nodes = [self.target_node, self.other_node]
9295
      else:
9296
        self.disks = []
9297
        check_nodes = []
9298

    
9299
    else:
9300
      # Non-automatic modes
9301
      if self.mode == constants.REPLACE_DISK_PRI:
9302
        self.target_node = instance.primary_node
9303
        self.other_node = secondary_node
9304
        check_nodes = [self.target_node, self.other_node]
9305

    
9306
      elif self.mode == constants.REPLACE_DISK_SEC:
9307
        self.target_node = secondary_node
9308
        self.other_node = instance.primary_node
9309
        check_nodes = [self.target_node, self.other_node]
9310

    
9311
      elif self.mode == constants.REPLACE_DISK_CHG:
9312
        self.new_node = remote_node
9313
        self.other_node = instance.primary_node
9314
        self.target_node = secondary_node
9315
        check_nodes = [self.new_node, self.other_node]
9316

    
9317
        _CheckNodeNotDrained(self.lu, remote_node)
9318
        _CheckNodeVmCapable(self.lu, remote_node)
9319

    
9320
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9321
        assert old_node_info is not None
9322
        if old_node_info.offline and not self.early_release:
9323
          # doesn't make sense to delay the release
9324
          self.early_release = True
9325
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9326
                          " early-release mode", secondary_node)
9327

    
9328
      else:
9329
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9330
                                     self.mode)
9331

    
9332
      # If not specified all disks should be replaced
9333
      if not self.disks:
9334
        self.disks = range(len(self.instance.disks))
9335

    
9336
    for node in check_nodes:
9337
      _CheckNodeOnline(self.lu, node)
9338

    
9339
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9340
                                                          self.other_node,
9341
                                                          self.target_node]
9342
                              if node_name is not None)
9343

    
9344
    # Release unneeded node locks
9345
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9346

    
9347
    # Release any owned node group
9348
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9349
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9350

    
9351
    # Check whether disks are valid
9352
    for disk_idx in self.disks:
9353
      instance.FindDisk(disk_idx)
9354

    
9355
    # Get secondary node IP addresses
9356
    self.node_secondary_ip = \
9357
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9358
           for node_name in touched_nodes)
9359

    
9360
  def Exec(self, feedback_fn):
9361
    """Execute disk replacement.
9362

9363
    This dispatches the disk replacement to the appropriate handler.
9364

9365
    """
9366
    if self.delay_iallocator:
9367
      self._CheckPrereq2()
9368

    
9369
    if __debug__:
9370
      # Verify owned locks before starting operation
9371
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9372
      assert set(owned_locks) == set(self.node_secondary_ip), \
9373
          ("Incorrect node locks, owning %s, expected %s" %
9374
           (owned_locks, self.node_secondary_ip.keys()))
9375

    
9376
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9377
      assert list(owned_locks) == [self.instance_name], \
9378
          "Instance '%s' not locked" % self.instance_name
9379

    
9380
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9381
          "Should not own any node group lock at this point"
9382

    
9383
    if not self.disks:
9384
      feedback_fn("No disks need replacement")
9385
      return
9386

    
9387
    feedback_fn("Replacing disk(s) %s for %s" %
9388
                (utils.CommaJoin(self.disks), self.instance.name))
9389

    
9390
    activate_disks = (not self.instance.admin_up)
9391

    
9392
    # Activate the instance disks if we're replacing them on a down instance
9393
    if activate_disks:
9394
      _StartInstanceDisks(self.lu, self.instance, True)
9395

    
9396
    try:
9397
      # Should we replace the secondary node?
9398
      if self.new_node is not None:
9399
        fn = self._ExecDrbd8Secondary
9400
      else:
9401
        fn = self._ExecDrbd8DiskOnly
9402

    
9403
      result = fn(feedback_fn)
9404
    finally:
9405
      # Deactivate the instance disks if we're replacing them on a
9406
      # down instance
9407
      if activate_disks:
9408
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9409

    
9410
    if __debug__:
9411
      # Verify owned locks
9412
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9413
      nodes = frozenset(self.node_secondary_ip)
9414
      assert ((self.early_release and not owned_locks) or
9415
              (not self.early_release and not (set(owned_locks) - nodes))), \
9416
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9417
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9418

    
9419
    return result
9420

    
9421
  def _CheckVolumeGroup(self, nodes):
9422
    self.lu.LogInfo("Checking volume groups")
9423

    
9424
    vgname = self.cfg.GetVGName()
9425

    
9426
    # Make sure volume group exists on all involved nodes
9427
    results = self.rpc.call_vg_list(nodes)
9428
    if not results:
9429
      raise errors.OpExecError("Can't list volume groups on the nodes")
9430

    
9431
    for node in nodes:
9432
      res = results[node]
9433
      res.Raise("Error checking node %s" % node)
9434
      if vgname not in res.payload:
9435
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9436
                                 (vgname, node))
9437

    
9438
  def _CheckDisksExistence(self, nodes):
9439
    # Check disk existence
9440
    for idx, dev in enumerate(self.instance.disks):
9441
      if idx not in self.disks:
9442
        continue
9443

    
9444
      for node in nodes:
9445
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9446
        self.cfg.SetDiskID(dev, node)
9447

    
9448
        result = self.rpc.call_blockdev_find(node, dev)
9449

    
9450
        msg = result.fail_msg
9451
        if msg or not result.payload:
9452
          if not msg:
9453
            msg = "disk not found"
9454
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9455
                                   (idx, node, msg))
9456

    
9457
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9458
    for idx, dev in enumerate(self.instance.disks):
9459
      if idx not in self.disks:
9460
        continue
9461

    
9462
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9463
                      (idx, node_name))
9464

    
9465
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9466
                                   ldisk=ldisk):
9467
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9468
                                 " replace disks for instance %s" %
9469
                                 (node_name, self.instance.name))
9470

    
9471
  def _CreateNewStorage(self, node_name):
9472
    """Create new storage on the primary or secondary node.
9473

9474
    This is only used for same-node replaces, not for changing the
9475
    secondary node, hence we don't want to modify the existing disk.
9476

9477
    """
9478
    iv_names = {}
9479

    
9480
    for idx, dev in enumerate(self.instance.disks):
9481
      if idx not in self.disks:
9482
        continue
9483

    
9484
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9485

    
9486
      self.cfg.SetDiskID(dev, node_name)
9487

    
9488
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9489
      names = _GenerateUniqueNames(self.lu, lv_names)
9490

    
9491
      vg_data = dev.children[0].logical_id[0]
9492
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9493
                             logical_id=(vg_data, names[0]))
9494
      vg_meta = dev.children[1].logical_id[0]
9495
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9496
                             logical_id=(vg_meta, names[1]))
9497

    
9498
      new_lvs = [lv_data, lv_meta]
9499
      old_lvs = [child.Copy() for child in dev.children]
9500
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9501

    
9502
      # we pass force_create=True to force the LVM creation
9503
      for new_lv in new_lvs:
9504
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9505
                        _GetInstanceInfoText(self.instance), False)
9506

    
9507
    return iv_names
9508

    
9509
  def _CheckDevices(self, node_name, iv_names):
9510
    for name, (dev, _, _) in iv_names.iteritems():
9511
      self.cfg.SetDiskID(dev, node_name)
9512

    
9513
      result = self.rpc.call_blockdev_find(node_name, dev)
9514

    
9515
      msg = result.fail_msg
9516
      if msg or not result.payload:
9517
        if not msg:
9518
          msg = "disk not found"
9519
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9520
                                 (name, msg))
9521

    
9522
      if result.payload.is_degraded:
9523
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9524

    
9525
  def _RemoveOldStorage(self, node_name, iv_names):
9526
    for name, (_, old_lvs, _) in iv_names.iteritems():
9527
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9528

    
9529
      for lv in old_lvs:
9530
        self.cfg.SetDiskID(lv, node_name)
9531

    
9532
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9533
        if msg:
9534
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9535
                             hint="remove unused LVs manually")
9536

    
9537
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9538
    """Replace a disk on the primary or secondary for DRBD 8.
9539

9540
    The algorithm for replace is quite complicated:
9541

9542
      1. for each disk to be replaced:
9543

9544
        1. create new LVs on the target node with unique names
9545
        1. detach old LVs from the drbd device
9546
        1. rename old LVs to name_replaced.<time_t>
9547
        1. rename new LVs to old LVs
9548
        1. attach the new LVs (with the old names now) to the drbd device
9549

9550
      1. wait for sync across all devices
9551

9552
      1. for each modified disk:
9553

9554
        1. remove old LVs (which have the name name_replaces.<time_t>)
9555

9556
    Failures are not very well handled.
9557

9558
    """
9559
    steps_total = 6
9560

    
9561
    # Step: check device activation
9562
    self.lu.LogStep(1, steps_total, "Check device existence")
9563
    self._CheckDisksExistence([self.other_node, self.target_node])
9564
    self._CheckVolumeGroup([self.target_node, self.other_node])
9565

    
9566
    # Step: check other node consistency
9567
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9568
    self._CheckDisksConsistency(self.other_node,
9569
                                self.other_node == self.instance.primary_node,
9570
                                False)
9571

    
9572
    # Step: create new storage
9573
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9574
    iv_names = self._CreateNewStorage(self.target_node)
9575

    
9576
    # Step: for each lv, detach+rename*2+attach
9577
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9578
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9579
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9580

    
9581
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9582
                                                     old_lvs)
9583
      result.Raise("Can't detach drbd from local storage on node"
9584
                   " %s for device %s" % (self.target_node, dev.iv_name))
9585
      #dev.children = []
9586
      #cfg.Update(instance)
9587

    
9588
      # ok, we created the new LVs, so now we know we have the needed
9589
      # storage; as such, we proceed on the target node to rename
9590
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9591
      # using the assumption that logical_id == physical_id (which in
9592
      # turn is the unique_id on that node)
9593

    
9594
      # FIXME(iustin): use a better name for the replaced LVs
9595
      temp_suffix = int(time.time())
9596
      ren_fn = lambda d, suff: (d.physical_id[0],
9597
                                d.physical_id[1] + "_replaced-%s" % suff)
9598

    
9599
      # Build the rename list based on what LVs exist on the node
9600
      rename_old_to_new = []
9601
      for to_ren in old_lvs:
9602
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9603
        if not result.fail_msg and result.payload:
9604
          # device exists
9605
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9606

    
9607
      self.lu.LogInfo("Renaming the old LVs on the target node")
9608
      result = self.rpc.call_blockdev_rename(self.target_node,
9609
                                             rename_old_to_new)
9610
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9611

    
9612
      # Now we rename the new LVs to the old LVs
9613
      self.lu.LogInfo("Renaming the new LVs on the target node")
9614
      rename_new_to_old = [(new, old.physical_id)
9615
                           for old, new in zip(old_lvs, new_lvs)]
9616
      result = self.rpc.call_blockdev_rename(self.target_node,
9617
                                             rename_new_to_old)
9618
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9619

    
9620
      # Intermediate steps of in memory modifications
9621
      for old, new in zip(old_lvs, new_lvs):
9622
        new.logical_id = old.logical_id
9623
        self.cfg.SetDiskID(new, self.target_node)
9624

    
9625
      # We need to modify old_lvs so that removal later removes the
9626
      # right LVs, not the newly added ones; note that old_lvs is a
9627
      # copy here
9628
      for disk in old_lvs:
9629
        disk.logical_id = ren_fn(disk, temp_suffix)
9630
        self.cfg.SetDiskID(disk, self.target_node)
9631

    
9632
      # Now that the new lvs have the old name, we can add them to the device
9633
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9634
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9635
                                                  new_lvs)
9636
      msg = result.fail_msg
9637
      if msg:
9638
        for new_lv in new_lvs:
9639
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9640
                                               new_lv).fail_msg
9641
          if msg2:
9642
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9643
                               hint=("cleanup manually the unused logical"
9644
                                     "volumes"))
9645
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9646

    
9647
    cstep = 5
9648
    if self.early_release:
9649
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9650
      cstep += 1
9651
      self._RemoveOldStorage(self.target_node, iv_names)
9652
      # WARNING: we release both node locks here, do not do other RPCs
9653
      # than WaitForSync to the primary node
9654
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9655
                    names=[self.target_node, self.other_node])
9656

    
9657
    # Wait for sync
9658
    # This can fail as the old devices are degraded and _WaitForSync
9659
    # does a combined result over all disks, so we don't check its return value
9660
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9661
    cstep += 1
9662
    _WaitForSync(self.lu, self.instance)
9663

    
9664
    # Check all devices manually
9665
    self._CheckDevices(self.instance.primary_node, iv_names)
9666

    
9667
    # Step: remove old storage
9668
    if not self.early_release:
9669
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9670
      cstep += 1
9671
      self._RemoveOldStorage(self.target_node, iv_names)
9672

    
9673
  def _ExecDrbd8Secondary(self, feedback_fn):
9674
    """Replace the secondary node for DRBD 8.
9675

9676
    The algorithm for replace is quite complicated:
9677
      - for all disks of the instance:
9678
        - create new LVs on the new node with same names
9679
        - shutdown the drbd device on the old secondary
9680
        - disconnect the drbd network on the primary
9681
        - create the drbd device on the new secondary
9682
        - network attach the drbd on the primary, using an artifice:
9683
          the drbd code for Attach() will connect to the network if it
9684
          finds a device which is connected to the good local disks but
9685
          not network enabled
9686
      - wait for sync across all devices
9687
      - remove all disks from the old secondary
9688

9689
    Failures are not very well handled.
9690

9691
    """
9692
    steps_total = 6
9693

    
9694
    # Step: check device activation
9695
    self.lu.LogStep(1, steps_total, "Check device existence")
9696
    self._CheckDisksExistence([self.instance.primary_node])
9697
    self._CheckVolumeGroup([self.instance.primary_node])
9698

    
9699
    # Step: check other node consistency
9700
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9701
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9702

    
9703
    # Step: create new storage
9704
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9705
    for idx, dev in enumerate(self.instance.disks):
9706
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9707
                      (self.new_node, idx))
9708
      # we pass force_create=True to force LVM creation
9709
      for new_lv in dev.children:
9710
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9711
                        _GetInstanceInfoText(self.instance), False)
9712

    
9713
    # Step 4: dbrd minors and drbd setups changes
9714
    # after this, we must manually remove the drbd minors on both the
9715
    # error and the success paths
9716
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9717
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9718
                                         for dev in self.instance.disks],
9719
                                        self.instance.name)
9720
    logging.debug("Allocated minors %r", minors)
9721

    
9722
    iv_names = {}
9723
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9724
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9725
                      (self.new_node, idx))
9726
      # create new devices on new_node; note that we create two IDs:
9727
      # one without port, so the drbd will be activated without
9728
      # networking information on the new node at this stage, and one
9729
      # with network, for the latter activation in step 4
9730
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9731
      if self.instance.primary_node == o_node1:
9732
        p_minor = o_minor1
9733
      else:
9734
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9735
        p_minor = o_minor2
9736

    
9737
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9738
                      p_minor, new_minor, o_secret)
9739
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9740
                    p_minor, new_minor, o_secret)
9741

    
9742
      iv_names[idx] = (dev, dev.children, new_net_id)
9743
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9744
                    new_net_id)
9745
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9746
                              logical_id=new_alone_id,
9747
                              children=dev.children,
9748
                              size=dev.size)
9749
      try:
9750
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9751
                              _GetInstanceInfoText(self.instance), False)
9752
      except errors.GenericError:
9753
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9754
        raise
9755

    
9756
    # We have new devices, shutdown the drbd on the old secondary
9757
    for idx, dev in enumerate(self.instance.disks):
9758
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9759
      self.cfg.SetDiskID(dev, self.target_node)
9760
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9761
      if msg:
9762
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9763
                           "node: %s" % (idx, msg),
9764
                           hint=("Please cleanup this device manually as"
9765
                                 " soon as possible"))
9766

    
9767
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9768
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9769
                                               self.node_secondary_ip,
9770
                                               self.instance.disks)\
9771
                                              [self.instance.primary_node]
9772

    
9773
    msg = result.fail_msg
9774
    if msg:
9775
      # detaches didn't succeed (unlikely)
9776
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9777
      raise errors.OpExecError("Can't detach the disks from the network on"
9778
                               " old node: %s" % (msg,))
9779

    
9780
    # if we managed to detach at least one, we update all the disks of
9781
    # the instance to point to the new secondary
9782
    self.lu.LogInfo("Updating instance configuration")
9783
    for dev, _, new_logical_id in iv_names.itervalues():
9784
      dev.logical_id = new_logical_id
9785
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9786

    
9787
    self.cfg.Update(self.instance, feedback_fn)
9788

    
9789
    # and now perform the drbd attach
9790
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9791
                    " (standalone => connected)")
9792
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9793
                                            self.new_node],
9794
                                           self.node_secondary_ip,
9795
                                           self.instance.disks,
9796
                                           self.instance.name,
9797
                                           False)
9798
    for to_node, to_result in result.items():
9799
      msg = to_result.fail_msg
9800
      if msg:
9801
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9802
                           to_node, msg,
9803
                           hint=("please do a gnt-instance info to see the"
9804
                                 " status of disks"))
9805
    cstep = 5
9806
    if self.early_release:
9807
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9808
      cstep += 1
9809
      self._RemoveOldStorage(self.target_node, iv_names)
9810
      # WARNING: we release all node locks here, do not do other RPCs
9811
      # than WaitForSync to the primary node
9812
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9813
                    names=[self.instance.primary_node,
9814
                           self.target_node,
9815
                           self.new_node])
9816

    
9817
    # Wait for sync
9818
    # This can fail as the old devices are degraded and _WaitForSync
9819
    # does a combined result over all disks, so we don't check its return value
9820
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9821
    cstep += 1
9822
    _WaitForSync(self.lu, self.instance)
9823

    
9824
    # Check all devices manually
9825
    self._CheckDevices(self.instance.primary_node, iv_names)
9826

    
9827
    # Step: remove old storage
9828
    if not self.early_release:
9829
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9830
      self._RemoveOldStorage(self.target_node, iv_names)
9831

    
9832

    
9833
class LURepairNodeStorage(NoHooksLU):
9834
  """Repairs the volume group on a node.
9835

9836
  """
9837
  REQ_BGL = False
9838

    
9839
  def CheckArguments(self):
9840
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9841

    
9842
    storage_type = self.op.storage_type
9843

    
9844
    if (constants.SO_FIX_CONSISTENCY not in
9845
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9846
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9847
                                 " repaired" % storage_type,
9848
                                 errors.ECODE_INVAL)
9849

    
9850
  def ExpandNames(self):
9851
    self.needed_locks = {
9852
      locking.LEVEL_NODE: [self.op.node_name],
9853
      }
9854

    
9855
  def _CheckFaultyDisks(self, instance, node_name):
9856
    """Ensure faulty disks abort the opcode or at least warn."""
9857
    try:
9858
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9859
                                  node_name, True):
9860
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9861
                                   " node '%s'" % (instance.name, node_name),
9862
                                   errors.ECODE_STATE)
9863
    except errors.OpPrereqError, err:
9864
      if self.op.ignore_consistency:
9865
        self.proc.LogWarning(str(err.args[0]))
9866
      else:
9867
        raise
9868

    
9869
  def CheckPrereq(self):
9870
    """Check prerequisites.
9871

9872
    """
9873
    # Check whether any instance on this node has faulty disks
9874
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9875
      if not inst.admin_up:
9876
        continue
9877
      check_nodes = set(inst.all_nodes)
9878
      check_nodes.discard(self.op.node_name)
9879
      for inst_node_name in check_nodes:
9880
        self._CheckFaultyDisks(inst, inst_node_name)
9881

    
9882
  def Exec(self, feedback_fn):
9883
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9884
                (self.op.name, self.op.node_name))
9885

    
9886
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9887
    result = self.rpc.call_storage_execute(self.op.node_name,
9888
                                           self.op.storage_type, st_args,
9889
                                           self.op.name,
9890
                                           constants.SO_FIX_CONSISTENCY)
9891
    result.Raise("Failed to repair storage unit '%s' on %s" %
9892
                 (self.op.name, self.op.node_name))
9893

    
9894

    
9895
class LUNodeEvacuate(NoHooksLU):
9896
  """Evacuates instances off a list of nodes.
9897

9898
  """
9899
  REQ_BGL = False
9900

    
9901
  def CheckArguments(self):
9902
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9903

    
9904
  def ExpandNames(self):
9905
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9906

    
9907
    if self.op.remote_node is not None:
9908
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9909
      assert self.op.remote_node
9910

    
9911
      if self.op.remote_node == self.op.node_name:
9912
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9913
                                   " secondary node", errors.ECODE_INVAL)
9914

    
9915
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9916
        raise errors.OpPrereqError("Without the use of an iallocator only"
9917
                                   " secondary instances can be evacuated",
9918
                                   errors.ECODE_INVAL)
9919

    
9920
    # Declare locks
9921
    self.share_locks = _ShareAll()
9922
    self.needed_locks = {
9923
      locking.LEVEL_INSTANCE: [],
9924
      locking.LEVEL_NODEGROUP: [],
9925
      locking.LEVEL_NODE: [],
9926
      }
9927

    
9928
    if self.op.remote_node is None:
9929
      # Iallocator will choose any node(s) in the same group
9930
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9931
    else:
9932
      group_nodes = frozenset([self.op.remote_node])
9933

    
9934
    # Determine nodes to be locked
9935
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9936

    
9937
  def _DetermineInstances(self):
9938
    """Builds list of instances to operate on.
9939

9940
    """
9941
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9942

    
9943
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9944
      # Primary instances only
9945
      inst_fn = _GetNodePrimaryInstances
9946
      assert self.op.remote_node is None, \
9947
        "Evacuating primary instances requires iallocator"
9948
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9949
      # Secondary instances only
9950
      inst_fn = _GetNodeSecondaryInstances
9951
    else:
9952
      # All instances
9953
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9954
      inst_fn = _GetNodeInstances
9955

    
9956
    return inst_fn(self.cfg, self.op.node_name)
9957

    
9958
  def DeclareLocks(self, level):
9959
    if level == locking.LEVEL_INSTANCE:
9960
      # Lock instances optimistically, needs verification once node and group
9961
      # locks have been acquired
9962
      self.needed_locks[locking.LEVEL_INSTANCE] = \
9963
        set(i.name for i in self._DetermineInstances())
9964

    
9965
    elif level == locking.LEVEL_NODEGROUP:
9966
      # Lock node groups optimistically, needs verification once nodes have
9967
      # been acquired
9968
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9969
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9970

    
9971
    elif level == locking.LEVEL_NODE:
9972
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9973

    
9974
  def CheckPrereq(self):
9975
    # Verify locks
9976
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9977
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9978
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9979

    
9980
    assert owned_nodes == self.lock_nodes
9981

    
9982
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9983
    if owned_groups != wanted_groups:
9984
      raise errors.OpExecError("Node groups changed since locks were acquired,"
9985
                               " current groups are '%s', used to be '%s'" %
9986
                               (utils.CommaJoin(wanted_groups),
9987
                                utils.CommaJoin(owned_groups)))
9988

    
9989
    # Determine affected instances
9990
    self.instances = self._DetermineInstances()
9991
    self.instance_names = [i.name for i in self.instances]
9992

    
9993
    if set(self.instance_names) != owned_instances:
9994
      raise errors.OpExecError("Instances on node '%s' changed since locks"
9995
                               " were acquired, current instances are '%s',"
9996
                               " used to be '%s'" %
9997
                               (self.op.node_name,
9998
                                utils.CommaJoin(self.instance_names),
9999
                                utils.CommaJoin(owned_instances)))
10000

    
10001
    if self.instance_names:
10002
      self.LogInfo("Evacuating instances from node '%s': %s",
10003
                   self.op.node_name,
10004
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10005
    else:
10006
      self.LogInfo("No instances to evacuate from node '%s'",
10007
                   self.op.node_name)
10008

    
10009
    if self.op.remote_node is not None:
10010
      for i in self.instances:
10011
        if i.primary_node == self.op.remote_node:
10012
          raise errors.OpPrereqError("Node %s is the primary node of"
10013
                                     " instance %s, cannot use it as"
10014
                                     " secondary" %
10015
                                     (self.op.remote_node, i.name),
10016
                                     errors.ECODE_INVAL)
10017

    
10018
  def Exec(self, feedback_fn):
10019
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10020

    
10021
    if not self.instance_names:
10022
      # No instances to evacuate
10023
      jobs = []
10024

    
10025
    elif self.op.iallocator is not None:
10026
      # TODO: Implement relocation to other group
10027
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10028
                       evac_mode=self.op.mode,
10029
                       instances=list(self.instance_names))
10030

    
10031
      ial.Run(self.op.iallocator)
10032

    
10033
      if not ial.success:
10034
        raise errors.OpPrereqError("Can't compute node evacuation using"
10035
                                   " iallocator '%s': %s" %
10036
                                   (self.op.iallocator, ial.info),
10037
                                   errors.ECODE_NORES)
10038

    
10039
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10040

    
10041
    elif self.op.remote_node is not None:
10042
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10043
      jobs = [
10044
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10045
                                        remote_node=self.op.remote_node,
10046
                                        disks=[],
10047
                                        mode=constants.REPLACE_DISK_CHG,
10048
                                        early_release=self.op.early_release)]
10049
        for instance_name in self.instance_names
10050
        ]
10051

    
10052
    else:
10053
      raise errors.ProgrammerError("No iallocator or remote node")
10054

    
10055
    return ResultWithJobs(jobs)
10056

    
10057

    
10058
def _SetOpEarlyRelease(early_release, op):
10059
  """Sets C{early_release} flag on opcodes if available.
10060

10061
  """
10062
  try:
10063
    op.early_release = early_release
10064
  except AttributeError:
10065
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10066

    
10067
  return op
10068

    
10069

    
10070
def _NodeEvacDest(use_nodes, group, nodes):
10071
  """Returns group or nodes depending on caller's choice.
10072

10073
  """
10074
  if use_nodes:
10075
    return utils.CommaJoin(nodes)
10076
  else:
10077
    return group
10078

    
10079

    
10080
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10081
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10082

10083
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10084
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10085

10086
  @type lu: L{LogicalUnit}
10087
  @param lu: Logical unit instance
10088
  @type alloc_result: tuple/list
10089
  @param alloc_result: Result from iallocator
10090
  @type early_release: bool
10091
  @param early_release: Whether to release locks early if possible
10092
  @type use_nodes: bool
10093
  @param use_nodes: Whether to display node names instead of groups
10094

10095
  """
10096
  (moved, failed, jobs) = alloc_result
10097

    
10098
  if failed:
10099
    lu.LogWarning("Unable to evacuate instances %s",
10100
                  utils.CommaJoin("%s (%s)" % (name, reason)
10101
                                  for (name, reason) in failed))
10102

    
10103
  if moved:
10104
    lu.LogInfo("Instances to be moved: %s",
10105
               utils.CommaJoin("%s (to %s)" %
10106
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10107
                               for (name, group, nodes) in moved))
10108

    
10109
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10110
              map(opcodes.OpCode.LoadOpCode, ops))
10111
          for ops in jobs]
10112

    
10113

    
10114
class LUInstanceGrowDisk(LogicalUnit):
10115
  """Grow a disk of an instance.
10116

10117
  """
10118
  HPATH = "disk-grow"
10119
  HTYPE = constants.HTYPE_INSTANCE
10120
  REQ_BGL = False
10121

    
10122
  def ExpandNames(self):
10123
    self._ExpandAndLockInstance()
10124
    self.needed_locks[locking.LEVEL_NODE] = []
10125
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10126

    
10127
  def DeclareLocks(self, level):
10128
    if level == locking.LEVEL_NODE:
10129
      self._LockInstancesNodes()
10130

    
10131
  def BuildHooksEnv(self):
10132
    """Build hooks env.
10133

10134
    This runs on the master, the primary and all the secondaries.
10135

10136
    """
10137
    env = {
10138
      "DISK": self.op.disk,
10139
      "AMOUNT": self.op.amount,
10140
      }
10141
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10142
    return env
10143

    
10144
  def BuildHooksNodes(self):
10145
    """Build hooks nodes.
10146

10147
    """
10148
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10149
    return (nl, nl)
10150

    
10151
  def CheckPrereq(self):
10152
    """Check prerequisites.
10153

10154
    This checks that the instance is in the cluster.
10155

10156
    """
10157
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10158
    assert instance is not None, \
10159
      "Cannot retrieve locked instance %s" % self.op.instance_name
10160
    nodenames = list(instance.all_nodes)
10161
    for node in nodenames:
10162
      _CheckNodeOnline(self, node)
10163

    
10164
    self.instance = instance
10165

    
10166
    if instance.disk_template not in constants.DTS_GROWABLE:
10167
      raise errors.OpPrereqError("Instance's disk layout does not support"
10168
                                 " growing", errors.ECODE_INVAL)
10169

    
10170
    self.disk = instance.FindDisk(self.op.disk)
10171

    
10172
    if instance.disk_template not in (constants.DT_FILE,
10173
                                      constants.DT_SHARED_FILE):
10174
      # TODO: check the free disk space for file, when that feature will be
10175
      # supported
10176
      _CheckNodesFreeDiskPerVG(self, nodenames,
10177
                               self.disk.ComputeGrowth(self.op.amount))
10178

    
10179
  def Exec(self, feedback_fn):
10180
    """Execute disk grow.
10181

10182
    """
10183
    instance = self.instance
10184
    disk = self.disk
10185

    
10186
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10187
    if not disks_ok:
10188
      raise errors.OpExecError("Cannot activate block device to grow")
10189

    
10190
    # First run all grow ops in dry-run mode
10191
    for node in instance.all_nodes:
10192
      self.cfg.SetDiskID(disk, node)
10193
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10194
      result.Raise("Grow request failed to node %s" % node)
10195

    
10196
    # We know that (as far as we can test) operations across different
10197
    # nodes will succeed, time to run it for real
10198
    for node in instance.all_nodes:
10199
      self.cfg.SetDiskID(disk, node)
10200
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10201
      result.Raise("Grow request failed to node %s" % node)
10202

    
10203
      # TODO: Rewrite code to work properly
10204
      # DRBD goes into sync mode for a short amount of time after executing the
10205
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10206
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10207
      # time is a work-around.
10208
      time.sleep(5)
10209

    
10210
    disk.RecordGrow(self.op.amount)
10211
    self.cfg.Update(instance, feedback_fn)
10212
    if self.op.wait_for_sync:
10213
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10214
      if disk_abort:
10215
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10216
                             " status; please check the instance")
10217
      if not instance.admin_up:
10218
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10219
    elif not instance.admin_up:
10220
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10221
                           " not supposed to be running because no wait for"
10222
                           " sync mode was requested")
10223

    
10224

    
10225
class LUInstanceQueryData(NoHooksLU):
10226
  """Query runtime instance data.
10227

10228
  """
10229
  REQ_BGL = False
10230

    
10231
  def ExpandNames(self):
10232
    self.needed_locks = {}
10233

    
10234
    # Use locking if requested or when non-static information is wanted
10235
    if not (self.op.static or self.op.use_locking):
10236
      self.LogWarning("Non-static data requested, locks need to be acquired")
10237
      self.op.use_locking = True
10238

    
10239
    if self.op.instances or not self.op.use_locking:
10240
      # Expand instance names right here
10241
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10242
    else:
10243
      # Will use acquired locks
10244
      self.wanted_names = None
10245

    
10246
    if self.op.use_locking:
10247
      self.share_locks = _ShareAll()
10248

    
10249
      if self.wanted_names is None:
10250
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10251
      else:
10252
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10253

    
10254
      self.needed_locks[locking.LEVEL_NODE] = []
10255
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10256

    
10257
  def DeclareLocks(self, level):
10258
    if self.op.use_locking and level == locking.LEVEL_NODE:
10259
      self._LockInstancesNodes()
10260

    
10261
  def CheckPrereq(self):
10262
    """Check prerequisites.
10263

10264
    This only checks the optional instance list against the existing names.
10265

10266
    """
10267
    if self.wanted_names is None:
10268
      assert self.op.use_locking, "Locking was not used"
10269
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10270

    
10271
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10272
                             for name in self.wanted_names]
10273

    
10274
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10275
    """Returns the status of a block device
10276

10277
    """
10278
    if self.op.static or not node:
10279
      return None
10280

    
10281
    self.cfg.SetDiskID(dev, node)
10282

    
10283
    result = self.rpc.call_blockdev_find(node, dev)
10284
    if result.offline:
10285
      return None
10286

    
10287
    result.Raise("Can't compute disk status for %s" % instance_name)
10288

    
10289
    status = result.payload
10290
    if status is None:
10291
      return None
10292

    
10293
    return (status.dev_path, status.major, status.minor,
10294
            status.sync_percent, status.estimated_time,
10295
            status.is_degraded, status.ldisk_status)
10296

    
10297
  def _ComputeDiskStatus(self, instance, snode, dev):
10298
    """Compute block device status.
10299

10300
    """
10301
    if dev.dev_type in constants.LDS_DRBD:
10302
      # we change the snode then (otherwise we use the one passed in)
10303
      if dev.logical_id[0] == instance.primary_node:
10304
        snode = dev.logical_id[1]
10305
      else:
10306
        snode = dev.logical_id[0]
10307

    
10308
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10309
                                              instance.name, dev)
10310
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10311

    
10312
    if dev.children:
10313
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10314
                                        instance, snode),
10315
                         dev.children)
10316
    else:
10317
      dev_children = []
10318

    
10319
    return {
10320
      "iv_name": dev.iv_name,
10321
      "dev_type": dev.dev_type,
10322
      "logical_id": dev.logical_id,
10323
      "physical_id": dev.physical_id,
10324
      "pstatus": dev_pstatus,
10325
      "sstatus": dev_sstatus,
10326
      "children": dev_children,
10327
      "mode": dev.mode,
10328
      "size": dev.size,
10329
      }
10330

    
10331
  def Exec(self, feedback_fn):
10332
    """Gather and return data"""
10333
    result = {}
10334

    
10335
    cluster = self.cfg.GetClusterInfo()
10336

    
10337
    for instance in self.wanted_instances:
10338
      pnode = self.cfg.GetNodeInfo(instance.primary_node)
10339

    
10340
      if self.op.static or pnode.offline:
10341
        remote_state = None
10342
        if pnode.offline:
10343
          self.LogWarning("Primary node %s is marked offline, returning static"
10344
                          " information only for instance %s" %
10345
                          (pnode.name, instance.name))
10346
      else:
10347
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10348
                                                  instance.name,
10349
                                                  instance.hypervisor)
10350
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10351
        remote_info = remote_info.payload
10352
        if remote_info and "state" in remote_info:
10353
          remote_state = "up"
10354
        else:
10355
          remote_state = "down"
10356

    
10357
      if instance.admin_up:
10358
        config_state = "up"
10359
      else:
10360
        config_state = "down"
10361

    
10362
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10363
                  instance.disks)
10364

    
10365
      result[instance.name] = {
10366
        "name": instance.name,
10367
        "config_state": config_state,
10368
        "run_state": remote_state,
10369
        "pnode": instance.primary_node,
10370
        "snodes": instance.secondary_nodes,
10371
        "os": instance.os,
10372
        # this happens to be the same format used for hooks
10373
        "nics": _NICListToTuple(self, instance.nics),
10374
        "disk_template": instance.disk_template,
10375
        "disks": disks,
10376
        "hypervisor": instance.hypervisor,
10377
        "network_port": instance.network_port,
10378
        "hv_instance": instance.hvparams,
10379
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10380
        "be_instance": instance.beparams,
10381
        "be_actual": cluster.FillBE(instance),
10382
        "os_instance": instance.osparams,
10383
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10384
        "serial_no": instance.serial_no,
10385
        "mtime": instance.mtime,
10386
        "ctime": instance.ctime,
10387
        "uuid": instance.uuid,
10388
        }
10389

    
10390
    return result
10391

    
10392

    
10393
class LUInstanceSetParams(LogicalUnit):
10394
  """Modifies an instances's parameters.
10395

10396
  """
10397
  HPATH = "instance-modify"
10398
  HTYPE = constants.HTYPE_INSTANCE
10399
  REQ_BGL = False
10400

    
10401
  def CheckArguments(self):
10402
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10403
            self.op.hvparams or self.op.beparams or self.op.os_name):
10404
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10405

    
10406
    if self.op.hvparams:
10407
      _CheckGlobalHvParams(self.op.hvparams)
10408

    
10409
    # Disk validation
10410
    disk_addremove = 0
10411
    for disk_op, disk_dict in self.op.disks:
10412
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10413
      if disk_op == constants.DDM_REMOVE:
10414
        disk_addremove += 1
10415
        continue
10416
      elif disk_op == constants.DDM_ADD:
10417
        disk_addremove += 1
10418
      else:
10419
        if not isinstance(disk_op, int):
10420
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10421
        if not isinstance(disk_dict, dict):
10422
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10423
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10424

    
10425
      if disk_op == constants.DDM_ADD:
10426
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10427
        if mode not in constants.DISK_ACCESS_SET:
10428
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10429
                                     errors.ECODE_INVAL)
10430
        size = disk_dict.get(constants.IDISK_SIZE, None)
10431
        if size is None:
10432
          raise errors.OpPrereqError("Required disk parameter size missing",
10433
                                     errors.ECODE_INVAL)
10434
        try:
10435
          size = int(size)
10436
        except (TypeError, ValueError), err:
10437
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10438
                                     str(err), errors.ECODE_INVAL)
10439
        disk_dict[constants.IDISK_SIZE] = size
10440
      else:
10441
        # modification of disk
10442
        if constants.IDISK_SIZE in disk_dict:
10443
          raise errors.OpPrereqError("Disk size change not possible, use"
10444
                                     " grow-disk", errors.ECODE_INVAL)
10445

    
10446
    if disk_addremove > 1:
10447
      raise errors.OpPrereqError("Only one disk add or remove operation"
10448
                                 " supported at a time", errors.ECODE_INVAL)
10449

    
10450
    if self.op.disks and self.op.disk_template is not None:
10451
      raise errors.OpPrereqError("Disk template conversion and other disk"
10452
                                 " changes not supported at the same time",
10453
                                 errors.ECODE_INVAL)
10454

    
10455
    if (self.op.disk_template and
10456
        self.op.disk_template in constants.DTS_INT_MIRROR and
10457
        self.op.remote_node is None):
10458
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10459
                                 " one requires specifying a secondary node",
10460
                                 errors.ECODE_INVAL)
10461

    
10462
    # NIC validation
10463
    nic_addremove = 0
10464
    for nic_op, nic_dict in self.op.nics:
10465
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10466
      if nic_op == constants.DDM_REMOVE:
10467
        nic_addremove += 1
10468
        continue
10469
      elif nic_op == constants.DDM_ADD:
10470
        nic_addremove += 1
10471
      else:
10472
        if not isinstance(nic_op, int):
10473
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10474
        if not isinstance(nic_dict, dict):
10475
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10476
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10477

    
10478
      # nic_dict should be a dict
10479
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10480
      if nic_ip is not None:
10481
        if nic_ip.lower() == constants.VALUE_NONE:
10482
          nic_dict[constants.INIC_IP] = None
10483
        else:
10484
          if not netutils.IPAddress.IsValid(nic_ip):
10485
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10486
                                       errors.ECODE_INVAL)
10487

    
10488
      nic_bridge = nic_dict.get("bridge", None)
10489
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10490
      if nic_bridge and nic_link:
10491
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10492
                                   " at the same time", errors.ECODE_INVAL)
10493
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10494
        nic_dict["bridge"] = None
10495
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10496
        nic_dict[constants.INIC_LINK] = None
10497

    
10498
      if nic_op == constants.DDM_ADD:
10499
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10500
        if nic_mac is None:
10501
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10502

    
10503
      if constants.INIC_MAC in nic_dict:
10504
        nic_mac = nic_dict[constants.INIC_MAC]
10505
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10506
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10507

    
10508
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10509
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10510
                                     " modifying an existing nic",
10511
                                     errors.ECODE_INVAL)
10512

    
10513
    if nic_addremove > 1:
10514
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10515
                                 " supported at a time", errors.ECODE_INVAL)
10516

    
10517
  def ExpandNames(self):
10518
    self._ExpandAndLockInstance()
10519
    self.needed_locks[locking.LEVEL_NODE] = []
10520
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10521

    
10522
  def DeclareLocks(self, level):
10523
    if level == locking.LEVEL_NODE:
10524
      self._LockInstancesNodes()
10525
      if self.op.disk_template and self.op.remote_node:
10526
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10527
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10528

    
10529
  def BuildHooksEnv(self):
10530
    """Build hooks env.
10531

10532
    This runs on the master, primary and secondaries.
10533

10534
    """
10535
    args = dict()
10536
    if constants.BE_MEMORY in self.be_new:
10537
      args["memory"] = self.be_new[constants.BE_MEMORY]
10538
    if constants.BE_VCPUS in self.be_new:
10539
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10540
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10541
    # information at all.
10542
    if self.op.nics:
10543
      args["nics"] = []
10544
      nic_override = dict(self.op.nics)
10545
      for idx, nic in enumerate(self.instance.nics):
10546
        if idx in nic_override:
10547
          this_nic_override = nic_override[idx]
10548
        else:
10549
          this_nic_override = {}
10550
        if constants.INIC_IP in this_nic_override:
10551
          ip = this_nic_override[constants.INIC_IP]
10552
        else:
10553
          ip = nic.ip
10554
        if constants.INIC_MAC in this_nic_override:
10555
          mac = this_nic_override[constants.INIC_MAC]
10556
        else:
10557
          mac = nic.mac
10558
        if idx in self.nic_pnew:
10559
          nicparams = self.nic_pnew[idx]
10560
        else:
10561
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10562
        mode = nicparams[constants.NIC_MODE]
10563
        link = nicparams[constants.NIC_LINK]
10564
        args["nics"].append((ip, mac, mode, link))
10565
      if constants.DDM_ADD in nic_override:
10566
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10567
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10568
        nicparams = self.nic_pnew[constants.DDM_ADD]
10569
        mode = nicparams[constants.NIC_MODE]
10570
        link = nicparams[constants.NIC_LINK]
10571
        args["nics"].append((ip, mac, mode, link))
10572
      elif constants.DDM_REMOVE in nic_override:
10573
        del args["nics"][-1]
10574

    
10575
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10576
    if self.op.disk_template:
10577
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10578

    
10579
    return env
10580

    
10581
  def BuildHooksNodes(self):
10582
    """Build hooks nodes.
10583

10584
    """
10585
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10586
    return (nl, nl)
10587

    
10588
  def CheckPrereq(self):
10589
    """Check prerequisites.
10590

10591
    This only checks the instance list against the existing names.
10592

10593
    """
10594
    # checking the new params on the primary/secondary nodes
10595

    
10596
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10597
    cluster = self.cluster = self.cfg.GetClusterInfo()
10598
    assert self.instance is not None, \
10599
      "Cannot retrieve locked instance %s" % self.op.instance_name
10600
    pnode = instance.primary_node
10601
    nodelist = list(instance.all_nodes)
10602

    
10603
    # OS change
10604
    if self.op.os_name and not self.op.force:
10605
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10606
                      self.op.force_variant)
10607
      instance_os = self.op.os_name
10608
    else:
10609
      instance_os = instance.os
10610

    
10611
    if self.op.disk_template:
10612
      if instance.disk_template == self.op.disk_template:
10613
        raise errors.OpPrereqError("Instance already has disk template %s" %
10614
                                   instance.disk_template, errors.ECODE_INVAL)
10615

    
10616
      if (instance.disk_template,
10617
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10618
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10619
                                   " %s to %s" % (instance.disk_template,
10620
                                                  self.op.disk_template),
10621
                                   errors.ECODE_INVAL)
10622
      _CheckInstanceDown(self, instance, "cannot change disk template")
10623
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10624
        if self.op.remote_node == pnode:
10625
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10626
                                     " as the primary node of the instance" %
10627
                                     self.op.remote_node, errors.ECODE_STATE)
10628
        _CheckNodeOnline(self, self.op.remote_node)
10629
        _CheckNodeNotDrained(self, self.op.remote_node)
10630
        # FIXME: here we assume that the old instance type is DT_PLAIN
10631
        assert instance.disk_template == constants.DT_PLAIN
10632
        disks = [{constants.IDISK_SIZE: d.size,
10633
                  constants.IDISK_VG: d.logical_id[0]}
10634
                 for d in instance.disks]
10635
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10636
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10637

    
10638
    # hvparams processing
10639
    if self.op.hvparams:
10640
      hv_type = instance.hypervisor
10641
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10642
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10643
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10644

    
10645
      # local check
10646
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10647
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10648
      self.hv_new = hv_new # the new actual values
10649
      self.hv_inst = i_hvdict # the new dict (without defaults)
10650
    else:
10651
      self.hv_new = self.hv_inst = {}
10652

    
10653
    # beparams processing
10654
    if self.op.beparams:
10655
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10656
                                   use_none=True)
10657
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10658
      be_new = cluster.SimpleFillBE(i_bedict)
10659
      self.be_new = be_new # the new actual values
10660
      self.be_inst = i_bedict # the new dict (without defaults)
10661
    else:
10662
      self.be_new = self.be_inst = {}
10663
    be_old = cluster.FillBE(instance)
10664

    
10665
    # osparams processing
10666
    if self.op.osparams:
10667
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10668
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10669
      self.os_inst = i_osdict # the new dict (without defaults)
10670
    else:
10671
      self.os_inst = {}
10672

    
10673
    self.warn = []
10674

    
10675
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10676
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10677
      mem_check_list = [pnode]
10678
      if be_new[constants.BE_AUTO_BALANCE]:
10679
        # either we changed auto_balance to yes or it was from before
10680
        mem_check_list.extend(instance.secondary_nodes)
10681
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10682
                                                  instance.hypervisor)
10683
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10684
                                         instance.hypervisor)
10685
      pninfo = nodeinfo[pnode]
10686
      msg = pninfo.fail_msg
10687
      if msg:
10688
        # Assume the primary node is unreachable and go ahead
10689
        self.warn.append("Can't get info from primary node %s: %s" %
10690
                         (pnode,  msg))
10691
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10692
        self.warn.append("Node data from primary node %s doesn't contain"
10693
                         " free memory information" % pnode)
10694
      elif instance_info.fail_msg:
10695
        self.warn.append("Can't get instance runtime information: %s" %
10696
                        instance_info.fail_msg)
10697
      else:
10698
        if instance_info.payload:
10699
          current_mem = int(instance_info.payload["memory"])
10700
        else:
10701
          # Assume instance not running
10702
          # (there is a slight race condition here, but it's not very probable,
10703
          # and we have no other way to check)
10704
          current_mem = 0
10705
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10706
                    pninfo.payload["memory_free"])
10707
        if miss_mem > 0:
10708
          raise errors.OpPrereqError("This change will prevent the instance"
10709
                                     " from starting, due to %d MB of memory"
10710
                                     " missing on its primary node" % miss_mem,
10711
                                     errors.ECODE_NORES)
10712

    
10713
      if be_new[constants.BE_AUTO_BALANCE]:
10714
        for node, nres in nodeinfo.items():
10715
          if node not in instance.secondary_nodes:
10716
            continue
10717
          nres.Raise("Can't get info from secondary node %s" % node,
10718
                     prereq=True, ecode=errors.ECODE_STATE)
10719
          if not isinstance(nres.payload.get("memory_free", None), int):
10720
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10721
                                       " memory information" % node,
10722
                                       errors.ECODE_STATE)
10723
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10724
            raise errors.OpPrereqError("This change will prevent the instance"
10725
                                       " from failover to its secondary node"
10726
                                       " %s, due to not enough memory" % node,
10727
                                       errors.ECODE_STATE)
10728

    
10729
    # NIC processing
10730
    self.nic_pnew = {}
10731
    self.nic_pinst = {}
10732
    for nic_op, nic_dict in self.op.nics:
10733
      if nic_op == constants.DDM_REMOVE:
10734
        if not instance.nics:
10735
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10736
                                     errors.ECODE_INVAL)
10737
        continue
10738
      if nic_op != constants.DDM_ADD:
10739
        # an existing nic
10740
        if not instance.nics:
10741
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10742
                                     " no NICs" % nic_op,
10743
                                     errors.ECODE_INVAL)
10744
        if nic_op < 0 or nic_op >= len(instance.nics):
10745
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10746
                                     " are 0 to %d" %
10747
                                     (nic_op, len(instance.nics) - 1),
10748
                                     errors.ECODE_INVAL)
10749
        old_nic_params = instance.nics[nic_op].nicparams
10750
        old_nic_ip = instance.nics[nic_op].ip
10751
      else:
10752
        old_nic_params = {}
10753
        old_nic_ip = None
10754

    
10755
      update_params_dict = dict([(key, nic_dict[key])
10756
                                 for key in constants.NICS_PARAMETERS
10757
                                 if key in nic_dict])
10758

    
10759
      if "bridge" in nic_dict:
10760
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10761

    
10762
      new_nic_params = _GetUpdatedParams(old_nic_params,
10763
                                         update_params_dict)
10764
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10765
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10766
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10767
      self.nic_pinst[nic_op] = new_nic_params
10768
      self.nic_pnew[nic_op] = new_filled_nic_params
10769
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10770

    
10771
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10772
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10773
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10774
        if msg:
10775
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10776
          if self.op.force:
10777
            self.warn.append(msg)
10778
          else:
10779
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10780
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10781
        if constants.INIC_IP in nic_dict:
10782
          nic_ip = nic_dict[constants.INIC_IP]
10783
        else:
10784
          nic_ip = old_nic_ip
10785
        if nic_ip is None:
10786
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10787
                                     " on a routed nic", errors.ECODE_INVAL)
10788
      if constants.INIC_MAC in nic_dict:
10789
        nic_mac = nic_dict[constants.INIC_MAC]
10790
        if nic_mac is None:
10791
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10792
                                     errors.ECODE_INVAL)
10793
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10794
          # otherwise generate the mac
10795
          nic_dict[constants.INIC_MAC] = \
10796
            self.cfg.GenerateMAC(self.proc.GetECId())
10797
        else:
10798
          # or validate/reserve the current one
10799
          try:
10800
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10801
          except errors.ReservationError:
10802
            raise errors.OpPrereqError("MAC address %s already in use"
10803
                                       " in cluster" % nic_mac,
10804
                                       errors.ECODE_NOTUNIQUE)
10805

    
10806
    # DISK processing
10807
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10808
      raise errors.OpPrereqError("Disk operations not supported for"
10809
                                 " diskless instances",
10810
                                 errors.ECODE_INVAL)
10811
    for disk_op, _ in self.op.disks:
10812
      if disk_op == constants.DDM_REMOVE:
10813
        if len(instance.disks) == 1:
10814
          raise errors.OpPrereqError("Cannot remove the last disk of"
10815
                                     " an instance", errors.ECODE_INVAL)
10816
        _CheckInstanceDown(self, instance, "cannot remove disks")
10817

    
10818
      if (disk_op == constants.DDM_ADD and
10819
          len(instance.disks) >= constants.MAX_DISKS):
10820
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10821
                                   " add more" % constants.MAX_DISKS,
10822
                                   errors.ECODE_STATE)
10823
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10824
        # an existing disk
10825
        if disk_op < 0 or disk_op >= len(instance.disks):
10826
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10827
                                     " are 0 to %d" %
10828
                                     (disk_op, len(instance.disks)),
10829
                                     errors.ECODE_INVAL)
10830

    
10831
    return
10832

    
10833
  def _ConvertPlainToDrbd(self, feedback_fn):
10834
    """Converts an instance from plain to drbd.
10835

10836
    """
10837
    feedback_fn("Converting template to drbd")
10838
    instance = self.instance
10839
    pnode = instance.primary_node
10840
    snode = self.op.remote_node
10841

    
10842
    # create a fake disk info for _GenerateDiskTemplate
10843
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10844
                  constants.IDISK_VG: d.logical_id[0]}
10845
                 for d in instance.disks]
10846
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10847
                                      instance.name, pnode, [snode],
10848
                                      disk_info, None, None, 0, feedback_fn)
10849
    info = _GetInstanceInfoText(instance)
10850
    feedback_fn("Creating aditional volumes...")
10851
    # first, create the missing data and meta devices
10852
    for disk in new_disks:
10853
      # unfortunately this is... not too nice
10854
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10855
                            info, True)
10856
      for child in disk.children:
10857
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10858
    # at this stage, all new LVs have been created, we can rename the
10859
    # old ones
10860
    feedback_fn("Renaming original volumes...")
10861
    rename_list = [(o, n.children[0].logical_id)
10862
                   for (o, n) in zip(instance.disks, new_disks)]
10863
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10864
    result.Raise("Failed to rename original LVs")
10865

    
10866
    feedback_fn("Initializing DRBD devices...")
10867
    # all child devices are in place, we can now create the DRBD devices
10868
    for disk in new_disks:
10869
      for node in [pnode, snode]:
10870
        f_create = node == pnode
10871
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10872

    
10873
    # at this point, the instance has been modified
10874
    instance.disk_template = constants.DT_DRBD8
10875
    instance.disks = new_disks
10876
    self.cfg.Update(instance, feedback_fn)
10877

    
10878
    # disks are created, waiting for sync
10879
    disk_abort = not _WaitForSync(self, instance,
10880
                                  oneshot=not self.op.wait_for_sync)
10881
    if disk_abort:
10882
      raise errors.OpExecError("There are some degraded disks for"
10883
                               " this instance, please cleanup manually")
10884

    
10885
  def _ConvertDrbdToPlain(self, feedback_fn):
10886
    """Converts an instance from drbd to plain.
10887

10888
    """
10889
    instance = self.instance
10890
    assert len(instance.secondary_nodes) == 1
10891
    pnode = instance.primary_node
10892
    snode = instance.secondary_nodes[0]
10893
    feedback_fn("Converting template to plain")
10894

    
10895
    old_disks = instance.disks
10896
    new_disks = [d.children[0] for d in old_disks]
10897

    
10898
    # copy over size and mode
10899
    for parent, child in zip(old_disks, new_disks):
10900
      child.size = parent.size
10901
      child.mode = parent.mode
10902

    
10903
    # update instance structure
10904
    instance.disks = new_disks
10905
    instance.disk_template = constants.DT_PLAIN
10906
    self.cfg.Update(instance, feedback_fn)
10907

    
10908
    feedback_fn("Removing volumes on the secondary node...")
10909
    for disk in old_disks:
10910
      self.cfg.SetDiskID(disk, snode)
10911
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10912
      if msg:
10913
        self.LogWarning("Could not remove block device %s on node %s,"
10914
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10915

    
10916
    feedback_fn("Removing unneeded volumes on the primary node...")
10917
    for idx, disk in enumerate(old_disks):
10918
      meta = disk.children[1]
10919
      self.cfg.SetDiskID(meta, pnode)
10920
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10921
      if msg:
10922
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10923
                        " continuing anyway: %s", idx, pnode, msg)
10924

    
10925
  def Exec(self, feedback_fn):
10926
    """Modifies an instance.
10927

10928
    All parameters take effect only at the next restart of the instance.
10929

10930
    """
10931
    # Process here the warnings from CheckPrereq, as we don't have a
10932
    # feedback_fn there.
10933
    for warn in self.warn:
10934
      feedback_fn("WARNING: %s" % warn)
10935

    
10936
    result = []
10937
    instance = self.instance
10938
    # disk changes
10939
    for disk_op, disk_dict in self.op.disks:
10940
      if disk_op == constants.DDM_REMOVE:
10941
        # remove the last disk
10942
        device = instance.disks.pop()
10943
        device_idx = len(instance.disks)
10944
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10945
          self.cfg.SetDiskID(disk, node)
10946
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10947
          if msg:
10948
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10949
                            " continuing anyway", device_idx, node, msg)
10950
        result.append(("disk/%d" % device_idx, "remove"))
10951
      elif disk_op == constants.DDM_ADD:
10952
        # add a new disk
10953
        if instance.disk_template in (constants.DT_FILE,
10954
                                        constants.DT_SHARED_FILE):
10955
          file_driver, file_path = instance.disks[0].logical_id
10956
          file_path = os.path.dirname(file_path)
10957
        else:
10958
          file_driver = file_path = None
10959
        disk_idx_base = len(instance.disks)
10960
        new_disk = _GenerateDiskTemplate(self,
10961
                                         instance.disk_template,
10962
                                         instance.name, instance.primary_node,
10963
                                         instance.secondary_nodes,
10964
                                         [disk_dict],
10965
                                         file_path,
10966
                                         file_driver,
10967
                                         disk_idx_base, feedback_fn)[0]
10968
        instance.disks.append(new_disk)
10969
        info = _GetInstanceInfoText(instance)
10970

    
10971
        logging.info("Creating volume %s for instance %s",
10972
                     new_disk.iv_name, instance.name)
10973
        # Note: this needs to be kept in sync with _CreateDisks
10974
        #HARDCODE
10975
        for node in instance.all_nodes:
10976
          f_create = node == instance.primary_node
10977
          try:
10978
            _CreateBlockDev(self, node, instance, new_disk,
10979
                            f_create, info, f_create)
10980
          except errors.OpExecError, err:
10981
            self.LogWarning("Failed to create volume %s (%s) on"
10982
                            " node %s: %s",
10983
                            new_disk.iv_name, new_disk, node, err)
10984
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10985
                       (new_disk.size, new_disk.mode)))
10986
      else:
10987
        # change a given disk
10988
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10989
        result.append(("disk.mode/%d" % disk_op,
10990
                       disk_dict[constants.IDISK_MODE]))
10991

    
10992
    if self.op.disk_template:
10993
      r_shut = _ShutdownInstanceDisks(self, instance)
10994
      if not r_shut:
10995
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10996
                                 " proceed with disk template conversion")
10997
      mode = (instance.disk_template, self.op.disk_template)
10998
      try:
10999
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11000
      except:
11001
        self.cfg.ReleaseDRBDMinors(instance.name)
11002
        raise
11003
      result.append(("disk_template", self.op.disk_template))
11004

    
11005
    # NIC changes
11006
    for nic_op, nic_dict in self.op.nics:
11007
      if nic_op == constants.DDM_REMOVE:
11008
        # remove the last nic
11009
        del instance.nics[-1]
11010
        result.append(("nic.%d" % len(instance.nics), "remove"))
11011
      elif nic_op == constants.DDM_ADD:
11012
        # mac and bridge should be set, by now
11013
        mac = nic_dict[constants.INIC_MAC]
11014
        ip = nic_dict.get(constants.INIC_IP, None)
11015
        nicparams = self.nic_pinst[constants.DDM_ADD]
11016
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11017
        instance.nics.append(new_nic)
11018
        result.append(("nic.%d" % (len(instance.nics) - 1),
11019
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11020
                       (new_nic.mac, new_nic.ip,
11021
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11022
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11023
                       )))
11024
      else:
11025
        for key in (constants.INIC_MAC, constants.INIC_IP):
11026
          if key in nic_dict:
11027
            setattr(instance.nics[nic_op], key, nic_dict[key])
11028
        if nic_op in self.nic_pinst:
11029
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11030
        for key, val in nic_dict.iteritems():
11031
          result.append(("nic.%s/%d" % (key, nic_op), val))
11032

    
11033
    # hvparams changes
11034
    if self.op.hvparams:
11035
      instance.hvparams = self.hv_inst
11036
      for key, val in self.op.hvparams.iteritems():
11037
        result.append(("hv/%s" % key, val))
11038

    
11039
    # beparams changes
11040
    if self.op.beparams:
11041
      instance.beparams = self.be_inst
11042
      for key, val in self.op.beparams.iteritems():
11043
        result.append(("be/%s" % key, val))
11044

    
11045
    # OS change
11046
    if self.op.os_name:
11047
      instance.os = self.op.os_name
11048

    
11049
    # osparams changes
11050
    if self.op.osparams:
11051
      instance.osparams = self.os_inst
11052
      for key, val in self.op.osparams.iteritems():
11053
        result.append(("os/%s" % key, val))
11054

    
11055
    self.cfg.Update(instance, feedback_fn)
11056

    
11057
    return result
11058

    
11059
  _DISK_CONVERSIONS = {
11060
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11061
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11062
    }
11063

    
11064

    
11065
class LUBackupQuery(NoHooksLU):
11066
  """Query the exports list
11067

11068
  """
11069
  REQ_BGL = False
11070

    
11071
  def ExpandNames(self):
11072
    self.needed_locks = {}
11073
    self.share_locks[locking.LEVEL_NODE] = 1
11074
    if not self.op.nodes:
11075
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11076
    else:
11077
      self.needed_locks[locking.LEVEL_NODE] = \
11078
        _GetWantedNodes(self, self.op.nodes)
11079

    
11080
  def Exec(self, feedback_fn):
11081
    """Compute the list of all the exported system images.
11082

11083
    @rtype: dict
11084
    @return: a dictionary with the structure node->(export-list)
11085
        where export-list is a list of the instances exported on
11086
        that node.
11087

11088
    """
11089
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11090
    rpcresult = self.rpc.call_export_list(self.nodes)
11091
    result = {}
11092
    for node in rpcresult:
11093
      if rpcresult[node].fail_msg:
11094
        result[node] = False
11095
      else:
11096
        result[node] = rpcresult[node].payload
11097

    
11098
    return result
11099

    
11100

    
11101
class LUBackupPrepare(NoHooksLU):
11102
  """Prepares an instance for an export and returns useful information.
11103

11104
  """
11105
  REQ_BGL = False
11106

    
11107
  def ExpandNames(self):
11108
    self._ExpandAndLockInstance()
11109

    
11110
  def CheckPrereq(self):
11111
    """Check prerequisites.
11112

11113
    """
11114
    instance_name = self.op.instance_name
11115

    
11116
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11117
    assert self.instance is not None, \
11118
          "Cannot retrieve locked instance %s" % self.op.instance_name
11119
    _CheckNodeOnline(self, self.instance.primary_node)
11120

    
11121
    self._cds = _GetClusterDomainSecret()
11122

    
11123
  def Exec(self, feedback_fn):
11124
    """Prepares an instance for an export.
11125

11126
    """
11127
    instance = self.instance
11128

    
11129
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11130
      salt = utils.GenerateSecret(8)
11131

    
11132
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11133
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11134
                                              constants.RIE_CERT_VALIDITY)
11135
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11136

    
11137
      (name, cert_pem) = result.payload
11138

    
11139
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11140
                                             cert_pem)
11141

    
11142
      return {
11143
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11144
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11145
                          salt),
11146
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11147
        }
11148

    
11149
    return None
11150

    
11151

    
11152
class LUBackupExport(LogicalUnit):
11153
  """Export an instance to an image in the cluster.
11154

11155
  """
11156
  HPATH = "instance-export"
11157
  HTYPE = constants.HTYPE_INSTANCE
11158
  REQ_BGL = False
11159

    
11160
  def CheckArguments(self):
11161
    """Check the arguments.
11162

11163
    """
11164
    self.x509_key_name = self.op.x509_key_name
11165
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11166

    
11167
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11168
      if not self.x509_key_name:
11169
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11170
                                   errors.ECODE_INVAL)
11171

    
11172
      if not self.dest_x509_ca_pem:
11173
        raise errors.OpPrereqError("Missing destination X509 CA",
11174
                                   errors.ECODE_INVAL)
11175

    
11176
  def ExpandNames(self):
11177
    self._ExpandAndLockInstance()
11178

    
11179
    # Lock all nodes for local exports
11180
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11181
      # FIXME: lock only instance primary and destination node
11182
      #
11183
      # Sad but true, for now we have do lock all nodes, as we don't know where
11184
      # the previous export might be, and in this LU we search for it and
11185
      # remove it from its current node. In the future we could fix this by:
11186
      #  - making a tasklet to search (share-lock all), then create the
11187
      #    new one, then one to remove, after
11188
      #  - removing the removal operation altogether
11189
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11190

    
11191
  def DeclareLocks(self, level):
11192
    """Last minute lock declaration."""
11193
    # All nodes are locked anyway, so nothing to do here.
11194

    
11195
  def BuildHooksEnv(self):
11196
    """Build hooks env.
11197

11198
    This will run on the master, primary node and target node.
11199

11200
    """
11201
    env = {
11202
      "EXPORT_MODE": self.op.mode,
11203
      "EXPORT_NODE": self.op.target_node,
11204
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11205
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11206
      # TODO: Generic function for boolean env variables
11207
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11208
      }
11209

    
11210
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11211

    
11212
    return env
11213

    
11214
  def BuildHooksNodes(self):
11215
    """Build hooks nodes.
11216

11217
    """
11218
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11219

    
11220
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11221
      nl.append(self.op.target_node)
11222

    
11223
    return (nl, nl)
11224

    
11225
  def CheckPrereq(self):
11226
    """Check prerequisites.
11227

11228
    This checks that the instance and node names are valid.
11229

11230
    """
11231
    instance_name = self.op.instance_name
11232

    
11233
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11234
    assert self.instance is not None, \
11235
          "Cannot retrieve locked instance %s" % self.op.instance_name
11236
    _CheckNodeOnline(self, self.instance.primary_node)
11237

    
11238
    if (self.op.remove_instance and self.instance.admin_up and
11239
        not self.op.shutdown):
11240
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11241
                                 " down before")
11242

    
11243
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11244
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11245
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11246
      assert self.dst_node is not None
11247

    
11248
      _CheckNodeOnline(self, self.dst_node.name)
11249
      _CheckNodeNotDrained(self, self.dst_node.name)
11250

    
11251
      self._cds = None
11252
      self.dest_disk_info = None
11253
      self.dest_x509_ca = None
11254

    
11255
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11256
      self.dst_node = None
11257

    
11258
      if len(self.op.target_node) != len(self.instance.disks):
11259
        raise errors.OpPrereqError(("Received destination information for %s"
11260
                                    " disks, but instance %s has %s disks") %
11261
                                   (len(self.op.target_node), instance_name,
11262
                                    len(self.instance.disks)),
11263
                                   errors.ECODE_INVAL)
11264

    
11265
      cds = _GetClusterDomainSecret()
11266

    
11267
      # Check X509 key name
11268
      try:
11269
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11270
      except (TypeError, ValueError), err:
11271
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11272

    
11273
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11274
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11275
                                   errors.ECODE_INVAL)
11276

    
11277
      # Load and verify CA
11278
      try:
11279
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11280
      except OpenSSL.crypto.Error, err:
11281
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11282
                                   (err, ), errors.ECODE_INVAL)
11283

    
11284
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11285
      if errcode is not None:
11286
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11287
                                   (msg, ), errors.ECODE_INVAL)
11288

    
11289
      self.dest_x509_ca = cert
11290

    
11291
      # Verify target information
11292
      disk_info = []
11293
      for idx, disk_data in enumerate(self.op.target_node):
11294
        try:
11295
          (host, port, magic) = \
11296
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11297
        except errors.GenericError, err:
11298
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11299
                                     (idx, err), errors.ECODE_INVAL)
11300

    
11301
        disk_info.append((host, port, magic))
11302

    
11303
      assert len(disk_info) == len(self.op.target_node)
11304
      self.dest_disk_info = disk_info
11305

    
11306
    else:
11307
      raise errors.ProgrammerError("Unhandled export mode %r" %
11308
                                   self.op.mode)
11309

    
11310
    # instance disk type verification
11311
    # TODO: Implement export support for file-based disks
11312
    for disk in self.instance.disks:
11313
      if disk.dev_type == constants.LD_FILE:
11314
        raise errors.OpPrereqError("Export not supported for instances with"
11315
                                   " file-based disks", errors.ECODE_INVAL)
11316

    
11317
  def _CleanupExports(self, feedback_fn):
11318
    """Removes exports of current instance from all other nodes.
11319

11320
    If an instance in a cluster with nodes A..D was exported to node C, its
11321
    exports will be removed from the nodes A, B and D.
11322

11323
    """
11324
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11325

    
11326
    nodelist = self.cfg.GetNodeList()
11327
    nodelist.remove(self.dst_node.name)
11328

    
11329
    # on one-node clusters nodelist will be empty after the removal
11330
    # if we proceed the backup would be removed because OpBackupQuery
11331
    # substitutes an empty list with the full cluster node list.
11332
    iname = self.instance.name
11333
    if nodelist:
11334
      feedback_fn("Removing old exports for instance %s" % iname)
11335
      exportlist = self.rpc.call_export_list(nodelist)
11336
      for node in exportlist:
11337
        if exportlist[node].fail_msg:
11338
          continue
11339
        if iname in exportlist[node].payload:
11340
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11341
          if msg:
11342
            self.LogWarning("Could not remove older export for instance %s"
11343
                            " on node %s: %s", iname, node, msg)
11344

    
11345
  def Exec(self, feedback_fn):
11346
    """Export an instance to an image in the cluster.
11347

11348
    """
11349
    assert self.op.mode in constants.EXPORT_MODES
11350

    
11351
    instance = self.instance
11352
    src_node = instance.primary_node
11353

    
11354
    if self.op.shutdown:
11355
      # shutdown the instance, but not the disks
11356
      feedback_fn("Shutting down instance %s" % instance.name)
11357
      result = self.rpc.call_instance_shutdown(src_node, instance,
11358
                                               self.op.shutdown_timeout)
11359
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11360
      result.Raise("Could not shutdown instance %s on"
11361
                   " node %s" % (instance.name, src_node))
11362

    
11363
    # set the disks ID correctly since call_instance_start needs the
11364
    # correct drbd minor to create the symlinks
11365
    for disk in instance.disks:
11366
      self.cfg.SetDiskID(disk, src_node)
11367

    
11368
    activate_disks = (not instance.admin_up)
11369

    
11370
    if activate_disks:
11371
      # Activate the instance disks if we'exporting a stopped instance
11372
      feedback_fn("Activating disks for %s" % instance.name)
11373
      _StartInstanceDisks(self, instance, None)
11374

    
11375
    try:
11376
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11377
                                                     instance)
11378

    
11379
      helper.CreateSnapshots()
11380
      try:
11381
        if (self.op.shutdown and instance.admin_up and
11382
            not self.op.remove_instance):
11383
          assert not activate_disks
11384
          feedback_fn("Starting instance %s" % instance.name)
11385
          result = self.rpc.call_instance_start(src_node, instance,
11386
                                                None, None, False)
11387
          msg = result.fail_msg
11388
          if msg:
11389
            feedback_fn("Failed to start instance: %s" % msg)
11390
            _ShutdownInstanceDisks(self, instance)
11391
            raise errors.OpExecError("Could not start instance: %s" % msg)
11392

    
11393
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11394
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11395
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11396
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11397
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11398

    
11399
          (key_name, _, _) = self.x509_key_name
11400

    
11401
          dest_ca_pem = \
11402
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11403
                                            self.dest_x509_ca)
11404

    
11405
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11406
                                                     key_name, dest_ca_pem,
11407
                                                     timeouts)
11408
      finally:
11409
        helper.Cleanup()
11410

    
11411
      # Check for backwards compatibility
11412
      assert len(dresults) == len(instance.disks)
11413
      assert compat.all(isinstance(i, bool) for i in dresults), \
11414
             "Not all results are boolean: %r" % dresults
11415

    
11416
    finally:
11417
      if activate_disks:
11418
        feedback_fn("Deactivating disks for %s" % instance.name)
11419
        _ShutdownInstanceDisks(self, instance)
11420

    
11421
    if not (compat.all(dresults) and fin_resu):
11422
      failures = []
11423
      if not fin_resu:
11424
        failures.append("export finalization")
11425
      if not compat.all(dresults):
11426
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11427
                               if not dsk)
11428
        failures.append("disk export: disk(s) %s" % fdsk)
11429

    
11430
      raise errors.OpExecError("Export failed, errors in %s" %
11431
                               utils.CommaJoin(failures))
11432

    
11433
    # At this point, the export was successful, we can cleanup/finish
11434

    
11435
    # Remove instance if requested
11436
    if self.op.remove_instance:
11437
      feedback_fn("Removing instance %s" % instance.name)
11438
      _RemoveInstance(self, feedback_fn, instance,
11439
                      self.op.ignore_remove_failures)
11440

    
11441
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11442
      self._CleanupExports(feedback_fn)
11443

    
11444
    return fin_resu, dresults
11445

    
11446

    
11447
class LUBackupRemove(NoHooksLU):
11448
  """Remove exports related to the named instance.
11449

11450
  """
11451
  REQ_BGL = False
11452

    
11453
  def ExpandNames(self):
11454
    self.needed_locks = {}
11455
    # We need all nodes to be locked in order for RemoveExport to work, but we
11456
    # don't need to lock the instance itself, as nothing will happen to it (and
11457
    # we can remove exports also for a removed instance)
11458
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11459

    
11460
  def Exec(self, feedback_fn):
11461
    """Remove any export.
11462

11463
    """
11464
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11465
    # If the instance was not found we'll try with the name that was passed in.
11466
    # This will only work if it was an FQDN, though.
11467
    fqdn_warn = False
11468
    if not instance_name:
11469
      fqdn_warn = True
11470
      instance_name = self.op.instance_name
11471

    
11472
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11473
    exportlist = self.rpc.call_export_list(locked_nodes)
11474
    found = False
11475
    for node in exportlist:
11476
      msg = exportlist[node].fail_msg
11477
      if msg:
11478
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11479
        continue
11480
      if instance_name in exportlist[node].payload:
11481
        found = True
11482
        result = self.rpc.call_export_remove(node, instance_name)
11483
        msg = result.fail_msg
11484
        if msg:
11485
          logging.error("Could not remove export for instance %s"
11486
                        " on node %s: %s", instance_name, node, msg)
11487

    
11488
    if fqdn_warn and not found:
11489
      feedback_fn("Export not found. If trying to remove an export belonging"
11490
                  " to a deleted instance please use its Fully Qualified"
11491
                  " Domain Name.")
11492

    
11493

    
11494
class LUGroupAdd(LogicalUnit):
11495
  """Logical unit for creating node groups.
11496

11497
  """
11498
  HPATH = "group-add"
11499
  HTYPE = constants.HTYPE_GROUP
11500
  REQ_BGL = False
11501

    
11502
  def ExpandNames(self):
11503
    # We need the new group's UUID here so that we can create and acquire the
11504
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11505
    # that it should not check whether the UUID exists in the configuration.
11506
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11507
    self.needed_locks = {}
11508
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11509

    
11510
  def CheckPrereq(self):
11511
    """Check prerequisites.
11512

11513
    This checks that the given group name is not an existing node group
11514
    already.
11515

11516
    """
11517
    try:
11518
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11519
    except errors.OpPrereqError:
11520
      pass
11521
    else:
11522
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11523
                                 " node group (UUID: %s)" %
11524
                                 (self.op.group_name, existing_uuid),
11525
                                 errors.ECODE_EXISTS)
11526

    
11527
    if self.op.ndparams:
11528
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11529

    
11530
  def BuildHooksEnv(self):
11531
    """Build hooks env.
11532

11533
    """
11534
    return {
11535
      "GROUP_NAME": self.op.group_name,
11536
      }
11537

    
11538
  def BuildHooksNodes(self):
11539
    """Build hooks nodes.
11540

11541
    """
11542
    mn = self.cfg.GetMasterNode()
11543
    return ([mn], [mn])
11544

    
11545
  def Exec(self, feedback_fn):
11546
    """Add the node group to the cluster.
11547

11548
    """
11549
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11550
                                  uuid=self.group_uuid,
11551
                                  alloc_policy=self.op.alloc_policy,
11552
                                  ndparams=self.op.ndparams)
11553

    
11554
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11555
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11556

    
11557

    
11558
class LUGroupAssignNodes(NoHooksLU):
11559
  """Logical unit for assigning nodes to groups.
11560

11561
  """
11562
  REQ_BGL = False
11563

    
11564
  def ExpandNames(self):
11565
    # These raise errors.OpPrereqError on their own:
11566
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11567
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11568

    
11569
    # We want to lock all the affected nodes and groups. We have readily
11570
    # available the list of nodes, and the *destination* group. To gather the
11571
    # list of "source" groups, we need to fetch node information later on.
11572
    self.needed_locks = {
11573
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11574
      locking.LEVEL_NODE: self.op.nodes,
11575
      }
11576

    
11577
  def DeclareLocks(self, level):
11578
    if level == locking.LEVEL_NODEGROUP:
11579
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11580

    
11581
      # Try to get all affected nodes' groups without having the group or node
11582
      # lock yet. Needs verification later in the code flow.
11583
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11584

    
11585
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11586

    
11587
  def CheckPrereq(self):
11588
    """Check prerequisites.
11589

11590
    """
11591
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11592
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11593
            frozenset(self.op.nodes))
11594

    
11595
    expected_locks = (set([self.group_uuid]) |
11596
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11597
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11598
    if actual_locks != expected_locks:
11599
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11600
                               " current groups are '%s', used to be '%s'" %
11601
                               (utils.CommaJoin(expected_locks),
11602
                                utils.CommaJoin(actual_locks)))
11603

    
11604
    self.node_data = self.cfg.GetAllNodesInfo()
11605
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11606
    instance_data = self.cfg.GetAllInstancesInfo()
11607

    
11608
    if self.group is None:
11609
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11610
                               (self.op.group_name, self.group_uuid))
11611

    
11612
    (new_splits, previous_splits) = \
11613
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11614
                                             for node in self.op.nodes],
11615
                                            self.node_data, instance_data)
11616

    
11617
    if new_splits:
11618
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11619

    
11620
      if not self.op.force:
11621
        raise errors.OpExecError("The following instances get split by this"
11622
                                 " change and --force was not given: %s" %
11623
                                 fmt_new_splits)
11624
      else:
11625
        self.LogWarning("This operation will split the following instances: %s",
11626
                        fmt_new_splits)
11627

    
11628
        if previous_splits:
11629
          self.LogWarning("In addition, these already-split instances continue"
11630
                          " to be split across groups: %s",
11631
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11632

    
11633
  def Exec(self, feedback_fn):
11634
    """Assign nodes to a new group.
11635

11636
    """
11637
    for node in self.op.nodes:
11638
      self.node_data[node].group = self.group_uuid
11639

    
11640
    # FIXME: Depends on side-effects of modifying the result of
11641
    # C{cfg.GetAllNodesInfo}
11642

    
11643
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11644

    
11645
  @staticmethod
11646
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11647
    """Check for split instances after a node assignment.
11648

11649
    This method considers a series of node assignments as an atomic operation,
11650
    and returns information about split instances after applying the set of
11651
    changes.
11652

11653
    In particular, it returns information about newly split instances, and
11654
    instances that were already split, and remain so after the change.
11655

11656
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11657
    considered.
11658

11659
    @type changes: list of (node_name, new_group_uuid) pairs.
11660
    @param changes: list of node assignments to consider.
11661
    @param node_data: a dict with data for all nodes
11662
    @param instance_data: a dict with all instances to consider
11663
    @rtype: a two-tuple
11664
    @return: a list of instances that were previously okay and result split as a
11665
      consequence of this change, and a list of instances that were previously
11666
      split and this change does not fix.
11667

11668
    """
11669
    changed_nodes = dict((node, group) for node, group in changes
11670
                         if node_data[node].group != group)
11671

    
11672
    all_split_instances = set()
11673
    previously_split_instances = set()
11674

    
11675
    def InstanceNodes(instance):
11676
      return [instance.primary_node] + list(instance.secondary_nodes)
11677

    
11678
    for inst in instance_data.values():
11679
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11680
        continue
11681

    
11682
      instance_nodes = InstanceNodes(inst)
11683

    
11684
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11685
        previously_split_instances.add(inst.name)
11686

    
11687
      if len(set(changed_nodes.get(node, node_data[node].group)
11688
                 for node in instance_nodes)) > 1:
11689
        all_split_instances.add(inst.name)
11690

    
11691
    return (list(all_split_instances - previously_split_instances),
11692
            list(previously_split_instances & all_split_instances))
11693

    
11694

    
11695
class _GroupQuery(_QueryBase):
11696
  FIELDS = query.GROUP_FIELDS
11697

    
11698
  def ExpandNames(self, lu):
11699
    lu.needed_locks = {}
11700

    
11701
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11702
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11703

    
11704
    if not self.names:
11705
      self.wanted = [name_to_uuid[name]
11706
                     for name in utils.NiceSort(name_to_uuid.keys())]
11707
    else:
11708
      # Accept names to be either names or UUIDs.
11709
      missing = []
11710
      self.wanted = []
11711
      all_uuid = frozenset(self._all_groups.keys())
11712

    
11713
      for name in self.names:
11714
        if name in all_uuid:
11715
          self.wanted.append(name)
11716
        elif name in name_to_uuid:
11717
          self.wanted.append(name_to_uuid[name])
11718
        else:
11719
          missing.append(name)
11720

    
11721
      if missing:
11722
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11723
                                   utils.CommaJoin(missing),
11724
                                   errors.ECODE_NOENT)
11725

    
11726
  def DeclareLocks(self, lu, level):
11727
    pass
11728

    
11729
  def _GetQueryData(self, lu):
11730
    """Computes the list of node groups and their attributes.
11731

11732
    """
11733
    do_nodes = query.GQ_NODE in self.requested_data
11734
    do_instances = query.GQ_INST in self.requested_data
11735

    
11736
    group_to_nodes = None
11737
    group_to_instances = None
11738

    
11739
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11740
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11741
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11742
    # instance->node. Hence, we will need to process nodes even if we only need
11743
    # instance information.
11744
    if do_nodes or do_instances:
11745
      all_nodes = lu.cfg.GetAllNodesInfo()
11746
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11747
      node_to_group = {}
11748

    
11749
      for node in all_nodes.values():
11750
        if node.group in group_to_nodes:
11751
          group_to_nodes[node.group].append(node.name)
11752
          node_to_group[node.name] = node.group
11753

    
11754
      if do_instances:
11755
        all_instances = lu.cfg.GetAllInstancesInfo()
11756
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11757

    
11758
        for instance in all_instances.values():
11759
          node = instance.primary_node
11760
          if node in node_to_group:
11761
            group_to_instances[node_to_group[node]].append(instance.name)
11762

    
11763
        if not do_nodes:
11764
          # Do not pass on node information if it was not requested.
11765
          group_to_nodes = None
11766

    
11767
    return query.GroupQueryData([self._all_groups[uuid]
11768
                                 for uuid in self.wanted],
11769
                                group_to_nodes, group_to_instances)
11770

    
11771

    
11772
class LUGroupQuery(NoHooksLU):
11773
  """Logical unit for querying node groups.
11774

11775
  """
11776
  REQ_BGL = False
11777

    
11778
  def CheckArguments(self):
11779
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11780
                          self.op.output_fields, False)
11781

    
11782
  def ExpandNames(self):
11783
    self.gq.ExpandNames(self)
11784

    
11785
  def Exec(self, feedback_fn):
11786
    return self.gq.OldStyleQuery(self)
11787

    
11788

    
11789
class LUGroupSetParams(LogicalUnit):
11790
  """Modifies the parameters of a node group.
11791

11792
  """
11793
  HPATH = "group-modify"
11794
  HTYPE = constants.HTYPE_GROUP
11795
  REQ_BGL = False
11796

    
11797
  def CheckArguments(self):
11798
    all_changes = [
11799
      self.op.ndparams,
11800
      self.op.alloc_policy,
11801
      ]
11802

    
11803
    if all_changes.count(None) == len(all_changes):
11804
      raise errors.OpPrereqError("Please pass at least one modification",
11805
                                 errors.ECODE_INVAL)
11806

    
11807
  def ExpandNames(self):
11808
    # This raises errors.OpPrereqError on its own:
11809
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11810

    
11811
    self.needed_locks = {
11812
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11813
      }
11814

    
11815
  def CheckPrereq(self):
11816
    """Check prerequisites.
11817

11818
    """
11819
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11820

    
11821
    if self.group is None:
11822
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11823
                               (self.op.group_name, self.group_uuid))
11824

    
11825
    if self.op.ndparams:
11826
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11827
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11828
      self.new_ndparams = new_ndparams
11829

    
11830
  def BuildHooksEnv(self):
11831
    """Build hooks env.
11832

11833
    """
11834
    return {
11835
      "GROUP_NAME": self.op.group_name,
11836
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11837
      }
11838

    
11839
  def BuildHooksNodes(self):
11840
    """Build hooks nodes.
11841

11842
    """
11843
    mn = self.cfg.GetMasterNode()
11844
    return ([mn], [mn])
11845

    
11846
  def Exec(self, feedback_fn):
11847
    """Modifies the node group.
11848

11849
    """
11850
    result = []
11851

    
11852
    if self.op.ndparams:
11853
      self.group.ndparams = self.new_ndparams
11854
      result.append(("ndparams", str(self.group.ndparams)))
11855

    
11856
    if self.op.alloc_policy:
11857
      self.group.alloc_policy = self.op.alloc_policy
11858

    
11859
    self.cfg.Update(self.group, feedback_fn)
11860
    return result
11861

    
11862

    
11863

    
11864
class LUGroupRemove(LogicalUnit):
11865
  HPATH = "group-remove"
11866
  HTYPE = constants.HTYPE_GROUP
11867
  REQ_BGL = False
11868

    
11869
  def ExpandNames(self):
11870
    # This will raises errors.OpPrereqError on its own:
11871
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11872
    self.needed_locks = {
11873
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11874
      }
11875

    
11876
  def CheckPrereq(self):
11877
    """Check prerequisites.
11878

11879
    This checks that the given group name exists as a node group, that is
11880
    empty (i.e., contains no nodes), and that is not the last group of the
11881
    cluster.
11882

11883
    """
11884
    # Verify that the group is empty.
11885
    group_nodes = [node.name
11886
                   for node in self.cfg.GetAllNodesInfo().values()
11887
                   if node.group == self.group_uuid]
11888

    
11889
    if group_nodes:
11890
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11891
                                 " nodes: %s" %
11892
                                 (self.op.group_name,
11893
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11894
                                 errors.ECODE_STATE)
11895

    
11896
    # Verify the cluster would not be left group-less.
11897
    if len(self.cfg.GetNodeGroupList()) == 1:
11898
      raise errors.OpPrereqError("Group '%s' is the only group,"
11899
                                 " cannot be removed" %
11900
                                 self.op.group_name,
11901
                                 errors.ECODE_STATE)
11902

    
11903
  def BuildHooksEnv(self):
11904
    """Build hooks env.
11905

11906
    """
11907
    return {
11908
      "GROUP_NAME": self.op.group_name,
11909
      }
11910

    
11911
  def BuildHooksNodes(self):
11912
    """Build hooks nodes.
11913

11914
    """
11915
    mn = self.cfg.GetMasterNode()
11916
    return ([mn], [mn])
11917

    
11918
  def Exec(self, feedback_fn):
11919
    """Remove the node group.
11920

11921
    """
11922
    try:
11923
      self.cfg.RemoveNodeGroup(self.group_uuid)
11924
    except errors.ConfigurationError:
11925
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11926
                               (self.op.group_name, self.group_uuid))
11927

    
11928
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11929

    
11930

    
11931
class LUGroupRename(LogicalUnit):
11932
  HPATH = "group-rename"
11933
  HTYPE = constants.HTYPE_GROUP
11934
  REQ_BGL = False
11935

    
11936
  def ExpandNames(self):
11937
    # This raises errors.OpPrereqError on its own:
11938
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11939

    
11940
    self.needed_locks = {
11941
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11942
      }
11943

    
11944
  def CheckPrereq(self):
11945
    """Check prerequisites.
11946

11947
    Ensures requested new name is not yet used.
11948

11949
    """
11950
    try:
11951
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11952
    except errors.OpPrereqError:
11953
      pass
11954
    else:
11955
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11956
                                 " node group (UUID: %s)" %
11957
                                 (self.op.new_name, new_name_uuid),
11958
                                 errors.ECODE_EXISTS)
11959

    
11960
  def BuildHooksEnv(self):
11961
    """Build hooks env.
11962

11963
    """
11964
    return {
11965
      "OLD_NAME": self.op.group_name,
11966
      "NEW_NAME": self.op.new_name,
11967
      }
11968

    
11969
  def BuildHooksNodes(self):
11970
    """Build hooks nodes.
11971

11972
    """
11973
    mn = self.cfg.GetMasterNode()
11974

    
11975
    all_nodes = self.cfg.GetAllNodesInfo()
11976
    all_nodes.pop(mn, None)
11977

    
11978
    run_nodes = [mn]
11979
    run_nodes.extend(node.name for node in all_nodes.values()
11980
                     if node.group == self.group_uuid)
11981

    
11982
    return (run_nodes, run_nodes)
11983

    
11984
  def Exec(self, feedback_fn):
11985
    """Rename the node group.
11986

11987
    """
11988
    group = self.cfg.GetNodeGroup(self.group_uuid)
11989

    
11990
    if group is None:
11991
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11992
                               (self.op.group_name, self.group_uuid))
11993

    
11994
    group.name = self.op.new_name
11995
    self.cfg.Update(group, feedback_fn)
11996

    
11997
    return self.op.new_name
11998

    
11999

    
12000
class LUGroupEvacuate(LogicalUnit):
12001
  HPATH = "group-evacuate"
12002
  HTYPE = constants.HTYPE_GROUP
12003
  REQ_BGL = False
12004

    
12005
  def ExpandNames(self):
12006
    # This raises errors.OpPrereqError on its own:
12007
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12008

    
12009
    if self.op.target_groups:
12010
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12011
                                  self.op.target_groups)
12012
    else:
12013
      self.req_target_uuids = []
12014

    
12015
    if self.group_uuid in self.req_target_uuids:
12016
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12017
                                 " as a target group (targets are %s)" %
12018
                                 (self.group_uuid,
12019
                                  utils.CommaJoin(self.req_target_uuids)),
12020
                                 errors.ECODE_INVAL)
12021

    
12022
    if not self.op.iallocator:
12023
      # Use default iallocator
12024
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
12025

    
12026
    if not self.op.iallocator:
12027
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
12028
                                 " opcode nor as a cluster-wide default",
12029
                                 errors.ECODE_INVAL)
12030

    
12031
    self.share_locks = _ShareAll()
12032
    self.needed_locks = {
12033
      locking.LEVEL_INSTANCE: [],
12034
      locking.LEVEL_NODEGROUP: [],
12035
      locking.LEVEL_NODE: [],
12036
      }
12037

    
12038
  def DeclareLocks(self, level):
12039
    if level == locking.LEVEL_INSTANCE:
12040
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12041

    
12042
      # Lock instances optimistically, needs verification once node and group
12043
      # locks have been acquired
12044
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12045
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12046

    
12047
    elif level == locking.LEVEL_NODEGROUP:
12048
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12049

    
12050
      if self.req_target_uuids:
12051
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12052

    
12053
        # Lock all groups used by instances optimistically; this requires going
12054
        # via the node before it's locked, requiring verification later on
12055
        lock_groups.update(group_uuid
12056
                           for instance_name in
12057
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12058
                           for group_uuid in
12059
                             self.cfg.GetInstanceNodeGroups(instance_name))
12060
      else:
12061
        # No target groups, need to lock all of them
12062
        lock_groups = locking.ALL_SET
12063

    
12064
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12065

    
12066
    elif level == locking.LEVEL_NODE:
12067
      # This will only lock the nodes in the group to be evacuated which
12068
      # contain actual instances
12069
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12070
      self._LockInstancesNodes()
12071

    
12072
      # Lock all nodes in group to be evacuated
12073
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12074
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12075
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12076

    
12077
  def CheckPrereq(self):
12078
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12079
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12080
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12081

    
12082
    assert owned_groups.issuperset(self.req_target_uuids)
12083
    assert self.group_uuid in owned_groups
12084

    
12085
    # Check if locked instances are still correct
12086
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12087
    if owned_instances != wanted_instances:
12088
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12089
                                 " changed since locks were acquired, wanted"
12090
                                 " %s, have %s; retry the operation" %
12091
                                 (self.group_uuid,
12092
                                  utils.CommaJoin(wanted_instances),
12093
                                  utils.CommaJoin(owned_instances)),
12094
                                 errors.ECODE_STATE)
12095

    
12096
    # Get instance information
12097
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
12098
                          for name in owned_instances)
12099

    
12100
    # Check if node groups for locked instances are still correct
12101
    for instance_name in owned_instances:
12102
      inst = self.instances[instance_name]
12103
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12104
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12105
      assert owned_nodes.issuperset(inst.all_nodes), \
12106
        "Instance %s's nodes changed while we kept the lock" % instance_name
12107

    
12108
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12109
      if not owned_groups.issuperset(inst_groups):
12110
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12111
                                   " locks were acquired, current groups"
12112
                                   " are '%s', owning groups '%s'; retry the"
12113
                                   " operation" %
12114
                                   (instance_name,
12115
                                    utils.CommaJoin(inst_groups),
12116
                                    utils.CommaJoin(owned_groups)),
12117
                                   errors.ECODE_STATE)
12118

    
12119
    if self.req_target_uuids:
12120
      # User requested specific target groups
12121
      self.target_uuids = self.req_target_uuids
12122
    else:
12123
      # All groups except the one to be evacuated are potential targets
12124
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12125
                           if group_uuid != self.group_uuid]
12126

    
12127
      if not self.target_uuids:
12128
        raise errors.OpExecError("There are no possible target groups")
12129

    
12130
  def BuildHooksEnv(self):
12131
    """Build hooks env.
12132

12133
    """
12134
    return {
12135
      "GROUP_NAME": self.op.group_name,
12136
      "TARGET_GROUPS": " ".join(self.target_uuids),
12137
      }
12138

    
12139
  def BuildHooksNodes(self):
12140
    """Build hooks nodes.
12141

12142
    """
12143
    mn = self.cfg.GetMasterNode()
12144

    
12145
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12146

    
12147
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12148

    
12149
    return (run_nodes, run_nodes)
12150

    
12151
  def Exec(self, feedback_fn):
12152
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12153

    
12154
    assert self.group_uuid not in self.target_uuids
12155

    
12156
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12157
                     instances=instances, target_groups=self.target_uuids)
12158

    
12159
    ial.Run(self.op.iallocator)
12160

    
12161
    if not ial.success:
12162
      raise errors.OpPrereqError("Can't compute group evacuation using"
12163
                                 " iallocator '%s': %s" %
12164
                                 (self.op.iallocator, ial.info),
12165
                                 errors.ECODE_NORES)
12166

    
12167
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12168

    
12169
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12170
                 len(jobs), self.op.group_name)
12171

    
12172
    return ResultWithJobs(jobs)
12173

    
12174

    
12175
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12176
  """Generic tags LU.
12177

12178
  This is an abstract class which is the parent of all the other tags LUs.
12179

12180
  """
12181
  def ExpandNames(self):
12182
    self.group_uuid = None
12183
    self.needed_locks = {}
12184
    if self.op.kind == constants.TAG_NODE:
12185
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12186
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12187
    elif self.op.kind == constants.TAG_INSTANCE:
12188
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12189
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12190
    elif self.op.kind == constants.TAG_NODEGROUP:
12191
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12192

    
12193
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12194
    # not possible to acquire the BGL based on opcode parameters)
12195

    
12196
  def CheckPrereq(self):
12197
    """Check prerequisites.
12198

12199
    """
12200
    if self.op.kind == constants.TAG_CLUSTER:
12201
      self.target = self.cfg.GetClusterInfo()
12202
    elif self.op.kind == constants.TAG_NODE:
12203
      self.target = self.cfg.GetNodeInfo(self.op.name)
12204
    elif self.op.kind == constants.TAG_INSTANCE:
12205
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12206
    elif self.op.kind == constants.TAG_NODEGROUP:
12207
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12208
    else:
12209
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12210
                                 str(self.op.kind), errors.ECODE_INVAL)
12211

    
12212

    
12213
class LUTagsGet(TagsLU):
12214
  """Returns the tags of a given object.
12215

12216
  """
12217
  REQ_BGL = False
12218

    
12219
  def ExpandNames(self):
12220
    TagsLU.ExpandNames(self)
12221

    
12222
    # Share locks as this is only a read operation
12223
    self.share_locks = _ShareAll()
12224

    
12225
  def Exec(self, feedback_fn):
12226
    """Returns the tag list.
12227

12228
    """
12229
    return list(self.target.GetTags())
12230

    
12231

    
12232
class LUTagsSearch(NoHooksLU):
12233
  """Searches the tags for a given pattern.
12234

12235
  """
12236
  REQ_BGL = False
12237

    
12238
  def ExpandNames(self):
12239
    self.needed_locks = {}
12240

    
12241
  def CheckPrereq(self):
12242
    """Check prerequisites.
12243

12244
    This checks the pattern passed for validity by compiling it.
12245

12246
    """
12247
    try:
12248
      self.re = re.compile(self.op.pattern)
12249
    except re.error, err:
12250
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12251
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12252

    
12253
  def Exec(self, feedback_fn):
12254
    """Returns the tag list.
12255

12256
    """
12257
    cfg = self.cfg
12258
    tgts = [("/cluster", cfg.GetClusterInfo())]
12259
    ilist = cfg.GetAllInstancesInfo().values()
12260
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12261
    nlist = cfg.GetAllNodesInfo().values()
12262
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12263
    tgts.extend(("/nodegroup/%s" % n.name, n)
12264
                for n in cfg.GetAllNodeGroupsInfo().values())
12265
    results = []
12266
    for path, target in tgts:
12267
      for tag in target.GetTags():
12268
        if self.re.search(tag):
12269
          results.append((path, tag))
12270
    return results
12271

    
12272

    
12273
class LUTagsSet(TagsLU):
12274
  """Sets a tag on a given object.
12275

12276
  """
12277
  REQ_BGL = False
12278

    
12279
  def CheckPrereq(self):
12280
    """Check prerequisites.
12281

12282
    This checks the type and length of the tag name and value.
12283

12284
    """
12285
    TagsLU.CheckPrereq(self)
12286
    for tag in self.op.tags:
12287
      objects.TaggableObject.ValidateTag(tag)
12288

    
12289
  def Exec(self, feedback_fn):
12290
    """Sets the tag.
12291

12292
    """
12293
    try:
12294
      for tag in self.op.tags:
12295
        self.target.AddTag(tag)
12296
    except errors.TagError, err:
12297
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12298
    self.cfg.Update(self.target, feedback_fn)
12299

    
12300

    
12301
class LUTagsDel(TagsLU):
12302
  """Delete a list of tags from a given object.
12303

12304
  """
12305
  REQ_BGL = False
12306

    
12307
  def CheckPrereq(self):
12308
    """Check prerequisites.
12309

12310
    This checks that we have the given tag.
12311

12312
    """
12313
    TagsLU.CheckPrereq(self)
12314
    for tag in self.op.tags:
12315
      objects.TaggableObject.ValidateTag(tag)
12316
    del_tags = frozenset(self.op.tags)
12317
    cur_tags = self.target.GetTags()
12318

    
12319
    diff_tags = del_tags - cur_tags
12320
    if diff_tags:
12321
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12322
      raise errors.OpPrereqError("Tag(s) %s not found" %
12323
                                 (utils.CommaJoin(diff_names), ),
12324
                                 errors.ECODE_NOENT)
12325

    
12326
  def Exec(self, feedback_fn):
12327
    """Remove the tag from the object.
12328

12329
    """
12330
    for tag in self.op.tags:
12331
      self.target.RemoveTag(tag)
12332
    self.cfg.Update(self.target, feedback_fn)
12333

    
12334

    
12335
class LUTestDelay(NoHooksLU):
12336
  """Sleep for a specified amount of time.
12337

12338
  This LU sleeps on the master and/or nodes for a specified amount of
12339
  time.
12340

12341
  """
12342
  REQ_BGL = False
12343

    
12344
  def ExpandNames(self):
12345
    """Expand names and set required locks.
12346

12347
    This expands the node list, if any.
12348

12349
    """
12350
    self.needed_locks = {}
12351
    if self.op.on_nodes:
12352
      # _GetWantedNodes can be used here, but is not always appropriate to use
12353
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12354
      # more information.
12355
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12356
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12357

    
12358
  def _TestDelay(self):
12359
    """Do the actual sleep.
12360

12361
    """
12362
    if self.op.on_master:
12363
      if not utils.TestDelay(self.op.duration):
12364
        raise errors.OpExecError("Error during master delay test")
12365
    if self.op.on_nodes:
12366
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12367
      for node, node_result in result.items():
12368
        node_result.Raise("Failure during rpc call to node %s" % node)
12369

    
12370
  def Exec(self, feedback_fn):
12371
    """Execute the test delay opcode, with the wanted repetitions.
12372

12373
    """
12374
    if self.op.repeat == 0:
12375
      self._TestDelay()
12376
    else:
12377
      top_value = self.op.repeat - 1
12378
      for i in range(self.op.repeat):
12379
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12380
        self._TestDelay()
12381

    
12382

    
12383
class LUTestJqueue(NoHooksLU):
12384
  """Utility LU to test some aspects of the job queue.
12385

12386
  """
12387
  REQ_BGL = False
12388

    
12389
  # Must be lower than default timeout for WaitForJobChange to see whether it
12390
  # notices changed jobs
12391
  _CLIENT_CONNECT_TIMEOUT = 20.0
12392
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12393

    
12394
  @classmethod
12395
  def _NotifyUsingSocket(cls, cb, errcls):
12396
    """Opens a Unix socket and waits for another program to connect.
12397

12398
    @type cb: callable
12399
    @param cb: Callback to send socket name to client
12400
    @type errcls: class
12401
    @param errcls: Exception class to use for errors
12402

12403
    """
12404
    # Using a temporary directory as there's no easy way to create temporary
12405
    # sockets without writing a custom loop around tempfile.mktemp and
12406
    # socket.bind
12407
    tmpdir = tempfile.mkdtemp()
12408
    try:
12409
      tmpsock = utils.PathJoin(tmpdir, "sock")
12410

    
12411
      logging.debug("Creating temporary socket at %s", tmpsock)
12412
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12413
      try:
12414
        sock.bind(tmpsock)
12415
        sock.listen(1)
12416

    
12417
        # Send details to client
12418
        cb(tmpsock)
12419

    
12420
        # Wait for client to connect before continuing
12421
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12422
        try:
12423
          (conn, _) = sock.accept()
12424
        except socket.error, err:
12425
          raise errcls("Client didn't connect in time (%s)" % err)
12426
      finally:
12427
        sock.close()
12428
    finally:
12429
      # Remove as soon as client is connected
12430
      shutil.rmtree(tmpdir)
12431

    
12432
    # Wait for client to close
12433
    try:
12434
      try:
12435
        # pylint: disable-msg=E1101
12436
        # Instance of '_socketobject' has no ... member
12437
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12438
        conn.recv(1)
12439
      except socket.error, err:
12440
        raise errcls("Client failed to confirm notification (%s)" % err)
12441
    finally:
12442
      conn.close()
12443

    
12444
  def _SendNotification(self, test, arg, sockname):
12445
    """Sends a notification to the client.
12446

12447
    @type test: string
12448
    @param test: Test name
12449
    @param arg: Test argument (depends on test)
12450
    @type sockname: string
12451
    @param sockname: Socket path
12452

12453
    """
12454
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12455

    
12456
  def _Notify(self, prereq, test, arg):
12457
    """Notifies the client of a test.
12458

12459
    @type prereq: bool
12460
    @param prereq: Whether this is a prereq-phase test
12461
    @type test: string
12462
    @param test: Test name
12463
    @param arg: Test argument (depends on test)
12464

12465
    """
12466
    if prereq:
12467
      errcls = errors.OpPrereqError
12468
    else:
12469
      errcls = errors.OpExecError
12470

    
12471
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12472
                                                  test, arg),
12473
                                   errcls)
12474

    
12475
  def CheckArguments(self):
12476
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12477
    self.expandnames_calls = 0
12478

    
12479
  def ExpandNames(self):
12480
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12481
    if checkargs_calls < 1:
12482
      raise errors.ProgrammerError("CheckArguments was not called")
12483

    
12484
    self.expandnames_calls += 1
12485

    
12486
    if self.op.notify_waitlock:
12487
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12488

    
12489
    self.LogInfo("Expanding names")
12490

    
12491
    # Get lock on master node (just to get a lock, not for a particular reason)
12492
    self.needed_locks = {
12493
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12494
      }
12495

    
12496
  def Exec(self, feedback_fn):
12497
    if self.expandnames_calls < 1:
12498
      raise errors.ProgrammerError("ExpandNames was not called")
12499

    
12500
    if self.op.notify_exec:
12501
      self._Notify(False, constants.JQT_EXEC, None)
12502

    
12503
    self.LogInfo("Executing")
12504

    
12505
    if self.op.log_messages:
12506
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12507
      for idx, msg in enumerate(self.op.log_messages):
12508
        self.LogInfo("Sending log message %s", idx + 1)
12509
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12510
        # Report how many test messages have been sent
12511
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12512

    
12513
    if self.op.fail:
12514
      raise errors.OpExecError("Opcode failure was requested")
12515

    
12516
    return True
12517

    
12518

    
12519
class IAllocator(object):
12520
  """IAllocator framework.
12521

12522
  An IAllocator instance has three sets of attributes:
12523
    - cfg that is needed to query the cluster
12524
    - input data (all members of the _KEYS class attribute are required)
12525
    - four buffer attributes (in|out_data|text), that represent the
12526
      input (to the external script) in text and data structure format,
12527
      and the output from it, again in two formats
12528
    - the result variables from the script (success, info, nodes) for
12529
      easy usage
12530

12531
  """
12532
  # pylint: disable-msg=R0902
12533
  # lots of instance attributes
12534

    
12535
  def __init__(self, cfg, rpc, mode, **kwargs):
12536
    self.cfg = cfg
12537
    self.rpc = rpc
12538
    # init buffer variables
12539
    self.in_text = self.out_text = self.in_data = self.out_data = None
12540
    # init all input fields so that pylint is happy
12541
    self.mode = mode
12542
    self.memory = self.disks = self.disk_template = None
12543
    self.os = self.tags = self.nics = self.vcpus = None
12544
    self.hypervisor = None
12545
    self.relocate_from = None
12546
    self.name = None
12547
    self.evac_nodes = None
12548
    self.instances = None
12549
    self.evac_mode = None
12550
    self.target_groups = []
12551
    # computed fields
12552
    self.required_nodes = None
12553
    # init result fields
12554
    self.success = self.info = self.result = None
12555

    
12556
    try:
12557
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12558
    except KeyError:
12559
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12560
                                   " IAllocator" % self.mode)
12561

    
12562
    keyset = [n for (n, _) in keydata]
12563

    
12564
    for key in kwargs:
12565
      if key not in keyset:
12566
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12567
                                     " IAllocator" % key)
12568
      setattr(self, key, kwargs[key])
12569

    
12570
    for key in keyset:
12571
      if key not in kwargs:
12572
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12573
                                     " IAllocator" % key)
12574
    self._BuildInputData(compat.partial(fn, self), keydata)
12575

    
12576
  def _ComputeClusterData(self):
12577
    """Compute the generic allocator input data.
12578

12579
    This is the data that is independent of the actual operation.
12580

12581
    """
12582
    cfg = self.cfg
12583
    cluster_info = cfg.GetClusterInfo()
12584
    # cluster data
12585
    data = {
12586
      "version": constants.IALLOCATOR_VERSION,
12587
      "cluster_name": cfg.GetClusterName(),
12588
      "cluster_tags": list(cluster_info.GetTags()),
12589
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12590
      # we don't have job IDs
12591
      }
12592
    ninfo = cfg.GetAllNodesInfo()
12593
    iinfo = cfg.GetAllInstancesInfo().values()
12594
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12595

    
12596
    # node data
12597
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12598

    
12599
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12600
      hypervisor_name = self.hypervisor
12601
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12602
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12603
    else:
12604
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12605

    
12606
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12607
                                        hypervisor_name)
12608
    node_iinfo = \
12609
      self.rpc.call_all_instances_info(node_list,
12610
                                       cluster_info.enabled_hypervisors)
12611

    
12612
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12613

    
12614
    config_ndata = self._ComputeBasicNodeData(ninfo)
12615
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12616
                                                 i_list, config_ndata)
12617
    assert len(data["nodes"]) == len(ninfo), \
12618
        "Incomplete node data computed"
12619

    
12620
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12621

    
12622
    self.in_data = data
12623

    
12624
  @staticmethod
12625
  def _ComputeNodeGroupData(cfg):
12626
    """Compute node groups data.
12627

12628
    """
12629
    ng = dict((guuid, {
12630
      "name": gdata.name,
12631
      "alloc_policy": gdata.alloc_policy,
12632
      })
12633
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12634

    
12635
    return ng
12636

    
12637
  @staticmethod
12638
  def _ComputeBasicNodeData(node_cfg):
12639
    """Compute global node data.
12640

12641
    @rtype: dict
12642
    @returns: a dict of name: (node dict, node config)
12643

12644
    """
12645
    # fill in static (config-based) values
12646
    node_results = dict((ninfo.name, {
12647
      "tags": list(ninfo.GetTags()),
12648
      "primary_ip": ninfo.primary_ip,
12649
      "secondary_ip": ninfo.secondary_ip,
12650
      "offline": ninfo.offline,
12651
      "drained": ninfo.drained,
12652
      "master_candidate": ninfo.master_candidate,
12653
      "group": ninfo.group,
12654
      "master_capable": ninfo.master_capable,
12655
      "vm_capable": ninfo.vm_capable,
12656
      })
12657
      for ninfo in node_cfg.values())
12658

    
12659
    return node_results
12660

    
12661
  @staticmethod
12662
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12663
                              node_results):
12664
    """Compute global node data.
12665

12666
    @param node_results: the basic node structures as filled from the config
12667

12668
    """
12669
    # make a copy of the current dict
12670
    node_results = dict(node_results)
12671
    for nname, nresult in node_data.items():
12672
      assert nname in node_results, "Missing basic data for node %s" % nname
12673
      ninfo = node_cfg[nname]
12674

    
12675
      if not (ninfo.offline or ninfo.drained):
12676
        nresult.Raise("Can't get data for node %s" % nname)
12677
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12678
                                nname)
12679
        remote_info = nresult.payload
12680

    
12681
        for attr in ["memory_total", "memory_free", "memory_dom0",
12682
                     "vg_size", "vg_free", "cpu_total"]:
12683
          if attr not in remote_info:
12684
            raise errors.OpExecError("Node '%s' didn't return attribute"
12685
                                     " '%s'" % (nname, attr))
12686
          if not isinstance(remote_info[attr], int):
12687
            raise errors.OpExecError("Node '%s' returned invalid value"
12688
                                     " for '%s': %s" %
12689
                                     (nname, attr, remote_info[attr]))
12690
        # compute memory used by primary instances
12691
        i_p_mem = i_p_up_mem = 0
12692
        for iinfo, beinfo in i_list:
12693
          if iinfo.primary_node == nname:
12694
            i_p_mem += beinfo[constants.BE_MEMORY]
12695
            if iinfo.name not in node_iinfo[nname].payload:
12696
              i_used_mem = 0
12697
            else:
12698
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12699
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12700
            remote_info["memory_free"] -= max(0, i_mem_diff)
12701

    
12702
            if iinfo.admin_up:
12703
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12704

    
12705
        # compute memory used by instances
12706
        pnr_dyn = {
12707
          "total_memory": remote_info["memory_total"],
12708
          "reserved_memory": remote_info["memory_dom0"],
12709
          "free_memory": remote_info["memory_free"],
12710
          "total_disk": remote_info["vg_size"],
12711
          "free_disk": remote_info["vg_free"],
12712
          "total_cpus": remote_info["cpu_total"],
12713
          "i_pri_memory": i_p_mem,
12714
          "i_pri_up_memory": i_p_up_mem,
12715
          }
12716
        pnr_dyn.update(node_results[nname])
12717
        node_results[nname] = pnr_dyn
12718

    
12719
    return node_results
12720

    
12721
  @staticmethod
12722
  def _ComputeInstanceData(cluster_info, i_list):
12723
    """Compute global instance data.
12724

12725
    """
12726
    instance_data = {}
12727
    for iinfo, beinfo in i_list:
12728
      nic_data = []
12729
      for nic in iinfo.nics:
12730
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12731
        nic_dict = {
12732
          "mac": nic.mac,
12733
          "ip": nic.ip,
12734
          "mode": filled_params[constants.NIC_MODE],
12735
          "link": filled_params[constants.NIC_LINK],
12736
          }
12737
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12738
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12739
        nic_data.append(nic_dict)
12740
      pir = {
12741
        "tags": list(iinfo.GetTags()),
12742
        "admin_up": iinfo.admin_up,
12743
        "vcpus": beinfo[constants.BE_VCPUS],
12744
        "memory": beinfo[constants.BE_MEMORY],
12745
        "os": iinfo.os,
12746
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12747
        "nics": nic_data,
12748
        "disks": [{constants.IDISK_SIZE: dsk.size,
12749
                   constants.IDISK_MODE: dsk.mode}
12750
                  for dsk in iinfo.disks],
12751
        "disk_template": iinfo.disk_template,
12752
        "hypervisor": iinfo.hypervisor,
12753
        }
12754
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12755
                                                 pir["disks"])
12756
      instance_data[iinfo.name] = pir
12757

    
12758
    return instance_data
12759

    
12760
  def _AddNewInstance(self):
12761
    """Add new instance data to allocator structure.
12762

12763
    This in combination with _AllocatorGetClusterData will create the
12764
    correct structure needed as input for the allocator.
12765

12766
    The checks for the completeness of the opcode must have already been
12767
    done.
12768

12769
    """
12770
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12771

    
12772
    if self.disk_template in constants.DTS_INT_MIRROR:
12773
      self.required_nodes = 2
12774
    else:
12775
      self.required_nodes = 1
12776

    
12777
    request = {
12778
      "name": self.name,
12779
      "disk_template": self.disk_template,
12780
      "tags": self.tags,
12781
      "os": self.os,
12782
      "vcpus": self.vcpus,
12783
      "memory": self.memory,
12784
      "disks": self.disks,
12785
      "disk_space_total": disk_space,
12786
      "nics": self.nics,
12787
      "required_nodes": self.required_nodes,
12788
      "hypervisor": self.hypervisor,
12789
      }
12790

    
12791
    return request
12792

    
12793
  def _AddRelocateInstance(self):
12794
    """Add relocate instance data to allocator structure.
12795

12796
    This in combination with _IAllocatorGetClusterData will create the
12797
    correct structure needed as input for the allocator.
12798

12799
    The checks for the completeness of the opcode must have already been
12800
    done.
12801

12802
    """
12803
    instance = self.cfg.GetInstanceInfo(self.name)
12804
    if instance is None:
12805
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12806
                                   " IAllocator" % self.name)
12807

    
12808
    if instance.disk_template not in constants.DTS_MIRRORED:
12809
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12810
                                 errors.ECODE_INVAL)
12811

    
12812
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12813
        len(instance.secondary_nodes) != 1:
12814
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12815
                                 errors.ECODE_STATE)
12816

    
12817
    self.required_nodes = 1
12818
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12819
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12820

    
12821
    request = {
12822
      "name": self.name,
12823
      "disk_space_total": disk_space,
12824
      "required_nodes": self.required_nodes,
12825
      "relocate_from": self.relocate_from,
12826
      }
12827
    return request
12828

    
12829
  def _AddEvacuateNodes(self):
12830
    """Add evacuate nodes data to allocator structure.
12831

12832
    """
12833
    request = {
12834
      "evac_nodes": self.evac_nodes
12835
      }
12836
    return request
12837

    
12838
  def _AddNodeEvacuate(self):
12839
    """Get data for node-evacuate requests.
12840

12841
    """
12842
    return {
12843
      "instances": self.instances,
12844
      "evac_mode": self.evac_mode,
12845
      }
12846

    
12847
  def _AddChangeGroup(self):
12848
    """Get data for node-evacuate requests.
12849

12850
    """
12851
    return {
12852
      "instances": self.instances,
12853
      "target_groups": self.target_groups,
12854
      }
12855

    
12856
  def _BuildInputData(self, fn, keydata):
12857
    """Build input data structures.
12858

12859
    """
12860
    self._ComputeClusterData()
12861

    
12862
    request = fn()
12863
    request["type"] = self.mode
12864
    for keyname, keytype in keydata:
12865
      if keyname not in request:
12866
        raise errors.ProgrammerError("Request parameter %s is missing" %
12867
                                     keyname)
12868
      val = request[keyname]
12869
      if not keytype(val):
12870
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12871
                                     " validation, value %s, expected"
12872
                                     " type %s" % (keyname, val, keytype))
12873
    self.in_data["request"] = request
12874

    
12875
    self.in_text = serializer.Dump(self.in_data)
12876

    
12877
  _STRING_LIST = ht.TListOf(ht.TString)
12878
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12879
     # pylint: disable-msg=E1101
12880
     # Class '...' has no 'OP_ID' member
12881
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12882
                          opcodes.OpInstanceMigrate.OP_ID,
12883
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12884
     })))
12885

    
12886
  _NEVAC_MOVED = \
12887
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12888
                       ht.TItems([ht.TNonEmptyString,
12889
                                  ht.TNonEmptyString,
12890
                                  ht.TListOf(ht.TNonEmptyString),
12891
                                 ])))
12892
  _NEVAC_FAILED = \
12893
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12894
                       ht.TItems([ht.TNonEmptyString,
12895
                                  ht.TMaybeString,
12896
                                 ])))
12897
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12898
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12899

    
12900
  _MODE_DATA = {
12901
    constants.IALLOCATOR_MODE_ALLOC:
12902
      (_AddNewInstance,
12903
       [
12904
        ("name", ht.TString),
12905
        ("memory", ht.TInt),
12906
        ("disks", ht.TListOf(ht.TDict)),
12907
        ("disk_template", ht.TString),
12908
        ("os", ht.TString),
12909
        ("tags", _STRING_LIST),
12910
        ("nics", ht.TListOf(ht.TDict)),
12911
        ("vcpus", ht.TInt),
12912
        ("hypervisor", ht.TString),
12913
        ], ht.TList),
12914
    constants.IALLOCATOR_MODE_RELOC:
12915
      (_AddRelocateInstance,
12916
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12917
       ht.TList),
12918
    constants.IALLOCATOR_MODE_MEVAC:
12919
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12920
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12921
     constants.IALLOCATOR_MODE_NODE_EVAC:
12922
      (_AddNodeEvacuate, [
12923
        ("instances", _STRING_LIST),
12924
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12925
        ], _NEVAC_RESULT),
12926
     constants.IALLOCATOR_MODE_CHG_GROUP:
12927
      (_AddChangeGroup, [
12928
        ("instances", _STRING_LIST),
12929
        ("target_groups", _STRING_LIST),
12930
        ], _NEVAC_RESULT),
12931
    }
12932

    
12933
  def Run(self, name, validate=True, call_fn=None):
12934
    """Run an instance allocator and return the results.
12935

12936
    """
12937
    if call_fn is None:
12938
      call_fn = self.rpc.call_iallocator_runner
12939

    
12940
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12941
    result.Raise("Failure while running the iallocator script")
12942

    
12943
    self.out_text = result.payload
12944
    if validate:
12945
      self._ValidateResult()
12946

    
12947
  def _ValidateResult(self):
12948
    """Process the allocator results.
12949

12950
    This will process and if successful save the result in
12951
    self.out_data and the other parameters.
12952

12953
    """
12954
    try:
12955
      rdict = serializer.Load(self.out_text)
12956
    except Exception, err:
12957
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12958

    
12959
    if not isinstance(rdict, dict):
12960
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12961

    
12962
    # TODO: remove backwards compatiblity in later versions
12963
    if "nodes" in rdict and "result" not in rdict:
12964
      rdict["result"] = rdict["nodes"]
12965
      del rdict["nodes"]
12966

    
12967
    for key in "success", "info", "result":
12968
      if key not in rdict:
12969
        raise errors.OpExecError("Can't parse iallocator results:"
12970
                                 " missing key '%s'" % key)
12971
      setattr(self, key, rdict[key])
12972

    
12973
    if not self._result_check(self.result):
12974
      raise errors.OpExecError("Iallocator returned invalid result,"
12975
                               " expected %s, got %s" %
12976
                               (self._result_check, self.result),
12977
                               errors.ECODE_INVAL)
12978

    
12979
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12980
                     constants.IALLOCATOR_MODE_MEVAC):
12981
      node2group = dict((name, ndata["group"])
12982
                        for (name, ndata) in self.in_data["nodes"].items())
12983

    
12984
      fn = compat.partial(self._NodesToGroups, node2group,
12985
                          self.in_data["nodegroups"])
12986

    
12987
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12988
        assert self.relocate_from is not None
12989
        assert self.required_nodes == 1
12990

    
12991
        request_groups = fn(self.relocate_from)
12992
        result_groups = fn(rdict["result"])
12993

    
12994
        if result_groups != request_groups:
12995
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12996
                                   " differ from original groups (%s)" %
12997
                                   (utils.CommaJoin(result_groups),
12998
                                    utils.CommaJoin(request_groups)))
12999
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13000
        request_groups = fn(self.evac_nodes)
13001
        for (instance_name, secnode) in self.result:
13002
          result_groups = fn([secnode])
13003
          if result_groups != request_groups:
13004
            raise errors.OpExecError("Iallocator returned new secondary node"
13005
                                     " '%s' (group '%s') for instance '%s'"
13006
                                     " which is not in original group '%s'" %
13007
                                     (secnode, utils.CommaJoin(result_groups),
13008
                                      instance_name,
13009
                                      utils.CommaJoin(request_groups)))
13010
      else:
13011
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13012

    
13013
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13014
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13015

    
13016
    self.out_data = rdict
13017

    
13018
  @staticmethod
13019
  def _NodesToGroups(node2group, groups, nodes):
13020
    """Returns a list of unique group names for a list of nodes.
13021

13022
    @type node2group: dict
13023
    @param node2group: Map from node name to group UUID
13024
    @type groups: dict
13025
    @param groups: Group information
13026
    @type nodes: list
13027
    @param nodes: Node names
13028

13029
    """
13030
    result = set()
13031

    
13032
    for node in nodes:
13033
      try:
13034
        group_uuid = node2group[node]
13035
      except KeyError:
13036
        # Ignore unknown node
13037
        pass
13038
      else:
13039
        try:
13040
          group = groups[group_uuid]
13041
        except KeyError:
13042
          # Can't find group, let's use UUID
13043
          group_name = group_uuid
13044
        else:
13045
          group_name = group["name"]
13046

    
13047
        result.add(group_name)
13048

    
13049
    return sorted(result)
13050

    
13051

    
13052
class LUTestAllocator(NoHooksLU):
13053
  """Run allocator tests.
13054

13055
  This LU runs the allocator tests
13056

13057
  """
13058
  def CheckPrereq(self):
13059
    """Check prerequisites.
13060

13061
    This checks the opcode parameters depending on the director and mode test.
13062

13063
    """
13064
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13065
      for attr in ["memory", "disks", "disk_template",
13066
                   "os", "tags", "nics", "vcpus"]:
13067
        if not hasattr(self.op, attr):
13068
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13069
                                     attr, errors.ECODE_INVAL)
13070
      iname = self.cfg.ExpandInstanceName(self.op.name)
13071
      if iname is not None:
13072
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13073
                                   iname, errors.ECODE_EXISTS)
13074
      if not isinstance(self.op.nics, list):
13075
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13076
                                   errors.ECODE_INVAL)
13077
      if not isinstance(self.op.disks, list):
13078
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13079
                                   errors.ECODE_INVAL)
13080
      for row in self.op.disks:
13081
        if (not isinstance(row, dict) or
13082
            constants.IDISK_SIZE not in row or
13083
            not isinstance(row[constants.IDISK_SIZE], int) or
13084
            constants.IDISK_MODE not in row or
13085
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13086
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13087
                                     " parameter", errors.ECODE_INVAL)
13088
      if self.op.hypervisor is None:
13089
        self.op.hypervisor = self.cfg.GetHypervisorType()
13090
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13091
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13092
      self.op.name = fname
13093
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
13094
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13095
      if not hasattr(self.op, "evac_nodes"):
13096
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13097
                                   " opcode input", errors.ECODE_INVAL)
13098
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13099
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13100
      if not self.op.instances:
13101
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13102
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13103
    else:
13104
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13105
                                 self.op.mode, errors.ECODE_INVAL)
13106

    
13107
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13108
      if self.op.allocator is None:
13109
        raise errors.OpPrereqError("Missing allocator name",
13110
                                   errors.ECODE_INVAL)
13111
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13112
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13113
                                 self.op.direction, errors.ECODE_INVAL)
13114

    
13115
  def Exec(self, feedback_fn):
13116
    """Run the allocator test.
13117

13118
    """
13119
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13120
      ial = IAllocator(self.cfg, self.rpc,
13121
                       mode=self.op.mode,
13122
                       name=self.op.name,
13123
                       memory=self.op.memory,
13124
                       disks=self.op.disks,
13125
                       disk_template=self.op.disk_template,
13126
                       os=self.op.os,
13127
                       tags=self.op.tags,
13128
                       nics=self.op.nics,
13129
                       vcpus=self.op.vcpus,
13130
                       hypervisor=self.op.hypervisor,
13131
                       )
13132
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13133
      ial = IAllocator(self.cfg, self.rpc,
13134
                       mode=self.op.mode,
13135
                       name=self.op.name,
13136
                       relocate_from=list(self.relocate_from),
13137
                       )
13138
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13139
      ial = IAllocator(self.cfg, self.rpc,
13140
                       mode=self.op.mode,
13141
                       evac_nodes=self.op.evac_nodes)
13142
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13143
      ial = IAllocator(self.cfg, self.rpc,
13144
                       mode=self.op.mode,
13145
                       instances=self.op.instances,
13146
                       target_groups=self.op.target_groups)
13147
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13148
      ial = IAllocator(self.cfg, self.rpc,
13149
                       mode=self.op.mode,
13150
                       instances=self.op.instances,
13151
                       evac_mode=self.op.evac_mode)
13152
    else:
13153
      raise errors.ProgrammerError("Uncatched mode %s in"
13154
                                   " LUTestAllocator.Exec", self.op.mode)
13155

    
13156
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13157
      result = ial.in_text
13158
    else:
13159
      ial.Run(self.op.allocator, validate=False)
13160
      result = ial.out_text
13161
    return result
13162

    
13163

    
13164
#: Query type implementations
13165
_QUERY_IMPL = {
13166
  constants.QR_INSTANCE: _InstanceQuery,
13167
  constants.QR_NODE: _NodeQuery,
13168
  constants.QR_GROUP: _GroupQuery,
13169
  constants.QR_OS: _OsQuery,
13170
  }
13171

    
13172
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13173

    
13174

    
13175
def _GetQueryImplementation(name):
13176
  """Returns the implemtnation for a query type.
13177

13178
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13179

13180
  """
13181
  try:
13182
    return _QUERY_IMPL[name]
13183
  except KeyError:
13184
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13185
                               errors.ECODE_INVAL)