Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 6ba2bdc2

History | View | Annotate | Download (465 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    self.context = context
123
    self.rpc = rpc
124
    # Dicts used to declare locking needs to mcpu
125
    self.needed_locks = None
126
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
127
    self.add_locks = {}
128
    self.remove_locks = {}
129
    # Used to force good behavior when calling helper functions
130
    self.recalculate_locks = {}
131
    # logging
132
    self.Log = processor.Log # pylint: disable-msg=C0103
133
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136
    # support for dry-run
137
    self.dry_run_result = None
138
    # support for generic debug attribute
139
    if (not hasattr(self.op, "debug_level") or
140
        not isinstance(self.op.debug_level, int)):
141
      self.op.debug_level = 0
142

    
143
    # Tasklets
144
    self.tasklets = None
145

    
146
    # Validate opcode parameters and set defaults
147
    self.op.Validate(True)
148

    
149
    self.CheckArguments()
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    @rtype: dict
277
    @return: Dictionary containing the environment that will be used for
278
      running the hooks for this LU. The keys of the dict must not be prefixed
279
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280
      will extend the environment with additional variables. If no environment
281
      should be defined, an empty dictionary should be returned (not C{None}).
282
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
283
      will not be called.
284

285
    """
286
    raise NotImplementedError
287

    
288
  def BuildHooksNodes(self):
289
    """Build list of nodes to run LU's hooks.
290

291
    @rtype: tuple; (list, list)
292
    @return: Tuple containing a list of node names on which the hook
293
      should run before the execution and a list of node names on which the
294
      hook should run after the execution. No nodes should be returned as an
295
      empty list (and not None).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303
    """Notify the LU about the results of its hooks.
304

305
    This method is called every time a hooks phase is executed, and notifies
306
    the Logical Unit about the hooks' result. The LU can then use it to alter
307
    its result based on the hooks.  By default the method does nothing and the
308
    previous result is passed back unchanged but any LU can define it if it
309
    wants to use the local cluster hook-scripts somehow.
310

311
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
312
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313
    @param hook_results: the results of the multi-node hooks rpc call
314
    @param feedback_fn: function used send feedback back to the caller
315
    @param lu_result: the previous Exec result this LU had, or None
316
        in the PRE phase
317
    @return: the new Exec result, based on the previous result
318
        and hook results
319

320
    """
321
    # API must be kept, thus we ignore the unused argument and could
322
    # be a function warnings
323
    # pylint: disable-msg=W0613,R0201
324
    return lu_result
325

    
326
  def _ExpandAndLockInstance(self):
327
    """Helper function to expand and lock an instance.
328

329
    Many LUs that work on an instance take its name in self.op.instance_name
330
    and need to expand it and then declare the expanded name for locking. This
331
    function does it, and then updates self.op.instance_name to the expanded
332
    name. It also initializes needed_locks as a dict, if this hasn't been done
333
    before.
334

335
    """
336
    if self.needed_locks is None:
337
      self.needed_locks = {}
338
    else:
339
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340
        "_ExpandAndLockInstance called with instance-level locks set"
341
    self.op.instance_name = _ExpandInstanceName(self.cfg,
342
                                                self.op.instance_name)
343
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
344

    
345
  def _LockInstancesNodes(self, primary_only=False):
346
    """Helper function to declare instances' nodes for locking.
347

348
    This function should be called after locking one or more instances to lock
349
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350
    with all primary or secondary nodes for instances already locked and
351
    present in self.needed_locks[locking.LEVEL_INSTANCE].
352

353
    It should be called from DeclareLocks, and for safety only works if
354
    self.recalculate_locks[locking.LEVEL_NODE] is set.
355

356
    In the future it may grow parameters to just lock some instance's nodes, or
357
    to just lock primaries or secondary nodes, if needed.
358

359
    If should be called in DeclareLocks in a way similar to::
360

361
      if level == locking.LEVEL_NODE:
362
        self._LockInstancesNodes()
363

364
    @type primary_only: boolean
365
    @param primary_only: only lock primary nodes of locked instances
366

367
    """
368
    assert locking.LEVEL_NODE in self.recalculate_locks, \
369
      "_LockInstancesNodes helper function called with no nodes to recalculate"
370

    
371
    # TODO: check if we're really been called with the instance locks held
372

    
373
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374
    # future we might want to have different behaviors depending on the value
375
    # of self.recalculate_locks[locking.LEVEL_NODE]
376
    wanted_nodes = []
377
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
378
      instance = self.context.cfg.GetInstanceInfo(instance_name)
379
      wanted_nodes.append(instance.primary_node)
380
      if not primary_only:
381
        wanted_nodes.extend(instance.secondary_nodes)
382

    
383
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
387

    
388
    del self.recalculate_locks[locking.LEVEL_NODE]
389

    
390

    
391
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392
  """Simple LU which runs no hooks.
393

394
  This LU is intended as a parent for other LogicalUnits which will
395
  run no hooks, in order to reduce duplicate code.
396

397
  """
398
  HPATH = None
399
  HTYPE = None
400

    
401
  def BuildHooksEnv(self):
402
    """Empty BuildHooksEnv for NoHooksLu.
403

404
    This just raises an error.
405

406
    """
407
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
408

    
409
  def BuildHooksNodes(self):
410
    """Empty BuildHooksNodes for NoHooksLU.
411

412
    """
413
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
414

    
415

    
416
class Tasklet:
417
  """Tasklet base class.
418

419
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
421
  tasklets know nothing about locks.
422

423
  Subclasses must follow these rules:
424
    - Implement CheckPrereq
425
    - Implement Exec
426

427
  """
428
  def __init__(self, lu):
429
    self.lu = lu
430

    
431
    # Shortcuts
432
    self.cfg = lu.cfg
433
    self.rpc = lu.rpc
434

    
435
  def CheckPrereq(self):
436
    """Check prerequisites for this tasklets.
437

438
    This method should check whether the prerequisites for the execution of
439
    this tasklet are fulfilled. It can do internode communication, but it
440
    should be idempotent - no cluster or system changes are allowed.
441

442
    The method should raise errors.OpPrereqError in case something is not
443
    fulfilled. Its return value is ignored.
444

445
    This method should also update all parameters to their canonical form if it
446
    hasn't been done before.
447

448
    """
449
    pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the tasklet.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in code, or
456
    expected.
457

458
    """
459
    raise NotImplementedError
460

    
461

    
462
class _QueryBase:
463
  """Base for query utility classes.
464

465
  """
466
  #: Attribute holding field definitions
467
  FIELDS = None
468

    
469
  def __init__(self, filter_, fields, use_locking):
470
    """Initializes this class.
471

472
    """
473
    self.use_locking = use_locking
474

    
475
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
476
                             namefield="name")
477
    self.requested_data = self.query.RequestedData()
478
    self.names = self.query.RequestedNames()
479

    
480
    # Sort only if no names were requested
481
    self.sort_by_name = not self.names
482

    
483
    self.do_locking = None
484
    self.wanted = None
485

    
486
  def _GetNames(self, lu, all_names, lock_level):
487
    """Helper function to determine names asked for in the query.
488

489
    """
490
    if self.do_locking:
491
      names = lu.glm.list_owned(lock_level)
492
    else:
493
      names = all_names
494

    
495
    if self.wanted == locking.ALL_SET:
496
      assert not self.names
497
      # caller didn't specify names, so ordering is not important
498
      return utils.NiceSort(names)
499

    
500
    # caller specified names and we must keep the same order
501
    assert self.names
502
    assert not self.do_locking or lu.glm.is_owned(lock_level)
503

    
504
    missing = set(self.wanted).difference(names)
505
    if missing:
506
      raise errors.OpExecError("Some items were removed before retrieving"
507
                               " their data: %s" % missing)
508

    
509
    # Return expanded names
510
    return self.wanted
511

    
512
  def ExpandNames(self, lu):
513
    """Expand names for this query.
514

515
    See L{LogicalUnit.ExpandNames}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def DeclareLocks(self, lu, level):
521
    """Declare locks for this query.
522

523
    See L{LogicalUnit.DeclareLocks}.
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def _GetQueryData(self, lu):
529
    """Collects all data for this query.
530

531
    @return: Query data object
532

533
    """
534
    raise NotImplementedError()
535

    
536
  def NewStyleQuery(self, lu):
537
    """Collect data and execute query.
538

539
    """
540
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541
                                  sort_by_name=self.sort_by_name)
542

    
543
  def OldStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return self.query.OldStyleQuery(self._GetQueryData(lu),
548
                                    sort_by_name=self.sort_by_name)
549

    
550

    
551
def _ShareAll():
552
  """Returns a dict declaring all lock levels shared.
553

554
  """
555
  return dict.fromkeys(locking.LEVELS, 1)
556

    
557

    
558
def _SupportsOob(cfg, node):
559
  """Tells if node supports OOB.
560

561
  @type cfg: L{config.ConfigWriter}
562
  @param cfg: The cluster configuration
563
  @type node: L{objects.Node}
564
  @param node: The node
565
  @return: The OOB script if supported or an empty string otherwise
566

567
  """
568
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
569

    
570

    
571
def _GetWantedNodes(lu, nodes):
572
  """Returns list of checked and expanded node names.
573

574
  @type lu: L{LogicalUnit}
575
  @param lu: the logical unit on whose behalf we execute
576
  @type nodes: list
577
  @param nodes: list of node names or None for all nodes
578
  @rtype: list
579
  @return: the list of nodes, sorted
580
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
581

582
  """
583
  if nodes:
584
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
585

    
586
  return utils.NiceSort(lu.cfg.GetNodeList())
587

    
588

    
589
def _GetWantedInstances(lu, instances):
590
  """Returns list of checked and expanded instance names.
591

592
  @type lu: L{LogicalUnit}
593
  @param lu: the logical unit on whose behalf we execute
594
  @type instances: list
595
  @param instances: list of instance names or None for all instances
596
  @rtype: list
597
  @return: the list of instances, sorted
598
  @raise errors.OpPrereqError: if the instances parameter is wrong type
599
  @raise errors.OpPrereqError: if any of the passed instances is not found
600

601
  """
602
  if instances:
603
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
604
  else:
605
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
606
  return wanted
607

    
608

    
609
def _GetUpdatedParams(old_params, update_dict,
610
                      use_default=True, use_none=False):
611
  """Return the new version of a parameter dictionary.
612

613
  @type old_params: dict
614
  @param old_params: old parameters
615
  @type update_dict: dict
616
  @param update_dict: dict containing new parameter values, or
617
      constants.VALUE_DEFAULT to reset the parameter to its default
618
      value
619
  @param use_default: boolean
620
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621
      values as 'to be deleted' values
622
  @param use_none: boolean
623
  @type use_none: whether to recognise C{None} values as 'to be
624
      deleted' values
625
  @rtype: dict
626
  @return: the new parameter dictionary
627

628
  """
629
  params_copy = copy.deepcopy(old_params)
630
  for key, val in update_dict.iteritems():
631
    if ((use_default and val == constants.VALUE_DEFAULT) or
632
        (use_none and val is None)):
633
      try:
634
        del params_copy[key]
635
      except KeyError:
636
        pass
637
    else:
638
      params_copy[key] = val
639
  return params_copy
640

    
641

    
642
def _ReleaseLocks(lu, level, names=None, keep=None):
643
  """Releases locks owned by an LU.
644

645
  @type lu: L{LogicalUnit}
646
  @param level: Lock level
647
  @type names: list or None
648
  @param names: Names of locks to release
649
  @type keep: list or None
650
  @param keep: Names of locks to retain
651

652
  """
653
  assert not (keep is not None and names is not None), \
654
         "Only one of the 'names' and the 'keep' parameters can be given"
655

    
656
  if names is not None:
657
    should_release = names.__contains__
658
  elif keep:
659
    should_release = lambda name: name not in keep
660
  else:
661
    should_release = None
662

    
663
  if should_release:
664
    retain = []
665
    release = []
666

    
667
    # Determine which locks to release
668
    for name in lu.glm.list_owned(level):
669
      if should_release(name):
670
        release.append(name)
671
      else:
672
        retain.append(name)
673

    
674
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
675

    
676
    # Release just some locks
677
    lu.glm.release(level, names=release)
678

    
679
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
680
  else:
681
    # Release everything
682
    lu.glm.release(level)
683

    
684
    assert not lu.glm.is_owned(level), "No locks should be owned"
685

    
686

    
687
def _MapInstanceDisksToNodes(instances):
688
  """Creates a map from (node, volume) to instance name.
689

690
  @type instances: list of L{objects.Instance}
691
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
692

693
  """
694
  return dict(((node, vol), inst.name)
695
              for inst in instances
696
              for (node, vols) in inst.MapLVsByNode().items()
697
              for vol in vols)
698

    
699

    
700
def _RunPostHook(lu, node_name):
701
  """Runs the post-hook for an opcode on a single node.
702

703
  """
704
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
705
  try:
706
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
707
  except:
708
    # pylint: disable-msg=W0702
709
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
710

    
711

    
712
def _CheckOutputFields(static, dynamic, selected):
713
  """Checks whether all selected fields are valid.
714

715
  @type static: L{utils.FieldSet}
716
  @param static: static fields set
717
  @type dynamic: L{utils.FieldSet}
718
  @param dynamic: dynamic fields set
719

720
  """
721
  f = utils.FieldSet()
722
  f.Extend(static)
723
  f.Extend(dynamic)
724

    
725
  delta = f.NonMatching(selected)
726
  if delta:
727
    raise errors.OpPrereqError("Unknown output fields selected: %s"
728
                               % ",".join(delta), errors.ECODE_INVAL)
729

    
730

    
731
def _CheckGlobalHvParams(params):
732
  """Validates that given hypervisor params are not global ones.
733

734
  This will ensure that instances don't get customised versions of
735
  global params.
736

737
  """
738
  used_globals = constants.HVC_GLOBALS.intersection(params)
739
  if used_globals:
740
    msg = ("The following hypervisor parameters are global and cannot"
741
           " be customized at instance level, please modify them at"
742
           " cluster level: %s" % utils.CommaJoin(used_globals))
743
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
744

    
745

    
746
def _CheckNodeOnline(lu, node, msg=None):
747
  """Ensure that a given node is online.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param msg: if passed, should be a message to replace the default one
752
  @raise errors.OpPrereqError: if the node is offline
753

754
  """
755
  if msg is None:
756
    msg = "Can't use offline node"
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
759

    
760

    
761
def _CheckNodeNotDrained(lu, node):
762
  """Ensure that a given node is not drained.
763

764
  @param lu: the LU on behalf of which we make the check
765
  @param node: the node to check
766
  @raise errors.OpPrereqError: if the node is drained
767

768
  """
769
  if lu.cfg.GetNodeInfo(node).drained:
770
    raise errors.OpPrereqError("Can't use drained node %s" % node,
771
                               errors.ECODE_STATE)
772

    
773

    
774
def _CheckNodeVmCapable(lu, node):
775
  """Ensure that a given node is vm capable.
776

777
  @param lu: the LU on behalf of which we make the check
778
  @param node: the node to check
779
  @raise errors.OpPrereqError: if the node is not vm capable
780

781
  """
782
  if not lu.cfg.GetNodeInfo(node).vm_capable:
783
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
784
                               errors.ECODE_STATE)
785

    
786

    
787
def _CheckNodeHasOS(lu, node, os_name, force_variant):
788
  """Ensure that a node supports a given OS.
789

790
  @param lu: the LU on behalf of which we make the check
791
  @param node: the node to check
792
  @param os_name: the OS to query about
793
  @param force_variant: whether to ignore variant errors
794
  @raise errors.OpPrereqError: if the node is not supporting the OS
795

796
  """
797
  result = lu.rpc.call_os_get(node, os_name)
798
  result.Raise("OS '%s' not in supported OS list for node %s" %
799
               (os_name, node),
800
               prereq=True, ecode=errors.ECODE_INVAL)
801
  if not force_variant:
802
    _CheckOSVariant(result.payload, os_name)
803

    
804

    
805
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806
  """Ensure that a node has the given secondary ip.
807

808
  @type lu: L{LogicalUnit}
809
  @param lu: the LU on behalf of which we make the check
810
  @type node: string
811
  @param node: the node to check
812
  @type secondary_ip: string
813
  @param secondary_ip: the ip to check
814
  @type prereq: boolean
815
  @param prereq: whether to throw a prerequisite or an execute error
816
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
818

819
  """
820
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821
  result.Raise("Failure checking secondary ip on node %s" % node,
822
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
823
  if not result.payload:
824
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825
           " please fix and re-run this command" % secondary_ip)
826
    if prereq:
827
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
828
    else:
829
      raise errors.OpExecError(msg)
830

    
831

    
832
def _GetClusterDomainSecret():
833
  """Reads the cluster domain secret.
834

835
  """
836
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
837
                               strict=True)
838

    
839

    
840
def _CheckInstanceDown(lu, instance, reason):
841
  """Ensure that an instance is not running."""
842
  if instance.admin_up:
843
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844
                               (instance.name, reason), errors.ECODE_STATE)
845

    
846
  pnode = instance.primary_node
847
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
849
              prereq=True, ecode=errors.ECODE_ENVIRON)
850

    
851
  if instance.name in ins_l.payload:
852
    raise errors.OpPrereqError("Instance %s is running, %s" %
853
                               (instance.name, reason), errors.ECODE_STATE)
854

    
855

    
856
def _ExpandItemName(fn, name, kind):
857
  """Expand an item name.
858

859
  @param fn: the function to use for expansion
860
  @param name: requested item name
861
  @param kind: text description ('Node' or 'Instance')
862
  @return: the resolved (full) name
863
  @raise errors.OpPrereqError: if the item is not found
864

865
  """
866
  full_name = fn(name)
867
  if full_name is None:
868
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
869
                               errors.ECODE_NOENT)
870
  return full_name
871

    
872

    
873
def _ExpandNodeName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for nodes."""
875
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
876

    
877

    
878
def _ExpandInstanceName(cfg, name):
879
  """Wrapper over L{_ExpandItemName} for instance."""
880
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
881

    
882

    
883
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884
                          memory, vcpus, nics, disk_template, disks,
885
                          bep, hvp, hypervisor_name, tags):
886
  """Builds instance related env variables for hooks
887

888
  This builds the hook environment from individual variables.
889

890
  @type name: string
891
  @param name: the name of the instance
892
  @type primary_node: string
893
  @param primary_node: the name of the instance's primary node
894
  @type secondary_nodes: list
895
  @param secondary_nodes: list of secondary nodes as strings
896
  @type os_type: string
897
  @param os_type: the name of the instance's OS
898
  @type status: boolean
899
  @param status: the should_run status of the instance
900
  @type memory: string
901
  @param memory: the memory size of the instance
902
  @type vcpus: string
903
  @param vcpus: the count of VCPUs the instance has
904
  @type nics: list
905
  @param nics: list of tuples (ip, mac, mode, link) representing
906
      the NICs the instance has
907
  @type disk_template: string
908
  @param disk_template: the disk template of the instance
909
  @type disks: list
910
  @param disks: the list of (size, mode) pairs
911
  @type bep: dict
912
  @param bep: the backend parameters for the instance
913
  @type hvp: dict
914
  @param hvp: the hypervisor parameters for the instance
915
  @type hypervisor_name: string
916
  @param hypervisor_name: the hypervisor for the instance
917
  @type tags: list
918
  @param tags: list of instance tags as strings
919
  @rtype: dict
920
  @return: the hook environment for this instance
921

922
  """
923
  if status:
924
    str_status = "up"
925
  else:
926
    str_status = "down"
927
  env = {
928
    "OP_TARGET": name,
929
    "INSTANCE_NAME": name,
930
    "INSTANCE_PRIMARY": primary_node,
931
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932
    "INSTANCE_OS_TYPE": os_type,
933
    "INSTANCE_STATUS": str_status,
934
    "INSTANCE_MEMORY": memory,
935
    "INSTANCE_VCPUS": vcpus,
936
    "INSTANCE_DISK_TEMPLATE": disk_template,
937
    "INSTANCE_HYPERVISOR": hypervisor_name,
938
  }
939

    
940
  if nics:
941
    nic_count = len(nics)
942
    for idx, (ip, mac, mode, link) in enumerate(nics):
943
      if ip is None:
944
        ip = ""
945
      env["INSTANCE_NIC%d_IP" % idx] = ip
946
      env["INSTANCE_NIC%d_MAC" % idx] = mac
947
      env["INSTANCE_NIC%d_MODE" % idx] = mode
948
      env["INSTANCE_NIC%d_LINK" % idx] = link
949
      if mode == constants.NIC_MODE_BRIDGED:
950
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
951
  else:
952
    nic_count = 0
953

    
954
  env["INSTANCE_NIC_COUNT"] = nic_count
955

    
956
  if disks:
957
    disk_count = len(disks)
958
    for idx, (size, mode) in enumerate(disks):
959
      env["INSTANCE_DISK%d_SIZE" % idx] = size
960
      env["INSTANCE_DISK%d_MODE" % idx] = mode
961
  else:
962
    disk_count = 0
963

    
964
  env["INSTANCE_DISK_COUNT"] = disk_count
965

    
966
  if not tags:
967
    tags = []
968

    
969
  env["INSTANCE_TAGS"] = " ".join(tags)
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUInstanceQueryData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    "name": instance.name,
1022
    "primary_node": instance.primary_node,
1023
    "secondary_nodes": instance.secondary_nodes,
1024
    "os_type": instance.os,
1025
    "status": instance.admin_up,
1026
    "memory": bep[constants.BE_MEMORY],
1027
    "vcpus": bep[constants.BE_VCPUS],
1028
    "nics": _NICListToTuple(lu, instance.nics),
1029
    "disk_template": instance.disk_template,
1030
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031
    "bep": bep,
1032
    "hvp": hvp,
1033
    "hypervisor_name": instance.hypervisor,
1034
    "tags": instance.tags,
1035
  }
1036
  if override:
1037
    args.update(override)
1038
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039

    
1040

    
1041
def _AdjustCandidatePool(lu, exceptions):
1042
  """Adjust the candidate pool after node operations.
1043

1044
  """
1045
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046
  if mod_list:
1047
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1048
               utils.CommaJoin(node.name for node in mod_list))
1049
    for name in mod_list:
1050
      lu.context.ReaddNode(name)
1051
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  if mc_now > mc_max:
1053
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054
               (mc_now, mc_max))
1055

    
1056

    
1057
def _DecideSelfPromotion(lu, exceptions=None):
1058
  """Decide whether I should promote myself as a master candidate.
1059

1060
  """
1061
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063
  # the new node will increase mc_max with one, so:
1064
  mc_should = min(mc_should + 1, cp_size)
1065
  return mc_now < mc_should
1066

    
1067

    
1068
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069
  """Check that the brigdes needed by a list of nics exist.
1070

1071
  """
1072
  cluster = lu.cfg.GetClusterInfo()
1073
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074
  brlist = [params[constants.NIC_LINK] for params in paramslist
1075
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076
  if brlist:
1077
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1078
    result.Raise("Error checking bridges on destination node '%s'" %
1079
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080

    
1081

    
1082
def _CheckInstanceBridgesExist(lu, instance, node=None):
1083
  """Check that the brigdes needed by an instance exist.
1084

1085
  """
1086
  if node is None:
1087
    node = instance.primary_node
1088
  _CheckNicsBridgesExist(lu, instance.nics, node)
1089

    
1090

    
1091
def _CheckOSVariant(os_obj, name):
1092
  """Check whether an OS name conforms to the os variants specification.
1093

1094
  @type os_obj: L{objects.OS}
1095
  @param os_obj: OS object to check
1096
  @type name: string
1097
  @param name: OS name passed by the user, to check for validity
1098

1099
  """
1100
  variant = objects.OS.GetVariant(name)
1101
  if not os_obj.supported_variants:
1102
    if variant:
1103
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104
                                 " passed)" % (os_obj.name, variant),
1105
                                 errors.ECODE_INVAL)
1106
    return
1107
  if not variant:
1108
    raise errors.OpPrereqError("OS name must include a variant",
1109
                               errors.ECODE_INVAL)
1110

    
1111
  if variant not in os_obj.supported_variants:
1112
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1113

    
1114

    
1115
def _GetNodeInstancesInner(cfg, fn):
1116
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1117

    
1118

    
1119
def _GetNodeInstances(cfg, node_name):
1120
  """Returns a list of all primary and secondary instances on a node.
1121

1122
  """
1123

    
1124
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1125

    
1126

    
1127
def _GetNodePrimaryInstances(cfg, node_name):
1128
  """Returns primary instances on a node.
1129

1130
  """
1131
  return _GetNodeInstancesInner(cfg,
1132
                                lambda inst: node_name == inst.primary_node)
1133

    
1134

    
1135
def _GetNodeSecondaryInstances(cfg, node_name):
1136
  """Returns secondary instances on a node.
1137

1138
  """
1139
  return _GetNodeInstancesInner(cfg,
1140
                                lambda inst: node_name in inst.secondary_nodes)
1141

    
1142

    
1143
def _GetStorageTypeArgs(cfg, storage_type):
1144
  """Returns the arguments for a storage type.
1145

1146
  """
1147
  # Special case for file storage
1148
  if storage_type == constants.ST_FILE:
1149
    # storage.FileStorage wants a list of storage directories
1150
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1151

    
1152
  return []
1153

    
1154

    
1155
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1156
  faulty = []
1157

    
1158
  for dev in instance.disks:
1159
    cfg.SetDiskID(dev, node_name)
1160

    
1161
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162
  result.Raise("Failed to get disk status from node %s" % node_name,
1163
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164

    
1165
  for idx, bdev_status in enumerate(result.payload):
1166
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1167
      faulty.append(idx)
1168

    
1169
  return faulty
1170

    
1171

    
1172
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173
  """Check the sanity of iallocator and node arguments and use the
1174
  cluster-wide iallocator if appropriate.
1175

1176
  Check that at most one of (iallocator, node) is specified. If none is
1177
  specified, then the LU's opcode's iallocator slot is filled with the
1178
  cluster-wide default iallocator.
1179

1180
  @type iallocator_slot: string
1181
  @param iallocator_slot: the name of the opcode iallocator slot
1182
  @type node_slot: string
1183
  @param node_slot: the name of the opcode target node slot
1184

1185
  """
1186
  node = getattr(lu.op, node_slot, None)
1187
  iallocator = getattr(lu.op, iallocator_slot, None)
1188

    
1189
  if node is not None and iallocator is not None:
1190
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1191
                               errors.ECODE_INVAL)
1192
  elif node is None and iallocator is None:
1193
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1194
    if default_iallocator:
1195
      setattr(lu.op, iallocator_slot, default_iallocator)
1196
    else:
1197
      raise errors.OpPrereqError("No iallocator or node given and no"
1198
                                 " cluster-wide default iallocator found;"
1199
                                 " please specify either an iallocator or a"
1200
                                 " node, or set a cluster-wide default"
1201
                                 " iallocator")
1202

    
1203

    
1204
class LUClusterPostInit(LogicalUnit):
1205
  """Logical unit for running hooks after cluster initialization.
1206

1207
  """
1208
  HPATH = "cluster-init"
1209
  HTYPE = constants.HTYPE_CLUSTER
1210

    
1211
  def BuildHooksEnv(self):
1212
    """Build hooks env.
1213

1214
    """
1215
    return {
1216
      "OP_TARGET": self.cfg.GetClusterName(),
1217
      }
1218

    
1219
  def BuildHooksNodes(self):
1220
    """Build hooks nodes.
1221

1222
    """
1223
    return ([], [self.cfg.GetMasterNode()])
1224

    
1225
  def Exec(self, feedback_fn):
1226
    """Nothing to do.
1227

1228
    """
1229
    return True
1230

    
1231

    
1232
class LUClusterDestroy(LogicalUnit):
1233
  """Logical unit for destroying the cluster.
1234

1235
  """
1236
  HPATH = "cluster-destroy"
1237
  HTYPE = constants.HTYPE_CLUSTER
1238

    
1239
  def BuildHooksEnv(self):
1240
    """Build hooks env.
1241

1242
    """
1243
    return {
1244
      "OP_TARGET": self.cfg.GetClusterName(),
1245
      }
1246

    
1247
  def BuildHooksNodes(self):
1248
    """Build hooks nodes.
1249

1250
    """
1251
    return ([], [])
1252

    
1253
  def CheckPrereq(self):
1254
    """Check prerequisites.
1255

1256
    This checks whether the cluster is empty.
1257

1258
    Any errors are signaled by raising errors.OpPrereqError.
1259

1260
    """
1261
    master = self.cfg.GetMasterNode()
1262

    
1263
    nodelist = self.cfg.GetNodeList()
1264
    if len(nodelist) != 1 or nodelist[0] != master:
1265
      raise errors.OpPrereqError("There are still %d node(s) in"
1266
                                 " this cluster." % (len(nodelist) - 1),
1267
                                 errors.ECODE_INVAL)
1268
    instancelist = self.cfg.GetInstanceList()
1269
    if instancelist:
1270
      raise errors.OpPrereqError("There are still %d instance(s) in"
1271
                                 " this cluster." % len(instancelist),
1272
                                 errors.ECODE_INVAL)
1273

    
1274
  def Exec(self, feedback_fn):
1275
    """Destroys the cluster.
1276

1277
    """
1278
    master = self.cfg.GetMasterNode()
1279

    
1280
    # Run post hooks on master node before it's removed
1281
    _RunPostHook(self, master)
1282

    
1283
    result = self.rpc.call_node_stop_master(master, False)
1284
    result.Raise("Could not disable the master role")
1285

    
1286
    return master
1287

    
1288

    
1289
def _VerifyCertificate(filename):
1290
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1291

1292
  @type filename: string
1293
  @param filename: Path to PEM file
1294

1295
  """
1296
  try:
1297
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1298
                                           utils.ReadFile(filename))
1299
  except Exception, err: # pylint: disable-msg=W0703
1300
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1301
            "Failed to load X509 certificate %s: %s" % (filename, err))
1302

    
1303
  (errcode, msg) = \
1304
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1305
                                constants.SSL_CERT_EXPIRATION_ERROR)
1306

    
1307
  if msg:
1308
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1309
  else:
1310
    fnamemsg = None
1311

    
1312
  if errcode is None:
1313
    return (None, fnamemsg)
1314
  elif errcode == utils.CERT_WARNING:
1315
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1316
  elif errcode == utils.CERT_ERROR:
1317
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1318

    
1319
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1320

    
1321

    
1322
def _GetAllHypervisorParameters(cluster, instances):
1323
  """Compute the set of all hypervisor parameters.
1324

1325
  @type cluster: L{objects.Cluster}
1326
  @param cluster: the cluster object
1327
  @param instances: list of L{objects.Instance}
1328
  @param instances: additional instances from which to obtain parameters
1329
  @rtype: list of (origin, hypervisor, parameters)
1330
  @return: a list with all parameters found, indicating the hypervisor they
1331
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1332

1333
  """
1334
  hvp_data = []
1335

    
1336
  for hv_name in cluster.enabled_hypervisors:
1337
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1338

    
1339
  for os_name, os_hvp in cluster.os_hvp.items():
1340
    for hv_name, hv_params in os_hvp.items():
1341
      if hv_params:
1342
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1343
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1344

    
1345
  # TODO: collapse identical parameter values in a single one
1346
  for instance in instances:
1347
    if instance.hvparams:
1348
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1349
                       cluster.FillHV(instance)))
1350

    
1351
  return hvp_data
1352

    
1353

    
1354
class _VerifyErrors(object):
1355
  """Mix-in for cluster/group verify LUs.
1356

1357
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1358
  self.op and self._feedback_fn to be available.)
1359

1360
  """
1361
  TCLUSTER = "cluster"
1362
  TNODE = "node"
1363
  TINSTANCE = "instance"
1364

    
1365
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1366
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1367
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1368
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1369
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1370
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1371
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1372
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1373
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1374
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1375
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1376
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1377
  ENODEDRBD = (TNODE, "ENODEDRBD")
1378
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1379
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1380
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1381
  ENODEHV = (TNODE, "ENODEHV")
1382
  ENODELVM = (TNODE, "ENODELVM")
1383
  ENODEN1 = (TNODE, "ENODEN1")
1384
  ENODENET = (TNODE, "ENODENET")
1385
  ENODEOS = (TNODE, "ENODEOS")
1386
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1387
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1388
  ENODERPC = (TNODE, "ENODERPC")
1389
  ENODESSH = (TNODE, "ENODESSH")
1390
  ENODEVERSION = (TNODE, "ENODEVERSION")
1391
  ENODESETUP = (TNODE, "ENODESETUP")
1392
  ENODETIME = (TNODE, "ENODETIME")
1393
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1394

    
1395
  ETYPE_FIELD = "code"
1396
  ETYPE_ERROR = "ERROR"
1397
  ETYPE_WARNING = "WARNING"
1398

    
1399
  def _Error(self, ecode, item, msg, *args, **kwargs):
1400
    """Format an error message.
1401

1402
    Based on the opcode's error_codes parameter, either format a
1403
    parseable error code, or a simpler error string.
1404

1405
    This must be called only from Exec and functions called from Exec.
1406

1407
    """
1408
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1409
    itype, etxt = ecode
1410
    # first complete the msg
1411
    if args:
1412
      msg = msg % args
1413
    # then format the whole message
1414
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1415
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1416
    else:
1417
      if item:
1418
        item = " " + item
1419
      else:
1420
        item = ""
1421
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1422
    # and finally report it via the feedback_fn
1423
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1424

    
1425
  def _ErrorIf(self, cond, *args, **kwargs):
1426
    """Log an error message if the passed condition is True.
1427

1428
    """
1429
    cond = (bool(cond)
1430
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1431
    if cond:
1432
      self._Error(*args, **kwargs)
1433
    # do not mark the operation as failed for WARN cases only
1434
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1435
      self.bad = self.bad or cond
1436

    
1437

    
1438
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1439
  """Verifies the cluster config.
1440

1441
  """
1442
  REQ_BGL = True
1443

    
1444
  def _VerifyHVP(self, hvp_data):
1445
    """Verifies locally the syntax of the hypervisor parameters.
1446

1447
    """
1448
    for item, hv_name, hv_params in hvp_data:
1449
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1450
             (item, hv_name))
1451
      try:
1452
        hv_class = hypervisor.GetHypervisor(hv_name)
1453
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1454
        hv_class.CheckParameterSyntax(hv_params)
1455
      except errors.GenericError, err:
1456
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1457

    
1458
  def ExpandNames(self):
1459
    # Information can be safely retrieved as the BGL is acquired in exclusive
1460
    # mode
1461
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1462
    self.all_node_info = self.cfg.GetAllNodesInfo()
1463
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1464
    self.needed_locks = {}
1465

    
1466
  def Exec(self, feedback_fn):
1467
    """Verify integrity of cluster, performing various test on nodes.
1468

1469
    """
1470
    self.bad = False
1471
    self._feedback_fn = feedback_fn
1472

    
1473
    feedback_fn("* Verifying cluster config")
1474

    
1475
    for msg in self.cfg.VerifyConfig():
1476
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1477

    
1478
    feedback_fn("* Verifying cluster certificate files")
1479

    
1480
    for cert_filename in constants.ALL_CERT_FILES:
1481
      (errcode, msg) = _VerifyCertificate(cert_filename)
1482
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1483

    
1484
    feedback_fn("* Verifying hypervisor parameters")
1485

    
1486
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1487
                                                self.all_inst_info.values()))
1488

    
1489
    feedback_fn("* Verifying all nodes belong to an existing group")
1490

    
1491
    # We do this verification here because, should this bogus circumstance
1492
    # occur, it would never be caught by VerifyGroup, which only acts on
1493
    # nodes/instances reachable from existing node groups.
1494

    
1495
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1496
                         if node.group not in self.all_group_info)
1497

    
1498
    dangling_instances = {}
1499
    no_node_instances = []
1500

    
1501
    for inst in self.all_inst_info.values():
1502
      if inst.primary_node in dangling_nodes:
1503
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1504
      elif inst.primary_node not in self.all_node_info:
1505
        no_node_instances.append(inst.name)
1506

    
1507
    pretty_dangling = [
1508
        "%s (%s)" %
1509
        (node.name,
1510
         utils.CommaJoin(dangling_instances.get(node.name,
1511
                                                ["no instances"])))
1512
        for node in dangling_nodes]
1513

    
1514
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1515
                  "the following nodes (and their instances) belong to a non"
1516
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1517

    
1518
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1519
                  "the following instances have a non-existing primary-node:"
1520
                  " %s", utils.CommaJoin(no_node_instances))
1521

    
1522
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1523

    
1524

    
1525
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1526
  """Verifies the status of a node group.
1527

1528
  """
1529
  HPATH = "cluster-verify"
1530
  HTYPE = constants.HTYPE_CLUSTER
1531
  REQ_BGL = False
1532

    
1533
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1534

    
1535
  class NodeImage(object):
1536
    """A class representing the logical and physical status of a node.
1537

1538
    @type name: string
1539
    @ivar name: the node name to which this object refers
1540
    @ivar volumes: a structure as returned from
1541
        L{ganeti.backend.GetVolumeList} (runtime)
1542
    @ivar instances: a list of running instances (runtime)
1543
    @ivar pinst: list of configured primary instances (config)
1544
    @ivar sinst: list of configured secondary instances (config)
1545
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1546
        instances for which this node is secondary (config)
1547
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1548
    @ivar dfree: free disk, as reported by the node (runtime)
1549
    @ivar offline: the offline status (config)
1550
    @type rpc_fail: boolean
1551
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1552
        not whether the individual keys were correct) (runtime)
1553
    @type lvm_fail: boolean
1554
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1555
    @type hyp_fail: boolean
1556
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1557
    @type ghost: boolean
1558
    @ivar ghost: whether this is a known node or not (config)
1559
    @type os_fail: boolean
1560
    @ivar os_fail: whether the RPC call didn't return valid OS data
1561
    @type oslist: list
1562
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1563
    @type vm_capable: boolean
1564
    @ivar vm_capable: whether the node can host instances
1565

1566
    """
1567
    def __init__(self, offline=False, name=None, vm_capable=True):
1568
      self.name = name
1569
      self.volumes = {}
1570
      self.instances = []
1571
      self.pinst = []
1572
      self.sinst = []
1573
      self.sbp = {}
1574
      self.mfree = 0
1575
      self.dfree = 0
1576
      self.offline = offline
1577
      self.vm_capable = vm_capable
1578
      self.rpc_fail = False
1579
      self.lvm_fail = False
1580
      self.hyp_fail = False
1581
      self.ghost = False
1582
      self.os_fail = False
1583
      self.oslist = {}
1584

    
1585
  def ExpandNames(self):
1586
    # This raises errors.OpPrereqError on its own:
1587
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1588

    
1589
    # Get instances in node group; this is unsafe and needs verification later
1590
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1591

    
1592
    self.needed_locks = {
1593
      locking.LEVEL_INSTANCE: inst_names,
1594
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1595
      locking.LEVEL_NODE: [],
1596
      }
1597

    
1598
    self.share_locks = _ShareAll()
1599

    
1600
  def DeclareLocks(self, level):
1601
    if level == locking.LEVEL_NODE:
1602
      # Get members of node group; this is unsafe and needs verification later
1603
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1604

    
1605
      all_inst_info = self.cfg.GetAllInstancesInfo()
1606

    
1607
      # In Exec(), we warn about mirrored instances that have primary and
1608
      # secondary living in separate node groups. To fully verify that
1609
      # volumes for these instances are healthy, we will need to do an
1610
      # extra call to their secondaries. We ensure here those nodes will
1611
      # be locked.
1612
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1613
        # Important: access only the instances whose lock is owned
1614
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1615
          nodes.update(all_inst_info[inst].secondary_nodes)
1616

    
1617
      self.needed_locks[locking.LEVEL_NODE] = nodes
1618

    
1619
  def CheckPrereq(self):
1620
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1621
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1622

    
1623
    unlocked_nodes = \
1624
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1625

    
1626
    unlocked_instances = \
1627
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1628

    
1629
    if unlocked_nodes:
1630
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1631
                                 utils.CommaJoin(unlocked_nodes))
1632

    
1633
    if unlocked_instances:
1634
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1635
                                 utils.CommaJoin(unlocked_instances))
1636

    
1637
    self.all_node_info = self.cfg.GetAllNodesInfo()
1638
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1639

    
1640
    self.my_node_names = utils.NiceSort(group_nodes)
1641
    self.my_inst_names = utils.NiceSort(group_instances)
1642

    
1643
    self.my_node_info = dict((name, self.all_node_info[name])
1644
                             for name in self.my_node_names)
1645

    
1646
    self.my_inst_info = dict((name, self.all_inst_info[name])
1647
                             for name in self.my_inst_names)
1648

    
1649
    # We detect here the nodes that will need the extra RPC calls for verifying
1650
    # split LV volumes; they should be locked.
1651
    extra_lv_nodes = set()
1652

    
1653
    for inst in self.my_inst_info.values():
1654
      if inst.disk_template in constants.DTS_INT_MIRROR:
1655
        group = self.my_node_info[inst.primary_node].group
1656
        for nname in inst.secondary_nodes:
1657
          if self.all_node_info[nname].group != group:
1658
            extra_lv_nodes.add(nname)
1659

    
1660
    unlocked_lv_nodes = \
1661
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1662

    
1663
    if unlocked_lv_nodes:
1664
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1665
                                 utils.CommaJoin(unlocked_lv_nodes))
1666
    self.extra_lv_nodes = list(extra_lv_nodes)
1667

    
1668
  def _VerifyNode(self, ninfo, nresult):
1669
    """Perform some basic validation on data returned from a node.
1670

1671
      - check the result data structure is well formed and has all the
1672
        mandatory fields
1673
      - check ganeti version
1674

1675
    @type ninfo: L{objects.Node}
1676
    @param ninfo: the node to check
1677
    @param nresult: the results from the node
1678
    @rtype: boolean
1679
    @return: whether overall this call was successful (and we can expect
1680
         reasonable values in the respose)
1681

1682
    """
1683
    node = ninfo.name
1684
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1685

    
1686
    # main result, nresult should be a non-empty dict
1687
    test = not nresult or not isinstance(nresult, dict)
1688
    _ErrorIf(test, self.ENODERPC, node,
1689
                  "unable to verify node: no data returned")
1690
    if test:
1691
      return False
1692

    
1693
    # compares ganeti version
1694
    local_version = constants.PROTOCOL_VERSION
1695
    remote_version = nresult.get("version", None)
1696
    test = not (remote_version and
1697
                isinstance(remote_version, (list, tuple)) and
1698
                len(remote_version) == 2)
1699
    _ErrorIf(test, self.ENODERPC, node,
1700
             "connection to node returned invalid data")
1701
    if test:
1702
      return False
1703

    
1704
    test = local_version != remote_version[0]
1705
    _ErrorIf(test, self.ENODEVERSION, node,
1706
             "incompatible protocol versions: master %s,"
1707
             " node %s", local_version, remote_version[0])
1708
    if test:
1709
      return False
1710

    
1711
    # node seems compatible, we can actually try to look into its results
1712

    
1713
    # full package version
1714
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1715
                  self.ENODEVERSION, node,
1716
                  "software version mismatch: master %s, node %s",
1717
                  constants.RELEASE_VERSION, remote_version[1],
1718
                  code=self.ETYPE_WARNING)
1719

    
1720
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1721
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1722
      for hv_name, hv_result in hyp_result.iteritems():
1723
        test = hv_result is not None
1724
        _ErrorIf(test, self.ENODEHV, node,
1725
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1726

    
1727
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1728
    if ninfo.vm_capable and isinstance(hvp_result, list):
1729
      for item, hv_name, hv_result in hvp_result:
1730
        _ErrorIf(True, self.ENODEHV, node,
1731
                 "hypervisor %s parameter verify failure (source %s): %s",
1732
                 hv_name, item, hv_result)
1733

    
1734
    test = nresult.get(constants.NV_NODESETUP,
1735
                       ["Missing NODESETUP results"])
1736
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1737
             "; ".join(test))
1738

    
1739
    return True
1740

    
1741
  def _VerifyNodeTime(self, ninfo, nresult,
1742
                      nvinfo_starttime, nvinfo_endtime):
1743
    """Check the node time.
1744

1745
    @type ninfo: L{objects.Node}
1746
    @param ninfo: the node to check
1747
    @param nresult: the remote results for the node
1748
    @param nvinfo_starttime: the start time of the RPC call
1749
    @param nvinfo_endtime: the end time of the RPC call
1750

1751
    """
1752
    node = ninfo.name
1753
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1754

    
1755
    ntime = nresult.get(constants.NV_TIME, None)
1756
    try:
1757
      ntime_merged = utils.MergeTime(ntime)
1758
    except (ValueError, TypeError):
1759
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1760
      return
1761

    
1762
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1763
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1764
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1765
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1766
    else:
1767
      ntime_diff = None
1768

    
1769
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1770
             "Node time diverges by at least %s from master node time",
1771
             ntime_diff)
1772

    
1773
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1774
    """Check the node LVM results.
1775

1776
    @type ninfo: L{objects.Node}
1777
    @param ninfo: the node to check
1778
    @param nresult: the remote results for the node
1779
    @param vg_name: the configured VG name
1780

1781
    """
1782
    if vg_name is None:
1783
      return
1784

    
1785
    node = ninfo.name
1786
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1787

    
1788
    # checks vg existence and size > 20G
1789
    vglist = nresult.get(constants.NV_VGLIST, None)
1790
    test = not vglist
1791
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1792
    if not test:
1793
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1794
                                            constants.MIN_VG_SIZE)
1795
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1796

    
1797
    # check pv names
1798
    pvlist = nresult.get(constants.NV_PVLIST, None)
1799
    test = pvlist is None
1800
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1801
    if not test:
1802
      # check that ':' is not present in PV names, since it's a
1803
      # special character for lvcreate (denotes the range of PEs to
1804
      # use on the PV)
1805
      for _, pvname, owner_vg in pvlist:
1806
        test = ":" in pvname
1807
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1808
                 " '%s' of VG '%s'", pvname, owner_vg)
1809

    
1810
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1811
    """Check the node bridges.
1812

1813
    @type ninfo: L{objects.Node}
1814
    @param ninfo: the node to check
1815
    @param nresult: the remote results for the node
1816
    @param bridges: the expected list of bridges
1817

1818
    """
1819
    if not bridges:
1820
      return
1821

    
1822
    node = ninfo.name
1823
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824

    
1825
    missing = nresult.get(constants.NV_BRIDGES, None)
1826
    test = not isinstance(missing, list)
1827
    _ErrorIf(test, self.ENODENET, node,
1828
             "did not return valid bridge information")
1829
    if not test:
1830
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1831
               utils.CommaJoin(sorted(missing)))
1832

    
1833
  def _VerifyNodeNetwork(self, ninfo, nresult):
1834
    """Check the node network connectivity results.
1835

1836
    @type ninfo: L{objects.Node}
1837
    @param ninfo: the node to check
1838
    @param nresult: the remote results for the node
1839

1840
    """
1841
    node = ninfo.name
1842
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1843

    
1844
    test = constants.NV_NODELIST not in nresult
1845
    _ErrorIf(test, self.ENODESSH, node,
1846
             "node hasn't returned node ssh connectivity data")
1847
    if not test:
1848
      if nresult[constants.NV_NODELIST]:
1849
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1850
          _ErrorIf(True, self.ENODESSH, node,
1851
                   "ssh communication with node '%s': %s", a_node, a_msg)
1852

    
1853
    test = constants.NV_NODENETTEST not in nresult
1854
    _ErrorIf(test, self.ENODENET, node,
1855
             "node hasn't returned node tcp connectivity data")
1856
    if not test:
1857
      if nresult[constants.NV_NODENETTEST]:
1858
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1859
        for anode in nlist:
1860
          _ErrorIf(True, self.ENODENET, node,
1861
                   "tcp communication with node '%s': %s",
1862
                   anode, nresult[constants.NV_NODENETTEST][anode])
1863

    
1864
    test = constants.NV_MASTERIP not in nresult
1865
    _ErrorIf(test, self.ENODENET, node,
1866
             "node hasn't returned node master IP reachability data")
1867
    if not test:
1868
      if not nresult[constants.NV_MASTERIP]:
1869
        if node == self.master_node:
1870
          msg = "the master node cannot reach the master IP (not configured?)"
1871
        else:
1872
          msg = "cannot reach the master IP"
1873
        _ErrorIf(True, self.ENODENET, node, msg)
1874

    
1875
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1876
                      diskstatus):
1877
    """Verify an instance.
1878

1879
    This function checks to see if the required block devices are
1880
    available on the instance's node.
1881

1882
    """
1883
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884
    node_current = instanceconfig.primary_node
1885

    
1886
    node_vol_should = {}
1887
    instanceconfig.MapLVsByNode(node_vol_should)
1888

    
1889
    for node in node_vol_should:
1890
      n_img = node_image[node]
1891
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1892
        # ignore missing volumes on offline or broken nodes
1893
        continue
1894
      for volume in node_vol_should[node]:
1895
        test = volume not in n_img.volumes
1896
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1897
                 "volume %s missing on node %s", volume, node)
1898

    
1899
    if instanceconfig.admin_up:
1900
      pri_img = node_image[node_current]
1901
      test = instance not in pri_img.instances and not pri_img.offline
1902
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1903
               "instance not running on its primary node %s",
1904
               node_current)
1905

    
1906
    diskdata = [(nname, success, status, idx)
1907
                for (nname, disks) in diskstatus.items()
1908
                for idx, (success, status) in enumerate(disks)]
1909

    
1910
    for nname, success, bdev_status, idx in diskdata:
1911
      # the 'ghost node' construction in Exec() ensures that we have a
1912
      # node here
1913
      snode = node_image[nname]
1914
      bad_snode = snode.ghost or snode.offline
1915
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1916
               self.EINSTANCEFAULTYDISK, instance,
1917
               "couldn't retrieve status for disk/%s on %s: %s",
1918
               idx, nname, bdev_status)
1919
      _ErrorIf((instanceconfig.admin_up and success and
1920
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1921
               self.EINSTANCEFAULTYDISK, instance,
1922
               "disk/%s on %s is faulty", idx, nname)
1923

    
1924
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1925
    """Verify if there are any unknown volumes in the cluster.
1926

1927
    The .os, .swap and backup volumes are ignored. All other volumes are
1928
    reported as unknown.
1929

1930
    @type reserved: L{ganeti.utils.FieldSet}
1931
    @param reserved: a FieldSet of reserved volume names
1932

1933
    """
1934
    for node, n_img in node_image.items():
1935
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1936
        # skip non-healthy nodes
1937
        continue
1938
      for volume in n_img.volumes:
1939
        test = ((node not in node_vol_should or
1940
                volume not in node_vol_should[node]) and
1941
                not reserved.Matches(volume))
1942
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1943
                      "volume %s is unknown", volume)
1944

    
1945
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1946
    """Verify N+1 Memory Resilience.
1947

1948
    Check that if one single node dies we can still start all the
1949
    instances it was primary for.
1950

1951
    """
1952
    cluster_info = self.cfg.GetClusterInfo()
1953
    for node, n_img in node_image.items():
1954
      # This code checks that every node which is now listed as
1955
      # secondary has enough memory to host all instances it is
1956
      # supposed to should a single other node in the cluster fail.
1957
      # FIXME: not ready for failover to an arbitrary node
1958
      # FIXME: does not support file-backed instances
1959
      # WARNING: we currently take into account down instances as well
1960
      # as up ones, considering that even if they're down someone
1961
      # might want to start them even in the event of a node failure.
1962
      if n_img.offline:
1963
        # we're skipping offline nodes from the N+1 warning, since
1964
        # most likely we don't have good memory infromation from them;
1965
        # we already list instances living on such nodes, and that's
1966
        # enough warning
1967
        continue
1968
      for prinode, instances in n_img.sbp.items():
1969
        needed_mem = 0
1970
        for instance in instances:
1971
          bep = cluster_info.FillBE(instance_cfg[instance])
1972
          if bep[constants.BE_AUTO_BALANCE]:
1973
            needed_mem += bep[constants.BE_MEMORY]
1974
        test = n_img.mfree < needed_mem
1975
        self._ErrorIf(test, self.ENODEN1, node,
1976
                      "not enough memory to accomodate instance failovers"
1977
                      " should node %s fail (%dMiB needed, %dMiB available)",
1978
                      prinode, needed_mem, n_img.mfree)
1979

    
1980
  @classmethod
1981
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1982
                   (files_all, files_all_opt, files_mc, files_vm)):
1983
    """Verifies file checksums collected from all nodes.
1984

1985
    @param errorif: Callback for reporting errors
1986
    @param nodeinfo: List of L{objects.Node} objects
1987
    @param master_node: Name of master node
1988
    @param all_nvinfo: RPC results
1989

1990
    """
1991
    node_names = frozenset(node.name for node in nodeinfo)
1992

    
1993
    assert master_node in node_names
1994
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1995
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1996
           "Found file listed in more than one file list"
1997

    
1998
    # Define functions determining which nodes to consider for a file
1999
    file2nodefn = dict([(filename, fn)
2000
      for (files, fn) in [(files_all, None),
2001
                          (files_all_opt, None),
2002
                          (files_mc, lambda node: (node.master_candidate or
2003
                                                   node.name == master_node)),
2004
                          (files_vm, lambda node: node.vm_capable)]
2005
      for filename in files])
2006

    
2007
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2008

    
2009
    for node in nodeinfo:
2010
      nresult = all_nvinfo[node.name]
2011

    
2012
      if nresult.fail_msg or not nresult.payload:
2013
        node_files = None
2014
      else:
2015
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2016

    
2017
      test = not (node_files and isinstance(node_files, dict))
2018
      errorif(test, cls.ENODEFILECHECK, node.name,
2019
              "Node did not return file checksum data")
2020
      if test:
2021
        continue
2022

    
2023
      for (filename, checksum) in node_files.items():
2024
        # Check if the file should be considered for a node
2025
        fn = file2nodefn[filename]
2026
        if fn is None or fn(node):
2027
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2028

    
2029
    for (filename, checksums) in fileinfo.items():
2030
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2031

    
2032
      # Nodes having the file
2033
      with_file = frozenset(node_name
2034
                            for nodes in fileinfo[filename].values()
2035
                            for node_name in nodes)
2036

    
2037
      # Nodes missing file
2038
      missing_file = node_names - with_file
2039

    
2040
      if filename in files_all_opt:
2041
        # All or no nodes
2042
        errorif(missing_file and missing_file != node_names,
2043
                cls.ECLUSTERFILECHECK, None,
2044
                "File %s is optional, but it must exist on all or no"
2045
                " nodes (not found on %s)",
2046
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2047
      else:
2048
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2049
                "File %s is missing from node(s) %s", filename,
2050
                utils.CommaJoin(utils.NiceSort(missing_file)))
2051

    
2052
      # See if there are multiple versions of the file
2053
      test = len(checksums) > 1
2054
      if test:
2055
        variants = ["variant %s on %s" %
2056
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2057
                    for (idx, (checksum, nodes)) in
2058
                      enumerate(sorted(checksums.items()))]
2059
      else:
2060
        variants = []
2061

    
2062
      errorif(test, cls.ECLUSTERFILECHECK, None,
2063
              "File %s found with %s different checksums (%s)",
2064
              filename, len(checksums), "; ".join(variants))
2065

    
2066
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2067
                      drbd_map):
2068
    """Verifies and the node DRBD status.
2069

2070
    @type ninfo: L{objects.Node}
2071
    @param ninfo: the node to check
2072
    @param nresult: the remote results for the node
2073
    @param instanceinfo: the dict of instances
2074
    @param drbd_helper: the configured DRBD usermode helper
2075
    @param drbd_map: the DRBD map as returned by
2076
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2077

2078
    """
2079
    node = ninfo.name
2080
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2081

    
2082
    if drbd_helper:
2083
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2084
      test = (helper_result == None)
2085
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2086
               "no drbd usermode helper returned")
2087
      if helper_result:
2088
        status, payload = helper_result
2089
        test = not status
2090
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2091
                 "drbd usermode helper check unsuccessful: %s", payload)
2092
        test = status and (payload != drbd_helper)
2093
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2094
                 "wrong drbd usermode helper: %s", payload)
2095

    
2096
    # compute the DRBD minors
2097
    node_drbd = {}
2098
    for minor, instance in drbd_map[node].items():
2099
      test = instance not in instanceinfo
2100
      _ErrorIf(test, self.ECLUSTERCFG, None,
2101
               "ghost instance '%s' in temporary DRBD map", instance)
2102
        # ghost instance should not be running, but otherwise we
2103
        # don't give double warnings (both ghost instance and
2104
        # unallocated minor in use)
2105
      if test:
2106
        node_drbd[minor] = (instance, False)
2107
      else:
2108
        instance = instanceinfo[instance]
2109
        node_drbd[minor] = (instance.name, instance.admin_up)
2110

    
2111
    # and now check them
2112
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2113
    test = not isinstance(used_minors, (tuple, list))
2114
    _ErrorIf(test, self.ENODEDRBD, node,
2115
             "cannot parse drbd status file: %s", str(used_minors))
2116
    if test:
2117
      # we cannot check drbd status
2118
      return
2119

    
2120
    for minor, (iname, must_exist) in node_drbd.items():
2121
      test = minor not in used_minors and must_exist
2122
      _ErrorIf(test, self.ENODEDRBD, node,
2123
               "drbd minor %d of instance %s is not active", minor, iname)
2124
    for minor in used_minors:
2125
      test = minor not in node_drbd
2126
      _ErrorIf(test, self.ENODEDRBD, node,
2127
               "unallocated drbd minor %d is in use", minor)
2128

    
2129
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2130
    """Builds the node OS structures.
2131

2132
    @type ninfo: L{objects.Node}
2133
    @param ninfo: the node to check
2134
    @param nresult: the remote results for the node
2135
    @param nimg: the node image object
2136

2137
    """
2138
    node = ninfo.name
2139
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2140

    
2141
    remote_os = nresult.get(constants.NV_OSLIST, None)
2142
    test = (not isinstance(remote_os, list) or
2143
            not compat.all(isinstance(v, list) and len(v) == 7
2144
                           for v in remote_os))
2145

    
2146
    _ErrorIf(test, self.ENODEOS, node,
2147
             "node hasn't returned valid OS data")
2148

    
2149
    nimg.os_fail = test
2150

    
2151
    if test:
2152
      return
2153

    
2154
    os_dict = {}
2155

    
2156
    for (name, os_path, status, diagnose,
2157
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2158

    
2159
      if name not in os_dict:
2160
        os_dict[name] = []
2161

    
2162
      # parameters is a list of lists instead of list of tuples due to
2163
      # JSON lacking a real tuple type, fix it:
2164
      parameters = [tuple(v) for v in parameters]
2165
      os_dict[name].append((os_path, status, diagnose,
2166
                            set(variants), set(parameters), set(api_ver)))
2167

    
2168
    nimg.oslist = os_dict
2169

    
2170
  def _VerifyNodeOS(self, ninfo, nimg, base):
2171
    """Verifies the node OS list.
2172

2173
    @type ninfo: L{objects.Node}
2174
    @param ninfo: the node to check
2175
    @param nimg: the node image object
2176
    @param base: the 'template' node we match against (e.g. from the master)
2177

2178
    """
2179
    node = ninfo.name
2180
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2181

    
2182
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2183

    
2184
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2185
    for os_name, os_data in nimg.oslist.items():
2186
      assert os_data, "Empty OS status for OS %s?!" % os_name
2187
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2188
      _ErrorIf(not f_status, self.ENODEOS, node,
2189
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2190
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2191
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2192
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2193
      # this will catched in backend too
2194
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2195
               and not f_var, self.ENODEOS, node,
2196
               "OS %s with API at least %d does not declare any variant",
2197
               os_name, constants.OS_API_V15)
2198
      # comparisons with the 'base' image
2199
      test = os_name not in base.oslist
2200
      _ErrorIf(test, self.ENODEOS, node,
2201
               "Extra OS %s not present on reference node (%s)",
2202
               os_name, base.name)
2203
      if test:
2204
        continue
2205
      assert base.oslist[os_name], "Base node has empty OS status?"
2206
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2207
      if not b_status:
2208
        # base OS is invalid, skipping
2209
        continue
2210
      for kind, a, b in [("API version", f_api, b_api),
2211
                         ("variants list", f_var, b_var),
2212
                         ("parameters", beautify_params(f_param),
2213
                          beautify_params(b_param))]:
2214
        _ErrorIf(a != b, self.ENODEOS, node,
2215
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2216
                 kind, os_name, base.name,
2217
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2218

    
2219
    # check any missing OSes
2220
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2221
    _ErrorIf(missing, self.ENODEOS, node,
2222
             "OSes present on reference node %s but missing on this node: %s",
2223
             base.name, utils.CommaJoin(missing))
2224

    
2225
  def _VerifyOob(self, ninfo, nresult):
2226
    """Verifies out of band functionality of a node.
2227

2228
    @type ninfo: L{objects.Node}
2229
    @param ninfo: the node to check
2230
    @param nresult: the remote results for the node
2231

2232
    """
2233
    node = ninfo.name
2234
    # We just have to verify the paths on master and/or master candidates
2235
    # as the oob helper is invoked on the master
2236
    if ((ninfo.master_candidate or ninfo.master_capable) and
2237
        constants.NV_OOB_PATHS in nresult):
2238
      for path_result in nresult[constants.NV_OOB_PATHS]:
2239
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2240

    
2241
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2242
    """Verifies and updates the node volume data.
2243

2244
    This function will update a L{NodeImage}'s internal structures
2245
    with data from the remote call.
2246

2247
    @type ninfo: L{objects.Node}
2248
    @param ninfo: the node to check
2249
    @param nresult: the remote results for the node
2250
    @param nimg: the node image object
2251
    @param vg_name: the configured VG name
2252

2253
    """
2254
    node = ninfo.name
2255
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2256

    
2257
    nimg.lvm_fail = True
2258
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2259
    if vg_name is None:
2260
      pass
2261
    elif isinstance(lvdata, basestring):
2262
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2263
               utils.SafeEncode(lvdata))
2264
    elif not isinstance(lvdata, dict):
2265
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2266
    else:
2267
      nimg.volumes = lvdata
2268
      nimg.lvm_fail = False
2269

    
2270
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2271
    """Verifies and updates the node instance list.
2272

2273
    If the listing was successful, then updates this node's instance
2274
    list. Otherwise, it marks the RPC call as failed for the instance
2275
    list key.
2276

2277
    @type ninfo: L{objects.Node}
2278
    @param ninfo: the node to check
2279
    @param nresult: the remote results for the node
2280
    @param nimg: the node image object
2281

2282
    """
2283
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2284
    test = not isinstance(idata, list)
2285
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2286
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2287
    if test:
2288
      nimg.hyp_fail = True
2289
    else:
2290
      nimg.instances = idata
2291

    
2292
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2293
    """Verifies and computes a node information map
2294

2295
    @type ninfo: L{objects.Node}
2296
    @param ninfo: the node to check
2297
    @param nresult: the remote results for the node
2298
    @param nimg: the node image object
2299
    @param vg_name: the configured VG name
2300

2301
    """
2302
    node = ninfo.name
2303
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2304

    
2305
    # try to read free memory (from the hypervisor)
2306
    hv_info = nresult.get(constants.NV_HVINFO, None)
2307
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2308
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2309
    if not test:
2310
      try:
2311
        nimg.mfree = int(hv_info["memory_free"])
2312
      except (ValueError, TypeError):
2313
        _ErrorIf(True, self.ENODERPC, node,
2314
                 "node returned invalid nodeinfo, check hypervisor")
2315

    
2316
    # FIXME: devise a free space model for file based instances as well
2317
    if vg_name is not None:
2318
      test = (constants.NV_VGLIST not in nresult or
2319
              vg_name not in nresult[constants.NV_VGLIST])
2320
      _ErrorIf(test, self.ENODELVM, node,
2321
               "node didn't return data for the volume group '%s'"
2322
               " - it is either missing or broken", vg_name)
2323
      if not test:
2324
        try:
2325
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2326
        except (ValueError, TypeError):
2327
          _ErrorIf(True, self.ENODERPC, node,
2328
                   "node returned invalid LVM info, check LVM status")
2329

    
2330
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2331
    """Gets per-disk status information for all instances.
2332

2333
    @type nodelist: list of strings
2334
    @param nodelist: Node names
2335
    @type node_image: dict of (name, L{objects.Node})
2336
    @param node_image: Node objects
2337
    @type instanceinfo: dict of (name, L{objects.Instance})
2338
    @param instanceinfo: Instance objects
2339
    @rtype: {instance: {node: [(succes, payload)]}}
2340
    @return: a dictionary of per-instance dictionaries with nodes as
2341
        keys and disk information as values; the disk information is a
2342
        list of tuples (success, payload)
2343

2344
    """
2345
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2346

    
2347
    node_disks = {}
2348
    node_disks_devonly = {}
2349
    diskless_instances = set()
2350
    diskless = constants.DT_DISKLESS
2351

    
2352
    for nname in nodelist:
2353
      node_instances = list(itertools.chain(node_image[nname].pinst,
2354
                                            node_image[nname].sinst))
2355
      diskless_instances.update(inst for inst in node_instances
2356
                                if instanceinfo[inst].disk_template == diskless)
2357
      disks = [(inst, disk)
2358
               for inst in node_instances
2359
               for disk in instanceinfo[inst].disks]
2360

    
2361
      if not disks:
2362
        # No need to collect data
2363
        continue
2364

    
2365
      node_disks[nname] = disks
2366

    
2367
      # Creating copies as SetDiskID below will modify the objects and that can
2368
      # lead to incorrect data returned from nodes
2369
      devonly = [dev.Copy() for (_, dev) in disks]
2370

    
2371
      for dev in devonly:
2372
        self.cfg.SetDiskID(dev, nname)
2373

    
2374
      node_disks_devonly[nname] = devonly
2375

    
2376
    assert len(node_disks) == len(node_disks_devonly)
2377

    
2378
    # Collect data from all nodes with disks
2379
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2380
                                                          node_disks_devonly)
2381

    
2382
    assert len(result) == len(node_disks)
2383

    
2384
    instdisk = {}
2385

    
2386
    for (nname, nres) in result.items():
2387
      disks = node_disks[nname]
2388

    
2389
      if nres.offline:
2390
        # No data from this node
2391
        data = len(disks) * [(False, "node offline")]
2392
      else:
2393
        msg = nres.fail_msg
2394
        _ErrorIf(msg, self.ENODERPC, nname,
2395
                 "while getting disk information: %s", msg)
2396
        if msg:
2397
          # No data from this node
2398
          data = len(disks) * [(False, msg)]
2399
        else:
2400
          data = []
2401
          for idx, i in enumerate(nres.payload):
2402
            if isinstance(i, (tuple, list)) and len(i) == 2:
2403
              data.append(i)
2404
            else:
2405
              logging.warning("Invalid result from node %s, entry %d: %s",
2406
                              nname, idx, i)
2407
              data.append((False, "Invalid result from the remote node"))
2408

    
2409
      for ((inst, _), status) in zip(disks, data):
2410
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2411

    
2412
    # Add empty entries for diskless instances.
2413
    for inst in diskless_instances:
2414
      assert inst not in instdisk
2415
      instdisk[inst] = {}
2416

    
2417
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2418
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2419
                      compat.all(isinstance(s, (tuple, list)) and
2420
                                 len(s) == 2 for s in statuses)
2421
                      for inst, nnames in instdisk.items()
2422
                      for nname, statuses in nnames.items())
2423
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2424

    
2425
    return instdisk
2426

    
2427
  def BuildHooksEnv(self):
2428
    """Build hooks env.
2429

2430
    Cluster-Verify hooks just ran in the post phase and their failure makes
2431
    the output be logged in the verify output and the verification to fail.
2432

2433
    """
2434
    env = {
2435
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2436
      }
2437

    
2438
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2439
               for node in self.my_node_info.values())
2440

    
2441
    return env
2442

    
2443
  def BuildHooksNodes(self):
2444
    """Build hooks nodes.
2445

2446
    """
2447
    return ([], self.my_node_names)
2448

    
2449
  def Exec(self, feedback_fn):
2450
    """Verify integrity of the node group, performing various test on nodes.
2451

2452
    """
2453
    # This method has too many local variables. pylint: disable-msg=R0914
2454

    
2455
    if not self.my_node_names:
2456
      # empty node group
2457
      feedback_fn("* Empty node group, skipping verification")
2458
      return True
2459

    
2460
    self.bad = False
2461
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2462
    verbose = self.op.verbose
2463
    self._feedback_fn = feedback_fn
2464

    
2465
    vg_name = self.cfg.GetVGName()
2466
    drbd_helper = self.cfg.GetDRBDHelper()
2467
    cluster = self.cfg.GetClusterInfo()
2468
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2469
    hypervisors = cluster.enabled_hypervisors
2470
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2471

    
2472
    i_non_redundant = [] # Non redundant instances
2473
    i_non_a_balanced = [] # Non auto-balanced instances
2474
    n_offline = 0 # Count of offline nodes
2475
    n_drained = 0 # Count of nodes being drained
2476
    node_vol_should = {}
2477

    
2478
    # FIXME: verify OS list
2479

    
2480
    # File verification
2481
    filemap = _ComputeAncillaryFiles(cluster, False)
2482

    
2483
    # do local checksums
2484
    master_node = self.master_node = self.cfg.GetMasterNode()
2485
    master_ip = self.cfg.GetMasterIP()
2486

    
2487
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2488

    
2489
    # We will make nodes contact all nodes in their group, and one node from
2490
    # every other group.
2491
    # TODO: should it be a *random* node, different every time?
2492
    online_nodes = [node.name for node in node_data_list if not node.offline]
2493
    other_group_nodes = {}
2494

    
2495
    for name in sorted(self.all_node_info):
2496
      node = self.all_node_info[name]
2497
      if (node.group not in other_group_nodes
2498
          and node.group != self.group_uuid
2499
          and not node.offline):
2500
        other_group_nodes[node.group] = node.name
2501

    
2502
    node_verify_param = {
2503
      constants.NV_FILELIST:
2504
        utils.UniqueSequence(filename
2505
                             for files in filemap
2506
                             for filename in files),
2507
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2508
      constants.NV_HYPERVISOR: hypervisors,
2509
      constants.NV_HVPARAMS:
2510
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2511
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2512
                                 for node in node_data_list
2513
                                 if not node.offline],
2514
      constants.NV_INSTANCELIST: hypervisors,
2515
      constants.NV_VERSION: None,
2516
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2517
      constants.NV_NODESETUP: None,
2518
      constants.NV_TIME: None,
2519
      constants.NV_MASTERIP: (master_node, master_ip),
2520
      constants.NV_OSLIST: None,
2521
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2522
      }
2523

    
2524
    if vg_name is not None:
2525
      node_verify_param[constants.NV_VGLIST] = None
2526
      node_verify_param[constants.NV_LVLIST] = vg_name
2527
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2528
      node_verify_param[constants.NV_DRBDLIST] = None
2529

    
2530
    if drbd_helper:
2531
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2532

    
2533
    # bridge checks
2534
    # FIXME: this needs to be changed per node-group, not cluster-wide
2535
    bridges = set()
2536
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2537
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2538
      bridges.add(default_nicpp[constants.NIC_LINK])
2539
    for instance in self.my_inst_info.values():
2540
      for nic in instance.nics:
2541
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2542
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2543
          bridges.add(full_nic[constants.NIC_LINK])
2544

    
2545
    if bridges:
2546
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2547

    
2548
    # Build our expected cluster state
2549
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2550
                                                 name=node.name,
2551
                                                 vm_capable=node.vm_capable))
2552
                      for node in node_data_list)
2553

    
2554
    # Gather OOB paths
2555
    oob_paths = []
2556
    for node in self.all_node_info.values():
2557
      path = _SupportsOob(self.cfg, node)
2558
      if path and path not in oob_paths:
2559
        oob_paths.append(path)
2560

    
2561
    if oob_paths:
2562
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2563

    
2564
    for instance in self.my_inst_names:
2565
      inst_config = self.my_inst_info[instance]
2566

    
2567
      for nname in inst_config.all_nodes:
2568
        if nname not in node_image:
2569
          gnode = self.NodeImage(name=nname)
2570
          gnode.ghost = (nname not in self.all_node_info)
2571
          node_image[nname] = gnode
2572

    
2573
      inst_config.MapLVsByNode(node_vol_should)
2574

    
2575
      pnode = inst_config.primary_node
2576
      node_image[pnode].pinst.append(instance)
2577

    
2578
      for snode in inst_config.secondary_nodes:
2579
        nimg = node_image[snode]
2580
        nimg.sinst.append(instance)
2581
        if pnode not in nimg.sbp:
2582
          nimg.sbp[pnode] = []
2583
        nimg.sbp[pnode].append(instance)
2584

    
2585
    # At this point, we have the in-memory data structures complete,
2586
    # except for the runtime information, which we'll gather next
2587

    
2588
    # Due to the way our RPC system works, exact response times cannot be
2589
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2590
    # time before and after executing the request, we can at least have a time
2591
    # window.
2592
    nvinfo_starttime = time.time()
2593
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2594
                                           node_verify_param,
2595
                                           self.cfg.GetClusterName())
2596
    nvinfo_endtime = time.time()
2597

    
2598
    if self.extra_lv_nodes and vg_name is not None:
2599
      extra_lv_nvinfo = \
2600
          self.rpc.call_node_verify(self.extra_lv_nodes,
2601
                                    {constants.NV_LVLIST: vg_name},
2602
                                    self.cfg.GetClusterName())
2603
    else:
2604
      extra_lv_nvinfo = {}
2605

    
2606
    all_drbd_map = self.cfg.ComputeDRBDMap()
2607

    
2608
    feedback_fn("* Gathering disk information (%s nodes)" %
2609
                len(self.my_node_names))
2610
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2611
                                     self.my_inst_info)
2612

    
2613
    feedback_fn("* Verifying configuration file consistency")
2614

    
2615
    # If not all nodes are being checked, we need to make sure the master node
2616
    # and a non-checked vm_capable node are in the list.
2617
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2618
    if absent_nodes:
2619
      vf_nvinfo = all_nvinfo.copy()
2620
      vf_node_info = list(self.my_node_info.values())
2621
      additional_nodes = []
2622
      if master_node not in self.my_node_info:
2623
        additional_nodes.append(master_node)
2624
        vf_node_info.append(self.all_node_info[master_node])
2625
      # Add the first vm_capable node we find which is not included
2626
      for node in absent_nodes:
2627
        nodeinfo = self.all_node_info[node]
2628
        if nodeinfo.vm_capable and not nodeinfo.offline:
2629
          additional_nodes.append(node)
2630
          vf_node_info.append(self.all_node_info[node])
2631
          break
2632
      key = constants.NV_FILELIST
2633
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2634
                                                 {key: node_verify_param[key]},
2635
                                                 self.cfg.GetClusterName()))
2636
    else:
2637
      vf_nvinfo = all_nvinfo
2638
      vf_node_info = self.my_node_info.values()
2639

    
2640
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2641

    
2642
    feedback_fn("* Verifying node status")
2643

    
2644
    refos_img = None
2645

    
2646
    for node_i in node_data_list:
2647
      node = node_i.name
2648
      nimg = node_image[node]
2649

    
2650
      if node_i.offline:
2651
        if verbose:
2652
          feedback_fn("* Skipping offline node %s" % (node,))
2653
        n_offline += 1
2654
        continue
2655

    
2656
      if node == master_node:
2657
        ntype = "master"
2658
      elif node_i.master_candidate:
2659
        ntype = "master candidate"
2660
      elif node_i.drained:
2661
        ntype = "drained"
2662
        n_drained += 1
2663
      else:
2664
        ntype = "regular"
2665
      if verbose:
2666
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2667

    
2668
      msg = all_nvinfo[node].fail_msg
2669
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2670
      if msg:
2671
        nimg.rpc_fail = True
2672
        continue
2673

    
2674
      nresult = all_nvinfo[node].payload
2675

    
2676
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2677
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2678
      self._VerifyNodeNetwork(node_i, nresult)
2679
      self._VerifyOob(node_i, nresult)
2680

    
2681
      if nimg.vm_capable:
2682
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2683
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2684
                             all_drbd_map)
2685

    
2686
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2687
        self._UpdateNodeInstances(node_i, nresult, nimg)
2688
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2689
        self._UpdateNodeOS(node_i, nresult, nimg)
2690

    
2691
        if not nimg.os_fail:
2692
          if refos_img is None:
2693
            refos_img = nimg
2694
          self._VerifyNodeOS(node_i, nimg, refos_img)
2695
        self._VerifyNodeBridges(node_i, nresult, bridges)
2696

    
2697
        # Check whether all running instancies are primary for the node. (This
2698
        # can no longer be done from _VerifyInstance below, since some of the
2699
        # wrong instances could be from other node groups.)
2700
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2701

    
2702
        for inst in non_primary_inst:
2703
          test = inst in self.all_inst_info
2704
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2705
                   "instance should not run on node %s", node_i.name)
2706
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2707
                   "node is running unknown instance %s", inst)
2708

    
2709
    for node, result in extra_lv_nvinfo.items():
2710
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2711
                              node_image[node], vg_name)
2712

    
2713
    feedback_fn("* Verifying instance status")
2714
    for instance in self.my_inst_names:
2715
      if verbose:
2716
        feedback_fn("* Verifying instance %s" % instance)
2717
      inst_config = self.my_inst_info[instance]
2718
      self._VerifyInstance(instance, inst_config, node_image,
2719
                           instdisk[instance])
2720
      inst_nodes_offline = []
2721

    
2722
      pnode = inst_config.primary_node
2723
      pnode_img = node_image[pnode]
2724
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2725
               self.ENODERPC, pnode, "instance %s, connection to"
2726
               " primary node failed", instance)
2727

    
2728
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2729
               self.EINSTANCEBADNODE, instance,
2730
               "instance is marked as running and lives on offline node %s",
2731
               inst_config.primary_node)
2732

    
2733
      # If the instance is non-redundant we cannot survive losing its primary
2734
      # node, so we are not N+1 compliant. On the other hand we have no disk
2735
      # templates with more than one secondary so that situation is not well
2736
      # supported either.
2737
      # FIXME: does not support file-backed instances
2738
      if not inst_config.secondary_nodes:
2739
        i_non_redundant.append(instance)
2740

    
2741
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2742
               instance, "instance has multiple secondary nodes: %s",
2743
               utils.CommaJoin(inst_config.secondary_nodes),
2744
               code=self.ETYPE_WARNING)
2745

    
2746
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2747
        pnode = inst_config.primary_node
2748
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2749
        instance_groups = {}
2750

    
2751
        for node in instance_nodes:
2752
          instance_groups.setdefault(self.all_node_info[node].group,
2753
                                     []).append(node)
2754

    
2755
        pretty_list = [
2756
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2757
          # Sort so that we always list the primary node first.
2758
          for group, nodes in sorted(instance_groups.items(),
2759
                                     key=lambda (_, nodes): pnode in nodes,
2760
                                     reverse=True)]
2761

    
2762
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2763
                      instance, "instance has primary and secondary nodes in"
2764
                      " different groups: %s", utils.CommaJoin(pretty_list),
2765
                      code=self.ETYPE_WARNING)
2766

    
2767
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2768
        i_non_a_balanced.append(instance)
2769

    
2770
      for snode in inst_config.secondary_nodes:
2771
        s_img = node_image[snode]
2772
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2773
                 "instance %s, connection to secondary node failed", instance)
2774

    
2775
        if s_img.offline:
2776
          inst_nodes_offline.append(snode)
2777

    
2778
      # warn that the instance lives on offline nodes
2779
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2780
               "instance has offline secondary node(s) %s",
2781
               utils.CommaJoin(inst_nodes_offline))
2782
      # ... or ghost/non-vm_capable nodes
2783
      for node in inst_config.all_nodes:
2784
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2785
                 "instance lives on ghost node %s", node)
2786
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2787
                 instance, "instance lives on non-vm_capable node %s", node)
2788

    
2789
    feedback_fn("* Verifying orphan volumes")
2790
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2791

    
2792
    # We will get spurious "unknown volume" warnings if any node of this group
2793
    # is secondary for an instance whose primary is in another group. To avoid
2794
    # them, we find these instances and add their volumes to node_vol_should.
2795
    for inst in self.all_inst_info.values():
2796
      for secondary in inst.secondary_nodes:
2797
        if (secondary in self.my_node_info
2798
            and inst.name not in self.my_inst_info):
2799
          inst.MapLVsByNode(node_vol_should)
2800
          break
2801

    
2802
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2803

    
2804
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2805
      feedback_fn("* Verifying N+1 Memory redundancy")
2806
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2807

    
2808
    feedback_fn("* Other Notes")
2809
    if i_non_redundant:
2810
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2811
                  % len(i_non_redundant))
2812

    
2813
    if i_non_a_balanced:
2814
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2815
                  % len(i_non_a_balanced))
2816

    
2817
    if n_offline:
2818
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2819

    
2820
    if n_drained:
2821
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2822

    
2823
    return not self.bad
2824

    
2825
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2826
    """Analyze the post-hooks' result
2827

2828
    This method analyses the hook result, handles it, and sends some
2829
    nicely-formatted feedback back to the user.
2830

2831
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2832
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2833
    @param hooks_results: the results of the multi-node hooks rpc call
2834
    @param feedback_fn: function used send feedback back to the caller
2835
    @param lu_result: previous Exec result
2836
    @return: the new Exec result, based on the previous result
2837
        and hook results
2838

2839
    """
2840
    # We only really run POST phase hooks, only for non-empty groups,
2841
    # and are only interested in their results
2842
    if not self.my_node_names:
2843
      # empty node group
2844
      pass
2845
    elif phase == constants.HOOKS_PHASE_POST:
2846
      # Used to change hooks' output to proper indentation
2847
      feedback_fn("* Hooks Results")
2848
      assert hooks_results, "invalid result from hooks"
2849

    
2850
      for node_name in hooks_results:
2851
        res = hooks_results[node_name]
2852
        msg = res.fail_msg
2853
        test = msg and not res.offline
2854
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2855
                      "Communication failure in hooks execution: %s", msg)
2856
        if res.offline or msg:
2857
          # No need to investigate payload if node is offline or gave an error.
2858
          # override manually lu_result here as _ErrorIf only
2859
          # overrides self.bad
2860
          lu_result = 1
2861
          continue
2862
        for script, hkr, output in res.payload:
2863
          test = hkr == constants.HKR_FAIL
2864
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2865
                        "Script %s failed, output:", script)
2866
          if test:
2867
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2868
            feedback_fn("%s" % output)
2869
            lu_result = 0
2870

    
2871
    return lu_result
2872

    
2873

    
2874
class LUClusterVerifyDisks(NoHooksLU):
2875
  """Verifies the cluster disks status.
2876

2877
  """
2878
  REQ_BGL = False
2879

    
2880
  def ExpandNames(self):
2881
    self.share_locks = _ShareAll()
2882
    self.needed_locks = {
2883
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2884
      }
2885

    
2886
  def Exec(self, feedback_fn):
2887
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2888

    
2889
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2890
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2891
                           for group in group_names])
2892

    
2893

    
2894
class LUGroupVerifyDisks(NoHooksLU):
2895
  """Verifies the status of all disks in a node group.
2896

2897
  """
2898
  REQ_BGL = False
2899

    
2900
  def ExpandNames(self):
2901
    # Raises errors.OpPrereqError on its own if group can't be found
2902
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2903

    
2904
    self.share_locks = _ShareAll()
2905
    self.needed_locks = {
2906
      locking.LEVEL_INSTANCE: [],
2907
      locking.LEVEL_NODEGROUP: [],
2908
      locking.LEVEL_NODE: [],
2909
      }
2910

    
2911
  def DeclareLocks(self, level):
2912
    if level == locking.LEVEL_INSTANCE:
2913
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2914

    
2915
      # Lock instances optimistically, needs verification once node and group
2916
      # locks have been acquired
2917
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2918
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2919

    
2920
    elif level == locking.LEVEL_NODEGROUP:
2921
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2922

    
2923
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2924
        set([self.group_uuid] +
2925
            # Lock all groups used by instances optimistically; this requires
2926
            # going via the node before it's locked, requiring verification
2927
            # later on
2928
            [group_uuid
2929
             for instance_name in
2930
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2931
             for group_uuid in
2932
               self.cfg.GetInstanceNodeGroups(instance_name)])
2933

    
2934
    elif level == locking.LEVEL_NODE:
2935
      # This will only lock the nodes in the group to be verified which contain
2936
      # actual instances
2937
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2938
      self._LockInstancesNodes()
2939

    
2940
      # Lock all nodes in group to be verified
2941
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2942
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2943
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2944

    
2945
  def CheckPrereq(self):
2946
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2947
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2948
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2949

    
2950
    assert self.group_uuid in owned_groups
2951

    
2952
    # Check if locked instances are still correct
2953
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2954
    if owned_instances != wanted_instances:
2955
      raise errors.OpPrereqError("Instances in node group %s changed since"
2956
                                 " locks were acquired, wanted %s, have %s;"
2957
                                 " retry the operation" %
2958
                                 (self.op.group_name,
2959
                                  utils.CommaJoin(wanted_instances),
2960
                                  utils.CommaJoin(owned_instances)),
2961
                                 errors.ECODE_STATE)
2962

    
2963
    # Get instance information
2964
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
2965
                          for name in owned_instances)
2966

    
2967
    # Check if node groups for locked instances are still correct
2968
    for (instance_name, inst) in self.instances.items():
2969
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2970
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2971
      assert owned_nodes.issuperset(inst.all_nodes), \
2972
        "Instance %s's nodes changed while we kept the lock" % instance_name
2973

    
2974
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2975
      if not owned_groups.issuperset(inst_groups):
2976
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2977
                                   " locks were acquired, current groups are"
2978
                                   " are '%s', owning groups '%s'; retry the"
2979
                                   " operation" %
2980
                                   (instance_name,
2981
                                    utils.CommaJoin(inst_groups),
2982
                                    utils.CommaJoin(owned_groups)),
2983
                                   errors.ECODE_STATE)
2984

    
2985
  def Exec(self, feedback_fn):
2986
    """Verify integrity of cluster disks.
2987

2988
    @rtype: tuple of three items
2989
    @return: a tuple of (dict of node-to-node_error, list of instances
2990
        which need activate-disks, dict of instance: (node, volume) for
2991
        missing volumes
2992

2993
    """
2994
    res_nodes = {}
2995
    res_instances = set()
2996
    res_missing = {}
2997

    
2998
    nv_dict = _MapInstanceDisksToNodes([inst
2999
                                        for inst in self.instances.values()
3000
                                        if inst.admin_up])
3001

    
3002
    if nv_dict:
3003
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3004
                             set(self.cfg.GetVmCapableNodeList()))
3005

    
3006
      node_lvs = self.rpc.call_lv_list(nodes, [])
3007

    
3008
      for (node, node_res) in node_lvs.items():
3009
        if node_res.offline:
3010
          continue
3011

    
3012
        msg = node_res.fail_msg
3013
        if msg:
3014
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3015
          res_nodes[node] = msg
3016
          continue
3017

    
3018
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3019
          inst = nv_dict.pop((node, lv_name), None)
3020
          if not (lv_online or inst is None):
3021
            res_instances.add(inst)
3022

    
3023
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3024
      # better
3025
      for key, inst in nv_dict.iteritems():
3026
        res_missing.setdefault(inst, []).append(key)
3027

    
3028
    return (res_nodes, list(res_instances), res_missing)
3029

    
3030

    
3031
class LUClusterRepairDiskSizes(NoHooksLU):
3032
  """Verifies the cluster disks sizes.
3033

3034
  """
3035
  REQ_BGL = False
3036

    
3037
  def ExpandNames(self):
3038
    if self.op.instances:
3039
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3040
      self.needed_locks = {
3041
        locking.LEVEL_NODE: [],
3042
        locking.LEVEL_INSTANCE: self.wanted_names,
3043
        }
3044
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3045
    else:
3046
      self.wanted_names = None
3047
      self.needed_locks = {
3048
        locking.LEVEL_NODE: locking.ALL_SET,
3049
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3050
        }
3051
    self.share_locks = _ShareAll()
3052

    
3053
  def DeclareLocks(self, level):
3054
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3055
      self._LockInstancesNodes(primary_only=True)
3056

    
3057
  def CheckPrereq(self):
3058
    """Check prerequisites.
3059

3060
    This only checks the optional instance list against the existing names.
3061

3062
    """
3063
    if self.wanted_names is None:
3064
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3065

    
3066
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3067
                             in self.wanted_names]
3068

    
3069
  def _EnsureChildSizes(self, disk):
3070
    """Ensure children of the disk have the needed disk size.
3071

3072
    This is valid mainly for DRBD8 and fixes an issue where the
3073
    children have smaller disk size.
3074

3075
    @param disk: an L{ganeti.objects.Disk} object
3076

3077
    """
3078
    if disk.dev_type == constants.LD_DRBD8:
3079
      assert disk.children, "Empty children for DRBD8?"
3080
      fchild = disk.children[0]
3081
      mismatch = fchild.size < disk.size
3082
      if mismatch:
3083
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3084
                     fchild.size, disk.size)
3085
        fchild.size = disk.size
3086

    
3087
      # and we recurse on this child only, not on the metadev
3088
      return self._EnsureChildSizes(fchild) or mismatch
3089
    else:
3090
      return False
3091

    
3092
  def Exec(self, feedback_fn):
3093
    """Verify the size of cluster disks.
3094

3095
    """
3096
    # TODO: check child disks too
3097
    # TODO: check differences in size between primary/secondary nodes
3098
    per_node_disks = {}
3099
    for instance in self.wanted_instances:
3100
      pnode = instance.primary_node
3101
      if pnode not in per_node_disks:
3102
        per_node_disks[pnode] = []
3103
      for idx, disk in enumerate(instance.disks):
3104
        per_node_disks[pnode].append((instance, idx, disk))
3105

    
3106
    changed = []
3107
    for node, dskl in per_node_disks.items():
3108
      newl = [v[2].Copy() for v in dskl]
3109
      for dsk in newl:
3110
        self.cfg.SetDiskID(dsk, node)
3111
      result = self.rpc.call_blockdev_getsize(node, newl)
3112
      if result.fail_msg:
3113
        self.LogWarning("Failure in blockdev_getsize call to node"
3114
                        " %s, ignoring", node)
3115
        continue
3116
      if len(result.payload) != len(dskl):
3117
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3118
                        " result.payload=%s", node, len(dskl), result.payload)
3119
        self.LogWarning("Invalid result from node %s, ignoring node results",
3120
                        node)
3121
        continue
3122
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3123
        if size is None:
3124
          self.LogWarning("Disk %d of instance %s did not return size"
3125
                          " information, ignoring", idx, instance.name)
3126
          continue
3127
        if not isinstance(size, (int, long)):
3128
          self.LogWarning("Disk %d of instance %s did not return valid"
3129
                          " size information, ignoring", idx, instance.name)
3130
          continue
3131
        size = size >> 20
3132
        if size != disk.size:
3133
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3134
                       " correcting: recorded %d, actual %d", idx,
3135
                       instance.name, disk.size, size)
3136
          disk.size = size
3137
          self.cfg.Update(instance, feedback_fn)
3138
          changed.append((instance.name, idx, size))
3139
        if self._EnsureChildSizes(disk):
3140
          self.cfg.Update(instance, feedback_fn)
3141
          changed.append((instance.name, idx, disk.size))
3142
    return changed
3143

    
3144

    
3145
class LUClusterRename(LogicalUnit):
3146
  """Rename the cluster.
3147

3148
  """
3149
  HPATH = "cluster-rename"
3150
  HTYPE = constants.HTYPE_CLUSTER
3151

    
3152
  def BuildHooksEnv(self):
3153
    """Build hooks env.
3154

3155
    """
3156
    return {
3157
      "OP_TARGET": self.cfg.GetClusterName(),
3158
      "NEW_NAME": self.op.name,
3159
      }
3160

    
3161
  def BuildHooksNodes(self):
3162
    """Build hooks nodes.
3163

3164
    """
3165
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3166

    
3167
  def CheckPrereq(self):
3168
    """Verify that the passed name is a valid one.
3169

3170
    """
3171
    hostname = netutils.GetHostname(name=self.op.name,
3172
                                    family=self.cfg.GetPrimaryIPFamily())
3173

    
3174
    new_name = hostname.name
3175
    self.ip = new_ip = hostname.ip
3176
    old_name = self.cfg.GetClusterName()
3177
    old_ip = self.cfg.GetMasterIP()
3178
    if new_name == old_name and new_ip == old_ip:
3179
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3180
                                 " cluster has changed",
3181
                                 errors.ECODE_INVAL)
3182
    if new_ip != old_ip:
3183
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3184
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3185
                                   " reachable on the network" %
3186
                                   new_ip, errors.ECODE_NOTUNIQUE)
3187

    
3188
    self.op.name = new_name
3189

    
3190
  def Exec(self, feedback_fn):
3191
    """Rename the cluster.
3192

3193
    """
3194
    clustername = self.op.name
3195
    ip = self.ip
3196

    
3197
    # shutdown the master IP
3198
    master = self.cfg.GetMasterNode()
3199
    result = self.rpc.call_node_stop_master(master, False)
3200
    result.Raise("Could not disable the master role")
3201

    
3202
    try:
3203
      cluster = self.cfg.GetClusterInfo()
3204
      cluster.cluster_name = clustername
3205
      cluster.master_ip = ip
3206
      self.cfg.Update(cluster, feedback_fn)
3207

    
3208
      # update the known hosts file
3209
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3210
      node_list = self.cfg.GetOnlineNodeList()
3211
      try:
3212
        node_list.remove(master)
3213
      except ValueError:
3214
        pass
3215
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3216
    finally:
3217
      result = self.rpc.call_node_start_master(master, False, False)
3218
      msg = result.fail_msg
3219
      if msg:
3220
        self.LogWarning("Could not re-enable the master role on"
3221
                        " the master, please restart manually: %s", msg)
3222

    
3223
    return clustername
3224

    
3225

    
3226
class LUClusterSetParams(LogicalUnit):
3227
  """Change the parameters of the cluster.
3228

3229
  """
3230
  HPATH = "cluster-modify"
3231
  HTYPE = constants.HTYPE_CLUSTER
3232
  REQ_BGL = False
3233

    
3234
  def CheckArguments(self):
3235
    """Check parameters
3236

3237
    """
3238
    if self.op.uid_pool:
3239
      uidpool.CheckUidPool(self.op.uid_pool)
3240

    
3241
    if self.op.add_uids:
3242
      uidpool.CheckUidPool(self.op.add_uids)
3243

    
3244
    if self.op.remove_uids:
3245
      uidpool.CheckUidPool(self.op.remove_uids)
3246

    
3247
  def ExpandNames(self):
3248
    # FIXME: in the future maybe other cluster params won't require checking on
3249
    # all nodes to be modified.
3250
    self.needed_locks = {
3251
      locking.LEVEL_NODE: locking.ALL_SET,
3252
    }
3253
    self.share_locks[locking.LEVEL_NODE] = 1
3254

    
3255
  def BuildHooksEnv(self):
3256
    """Build hooks env.
3257

3258
    """
3259
    return {
3260
      "OP_TARGET": self.cfg.GetClusterName(),
3261
      "NEW_VG_NAME": self.op.vg_name,
3262
      }
3263

    
3264
  def BuildHooksNodes(self):
3265
    """Build hooks nodes.
3266

3267
    """
3268
    mn = self.cfg.GetMasterNode()
3269
    return ([mn], [mn])
3270

    
3271
  def CheckPrereq(self):
3272
    """Check prerequisites.
3273

3274
    This checks whether the given params don't conflict and
3275
    if the given volume group is valid.
3276

3277
    """
3278
    if self.op.vg_name is not None and not self.op.vg_name:
3279
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3280
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3281
                                   " instances exist", errors.ECODE_INVAL)
3282

    
3283
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3284
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3285
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3286
                                   " drbd-based instances exist",
3287
                                   errors.ECODE_INVAL)
3288

    
3289
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3290

    
3291
    # if vg_name not None, checks given volume group on all nodes
3292
    if self.op.vg_name:
3293
      vglist = self.rpc.call_vg_list(node_list)
3294
      for node in node_list:
3295
        msg = vglist[node].fail_msg
3296
        if msg:
3297
          # ignoring down node
3298
          self.LogWarning("Error while gathering data on node %s"
3299
                          " (ignoring node): %s", node, msg)
3300
          continue
3301
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3302
                                              self.op.vg_name,
3303
                                              constants.MIN_VG_SIZE)
3304
        if vgstatus:
3305
          raise errors.OpPrereqError("Error on node '%s': %s" %
3306
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3307

    
3308
    if self.op.drbd_helper:
3309
      # checks given drbd helper on all nodes
3310
      helpers = self.rpc.call_drbd_helper(node_list)
3311
      for node in node_list:
3312
        ninfo = self.cfg.GetNodeInfo(node)
3313
        if ninfo.offline:
3314
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3315
          continue
3316
        msg = helpers[node].fail_msg
3317
        if msg:
3318
          raise errors.OpPrereqError("Error checking drbd helper on node"
3319
                                     " '%s': %s" % (node, msg),
3320
                                     errors.ECODE_ENVIRON)
3321
        node_helper = helpers[node].payload
3322
        if node_helper != self.op.drbd_helper:
3323
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3324
                                     (node, node_helper), errors.ECODE_ENVIRON)
3325

    
3326
    self.cluster = cluster = self.cfg.GetClusterInfo()
3327
    # validate params changes
3328
    if self.op.beparams:
3329
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3330
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3331

    
3332
    if self.op.ndparams:
3333
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3334
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3335

    
3336
      # TODO: we need a more general way to handle resetting
3337
      # cluster-level parameters to default values
3338
      if self.new_ndparams["oob_program"] == "":
3339
        self.new_ndparams["oob_program"] = \
3340
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3341

    
3342
    if self.op.nicparams:
3343
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3344
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3345
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3346
      nic_errors = []
3347

    
3348
      # check all instances for consistency
3349
      for instance in self.cfg.GetAllInstancesInfo().values():
3350
        for nic_idx, nic in enumerate(instance.nics):
3351
          params_copy = copy.deepcopy(nic.nicparams)
3352
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3353

    
3354
          # check parameter syntax
3355
          try:
3356
            objects.NIC.CheckParameterSyntax(params_filled)
3357
          except errors.ConfigurationError, err:
3358
            nic_errors.append("Instance %s, nic/%d: %s" %
3359
                              (instance.name, nic_idx, err))
3360

    
3361
          # if we're moving instances to routed, check that they have an ip
3362
          target_mode = params_filled[constants.NIC_MODE]
3363
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3364
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3365
                              " address" % (instance.name, nic_idx))
3366
      if nic_errors:
3367
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3368
                                   "\n".join(nic_errors))
3369

    
3370
    # hypervisor list/parameters
3371
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3372
    if self.op.hvparams:
3373
      for hv_name, hv_dict in self.op.hvparams.items():
3374
        if hv_name not in self.new_hvparams:
3375
          self.new_hvparams[hv_name] = hv_dict
3376
        else:
3377
          self.new_hvparams[hv_name].update(hv_dict)
3378

    
3379
    # os hypervisor parameters
3380
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3381
    if self.op.os_hvp:
3382
      for os_name, hvs in self.op.os_hvp.items():
3383
        if os_name not in self.new_os_hvp:
3384
          self.new_os_hvp[os_name] = hvs
3385
        else:
3386
          for hv_name, hv_dict in hvs.items():
3387
            if hv_name not in self.new_os_hvp[os_name]:
3388
              self.new_os_hvp[os_name][hv_name] = hv_dict
3389
            else:
3390
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3391

    
3392
    # os parameters
3393
    self.new_osp = objects.FillDict(cluster.osparams, {})
3394
    if self.op.osparams:
3395
      for os_name, osp in self.op.osparams.items():
3396
        if os_name not in self.new_osp:
3397
          self.new_osp[os_name] = {}
3398

    
3399
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3400
                                                  use_none=True)
3401

    
3402
        if not self.new_osp[os_name]:
3403
          # we removed all parameters
3404
          del self.new_osp[os_name]
3405
        else:
3406
          # check the parameter validity (remote check)
3407
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3408
                         os_name, self.new_osp[os_name])
3409

    
3410
    # changes to the hypervisor list
3411
    if self.op.enabled_hypervisors is not None:
3412
      self.hv_list = self.op.enabled_hypervisors
3413
      for hv in self.hv_list:
3414
        # if the hypervisor doesn't already exist in the cluster
3415
        # hvparams, we initialize it to empty, and then (in both
3416
        # cases) we make sure to fill the defaults, as we might not
3417
        # have a complete defaults list if the hypervisor wasn't
3418
        # enabled before
3419
        if hv not in new_hvp:
3420
          new_hvp[hv] = {}
3421
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3422
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3423
    else:
3424
      self.hv_list = cluster.enabled_hypervisors
3425

    
3426
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3427
      # either the enabled list has changed, or the parameters have, validate
3428
      for hv_name, hv_params in self.new_hvparams.items():
3429
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3430
            (self.op.enabled_hypervisors and
3431
             hv_name in self.op.enabled_hypervisors)):
3432
          # either this is a new hypervisor, or its parameters have changed
3433
          hv_class = hypervisor.GetHypervisor(hv_name)
3434
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3435
          hv_class.CheckParameterSyntax(hv_params)
3436
          _CheckHVParams(self, node_list, hv_name, hv_params)
3437

    
3438
    if self.op.os_hvp:
3439
      # no need to check any newly-enabled hypervisors, since the
3440
      # defaults have already been checked in the above code-block
3441
      for os_name, os_hvp in self.new_os_hvp.items():
3442
        for hv_name, hv_params in os_hvp.items():
3443
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3444
          # we need to fill in the new os_hvp on top of the actual hv_p
3445
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3446
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3447
          hv_class = hypervisor.GetHypervisor(hv_name)
3448
          hv_class.CheckParameterSyntax(new_osp)
3449
          _CheckHVParams(self, node_list, hv_name, new_osp)
3450

    
3451
    if self.op.default_iallocator:
3452
      alloc_script = utils.FindFile(self.op.default_iallocator,
3453
                                    constants.IALLOCATOR_SEARCH_PATH,
3454
                                    os.path.isfile)
3455
      if alloc_script is None:
3456
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3457
                                   " specified" % self.op.default_iallocator,
3458
                                   errors.ECODE_INVAL)
3459

    
3460
  def Exec(self, feedback_fn):
3461
    """Change the parameters of the cluster.
3462

3463
    """
3464
    if self.op.vg_name is not None:
3465
      new_volume = self.op.vg_name
3466
      if not new_volume:
3467
        new_volume = None
3468
      if new_volume != self.cfg.GetVGName():
3469
        self.cfg.SetVGName(new_volume)
3470
      else:
3471
        feedback_fn("Cluster LVM configuration already in desired"
3472
                    " state, not changing")
3473
    if self.op.drbd_helper is not None:
3474
      new_helper = self.op.drbd_helper
3475
      if not new_helper:
3476
        new_helper = None
3477
      if new_helper != self.cfg.GetDRBDHelper():
3478
        self.cfg.SetDRBDHelper(new_helper)
3479
      else:
3480
        feedback_fn("Cluster DRBD helper already in desired state,"
3481
                    " not changing")
3482
    if self.op.hvparams:
3483
      self.cluster.hvparams = self.new_hvparams
3484
    if self.op.os_hvp:
3485
      self.cluster.os_hvp = self.new_os_hvp
3486
    if self.op.enabled_hypervisors is not None:
3487
      self.cluster.hvparams = self.new_hvparams
3488
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3489
    if self.op.beparams:
3490
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3491
    if self.op.nicparams:
3492
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3493
    if self.op.osparams:
3494
      self.cluster.osparams = self.new_osp
3495
    if self.op.ndparams:
3496
      self.cluster.ndparams = self.new_ndparams
3497

    
3498
    if self.op.candidate_pool_size is not None:
3499
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3500
      # we need to update the pool size here, otherwise the save will fail
3501
      _AdjustCandidatePool(self, [])
3502

    
3503
    if self.op.maintain_node_health is not None:
3504
      self.cluster.maintain_node_health = self.op.maintain_node_health
3505

    
3506
    if self.op.prealloc_wipe_disks is not None:
3507
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3508

    
3509
    if self.op.add_uids is not None:
3510
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3511

    
3512
    if self.op.remove_uids is not None:
3513
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3514

    
3515
    if self.op.uid_pool is not None:
3516
      self.cluster.uid_pool = self.op.uid_pool
3517

    
3518
    if self.op.default_iallocator is not None:
3519
      self.cluster.default_iallocator = self.op.default_iallocator
3520

    
3521
    if self.op.reserved_lvs is not None:
3522
      self.cluster.reserved_lvs = self.op.reserved_lvs
3523

    
3524
    def helper_os(aname, mods, desc):
3525
      desc += " OS list"
3526
      lst = getattr(self.cluster, aname)
3527
      for key, val in mods:
3528
        if key == constants.DDM_ADD:
3529
          if val in lst:
3530
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3531
          else:
3532
            lst.append(val)
3533
        elif key == constants.DDM_REMOVE:
3534
          if val in lst:
3535
            lst.remove(val)
3536
          else:
3537
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3538
        else:
3539
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3540

    
3541
    if self.op.hidden_os:
3542
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3543

    
3544
    if self.op.blacklisted_os:
3545
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3546

    
3547
    if self.op.master_netdev:
3548
      master = self.cfg.GetMasterNode()
3549
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3550
                  self.cluster.master_netdev)
3551
      result = self.rpc.call_node_stop_master(master, False)
3552
      result.Raise("Could not disable the master ip")
3553
      feedback_fn("Changing master_netdev from %s to %s" %
3554
                  (self.cluster.master_netdev, self.op.master_netdev))
3555
      self.cluster.master_netdev = self.op.master_netdev
3556

    
3557
    self.cfg.Update(self.cluster, feedback_fn)
3558

    
3559
    if self.op.master_netdev:
3560
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3561
                  self.op.master_netdev)
3562
      result = self.rpc.call_node_start_master(master, False, False)
3563
      if result.fail_msg:
3564
        self.LogWarning("Could not re-enable the master ip on"
3565
                        " the master, please restart manually: %s",
3566
                        result.fail_msg)
3567

    
3568

    
3569
def _UploadHelper(lu, nodes, fname):
3570
  """Helper for uploading a file and showing warnings.
3571

3572
  """
3573
  if os.path.exists(fname):
3574
    result = lu.rpc.call_upload_file(nodes, fname)
3575
    for to_node, to_result in result.items():
3576
      msg = to_result.fail_msg
3577
      if msg:
3578
        msg = ("Copy of file %s to node %s failed: %s" %
3579
               (fname, to_node, msg))
3580
        lu.proc.LogWarning(msg)
3581

    
3582

    
3583
def _ComputeAncillaryFiles(cluster, redist):
3584
  """Compute files external to Ganeti which need to be consistent.
3585

3586
  @type redist: boolean
3587
  @param redist: Whether to include files which need to be redistributed
3588

3589
  """
3590
  # Compute files for all nodes
3591
  files_all = set([
3592
    constants.SSH_KNOWN_HOSTS_FILE,
3593
    constants.CONFD_HMAC_KEY,
3594
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3595
    ])
3596

    
3597
  if not redist:
3598
    files_all.update(constants.ALL_CERT_FILES)
3599
    files_all.update(ssconf.SimpleStore().GetFileList())
3600

    
3601
  if cluster.modify_etc_hosts:
3602
    files_all.add(constants.ETC_HOSTS)
3603

    
3604
  # Files which must either exist on all nodes or on none
3605
  files_all_opt = set([
3606
    constants.RAPI_USERS_FILE,
3607
    ])
3608

    
3609
  # Files which should only be on master candidates
3610
  files_mc = set()
3611
  if not redist:
3612
    files_mc.add(constants.CLUSTER_CONF_FILE)
3613

    
3614
  # Files which should only be on VM-capable nodes
3615
  files_vm = set(filename
3616
    for hv_name in cluster.enabled_hypervisors
3617
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3618

    
3619
  # Filenames must be unique
3620
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3621
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3622
         "Found file listed in more than one file list"
3623

    
3624
  return (files_all, files_all_opt, files_mc, files_vm)
3625

    
3626

    
3627
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3628
  """Distribute additional files which are part of the cluster configuration.
3629

3630
  ConfigWriter takes care of distributing the config and ssconf files, but
3631
  there are more files which should be distributed to all nodes. This function
3632
  makes sure those are copied.
3633

3634
  @param lu: calling logical unit
3635
  @param additional_nodes: list of nodes not in the config to distribute to
3636
  @type additional_vm: boolean
3637
  @param additional_vm: whether the additional nodes are vm-capable or not
3638

3639
  """
3640
  # Gather target nodes
3641
  cluster = lu.cfg.GetClusterInfo()
3642
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3643

    
3644
  online_nodes = lu.cfg.GetOnlineNodeList()
3645
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3646

    
3647
  if additional_nodes is not None:
3648
    online_nodes.extend(additional_nodes)
3649
    if additional_vm:
3650
      vm_nodes.extend(additional_nodes)
3651

    
3652
  # Never distribute to master node
3653
  for nodelist in [online_nodes, vm_nodes]:
3654
    if master_info.name in nodelist:
3655
      nodelist.remove(master_info.name)
3656

    
3657
  # Gather file lists
3658
  (files_all, files_all_opt, files_mc, files_vm) = \
3659
    _ComputeAncillaryFiles(cluster, True)
3660

    
3661
  # Never re-distribute configuration file from here
3662
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3663
              constants.CLUSTER_CONF_FILE in files_vm)
3664
  assert not files_mc, "Master candidates not handled in this function"
3665

    
3666
  filemap = [
3667
    (online_nodes, files_all),
3668
    (online_nodes, files_all_opt),
3669
    (vm_nodes, files_vm),
3670
    ]
3671

    
3672
  # Upload the files
3673
  for (node_list, files) in filemap:
3674
    for fname in files:
3675
      _UploadHelper(lu, node_list, fname)
3676

    
3677

    
3678
class LUClusterRedistConf(NoHooksLU):
3679
  """Force the redistribution of cluster configuration.
3680

3681
  This is a very simple LU.
3682

3683
  """
3684
  REQ_BGL = False
3685

    
3686
  def ExpandNames(self):
3687
    self.needed_locks = {
3688
      locking.LEVEL_NODE: locking.ALL_SET,
3689
    }
3690
    self.share_locks[locking.LEVEL_NODE] = 1
3691

    
3692
  def Exec(self, feedback_fn):
3693
    """Redistribute the configuration.
3694

3695
    """
3696
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3697
    _RedistributeAncillaryFiles(self)
3698

    
3699

    
3700
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3701
  """Sleep and poll for an instance's disk to sync.
3702

3703
  """
3704
  if not instance.disks or disks is not None and not disks:
3705
    return True
3706

    
3707
  disks = _ExpandCheckDisks(instance, disks)
3708

    
3709
  if not oneshot:
3710
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3711

    
3712
  node = instance.primary_node
3713

    
3714
  for dev in disks:
3715
    lu.cfg.SetDiskID(dev, node)
3716

    
3717
  # TODO: Convert to utils.Retry
3718

    
3719
  retries = 0
3720
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3721
  while True:
3722
    max_time = 0
3723
    done = True
3724
    cumul_degraded = False
3725
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3726
    msg = rstats.fail_msg
3727
    if msg:
3728
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3729
      retries += 1
3730
      if retries >= 10:
3731
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3732
                                 " aborting." % node)
3733
      time.sleep(6)
3734
      continue
3735
    rstats = rstats.payload
3736
    retries = 0
3737
    for i, mstat in enumerate(rstats):
3738
      if mstat is None:
3739
        lu.LogWarning("Can't compute data for node %s/%s",
3740
                           node, disks[i].iv_name)
3741
        continue
3742

    
3743
      cumul_degraded = (cumul_degraded or
3744
                        (mstat.is_degraded and mstat.sync_percent is None))
3745
      if mstat.sync_percent is not None:
3746
        done = False
3747
        if mstat.estimated_time is not None:
3748
          rem_time = ("%s remaining (estimated)" %
3749
                      utils.FormatSeconds(mstat.estimated_time))
3750
          max_time = mstat.estimated_time
3751
        else:
3752
          rem_time = "no time estimate"
3753
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3754
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3755

    
3756
    # if we're done but degraded, let's do a few small retries, to
3757
    # make sure we see a stable and not transient situation; therefore
3758
    # we force restart of the loop
3759
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3760
      logging.info("Degraded disks found, %d retries left", degr_retries)
3761
      degr_retries -= 1
3762
      time.sleep(1)
3763
      continue
3764

    
3765
    if done or oneshot:
3766
      break
3767

    
3768
    time.sleep(min(60, max_time))
3769

    
3770
  if done:
3771
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3772
  return not cumul_degraded
3773

    
3774

    
3775
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3776
  """Check that mirrors are not degraded.
3777

3778
  The ldisk parameter, if True, will change the test from the
3779
  is_degraded attribute (which represents overall non-ok status for
3780
  the device(s)) to the ldisk (representing the local storage status).
3781

3782
  """
3783
  lu.cfg.SetDiskID(dev, node)
3784

    
3785
  result = True
3786

    
3787
  if on_primary or dev.AssembleOnSecondary():
3788
    rstats = lu.rpc.call_blockdev_find(node, dev)
3789
    msg = rstats.fail_msg
3790
    if msg:
3791
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3792
      result = False
3793
    elif not rstats.payload:
3794
      lu.LogWarning("Can't find disk on node %s", node)
3795
      result = False
3796
    else:
3797
      if ldisk:
3798
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3799
      else:
3800
        result = result and not rstats.payload.is_degraded
3801

    
3802
  if dev.children:
3803
    for child in dev.children:
3804
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3805

    
3806
  return result
3807

    
3808

    
3809
class LUOobCommand(NoHooksLU):
3810
  """Logical unit for OOB handling.
3811

3812
  """
3813
  REG_BGL = False
3814
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3815

    
3816
  def ExpandNames(self):
3817
    """Gather locks we need.
3818

3819
    """
3820
    if self.op.node_names:
3821
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3822
      lock_names = self.op.node_names
3823
    else:
3824
      lock_names = locking.ALL_SET
3825

    
3826
    self.needed_locks = {
3827
      locking.LEVEL_NODE: lock_names,
3828
      }
3829

    
3830
  def CheckPrereq(self):
3831
    """Check prerequisites.
3832

3833
    This checks:
3834
     - the node exists in the configuration
3835
     - OOB is supported
3836

3837
    Any errors are signaled by raising errors.OpPrereqError.
3838

3839
    """
3840
    self.nodes = []
3841
    self.master_node = self.cfg.GetMasterNode()
3842

    
3843
    assert self.op.power_delay >= 0.0
3844

    
3845
    if self.op.node_names:
3846
      if (self.op.command in self._SKIP_MASTER and
3847
          self.master_node in self.op.node_names):
3848
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3849
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3850

    
3851
        if master_oob_handler:
3852
          additional_text = ("run '%s %s %s' if you want to operate on the"
3853
                             " master regardless") % (master_oob_handler,
3854
                                                      self.op.command,
3855
                                                      self.master_node)
3856
        else:
3857
          additional_text = "it does not support out-of-band operations"
3858

    
3859
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3860
                                    " allowed for %s; %s") %
3861
                                   (self.master_node, self.op.command,
3862
                                    additional_text), errors.ECODE_INVAL)
3863
    else:
3864
      self.op.node_names = self.cfg.GetNodeList()
3865
      if self.op.command in self._SKIP_MASTER:
3866
        self.op.node_names.remove(self.master_node)
3867

    
3868
    if self.op.command in self._SKIP_MASTER:
3869
      assert self.master_node not in self.op.node_names
3870

    
3871
    for node_name in self.op.node_names:
3872
      node = self.cfg.GetNodeInfo(node_name)
3873

    
3874
      if node is None:
3875
        raise errors.OpPrereqError("Node %s not found" % node_name,
3876
                                   errors.ECODE_NOENT)
3877
      else:
3878
        self.nodes.append(node)
3879

    
3880
      if (not self.op.ignore_status and
3881
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3882
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3883
                                    " not marked offline") % node_name,
3884
                                   errors.ECODE_STATE)
3885

    
3886
  def Exec(self, feedback_fn):
3887
    """Execute OOB and return result if we expect any.
3888

3889
    """
3890
    master_node = self.master_node
3891
    ret = []
3892

    
3893
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3894
                                              key=lambda node: node.name)):
3895
      node_entry = [(constants.RS_NORMAL, node.name)]
3896
      ret.append(node_entry)
3897

    
3898
      oob_program = _SupportsOob(self.cfg, node)
3899

    
3900
      if not oob_program:
3901
        node_entry.append((constants.RS_UNAVAIL, None))
3902
        continue
3903

    
3904
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3905
                   self.op.command, oob_program, node.name)
3906
      result = self.rpc.call_run_oob(master_node, oob_program,
3907
                                     self.op.command, node.name,
3908
                                     self.op.timeout)
3909

    
3910
      if result.fail_msg:
3911
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3912
                        node.name, result.fail_msg)
3913
        node_entry.append((constants.RS_NODATA, None))
3914
      else:
3915
        try:
3916
          self._CheckPayload(result)
3917
        except errors.OpExecError, err:
3918
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3919
                          node.name, err)
3920
          node_entry.append((constants.RS_NODATA, None))
3921
        else:
3922
          if self.op.command == constants.OOB_HEALTH:
3923
            # For health we should log important events
3924
            for item, status in result.payload:
3925
              if status in [constants.OOB_STATUS_WARNING,
3926
                            constants.OOB_STATUS_CRITICAL]:
3927
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3928
                                item, node.name, status)
3929

    
3930
          if self.op.command == constants.OOB_POWER_ON:
3931
            node.powered = True
3932
          elif self.op.command == constants.OOB_POWER_OFF:
3933
            node.powered = False
3934
          elif self.op.command == constants.OOB_POWER_STATUS:
3935
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3936
            if powered != node.powered:
3937
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3938
                               " match actual power state (%s)"), node.powered,
3939
                              node.name, powered)
3940

    
3941
          # For configuration changing commands we should update the node
3942
          if self.op.command in (constants.OOB_POWER_ON,
3943
                                 constants.OOB_POWER_OFF):
3944
            self.cfg.Update(node, feedback_fn)
3945

    
3946
          node_entry.append((constants.RS_NORMAL, result.payload))
3947

    
3948
          if (self.op.command == constants.OOB_POWER_ON and
3949
              idx < len(self.nodes) - 1):
3950
            time.sleep(self.op.power_delay)
3951

    
3952
    return ret
3953

    
3954
  def _CheckPayload(self, result):
3955
    """Checks if the payload is valid.
3956

3957
    @param result: RPC result
3958
    @raises errors.OpExecError: If payload is not valid
3959

3960
    """
3961
    errs = []
3962
    if self.op.command == constants.OOB_HEALTH:
3963
      if not isinstance(result.payload, list):
3964
        errs.append("command 'health' is expected to return a list but got %s" %
3965
                    type(result.payload))
3966
      else:
3967
        for item, status in result.payload:
3968
          if status not in constants.OOB_STATUSES:
3969
            errs.append("health item '%s' has invalid status '%s'" %
3970
                        (item, status))
3971

    
3972
    if self.op.command == constants.OOB_POWER_STATUS:
3973
      if not isinstance(result.payload, dict):
3974
        errs.append("power-status is expected to return a dict but got %s" %
3975
                    type(result.payload))
3976

    
3977
    if self.op.command in [
3978
        constants.OOB_POWER_ON,
3979
        constants.OOB_POWER_OFF,
3980
        constants.OOB_POWER_CYCLE,
3981
        ]:
3982
      if result.payload is not None:
3983
        errs.append("%s is expected to not return payload but got '%s'" %
3984
                    (self.op.command, result.payload))
3985

    
3986
    if errs:
3987
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3988
                               utils.CommaJoin(errs))
3989

    
3990
class _OsQuery(_QueryBase):
3991
  FIELDS = query.OS_FIELDS
3992

    
3993
  def ExpandNames(self, lu):
3994
    # Lock all nodes in shared mode
3995
    # Temporary removal of locks, should be reverted later
3996
    # TODO: reintroduce locks when they are lighter-weight
3997
    lu.needed_locks = {}
3998
    #self.share_locks[locking.LEVEL_NODE] = 1
3999
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4000

    
4001
    # The following variables interact with _QueryBase._GetNames
4002
    if self.names:
4003
      self.wanted = self.names
4004
    else:
4005
      self.wanted = locking.ALL_SET
4006

    
4007
    self.do_locking = self.use_locking
4008

    
4009
  def DeclareLocks(self, lu, level):
4010
    pass
4011

    
4012
  @staticmethod
4013
  def _DiagnoseByOS(rlist):
4014
    """Remaps a per-node return list into an a per-os per-node dictionary
4015

4016
    @param rlist: a map with node names as keys and OS objects as values
4017

4018
    @rtype: dict
4019
    @return: a dictionary with osnames as keys and as value another
4020
        map, with nodes as keys and tuples of (path, status, diagnose,
4021
        variants, parameters, api_versions) as values, eg::
4022

4023
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4024
                                     (/srv/..., False, "invalid api")],
4025
                           "node2": [(/srv/..., True, "", [], [])]}
4026
          }
4027

4028
    """
4029
    all_os = {}
4030
    # we build here the list of nodes that didn't fail the RPC (at RPC
4031
    # level), so that nodes with a non-responding node daemon don't
4032
    # make all OSes invalid
4033
    good_nodes = [node_name for node_name in rlist
4034
                  if not rlist[node_name].fail_msg]
4035
    for node_name, nr in rlist.items():
4036
      if nr.fail_msg or not nr.payload:
4037
        continue
4038
      for (name, path, status, diagnose, variants,
4039
           params, api_versions) in nr.payload:
4040
        if name not in all_os:
4041
          # build a list of nodes for this os containing empty lists
4042
          # for each node in node_list
4043
          all_os[name] = {}
4044
          for nname in good_nodes:
4045
            all_os[name][nname] = []
4046
        # convert params from [name, help] to (name, help)
4047
        params = [tuple(v) for v in params]
4048
        all_os[name][node_name].append((path, status, diagnose,
4049
                                        variants, params, api_versions))
4050
    return all_os
4051

    
4052
  def _GetQueryData(self, lu):
4053
    """Computes the list of nodes and their attributes.
4054

4055
    """
4056
    # Locking is not used
4057
    assert not (compat.any(lu.glm.is_owned(level)
4058
                           for level in locking.LEVELS
4059
                           if level != locking.LEVEL_CLUSTER) or
4060
                self.do_locking or self.use_locking)
4061

    
4062
    valid_nodes = [node.name
4063
                   for node in lu.cfg.GetAllNodesInfo().values()
4064
                   if not node.offline and node.vm_capable]
4065
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4066
    cluster = lu.cfg.GetClusterInfo()
4067

    
4068
    data = {}
4069

    
4070
    for (os_name, os_data) in pol.items():
4071
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4072
                          hidden=(os_name in cluster.hidden_os),
4073
                          blacklisted=(os_name in cluster.blacklisted_os))
4074

    
4075
      variants = set()
4076
      parameters = set()
4077
      api_versions = set()
4078

    
4079
      for idx, osl in enumerate(os_data.values()):
4080
        info.valid = bool(info.valid and osl and osl[0][1])
4081
        if not info.valid:
4082
          break
4083

    
4084
        (node_variants, node_params, node_api) = osl[0][3:6]
4085
        if idx == 0:
4086
          # First entry
4087
          variants.update(node_variants)
4088
          parameters.update(node_params)
4089
          api_versions.update(node_api)
4090
        else:
4091
          # Filter out inconsistent values
4092
          variants.intersection_update(node_variants)
4093
          parameters.intersection_update(node_params)
4094
          api_versions.intersection_update(node_api)
4095

    
4096
      info.variants = list(variants)
4097
      info.parameters = list(parameters)
4098
      info.api_versions = list(api_versions)
4099

    
4100
      data[os_name] = info
4101

    
4102
    # Prepare data in requested order
4103
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4104
            if name in data]
4105

    
4106

    
4107
class LUOsDiagnose(NoHooksLU):
4108
  """Logical unit for OS diagnose/query.
4109

4110
  """
4111
  REQ_BGL = False
4112

    
4113
  @staticmethod
4114
  def _BuildFilter(fields, names):
4115
    """Builds a filter for querying OSes.
4116

4117
    """
4118
    name_filter = qlang.MakeSimpleFilter("name", names)
4119

    
4120
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4121
    # respective field is not requested
4122
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4123
                     for fname in ["hidden", "blacklisted"]
4124
                     if fname not in fields]
4125
    if "valid" not in fields:
4126
      status_filter.append([qlang.OP_TRUE, "valid"])
4127

    
4128
    if status_filter:
4129
      status_filter.insert(0, qlang.OP_AND)
4130
    else:
4131
      status_filter = None
4132

    
4133
    if name_filter and status_filter:
4134
      return [qlang.OP_AND, name_filter, status_filter]
4135
    elif name_filter:
4136
      return name_filter
4137
    else:
4138
      return status_filter
4139

    
4140
  def CheckArguments(self):
4141
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4142
                       self.op.output_fields, False)
4143

    
4144
  def ExpandNames(self):
4145
    self.oq.ExpandNames(self)
4146

    
4147
  def Exec(self, feedback_fn):
4148
    return self.oq.OldStyleQuery(self)
4149

    
4150

    
4151
class LUNodeRemove(LogicalUnit):
4152
  """Logical unit for removing a node.
4153

4154
  """
4155
  HPATH = "node-remove"
4156
  HTYPE = constants.HTYPE_NODE
4157

    
4158
  def BuildHooksEnv(self):
4159
    """Build hooks env.
4160

4161
    This doesn't run on the target node in the pre phase as a failed
4162
    node would then be impossible to remove.
4163

4164
    """
4165
    return {
4166
      "OP_TARGET": self.op.node_name,
4167
      "NODE_NAME": self.op.node_name,
4168
      }
4169

    
4170
  def BuildHooksNodes(self):
4171
    """Build hooks nodes.
4172

4173
    """
4174
    all_nodes = self.cfg.GetNodeList()
4175
    try:
4176
      all_nodes.remove(self.op.node_name)
4177
    except ValueError:
4178
      logging.warning("Node '%s', which is about to be removed, was not found"
4179
                      " in the list of all nodes", self.op.node_name)
4180
    return (all_nodes, all_nodes)
4181

    
4182
  def CheckPrereq(self):
4183
    """Check prerequisites.
4184

4185
    This checks:
4186
     - the node exists in the configuration
4187
     - it does not have primary or secondary instances
4188
     - it's not the master
4189

4190
    Any errors are signaled by raising errors.OpPrereqError.
4191

4192
    """
4193
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4194
    node = self.cfg.GetNodeInfo(self.op.node_name)
4195
    assert node is not None
4196

    
4197
    instance_list = self.cfg.GetInstanceList()
4198

    
4199
    masternode = self.cfg.GetMasterNode()
4200
    if node.name == masternode:
4201
      raise errors.OpPrereqError("Node is the master node, failover to another"
4202
                                 " node is required", errors.ECODE_INVAL)
4203

    
4204
    for instance_name in instance_list:
4205
      instance = self.cfg.GetInstanceInfo(instance_name)
4206
      if node.name in instance.all_nodes:
4207
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4208
                                   " please remove first" % instance_name,
4209
                                   errors.ECODE_INVAL)
4210
    self.op.node_name = node.name
4211
    self.node = node
4212

    
4213
  def Exec(self, feedback_fn):
4214
    """Removes the node from the cluster.
4215

4216
    """
4217
    node = self.node
4218
    logging.info("Stopping the node daemon and removing configs from node %s",
4219
                 node.name)
4220

    
4221
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4222

    
4223
    # Promote nodes to master candidate as needed
4224
    _AdjustCandidatePool(self, exceptions=[node.name])
4225
    self.context.RemoveNode(node.name)
4226

    
4227
    # Run post hooks on the node before it's removed
4228
    _RunPostHook(self, node.name)
4229

    
4230
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4231
    msg = result.fail_msg
4232
    if msg:
4233
      self.LogWarning("Errors encountered on the remote node while leaving"
4234
                      " the cluster: %s", msg)
4235

    
4236
    # Remove node from our /etc/hosts
4237
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4238
      master_node = self.cfg.GetMasterNode()
4239
      result = self.rpc.call_etc_hosts_modify(master_node,
4240
                                              constants.ETC_HOSTS_REMOVE,
4241
                                              node.name, None)
4242
      result.Raise("Can't update hosts file with new host data")
4243
      _RedistributeAncillaryFiles(self)
4244

    
4245

    
4246
class _NodeQuery(_QueryBase):
4247
  FIELDS = query.NODE_FIELDS
4248

    
4249
  def ExpandNames(self, lu):
4250
    lu.needed_locks = {}
4251
    lu.share_locks[locking.LEVEL_NODE] = 1
4252

    
4253
    if self.names:
4254
      self.wanted = _GetWantedNodes(lu, self.names)
4255
    else:
4256
      self.wanted = locking.ALL_SET
4257

    
4258
    self.do_locking = (self.use_locking and
4259
                       query.NQ_LIVE in self.requested_data)
4260

    
4261
    if self.do_locking:
4262
      # if we don't request only static fields, we need to lock the nodes
4263
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4264

    
4265
  def DeclareLocks(self, lu, level):
4266
    pass
4267

    
4268
  def _GetQueryData(self, lu):
4269
    """Computes the list of nodes and their attributes.
4270

4271
    """
4272
    all_info = lu.cfg.GetAllNodesInfo()
4273

    
4274
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4275

    
4276
    # Gather data as requested
4277
    if query.NQ_LIVE in self.requested_data:
4278
      # filter out non-vm_capable nodes
4279
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4280

    
4281
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4282
                                        lu.cfg.GetHypervisorType())
4283
      live_data = dict((name, nresult.payload)
4284
                       for (name, nresult) in node_data.items()
4285
                       if not nresult.fail_msg and nresult.payload)
4286
    else:
4287
      live_data = None
4288

    
4289
    if query.NQ_INST in self.requested_data:
4290
      node_to_primary = dict([(name, set()) for name in nodenames])
4291
      node_to_secondary = dict([(name, set()) for name in nodenames])
4292

    
4293
      inst_data = lu.cfg.GetAllInstancesInfo()
4294

    
4295
      for inst in inst_data.values():
4296
        if inst.primary_node in node_to_primary:
4297
          node_to_primary[inst.primary_node].add(inst.name)
4298
        for secnode in inst.secondary_nodes:
4299
          if secnode in node_to_secondary:
4300
            node_to_secondary[secnode].add(inst.name)
4301
    else:
4302
      node_to_primary = None
4303
      node_to_secondary = None
4304

    
4305
    if query.NQ_OOB in self.requested_data:
4306
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4307
                         for name, node in all_info.iteritems())
4308
    else:
4309
      oob_support = None
4310

    
4311
    if query.NQ_GROUP in self.requested_data:
4312
      groups = lu.cfg.GetAllNodeGroupsInfo()
4313
    else:
4314
      groups = {}
4315

    
4316
    return query.NodeQueryData([all_info[name] for name in nodenames],
4317
                               live_data, lu.cfg.GetMasterNode(),
4318
                               node_to_primary, node_to_secondary, groups,
4319
                               oob_support, lu.cfg.GetClusterInfo())
4320

    
4321

    
4322
class LUNodeQuery(NoHooksLU):
4323
  """Logical unit for querying nodes.
4324

4325
  """
4326
  # pylint: disable-msg=W0142
4327
  REQ_BGL = False
4328

    
4329
  def CheckArguments(self):
4330
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4331
                         self.op.output_fields, self.op.use_locking)
4332

    
4333
  def ExpandNames(self):
4334
    self.nq.ExpandNames(self)
4335

    
4336
  def Exec(self, feedback_fn):
4337
    return self.nq.OldStyleQuery(self)
4338

    
4339

    
4340
class LUNodeQueryvols(NoHooksLU):
4341
  """Logical unit for getting volumes on node(s).
4342

4343
  """
4344
  REQ_BGL = False
4345
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4346
  _FIELDS_STATIC = utils.FieldSet("node")
4347

    
4348
  def CheckArguments(self):
4349
    _CheckOutputFields(static=self._FIELDS_STATIC,
4350
                       dynamic=self._FIELDS_DYNAMIC,
4351
                       selected=self.op.output_fields)
4352

    
4353
  def ExpandNames(self):
4354
    self.needed_locks = {}
4355
    self.share_locks[locking.LEVEL_NODE] = 1
4356
    if not self.op.nodes:
4357
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4358
    else:
4359
      self.needed_locks[locking.LEVEL_NODE] = \
4360
        _GetWantedNodes(self, self.op.nodes)
4361

    
4362
  def Exec(self, feedback_fn):
4363
    """Computes the list of nodes and their attributes.
4364

4365
    """
4366
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4367
    volumes = self.rpc.call_node_volumes(nodenames)
4368

    
4369
    ilist = self.cfg.GetAllInstancesInfo()
4370
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4371

    
4372
    output = []
4373
    for node in nodenames:
4374
      nresult = volumes[node]
4375
      if nresult.offline:
4376
        continue
4377
      msg = nresult.fail_msg
4378
      if msg:
4379
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4380
        continue
4381

    
4382
      node_vols = sorted(nresult.payload,
4383
                         key=operator.itemgetter("dev"))
4384

    
4385
      for vol in node_vols:
4386
        node_output = []
4387
        for field in self.op.output_fields:
4388
          if field == "node":
4389
            val = node
4390
          elif field == "phys":
4391
            val = vol["dev"]
4392
          elif field == "vg":
4393
            val = vol["vg"]
4394
          elif field == "name":
4395
            val = vol["name"]
4396
          elif field == "size":
4397
            val = int(float(vol["size"]))
4398
          elif field == "instance":
4399
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4400
          else:
4401
            raise errors.ParameterError(field)
4402
          node_output.append(str(val))
4403

    
4404
        output.append(node_output)
4405

    
4406
    return output
4407

    
4408

    
4409
class LUNodeQueryStorage(NoHooksLU):
4410
  """Logical unit for getting information on storage units on node(s).
4411

4412
  """
4413
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4414
  REQ_BGL = False
4415

    
4416
  def CheckArguments(self):
4417
    _CheckOutputFields(static=self._FIELDS_STATIC,
4418
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4419
                       selected=self.op.output_fields)
4420

    
4421
  def ExpandNames(self):
4422
    self.needed_locks = {}
4423
    self.share_locks[locking.LEVEL_NODE] = 1
4424

    
4425
    if self.op.nodes:
4426
      self.needed_locks[locking.LEVEL_NODE] = \
4427
        _GetWantedNodes(self, self.op.nodes)
4428
    else:
4429
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4430

    
4431
  def Exec(self, feedback_fn):
4432
    """Computes the list of nodes and their attributes.
4433

4434
    """
4435
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4436

    
4437
    # Always get name to sort by
4438
    if constants.SF_NAME in self.op.output_fields:
4439
      fields = self.op.output_fields[:]
4440
    else:
4441
      fields = [constants.SF_NAME] + self.op.output_fields
4442

    
4443
    # Never ask for node or type as it's only known to the LU
4444
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4445
      while extra in fields:
4446
        fields.remove(extra)
4447

    
4448
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4449
    name_idx = field_idx[constants.SF_NAME]
4450

    
4451
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4452
    data = self.rpc.call_storage_list(self.nodes,
4453
                                      self.op.storage_type, st_args,
4454
                                      self.op.name, fields)
4455

    
4456
    result = []
4457

    
4458
    for node in utils.NiceSort(self.nodes):
4459
      nresult = data[node]
4460
      if nresult.offline:
4461
        continue
4462

    
4463
      msg = nresult.fail_msg
4464
      if msg:
4465
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4466
        continue
4467

    
4468
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4469

    
4470
      for name in utils.NiceSort(rows.keys()):
4471
        row = rows[name]
4472

    
4473
        out = []
4474

    
4475
        for field in self.op.output_fields:
4476
          if field == constants.SF_NODE:
4477
            val = node
4478
          elif field == constants.SF_TYPE:
4479
            val = self.op.storage_type
4480
          elif field in field_idx:
4481
            val = row[field_idx[field]]
4482
          else:
4483
            raise errors.ParameterError(field)
4484

    
4485
          out.append(val)
4486

    
4487
        result.append(out)
4488

    
4489
    return result
4490

    
4491

    
4492
class _InstanceQuery(_QueryBase):
4493
  FIELDS = query.INSTANCE_FIELDS
4494

    
4495
  def ExpandNames(self, lu):
4496
    lu.needed_locks = {}
4497
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4498
    lu.share_locks[locking.LEVEL_NODE] = 1
4499

    
4500
    if self.names:
4501
      self.wanted = _GetWantedInstances(lu, self.names)
4502
    else:
4503
      self.wanted = locking.ALL_SET
4504

    
4505
    self.do_locking = (self.use_locking and
4506
                       query.IQ_LIVE in self.requested_data)
4507
    if self.do_locking:
4508
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4509
      lu.needed_locks[locking.LEVEL_NODE] = []
4510
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4511

    
4512
  def DeclareLocks(self, lu, level):
4513
    if level == locking.LEVEL_NODE and self.do_locking:
4514
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4515

    
4516
  def _GetQueryData(self, lu):
4517
    """Computes the list of instances and their attributes.
4518

4519
    """
4520
    cluster = lu.cfg.GetClusterInfo()
4521
    all_info = lu.cfg.GetAllInstancesInfo()
4522

    
4523
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4524

    
4525
    instance_list = [all_info[name] for name in instance_names]
4526
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4527
                                        for inst in instance_list)))
4528
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4529
    bad_nodes = []
4530
    offline_nodes = []
4531
    wrongnode_inst = set()
4532

    
4533
    # Gather data as requested
4534
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4535
      live_data = {}
4536
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4537
      for name in nodes:
4538
        result = node_data[name]
4539
        if result.offline:
4540
          # offline nodes will be in both lists
4541
          assert result.fail_msg
4542
          offline_nodes.append(name)
4543
        if result.fail_msg:
4544
          bad_nodes.append(name)
4545
        elif result.payload:
4546
          for inst in result.payload:
4547
            if inst in all_info:
4548
              if all_info[inst].primary_node == name:
4549
                live_data.update(result.payload)
4550
              else:
4551
                wrongnode_inst.add(inst)
4552
            else:
4553
              # orphan instance; we don't list it here as we don't
4554
              # handle this case yet in the output of instance listing
4555
              logging.warning("Orphan instance '%s' found on node %s",
4556
                              inst, name)
4557
        # else no instance is alive
4558
    else:
4559
      live_data = {}
4560

    
4561
    if query.IQ_DISKUSAGE in self.requested_data:
4562
      disk_usage = dict((inst.name,
4563
                         _ComputeDiskSize(inst.disk_template,
4564
                                          [{constants.IDISK_SIZE: disk.size}
4565
                                           for disk in inst.disks]))
4566
                        for inst in instance_list)
4567
    else:
4568
      disk_usage = None
4569

    
4570
    if query.IQ_CONSOLE in self.requested_data:
4571
      consinfo = {}
4572
      for inst in instance_list:
4573
        if inst.name in live_data:
4574
          # Instance is running
4575
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4576
        else:
4577
          consinfo[inst.name] = None
4578
      assert set(consinfo.keys()) == set(instance_names)
4579
    else:
4580
      consinfo = None
4581

    
4582
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4583
                                   disk_usage, offline_nodes, bad_nodes,
4584
                                   live_data, wrongnode_inst, consinfo)
4585

    
4586

    
4587
class LUQuery(NoHooksLU):
4588
  """Query for resources/items of a certain kind.
4589

4590
  """
4591
  # pylint: disable-msg=W0142
4592
  REQ_BGL = False
4593

    
4594
  def CheckArguments(self):
4595
    qcls = _GetQueryImplementation(self.op.what)
4596

    
4597
    self.impl = qcls(self.op.filter, self.op.fields, False)
4598

    
4599
  def ExpandNames(self):
4600
    self.impl.ExpandNames(self)
4601

    
4602
  def DeclareLocks(self, level):
4603
    self.impl.DeclareLocks(self, level)
4604

    
4605
  def Exec(self, feedback_fn):
4606
    return self.impl.NewStyleQuery(self)
4607

    
4608

    
4609
class LUQueryFields(NoHooksLU):
4610
  """Query for resources/items of a certain kind.
4611

4612
  """
4613
  # pylint: disable-msg=W0142
4614
  REQ_BGL = False
4615

    
4616
  def CheckArguments(self):
4617
    self.qcls = _GetQueryImplementation(self.op.what)
4618

    
4619
  def ExpandNames(self):
4620
    self.needed_locks = {}
4621

    
4622
  def Exec(self, feedback_fn):
4623
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4624

    
4625

    
4626
class LUNodeModifyStorage(NoHooksLU):
4627
  """Logical unit for modifying a storage volume on a node.
4628

4629
  """
4630
  REQ_BGL = False
4631

    
4632
  def CheckArguments(self):
4633
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4634

    
4635
    storage_type = self.op.storage_type
4636

    
4637
    try:
4638
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4639
    except KeyError:
4640
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4641
                                 " modified" % storage_type,
4642
                                 errors.ECODE_INVAL)
4643

    
4644
    diff = set(self.op.changes.keys()) - modifiable
4645
    if diff:
4646
      raise errors.OpPrereqError("The following fields can not be modified for"
4647
                                 " storage units of type '%s': %r" %
4648
                                 (storage_type, list(diff)),
4649
                                 errors.ECODE_INVAL)
4650

    
4651
  def ExpandNames(self):
4652
    self.needed_locks = {
4653
      locking.LEVEL_NODE: self.op.node_name,
4654
      }
4655

    
4656
  def Exec(self, feedback_fn):
4657
    """Computes the list of nodes and their attributes.
4658

4659
    """
4660
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4661
    result = self.rpc.call_storage_modify(self.op.node_name,
4662
                                          self.op.storage_type, st_args,
4663
                                          self.op.name, self.op.changes)
4664
    result.Raise("Failed to modify storage unit '%s' on %s" %
4665
                 (self.op.name, self.op.node_name))
4666

    
4667

    
4668
class LUNodeAdd(LogicalUnit):
4669
  """Logical unit for adding node to the cluster.
4670

4671
  """
4672
  HPATH = "node-add"
4673
  HTYPE = constants.HTYPE_NODE
4674
  _NFLAGS = ["master_capable", "vm_capable"]
4675

    
4676
  def CheckArguments(self):
4677
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4678
    # validate/normalize the node name
4679
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4680
                                         family=self.primary_ip_family)
4681
    self.op.node_name = self.hostname.name
4682

    
4683
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4684
      raise errors.OpPrereqError("Cannot readd the master node",
4685
                                 errors.ECODE_STATE)
4686

    
4687
    if self.op.readd and self.op.group:
4688
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4689
                                 " being readded", errors.ECODE_INVAL)
4690

    
4691
  def BuildHooksEnv(self):
4692
    """Build hooks env.
4693

4694
    This will run on all nodes before, and on all nodes + the new node after.
4695

4696
    """
4697
    return {
4698
      "OP_TARGET": self.op.node_name,
4699
      "NODE_NAME": self.op.node_name,
4700
      "NODE_PIP": self.op.primary_ip,
4701
      "NODE_SIP": self.op.secondary_ip,
4702
      "MASTER_CAPABLE": str(self.op.master_capable),
4703
      "VM_CAPABLE": str(self.op.vm_capable),
4704
      }
4705

    
4706
  def BuildHooksNodes(self):
4707
    """Build hooks nodes.
4708

4709
    """
4710
    # Exclude added node
4711
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4712
    post_nodes = pre_nodes + [self.op.node_name, ]
4713

    
4714
    return (pre_nodes, post_nodes)
4715

    
4716
  def CheckPrereq(self):
4717
    """Check prerequisites.
4718

4719
    This checks:
4720
     - the new node is not already in the config
4721
     - it is resolvable
4722
     - its parameters (single/dual homed) matches the cluster
4723

4724
    Any errors are signaled by raising errors.OpPrereqError.
4725

4726
    """
4727
    cfg = self.cfg
4728
    hostname = self.hostname
4729
    node = hostname.name
4730
    primary_ip = self.op.primary_ip = hostname.ip
4731
    if self.op.secondary_ip is None:
4732
      if self.primary_ip_family == netutils.IP6Address.family:
4733
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4734
                                   " IPv4 address must be given as secondary",
4735
                                   errors.ECODE_INVAL)
4736
      self.op.secondary_ip = primary_ip
4737

    
4738
    secondary_ip = self.op.secondary_ip
4739
    if not netutils.IP4Address.IsValid(secondary_ip):
4740
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4741
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4742

    
4743
    node_list = cfg.GetNodeList()
4744
    if not self.op.readd and node in node_list:
4745
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4746
                                 node, errors.ECODE_EXISTS)
4747
    elif self.op.readd and node not in node_list:
4748
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4749
                                 errors.ECODE_NOENT)
4750

    
4751
    self.changed_primary_ip = False
4752

    
4753
    for existing_node_name in node_list:
4754
      existing_node = cfg.GetNodeInfo(existing_node_name)
4755

    
4756
      if self.op.readd and node == existing_node_name:
4757
        if existing_node.secondary_ip != secondary_ip:
4758
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4759
                                     " address configuration as before",
4760
                                     errors.ECODE_INVAL)
4761
        if existing_node.primary_ip != primary_ip:
4762
          self.changed_primary_ip = True
4763

    
4764
        continue
4765

    
4766
      if (existing_node.primary_ip == primary_ip or
4767
          existing_node.secondary_ip == primary_ip or
4768
          existing_node.primary_ip == secondary_ip or
4769
          existing_node.secondary_ip == secondary_ip):
4770
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4771
                                   " existing node %s" % existing_node.name,
4772
                                   errors.ECODE_NOTUNIQUE)
4773

    
4774
    # After this 'if' block, None is no longer a valid value for the
4775
    # _capable op attributes
4776
    if self.op.readd:
4777
      old_node = self.cfg.GetNodeInfo(node)
4778
      assert old_node is not None, "Can't retrieve locked node %s" % node
4779
      for attr in self._NFLAGS:
4780
        if getattr(self.op, attr) is None:
4781
          setattr(self.op, attr, getattr(old_node, attr))
4782
    else:
4783
      for attr in self._NFLAGS:
4784
        if getattr(self.op, attr) is None:
4785
          setattr(self.op, attr, True)
4786

    
4787
    if self.op.readd and not self.op.vm_capable:
4788
      pri, sec = cfg.GetNodeInstances(node)
4789
      if pri or sec:
4790
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4791
                                   " flag set to false, but it already holds"
4792
                                   " instances" % node,
4793
                                   errors.ECODE_STATE)
4794

    
4795
    # check that the type of the node (single versus dual homed) is the
4796
    # same as for the master
4797
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4798
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4799
    newbie_singlehomed = secondary_ip == primary_ip
4800
    if master_singlehomed != newbie_singlehomed:
4801
      if master_singlehomed:
4802
        raise errors.OpPrereqError("The master has no secondary ip but the"
4803
                                   " new node has one",
4804
                                   errors.ECODE_INVAL)
4805
      else:
4806
        raise errors.OpPrereqError("The master has a secondary ip but the"
4807
                                   " new node doesn't have one",
4808
                                   errors.ECODE_INVAL)
4809

    
4810
    # checks reachability
4811
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4812
      raise errors.OpPrereqError("Node not reachable by ping",
4813
                                 errors.ECODE_ENVIRON)
4814

    
4815
    if not newbie_singlehomed:
4816
      # check reachability from my secondary ip to newbie's secondary ip
4817
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4818
                           source=myself.secondary_ip):
4819
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4820
                                   " based ping to node daemon port",
4821
                                   errors.ECODE_ENVIRON)
4822

    
4823
    if self.op.readd:
4824
      exceptions = [node]
4825
    else:
4826
      exceptions = []
4827

    
4828
    if self.op.master_capable:
4829
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4830
    else:
4831
      self.master_candidate = False
4832

    
4833
    if self.op.readd:
4834
      self.new_node = old_node
4835
    else:
4836
      node_group = cfg.LookupNodeGroup(self.op.group)
4837
      self.new_node = objects.Node(name=node,
4838
                                   primary_ip=primary_ip,
4839
                                   secondary_ip=secondary_ip,
4840
                                   master_candidate=self.master_candidate,
4841
                                   offline=False, drained=False,
4842
                                   group=node_group)
4843

    
4844
    if self.op.ndparams:
4845
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4846

    
4847
  def Exec(self, feedback_fn):
4848
    """Adds the new node to the cluster.
4849

4850
    """
4851
    new_node = self.new_node
4852
    node = new_node.name
4853

    
4854
    # We adding a new node so we assume it's powered
4855
    new_node.powered = True
4856

    
4857
    # for re-adds, reset the offline/drained/master-candidate flags;
4858
    # we need to reset here, otherwise offline would prevent RPC calls
4859
    # later in the procedure; this also means that if the re-add
4860
    # fails, we are left with a non-offlined, broken node
4861
    if self.op.readd:
4862
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4863
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4864
      # if we demote the node, we do cleanup later in the procedure
4865
      new_node.master_candidate = self.master_candidate
4866
      if self.changed_primary_ip:
4867
        new_node.primary_ip = self.op.primary_ip
4868

    
4869
    # copy the master/vm_capable flags
4870
    for attr in self._NFLAGS:
4871
      setattr(new_node, attr, getattr(self.op, attr))
4872

    
4873
    # notify the user about any possible mc promotion
4874
    if new_node.master_candidate:
4875
      self.LogInfo("Node will be a master candidate")
4876

    
4877
    if self.op.ndparams:
4878
      new_node.ndparams = self.op.ndparams
4879
    else:
4880
      new_node.ndparams = {}
4881

    
4882
    # check connectivity
4883
    result = self.rpc.call_version([node])[node]
4884
    result.Raise("Can't get version information from node %s" % node)
4885
    if constants.PROTOCOL_VERSION == result.payload:
4886
      logging.info("Communication to node %s fine, sw version %s match",
4887
                   node, result.payload)
4888
    else:
4889
      raise errors.OpExecError("Version mismatch master version %s,"
4890
                               " node version %s" %
4891
                               (constants.PROTOCOL_VERSION, result.payload))
4892

    
4893
    # Add node to our /etc/hosts, and add key to known_hosts
4894
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4895
      master_node = self.cfg.GetMasterNode()
4896
      result = self.rpc.call_etc_hosts_modify(master_node,
4897
                                              constants.ETC_HOSTS_ADD,
4898
                                              self.hostname.name,
4899
                                              self.hostname.ip)
4900
      result.Raise("Can't update hosts file with new host data")
4901

    
4902
    if new_node.secondary_ip != new_node.primary_ip:
4903
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4904
                               False)
4905

    
4906
    node_verify_list = [self.cfg.GetMasterNode()]
4907
    node_verify_param = {
4908
      constants.NV_NODELIST: [node],
4909
      # TODO: do a node-net-test as well?
4910
    }
4911

    
4912
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4913
                                       self.cfg.GetClusterName())
4914
    for verifier in node_verify_list:
4915
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4916
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4917
      if nl_payload:
4918
        for failed in nl_payload:
4919
          feedback_fn("ssh/hostname verification failed"
4920
                      " (checking from %s): %s" %
4921
                      (verifier, nl_payload[failed]))
4922
        raise errors.OpExecError("ssh/hostname verification failed")
4923

    
4924
    if self.op.readd:
4925
      _RedistributeAncillaryFiles(self)
4926
      self.context.ReaddNode(new_node)
4927
      # make sure we redistribute the config
4928
      self.cfg.Update(new_node, feedback_fn)
4929
      # and make sure the new node will not have old files around
4930
      if not new_node.master_candidate:
4931
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4932
        msg = result.fail_msg
4933
        if msg:
4934
          self.LogWarning("Node failed to demote itself from master"
4935
                          " candidate status: %s" % msg)
4936
    else:
4937
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4938
                                  additional_vm=self.op.vm_capable)
4939
      self.context.AddNode(new_node, self.proc.GetECId())
4940

    
4941

    
4942
class LUNodeSetParams(LogicalUnit):
4943
  """Modifies the parameters of a node.
4944

4945
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4946
      to the node role (as _ROLE_*)
4947
  @cvar _R2F: a dictionary from node role to tuples of flags
4948
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4949

4950
  """
4951
  HPATH = "node-modify"
4952
  HTYPE = constants.HTYPE_NODE
4953
  REQ_BGL = False
4954
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4955
  _F2R = {
4956
    (True, False, False): _ROLE_CANDIDATE,
4957
    (False, True, False): _ROLE_DRAINED,
4958
    (False, False, True): _ROLE_OFFLINE,
4959
    (False, False, False): _ROLE_REGULAR,
4960
    }
4961
  _R2F = dict((v, k) for k, v in _F2R.items())
4962
  _FLAGS = ["master_candidate", "drained", "offline"]
4963

    
4964
  def CheckArguments(self):
4965
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4966
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4967
                self.op.master_capable, self.op.vm_capable,
4968
                self.op.secondary_ip, self.op.ndparams]
4969
    if all_mods.count(None) == len(all_mods):
4970
      raise errors.OpPrereqError("Please pass at least one modification",
4971
                                 errors.ECODE_INVAL)
4972
    if all_mods.count(True) > 1:
4973
      raise errors.OpPrereqError("Can't set the node into more than one"
4974
                                 " state at the same time",
4975
                                 errors.ECODE_INVAL)
4976

    
4977
    # Boolean value that tells us whether we might be demoting from MC
4978
    self.might_demote = (self.op.master_candidate == False or
4979
                         self.op.offline == True or
4980
                         self.op.drained == True or
4981
                         self.op.master_capable == False)
4982

    
4983
    if self.op.secondary_ip:
4984
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4985
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4986
                                   " address" % self.op.secondary_ip,
4987
                                   errors.ECODE_INVAL)
4988

    
4989
    self.lock_all = self.op.auto_promote and self.might_demote
4990
    self.lock_instances = self.op.secondary_ip is not None
4991

    
4992
  def ExpandNames(self):
4993
    if self.lock_all:
4994
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4995
    else:
4996
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4997

    
4998
    if self.lock_instances:
4999
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5000

    
5001
  def DeclareLocks(self, level):
5002
    # If we have locked all instances, before waiting to lock nodes, release
5003
    # all the ones living on nodes unrelated to the current operation.
5004
    if level == locking.LEVEL_NODE and self.lock_instances:
5005
      self.affected_instances = []
5006
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5007
        instances_keep = []
5008

    
5009
        # Build list of instances to release
5010
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
5011
          instance = self.context.cfg.GetInstanceInfo(instance_name)
5012
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5013
              self.op.node_name in instance.all_nodes):
5014
            instances_keep.append(instance_name)
5015
            self.affected_instances.append(instance)
5016

    
5017
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5018

    
5019
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5020
                set(instances_keep))
5021

    
5022
  def BuildHooksEnv(self):
5023
    """Build hooks env.
5024

5025
    This runs on the master node.
5026

5027
    """
5028
    return {
5029
      "OP_TARGET": self.op.node_name,
5030
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5031
      "OFFLINE": str(self.op.offline),
5032
      "DRAINED": str(self.op.drained),
5033
      "MASTER_CAPABLE": str(self.op.master_capable),
5034
      "VM_CAPABLE": str(self.op.vm_capable),
5035
      }
5036

    
5037
  def BuildHooksNodes(self):
5038
    """Build hooks nodes.
5039

5040
    """
5041
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5042
    return (nl, nl)
5043

    
5044
  def CheckPrereq(self):
5045
    """Check prerequisites.
5046

5047
    This only checks the instance list against the existing names.
5048

5049
    """
5050
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5051

    
5052
    if (self.op.master_candidate is not None or
5053
        self.op.drained is not None or
5054
        self.op.offline is not None):
5055
      # we can't change the master's node flags
5056
      if self.op.node_name == self.cfg.GetMasterNode():
5057
        raise errors.OpPrereqError("The master role can be changed"
5058
                                   " only via master-failover",
5059
                                   errors.ECODE_INVAL)
5060

    
5061
    if self.op.master_candidate and not node.master_capable:
5062
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5063
                                 " it a master candidate" % node.name,
5064
                                 errors.ECODE_STATE)
5065

    
5066
    if self.op.vm_capable == False:
5067
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5068
      if ipri or isec:
5069
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5070
                                   " the vm_capable flag" % node.name,
5071
                                   errors.ECODE_STATE)
5072

    
5073
    if node.master_candidate and self.might_demote and not self.lock_all:
5074
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5075
      # check if after removing the current node, we're missing master
5076
      # candidates
5077
      (mc_remaining, mc_should, _) = \
5078
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5079
      if mc_remaining < mc_should:
5080
        raise errors.OpPrereqError("Not enough master candidates, please"
5081
                                   " pass auto promote option to allow"
5082
                                   " promotion", errors.ECODE_STATE)
5083

    
5084
    self.old_flags = old_flags = (node.master_candidate,
5085
                                  node.drained, node.offline)
5086
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5087
    self.old_role = old_role = self._F2R[old_flags]
5088

    
5089
    # Check for ineffective changes
5090
    for attr in self._FLAGS:
5091
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5092
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5093
        setattr(self.op, attr, None)
5094

    
5095
    # Past this point, any flag change to False means a transition
5096
    # away from the respective state, as only real changes are kept
5097

    
5098
    # TODO: We might query the real power state if it supports OOB
5099
    if _SupportsOob(self.cfg, node):
5100
      if self.op.offline is False and not (node.powered or
5101
                                           self.op.powered == True):
5102
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5103
                                    " offline status can be reset") %
5104
                                   self.op.node_name)
5105
    elif self.op.powered is not None:
5106
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5107
                                  " as it does not support out-of-band"
5108
                                  " handling") % self.op.node_name)
5109

    
5110
    # If we're being deofflined/drained, we'll MC ourself if needed
5111
    if (self.op.drained == False or self.op.offline == False or
5112
        (self.op.master_capable and not node.master_capable)):
5113
      if _DecideSelfPromotion(self):
5114
        self.op.master_candidate = True
5115
        self.LogInfo("Auto-promoting node to master candidate")
5116

    
5117
    # If we're no longer master capable, we'll demote ourselves from MC
5118
    if self.op.master_capable == False and node.master_candidate:
5119
      self.LogInfo("Demoting from master candidate")
5120
      self.op.master_candidate = False
5121

    
5122
    # Compute new role
5123
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5124
    if self.op.master_candidate:
5125
      new_role = self._ROLE_CANDIDATE
5126
    elif self.op.drained:
5127
      new_role = self._ROLE_DRAINED
5128
    elif self.op.offline:
5129
      new_role = self._ROLE_OFFLINE
5130
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5131
      # False is still in new flags, which means we're un-setting (the
5132
      # only) True flag
5133
      new_role = self._ROLE_REGULAR
5134
    else: # no new flags, nothing, keep old role
5135
      new_role = old_role
5136

    
5137
    self.new_role = new_role
5138

    
5139
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5140
      # Trying to transition out of offline status
5141
      result = self.rpc.call_version([node.name])[node.name]
5142
      if result.fail_msg:
5143
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5144
                                   " to report its version: %s" %
5145
                                   (node.name, result.fail_msg),
5146
                                   errors.ECODE_STATE)
5147
      else:
5148
        self.LogWarning("Transitioning node from offline to online state"
5149
                        " without using re-add. Please make sure the node"
5150
                        " is healthy!")
5151

    
5152
    if self.op.secondary_ip:
5153
      # Ok even without locking, because this can't be changed by any LU
5154
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5155
      master_singlehomed = master.secondary_ip == master.primary_ip
5156
      if master_singlehomed and self.op.secondary_ip:
5157
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5158
                                   " homed cluster", errors.ECODE_INVAL)
5159

    
5160
      if node.offline:
5161
        if self.affected_instances:
5162
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5163
                                     " node has instances (%s) configured"
5164
                                     " to use it" % self.affected_instances)
5165
      else:
5166
        # On online nodes, check that no instances are running, and that
5167
        # the node has the new ip and we can reach it.
5168
        for instance in self.affected_instances:
5169
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5170

    
5171
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5172
        if master.name != node.name:
5173
          # check reachability from master secondary ip to new secondary ip
5174
          if not netutils.TcpPing(self.op.secondary_ip,
5175
                                  constants.DEFAULT_NODED_PORT,
5176
                                  source=master.secondary_ip):
5177
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5178
                                       " based ping to node daemon port",
5179
                                       errors.ECODE_ENVIRON)
5180

    
5181
    if self.op.ndparams:
5182
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5183
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5184
      self.new_ndparams = new_ndparams
5185

    
5186
  def Exec(self, feedback_fn):
5187
    """Modifies a node.
5188

5189
    """
5190
    node = self.node
5191
    old_role = self.old_role
5192
    new_role = self.new_role
5193

    
5194
    result = []
5195

    
5196
    if self.op.ndparams:
5197
      node.ndparams = self.new_ndparams
5198

    
5199
    if self.op.powered is not None:
5200
      node.powered = self.op.powered
5201

    
5202
    for attr in ["master_capable", "vm_capable"]:
5203
      val = getattr(self.op, attr)
5204
      if val is not None:
5205
        setattr(node, attr, val)
5206
        result.append((attr, str(val)))
5207

    
5208
    if new_role != old_role:
5209
      # Tell the node to demote itself, if no longer MC and not offline
5210
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5211
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5212
        if msg:
5213
          self.LogWarning("Node failed to demote itself: %s", msg)
5214

    
5215
      new_flags = self._R2F[new_role]
5216
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5217
        if of != nf:
5218
          result.append((desc, str(nf)))
5219
      (node.master_candidate, node.drained, node.offline) = new_flags
5220

    
5221
      # we locked all nodes, we adjust the CP before updating this node
5222
      if self.lock_all:
5223
        _AdjustCandidatePool(self, [node.name])
5224

    
5225
    if self.op.secondary_ip:
5226
      node.secondary_ip = self.op.secondary_ip
5227
      result.append(("secondary_ip", self.op.secondary_ip))
5228

    
5229
    # this will trigger configuration file update, if needed
5230
    self.cfg.Update(node, feedback_fn)
5231

    
5232
    # this will trigger job queue propagation or cleanup if the mc
5233
    # flag changed
5234
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5235
      self.context.ReaddNode(node)
5236

    
5237
    return result
5238

    
5239

    
5240
class LUNodePowercycle(NoHooksLU):
5241
  """Powercycles a node.
5242

5243
  """
5244
  REQ_BGL = False
5245

    
5246
  def CheckArguments(self):
5247
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5248
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5249
      raise errors.OpPrereqError("The node is the master and the force"
5250
                                 " parameter was not set",
5251
                                 errors.ECODE_INVAL)
5252

    
5253
  def ExpandNames(self):
5254
    """Locking for PowercycleNode.
5255

5256
    This is a last-resort option and shouldn't block on other
5257
    jobs. Therefore, we grab no locks.
5258

5259
    """
5260
    self.needed_locks = {}
5261

    
5262
  def Exec(self, feedback_fn):
5263
    """Reboots a node.
5264

5265
    """
5266
    result = self.rpc.call_node_powercycle(self.op.node_name,
5267
                                           self.cfg.GetHypervisorType())
5268
    result.Raise("Failed to schedule the reboot")
5269
    return result.payload
5270

    
5271

    
5272
class LUClusterQuery(NoHooksLU):
5273
  """Query cluster configuration.
5274

5275
  """
5276
  REQ_BGL = False
5277

    
5278
  def ExpandNames(self):
5279
    self.needed_locks = {}
5280

    
5281
  def Exec(self, feedback_fn):
5282
    """Return cluster config.
5283

5284
    """
5285
    cluster = self.cfg.GetClusterInfo()
5286
    os_hvp = {}
5287

    
5288
    # Filter just for enabled hypervisors
5289
    for os_name, hv_dict in cluster.os_hvp.items():
5290
      os_hvp[os_name] = {}
5291
      for hv_name, hv_params in hv_dict.items():
5292
        if hv_name in cluster.enabled_hypervisors:
5293
          os_hvp[os_name][hv_name] = hv_params
5294

    
5295
    # Convert ip_family to ip_version
5296
    primary_ip_version = constants.IP4_VERSION
5297
    if cluster.primary_ip_family == netutils.IP6Address.family:
5298
      primary_ip_version = constants.IP6_VERSION
5299

    
5300
    result = {
5301
      "software_version": constants.RELEASE_VERSION,
5302
      "protocol_version": constants.PROTOCOL_VERSION,
5303
      "config_version": constants.CONFIG_VERSION,
5304
      "os_api_version": max(constants.OS_API_VERSIONS),
5305
      "export_version": constants.EXPORT_VERSION,
5306
      "architecture": (platform.architecture()[0], platform.machine()),
5307
      "name": cluster.cluster_name,
5308
      "master": cluster.master_node,
5309
      "default_hypervisor": cluster.enabled_hypervisors[0],
5310
      "enabled_hypervisors": cluster.enabled_hypervisors,
5311
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5312
                        for hypervisor_name in cluster.enabled_hypervisors]),
5313
      "os_hvp": os_hvp,
5314
      "beparams": cluster.beparams,
5315
      "osparams": cluster.osparams,
5316
      "nicparams": cluster.nicparams,
5317
      "ndparams": cluster.ndparams,
5318
      "candidate_pool_size": cluster.candidate_pool_size,
5319
      "master_netdev": cluster.master_netdev,
5320
      "volume_group_name": cluster.volume_group_name,
5321
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5322
      "file_storage_dir": cluster.file_storage_dir,
5323
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5324
      "maintain_node_health": cluster.maintain_node_health,
5325
      "ctime": cluster.ctime,
5326
      "mtime": cluster.mtime,
5327
      "uuid": cluster.uuid,
5328
      "tags": list(cluster.GetTags()),
5329
      "uid_pool": cluster.uid_pool,
5330
      "default_iallocator": cluster.default_iallocator,
5331
      "reserved_lvs": cluster.reserved_lvs,
5332
      "primary_ip_version": primary_ip_version,
5333
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5334
      "hidden_os": cluster.hidden_os,
5335
      "blacklisted_os": cluster.blacklisted_os,
5336
      }
5337

    
5338
    return result
5339

    
5340

    
5341
class LUClusterConfigQuery(NoHooksLU):
5342
  """Return configuration values.
5343

5344
  """
5345
  REQ_BGL = False
5346
  _FIELDS_DYNAMIC = utils.FieldSet()
5347
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5348
                                  "watcher_pause", "volume_group_name")
5349

    
5350
  def CheckArguments(self):
5351
    _CheckOutputFields(static=self._FIELDS_STATIC,
5352
                       dynamic=self._FIELDS_DYNAMIC,
5353
                       selected=self.op.output_fields)
5354

    
5355
  def ExpandNames(self):
5356
    self.needed_locks = {}
5357

    
5358
  def Exec(self, feedback_fn):
5359
    """Dump a representation of the cluster config to the standard output.
5360

5361
    """
5362
    values = []
5363
    for field in self.op.output_fields:
5364
      if field == "cluster_name":
5365
        entry = self.cfg.GetClusterName()
5366
      elif field == "master_node":
5367
        entry = self.cfg.GetMasterNode()
5368
      elif field == "drain_flag":
5369
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5370
      elif field == "watcher_pause":
5371
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5372
      elif field == "volume_group_name":
5373
        entry = self.cfg.GetVGName()
5374
      else:
5375
        raise errors.ParameterError(field)
5376
      values.append(entry)
5377
    return values
5378

    
5379

    
5380
class LUInstanceActivateDisks(NoHooksLU):
5381
  """Bring up an instance's disks.
5382

5383
  """
5384
  REQ_BGL = False
5385

    
5386
  def ExpandNames(self):
5387
    self._ExpandAndLockInstance()
5388
    self.needed_locks[locking.LEVEL_NODE] = []
5389
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5390

    
5391
  def DeclareLocks(self, level):
5392
    if level == locking.LEVEL_NODE:
5393
      self._LockInstancesNodes()
5394

    
5395
  def CheckPrereq(self):
5396
    """Check prerequisites.
5397

5398
    This checks that the instance is in the cluster.
5399

5400
    """
5401
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5402
    assert self.instance is not None, \
5403
      "Cannot retrieve locked instance %s" % self.op.instance_name
5404
    _CheckNodeOnline(self, self.instance.primary_node)
5405

    
5406
  def Exec(self, feedback_fn):
5407
    """Activate the disks.
5408

5409
    """
5410
    disks_ok, disks_info = \
5411
              _AssembleInstanceDisks(self, self.instance,
5412
                                     ignore_size=self.op.ignore_size)
5413
    if not disks_ok:
5414
      raise errors.OpExecError("Cannot activate block devices")
5415

    
5416
    return disks_info
5417

    
5418

    
5419
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5420
                           ignore_size=False):
5421
  """Prepare the block devices for an instance.
5422

5423
  This sets up the block devices on all nodes.
5424

5425
  @type lu: L{LogicalUnit}
5426
  @param lu: the logical unit on whose behalf we execute
5427
  @type instance: L{objects.Instance}
5428
  @param instance: the instance for whose disks we assemble
5429
  @type disks: list of L{objects.Disk} or None
5430
  @param disks: which disks to assemble (or all, if None)
5431
  @type ignore_secondaries: boolean
5432
  @param ignore_secondaries: if true, errors on secondary nodes
5433
      won't result in an error return from the function
5434
  @type ignore_size: boolean
5435
  @param ignore_size: if true, the current known size of the disk
5436
      will not be used during the disk activation, useful for cases
5437
      when the size is wrong
5438
  @return: False if the operation failed, otherwise a list of
5439
      (host, instance_visible_name, node_visible_name)
5440
      with the mapping from node devices to instance devices
5441

5442
  """
5443
  device_info = []
5444
  disks_ok = True
5445
  iname = instance.name
5446
  disks = _ExpandCheckDisks(instance, disks)
5447

    
5448
  # With the two passes mechanism we try to reduce the window of
5449
  # opportunity for the race condition of switching DRBD to primary
5450
  # before handshaking occured, but we do not eliminate it
5451

    
5452
  # The proper fix would be to wait (with some limits) until the
5453
  # connection has been made and drbd transitions from WFConnection
5454
  # into any other network-connected state (Connected, SyncTarget,
5455
  # SyncSource, etc.)
5456

    
5457
  # 1st pass, assemble on all nodes in secondary mode
5458
  for idx, inst_disk in enumerate(disks):
5459
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5460
      if ignore_size:
5461
        node_disk = node_disk.Copy()
5462
        node_disk.UnsetSize()
5463
      lu.cfg.SetDiskID(node_disk, node)
5464
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5465
      msg = result.fail_msg
5466
      if msg:
5467
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5468
                           " (is_primary=False, pass=1): %s",
5469
                           inst_disk.iv_name, node, msg)
5470
        if not ignore_secondaries:
5471
          disks_ok = False
5472

    
5473
  # FIXME: race condition on drbd migration to primary
5474

    
5475
  # 2nd pass, do only the primary node
5476
  for idx, inst_disk in enumerate(disks):
5477
    dev_path = None
5478

    
5479
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5480
      if node != instance.primary_node:
5481
        continue
5482
      if ignore_size:
5483
        node_disk = node_disk.Copy()
5484
        node_disk.UnsetSize()
5485
      lu.cfg.SetDiskID(node_disk, node)
5486
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5487
      msg = result.fail_msg
5488
      if msg:
5489
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5490
                           " (is_primary=True, pass=2): %s",
5491
                           inst_disk.iv_name, node, msg)
5492
        disks_ok = False
5493
      else:
5494
        dev_path = result.payload
5495

    
5496
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5497

    
5498
  # leave the disks configured for the primary node
5499
  # this is a workaround that would be fixed better by
5500
  # improving the logical/physical id handling
5501
  for disk in disks:
5502
    lu.cfg.SetDiskID(disk, instance.primary_node)
5503

    
5504
  return disks_ok, device_info
5505

    
5506

    
5507
def _StartInstanceDisks(lu, instance, force):
5508
  """Start the disks of an instance.
5509

5510
  """
5511
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5512
                                           ignore_secondaries=force)
5513
  if not disks_ok:
5514
    _ShutdownInstanceDisks(lu, instance)
5515
    if force is not None and not force:
5516
      lu.proc.LogWarning("", hint="If the message above refers to a"
5517
                         " secondary node,"
5518
                         " you can retry the operation using '--force'.")
5519
    raise errors.OpExecError("Disk consistency error")
5520

    
5521

    
5522
class LUInstanceDeactivateDisks(NoHooksLU):
5523
  """Shutdown an instance's disks.
5524

5525
  """
5526
  REQ_BGL = False
5527

    
5528
  def ExpandNames(self):
5529
    self._ExpandAndLockInstance()
5530
    self.needed_locks[locking.LEVEL_NODE] = []
5531
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5532

    
5533
  def DeclareLocks(self, level):
5534
    if level == locking.LEVEL_NODE:
5535
      self._LockInstancesNodes()
5536

    
5537
  def CheckPrereq(self):
5538
    """Check prerequisites.
5539

5540
    This checks that the instance is in the cluster.
5541

5542
    """
5543
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5544
    assert self.instance is not None, \
5545
      "Cannot retrieve locked instance %s" % self.op.instance_name
5546

    
5547
  def Exec(self, feedback_fn):
5548
    """Deactivate the disks
5549

5550
    """
5551
    instance = self.instance
5552
    if self.op.force:
5553
      _ShutdownInstanceDisks(self, instance)
5554
    else:
5555
      _SafeShutdownInstanceDisks(self, instance)
5556

    
5557

    
5558
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5559
  """Shutdown block devices of an instance.
5560

5561
  This function checks if an instance is running, before calling
5562
  _ShutdownInstanceDisks.
5563

5564
  """
5565
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5566
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5567

    
5568

    
5569
def _ExpandCheckDisks(instance, disks):
5570
  """Return the instance disks selected by the disks list
5571

5572
  @type disks: list of L{objects.Disk} or None
5573
  @param disks: selected disks
5574
  @rtype: list of L{objects.Disk}
5575
  @return: selected instance disks to act on
5576

5577
  """
5578
  if disks is None:
5579
    return instance.disks
5580
  else:
5581
    if not set(disks).issubset(instance.disks):
5582
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5583
                                   " target instance")
5584
    return disks
5585

    
5586

    
5587
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5588
  """Shutdown block devices of an instance.
5589

5590
  This does the shutdown on all nodes of the instance.
5591

5592
  If the ignore_primary is false, errors on the primary node are
5593
  ignored.
5594

5595
  """
5596
  all_result = True
5597
  disks = _ExpandCheckDisks(instance, disks)
5598

    
5599
  for disk in disks:
5600
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5601
      lu.cfg.SetDiskID(top_disk, node)
5602
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5603
      msg = result.fail_msg
5604
      if msg:
5605
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5606
                      disk.iv_name, node, msg)
5607
        if ((node == instance.primary_node and not ignore_primary) or
5608
            (node != instance.primary_node and not result.offline)):
5609
          all_result = False
5610
  return all_result
5611

    
5612

    
5613
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5614
  """Checks if a node has enough free memory.
5615

5616
  This function check if a given node has the needed amount of free
5617
  memory. In case the node has less memory or we cannot get the
5618
  information from the node, this function raise an OpPrereqError
5619
  exception.
5620

5621
  @type lu: C{LogicalUnit}
5622
  @param lu: a logical unit from which we get configuration data
5623
  @type node: C{str}
5624
  @param node: the node to check
5625
  @type reason: C{str}
5626
  @param reason: string to use in the error message
5627
  @type requested: C{int}
5628
  @param requested: the amount of memory in MiB to check for
5629
  @type hypervisor_name: C{str}
5630
  @param hypervisor_name: the hypervisor to ask for memory stats
5631
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5632
      we cannot check the node
5633

5634
  """
5635
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5636
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5637
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5638
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5639
  if not isinstance(free_mem, int):
5640
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5641
                               " was '%s'" % (node, free_mem),
5642
                               errors.ECODE_ENVIRON)
5643
  if requested > free_mem:
5644
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5645
                               " needed %s MiB, available %s MiB" %
5646
                               (node, reason, requested, free_mem),
5647
                               errors.ECODE_NORES)
5648

    
5649

    
5650
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5651
  """Checks if nodes have enough free disk space in the all VGs.
5652

5653
  This function check if all given nodes have the needed amount of
5654
  free disk. In case any node has less disk or we cannot get the
5655
  information from the node, this function raise an OpPrereqError
5656
  exception.
5657

5658
  @type lu: C{LogicalUnit}
5659
  @param lu: a logical unit from which we get configuration data
5660
  @type nodenames: C{list}
5661
  @param nodenames: the list of node names to check
5662
  @type req_sizes: C{dict}
5663
  @param req_sizes: the hash of vg and corresponding amount of disk in
5664
      MiB to check for
5665
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5666
      or we cannot check the node
5667

5668
  """
5669
  for vg, req_size in req_sizes.items():
5670
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5671

    
5672

    
5673
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5674
  """Checks if nodes have enough free disk space in the specified VG.
5675

5676
  This function check if all given nodes have the needed amount of
5677
  free disk. In case any node has less disk or we cannot get the
5678
  information from the node, this function raise an OpPrereqError
5679
  exception.
5680

5681
  @type lu: C{LogicalUnit}
5682
  @param lu: a logical unit from which we get configuration data
5683
  @type nodenames: C{list}
5684
  @param nodenames: the list of node names to check
5685
  @type vg: C{str}
5686
  @param vg: the volume group to check
5687
  @type requested: C{int}
5688
  @param requested: the amount of disk in MiB to check for
5689
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5690
      or we cannot check the node
5691

5692
  """
5693
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5694
  for node in nodenames:
5695
    info = nodeinfo[node]
5696
    info.Raise("Cannot get current information from node %s" % node,
5697
               prereq=True, ecode=errors.ECODE_ENVIRON)
5698
    vg_free = info.payload.get("vg_free", None)
5699
    if not isinstance(vg_free, int):
5700
      raise errors.OpPrereqError("Can't compute free disk space on node"
5701
                                 " %s for vg %s, result was '%s'" %
5702
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5703
    if requested > vg_free:
5704
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5705
                                 " vg %s: required %d MiB, available %d MiB" %
5706
                                 (node, vg, requested, vg_free),
5707
                                 errors.ECODE_NORES)
5708

    
5709

    
5710
class LUInstanceStartup(LogicalUnit):
5711
  """Starts an instance.
5712

5713
  """
5714
  HPATH = "instance-start"
5715
  HTYPE = constants.HTYPE_INSTANCE
5716
  REQ_BGL = False
5717

    
5718
  def CheckArguments(self):
5719
    # extra beparams
5720
    if self.op.beparams:
5721
      # fill the beparams dict
5722
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5723

    
5724
  def ExpandNames(self):
5725
    self._ExpandAndLockInstance()
5726

    
5727
  def BuildHooksEnv(self):
5728
    """Build hooks env.
5729

5730
    This runs on master, primary and secondary nodes of the instance.
5731

5732
    """
5733
    env = {
5734
      "FORCE": self.op.force,
5735
      }
5736

    
5737
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5738

    
5739
    return env
5740

    
5741
  def BuildHooksNodes(self):
5742
    """Build hooks nodes.
5743

5744
    """
5745
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5746
    return (nl, nl)
5747

    
5748
  def CheckPrereq(self):
5749
    """Check prerequisites.
5750

5751
    This checks that the instance is in the cluster.
5752

5753
    """
5754
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5755
    assert self.instance is not None, \
5756
      "Cannot retrieve locked instance %s" % self.op.instance_name
5757

    
5758
    # extra hvparams
5759
    if self.op.hvparams:
5760
      # check hypervisor parameter syntax (locally)
5761
      cluster = self.cfg.GetClusterInfo()
5762
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5763
      filled_hvp = cluster.FillHV(instance)
5764
      filled_hvp.update(self.op.hvparams)
5765
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5766
      hv_type.CheckParameterSyntax(filled_hvp)
5767
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5768

    
5769
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5770

    
5771
    if self.primary_offline and self.op.ignore_offline_nodes:
5772
      self.proc.LogWarning("Ignoring offline primary node")
5773

    
5774
      if self.op.hvparams or self.op.beparams:
5775
        self.proc.LogWarning("Overridden parameters are ignored")
5776
    else:
5777
      _CheckNodeOnline(self, instance.primary_node)
5778

    
5779
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5780

    
5781
      # check bridges existence
5782
      _CheckInstanceBridgesExist(self, instance)
5783

    
5784
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5785
                                                instance.name,
5786
                                                instance.hypervisor)
5787
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5788
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5789
      if not remote_info.payload: # not running already
5790
        _CheckNodeFreeMemory(self, instance.primary_node,
5791
                             "starting instance %s" % instance.name,
5792
                             bep[constants.BE_MEMORY], instance.hypervisor)
5793

    
5794
  def Exec(self, feedback_fn):
5795
    """Start the instance.
5796

5797
    """
5798
    instance = self.instance
5799
    force = self.op.force
5800

    
5801
    if not self.op.no_remember:
5802
      self.cfg.MarkInstanceUp(instance.name)
5803

    
5804
    if self.primary_offline:
5805
      assert self.op.ignore_offline_nodes
5806
      self.proc.LogInfo("Primary node offline, marked instance as started")
5807
    else:
5808
      node_current = instance.primary_node
5809

    
5810
      _StartInstanceDisks(self, instance, force)
5811

    
5812
      result = self.rpc.call_instance_start(node_current, instance,
5813
                                            self.op.hvparams, self.op.beparams,
5814
                                            self.op.startup_paused)
5815
      msg = result.fail_msg
5816
      if msg:
5817
        _ShutdownInstanceDisks(self, instance)
5818
        raise errors.OpExecError("Could not start instance: %s" % msg)
5819

    
5820

    
5821
class LUInstanceReboot(LogicalUnit):
5822
  """Reboot an instance.
5823

5824
  """
5825
  HPATH = "instance-reboot"
5826
  HTYPE = constants.HTYPE_INSTANCE
5827
  REQ_BGL = False
5828

    
5829
  def ExpandNames(self):
5830
    self._ExpandAndLockInstance()
5831

    
5832
  def BuildHooksEnv(self):
5833
    """Build hooks env.
5834

5835
    This runs on master, primary and secondary nodes of the instance.
5836

5837
    """
5838
    env = {
5839
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5840
      "REBOOT_TYPE": self.op.reboot_type,
5841
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5842
      }
5843

    
5844
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5845

    
5846
    return env
5847

    
5848
  def BuildHooksNodes(self):
5849
    """Build hooks nodes.
5850

5851
    """
5852
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5853
    return (nl, nl)
5854

    
5855
  def CheckPrereq(self):
5856
    """Check prerequisites.
5857

5858
    This checks that the instance is in the cluster.
5859

5860
    """
5861
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5862
    assert self.instance is not None, \
5863
      "Cannot retrieve locked instance %s" % self.op.instance_name
5864

    
5865
    _CheckNodeOnline(self, instance.primary_node)
5866

    
5867
    # check bridges existence
5868
    _CheckInstanceBridgesExist(self, instance)
5869

    
5870
  def Exec(self, feedback_fn):
5871
    """Reboot the instance.
5872

5873
    """
5874
    instance = self.instance
5875
    ignore_secondaries = self.op.ignore_secondaries
5876
    reboot_type = self.op.reboot_type
5877

    
5878
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5879
                                              instance.name,
5880
                                              instance.hypervisor)
5881
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5882
    instance_running = bool(remote_info.payload)
5883

    
5884
    node_current = instance.primary_node
5885

    
5886
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5887
                                            constants.INSTANCE_REBOOT_HARD]:
5888
      for disk in instance.disks:
5889
        self.cfg.SetDiskID(disk, node_current)
5890
      result = self.rpc.call_instance_reboot(node_current, instance,
5891
                                             reboot_type,
5892
                                             self.op.shutdown_timeout)
5893
      result.Raise("Could not reboot instance")
5894
    else:
5895
      if instance_running:
5896
        result = self.rpc.call_instance_shutdown(node_current, instance,
5897
                                                 self.op.shutdown_timeout)
5898
        result.Raise("Could not shutdown instance for full reboot")
5899
        _ShutdownInstanceDisks(self, instance)
5900
      else:
5901
        self.LogInfo("Instance %s was already stopped, starting now",
5902
                     instance.name)
5903
      _StartInstanceDisks(self, instance, ignore_secondaries)
5904
      result = self.rpc.call_instance_start(node_current, instance,
5905
                                            None, None, False)
5906
      msg = result.fail_msg
5907
      if msg:
5908
        _ShutdownInstanceDisks(self, instance)
5909
        raise errors.OpExecError("Could not start instance for"
5910
                                 " full reboot: %s" % msg)
5911

    
5912
    self.cfg.MarkInstanceUp(instance.name)
5913

    
5914

    
5915
class LUInstanceShutdown(LogicalUnit):
5916
  """Shutdown an instance.
5917

5918
  """
5919
  HPATH = "instance-stop"
5920
  HTYPE = constants.HTYPE_INSTANCE
5921
  REQ_BGL = False
5922

    
5923
  def ExpandNames(self):
5924
    self._ExpandAndLockInstance()
5925

    
5926
  def BuildHooksEnv(self):
5927
    """Build hooks env.
5928

5929
    This runs on master, primary and secondary nodes of the instance.
5930

5931
    """
5932
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5933
    env["TIMEOUT"] = self.op.timeout
5934
    return env
5935

    
5936
  def BuildHooksNodes(self):
5937
    """Build hooks nodes.
5938

5939
    """
5940
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5941
    return (nl, nl)
5942

    
5943
  def CheckPrereq(self):
5944
    """Check prerequisites.
5945

5946
    This checks that the instance is in the cluster.
5947

5948
    """
5949
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5950
    assert self.instance is not None, \
5951
      "Cannot retrieve locked instance %s" % self.op.instance_name
5952

    
5953
    self.primary_offline = \
5954
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5955

    
5956
    if self.primary_offline and self.op.ignore_offline_nodes:
5957
      self.proc.LogWarning("Ignoring offline primary node")
5958
    else:
5959
      _CheckNodeOnline(self, self.instance.primary_node)
5960

    
5961
  def Exec(self, feedback_fn):
5962
    """Shutdown the instance.
5963

5964
    """
5965
    instance = self.instance
5966
    node_current = instance.primary_node
5967
    timeout = self.op.timeout
5968

    
5969
    if not self.op.no_remember:
5970
      self.cfg.MarkInstanceDown(instance.name)
5971

    
5972
    if self.primary_offline:
5973
      assert self.op.ignore_offline_nodes
5974
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5975
    else:
5976
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5977
      msg = result.fail_msg
5978
      if msg:
5979
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5980

    
5981
      _ShutdownInstanceDisks(self, instance)
5982

    
5983

    
5984
class LUInstanceReinstall(LogicalUnit):
5985
  """Reinstall an instance.
5986

5987
  """
5988
  HPATH = "instance-reinstall"
5989
  HTYPE = constants.HTYPE_INSTANCE
5990
  REQ_BGL = False
5991

    
5992
  def ExpandNames(self):
5993
    self._ExpandAndLockInstance()
5994

    
5995
  def BuildHooksEnv(self):
5996
    """Build hooks env.
5997

5998
    This runs on master, primary and secondary nodes of the instance.
5999

6000
    """
6001
    return _BuildInstanceHookEnvByObject(self, self.instance)
6002

    
6003
  def BuildHooksNodes(self):
6004
    """Build hooks nodes.
6005

6006
    """
6007
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6008
    return (nl, nl)
6009

    
6010
  def CheckPrereq(self):
6011
    """Check prerequisites.
6012

6013
    This checks that the instance is in the cluster and is not running.
6014

6015
    """
6016
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6017
    assert instance is not None, \
6018
      "Cannot retrieve locked instance %s" % self.op.instance_name
6019
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6020
                     " offline, cannot reinstall")
6021
    for node in instance.secondary_nodes:
6022
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6023
                       " cannot reinstall")
6024

    
6025
    if instance.disk_template == constants.DT_DISKLESS:
6026
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6027
                                 self.op.instance_name,
6028
                                 errors.ECODE_INVAL)
6029
    _CheckInstanceDown(self, instance, "cannot reinstall")
6030

    
6031
    if self.op.os_type is not None:
6032
      # OS verification
6033
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6034
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6035
      instance_os = self.op.os_type
6036
    else:
6037
      instance_os = instance.os
6038

    
6039
    nodelist = list(instance.all_nodes)
6040

    
6041
    if self.op.osparams:
6042
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6043
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6044
      self.os_inst = i_osdict # the new dict (without defaults)
6045
    else:
6046
      self.os_inst = None
6047

    
6048
    self.instance = instance
6049

    
6050
  def Exec(self, feedback_fn):
6051
    """Reinstall the instance.
6052

6053
    """
6054
    inst = self.instance
6055

    
6056
    if self.op.os_type is not None:
6057
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6058
      inst.os = self.op.os_type
6059
      # Write to configuration
6060
      self.cfg.Update(inst, feedback_fn)
6061

    
6062
    _StartInstanceDisks(self, inst, None)
6063
    try:
6064
      feedback_fn("Running the instance OS create scripts...")
6065
      # FIXME: pass debug option from opcode to backend
6066
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6067
                                             self.op.debug_level,
6068
                                             osparams=self.os_inst)
6069
      result.Raise("Could not install OS for instance %s on node %s" %
6070
                   (inst.name, inst.primary_node))
6071
    finally:
6072
      _ShutdownInstanceDisks(self, inst)
6073

    
6074

    
6075
class LUInstanceRecreateDisks(LogicalUnit):
6076
  """Recreate an instance's missing disks.
6077

6078
  """
6079
  HPATH = "instance-recreate-disks"
6080
  HTYPE = constants.HTYPE_INSTANCE
6081
  REQ_BGL = False
6082

    
6083
  def CheckArguments(self):
6084
    # normalise the disk list
6085
    self.op.disks = sorted(frozenset(self.op.disks))
6086

    
6087
  def ExpandNames(self):
6088
    self._ExpandAndLockInstance()
6089
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6090
    if self.op.nodes:
6091
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6092
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6093
    else:
6094
      self.needed_locks[locking.LEVEL_NODE] = []
6095

    
6096
  def DeclareLocks(self, level):
6097
    if level == locking.LEVEL_NODE:
6098
      # if we replace the nodes, we only need to lock the old primary,
6099
      # otherwise we need to lock all nodes for disk re-creation
6100
      primary_only = bool(self.op.nodes)
6101
      self._LockInstancesNodes(primary_only=primary_only)
6102

    
6103
  def BuildHooksEnv(self):
6104
    """Build hooks env.
6105

6106
    This runs on master, primary and secondary nodes of the instance.
6107

6108
    """
6109
    return _BuildInstanceHookEnvByObject(self, self.instance)
6110

    
6111
  def BuildHooksNodes(self):
6112
    """Build hooks nodes.
6113

6114
    """
6115
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6116
    return (nl, nl)
6117

    
6118
  def CheckPrereq(self):
6119
    """Check prerequisites.
6120

6121
    This checks that the instance is in the cluster and is not running.
6122

6123
    """
6124
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125
    assert instance is not None, \
6126
      "Cannot retrieve locked instance %s" % self.op.instance_name
6127
    if self.op.nodes:
6128
      if len(self.op.nodes) != len(instance.all_nodes):
6129
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6130
                                   " %d replacement nodes were specified" %
6131
                                   (instance.name, len(instance.all_nodes),
6132
                                    len(self.op.nodes)),
6133
                                   errors.ECODE_INVAL)
6134
      assert instance.disk_template != constants.DT_DRBD8 or \
6135
          len(self.op.nodes) == 2
6136
      assert instance.disk_template != constants.DT_PLAIN or \
6137
          len(self.op.nodes) == 1
6138
      primary_node = self.op.nodes[0]
6139
    else:
6140
      primary_node = instance.primary_node
6141
    _CheckNodeOnline(self, primary_node)
6142

    
6143
    if instance.disk_template == constants.DT_DISKLESS:
6144
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6145
                                 self.op.instance_name, errors.ECODE_INVAL)
6146
    # if we replace nodes *and* the old primary is offline, we don't
6147
    # check
6148
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6149
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6150
    if not (self.op.nodes and old_pnode.offline):
6151
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6152

    
6153
    if not self.op.disks:
6154
      self.op.disks = range(len(instance.disks))
6155
    else:
6156
      for idx in self.op.disks:
6157
        if idx >= len(instance.disks):
6158
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6159
                                     errors.ECODE_INVAL)
6160
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6161
      raise errors.OpPrereqError("Can't recreate disks partially and"
6162
                                 " change the nodes at the same time",
6163
                                 errors.ECODE_INVAL)
6164
    self.instance = instance
6165

    
6166
  def Exec(self, feedback_fn):
6167
    """Recreate the disks.
6168

6169
    """
6170
    instance = self.instance
6171

    
6172
    to_skip = []
6173
    mods = [] # keeps track of needed logical_id changes
6174

    
6175
    for idx, disk in enumerate(instance.disks):
6176
      if idx not in self.op.disks: # disk idx has not been passed in
6177
        to_skip.append(idx)
6178
        continue
6179
      # update secondaries for disks, if needed
6180
      if self.op.nodes:
6181
        if disk.dev_type == constants.LD_DRBD8:
6182
          # need to update the nodes and minors
6183
          assert len(self.op.nodes) == 2
6184
          assert len(disk.logical_id) == 6 # otherwise disk internals
6185
                                           # have changed
6186
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6187
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6188
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6189
                    new_minors[0], new_minors[1], old_secret)
6190
          assert len(disk.logical_id) == len(new_id)
6191
          mods.append((idx, new_id))
6192

    
6193
    # now that we have passed all asserts above, we can apply the mods
6194
    # in a single run (to avoid partial changes)
6195
    for idx, new_id in mods:
6196
      instance.disks[idx].logical_id = new_id
6197

    
6198
    # change primary node, if needed
6199
    if self.op.nodes:
6200
      instance.primary_node = self.op.nodes[0]
6201
      self.LogWarning("Changing the instance's nodes, you will have to"
6202
                      " remove any disks left on the older nodes manually")
6203

    
6204
    if self.op.nodes:
6205
      self.cfg.Update(instance, feedback_fn)
6206

    
6207
    _CreateDisks(self, instance, to_skip=to_skip)
6208

    
6209

    
6210
class LUInstanceRename(LogicalUnit):
6211
  """Rename an instance.
6212

6213
  """
6214
  HPATH = "instance-rename"
6215
  HTYPE = constants.HTYPE_INSTANCE
6216

    
6217
  def CheckArguments(self):
6218
    """Check arguments.
6219

6220
    """
6221
    if self.op.ip_check and not self.op.name_check:
6222
      # TODO: make the ip check more flexible and not depend on the name check
6223
      raise errors.OpPrereqError("IP address check requires a name check",
6224
                                 errors.ECODE_INVAL)
6225

    
6226
  def BuildHooksEnv(self):
6227
    """Build hooks env.
6228

6229
    This runs on master, primary and secondary nodes of the instance.
6230

6231
    """
6232
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6233
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6234
    return env
6235

    
6236
  def BuildHooksNodes(self):
6237
    """Build hooks nodes.
6238

6239
    """
6240
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6241
    return (nl, nl)
6242

    
6243
  def CheckPrereq(self):
6244
    """Check prerequisites.
6245

6246
    This checks that the instance is in the cluster and is not running.
6247

6248
    """
6249
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6250
                                                self.op.instance_name)
6251
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6252
    assert instance is not None
6253
    _CheckNodeOnline(self, instance.primary_node)
6254
    _CheckInstanceDown(self, instance, "cannot rename")
6255
    self.instance = instance
6256

    
6257
    new_name = self.op.new_name
6258
    if self.op.name_check:
6259
      hostname = netutils.GetHostname(name=new_name)
6260
      if hostname != new_name:
6261
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6262
                     hostname.name)
6263
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6264
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6265
                                    " same as given hostname '%s'") %
6266
                                    (hostname.name, self.op.new_name),
6267
                                    errors.ECODE_INVAL)
6268
      new_name = self.op.new_name = hostname.name
6269
      if (self.op.ip_check and
6270
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6271
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6272
                                   (hostname.ip, new_name),
6273
                                   errors.ECODE_NOTUNIQUE)
6274

    
6275
    instance_list = self.cfg.GetInstanceList()
6276
    if new_name in instance_list and new_name != instance.name:
6277
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6278
                                 new_name, errors.ECODE_EXISTS)
6279

    
6280
  def Exec(self, feedback_fn):
6281
    """Rename the instance.
6282

6283
    """
6284
    inst = self.instance
6285
    old_name = inst.name
6286

    
6287
    rename_file_storage = False
6288
    if (inst.disk_template in constants.DTS_FILEBASED and
6289
        self.op.new_name != inst.name):
6290
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6291
      rename_file_storage = True
6292

    
6293
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6294
    # Change the instance lock. This is definitely safe while we hold the BGL.
6295
    # Otherwise the new lock would have to be added in acquired mode.
6296
    assert self.REQ_BGL
6297
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6298
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6299

    
6300
    # re-read the instance from the configuration after rename
6301
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6302

    
6303
    if rename_file_storage:
6304
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6305
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6306
                                                     old_file_storage_dir,
6307
                                                     new_file_storage_dir)
6308
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6309
                   " (but the instance has been renamed in Ganeti)" %
6310
                   (inst.primary_node, old_file_storage_dir,
6311
                    new_file_storage_dir))
6312

    
6313
    _StartInstanceDisks(self, inst, None)
6314
    try:
6315
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6316
                                                 old_name, self.op.debug_level)
6317
      msg = result.fail_msg
6318
      if msg:
6319
        msg = ("Could not run OS rename script for instance %s on node %s"
6320
               " (but the instance has been renamed in Ganeti): %s" %
6321
               (inst.name, inst.primary_node, msg))
6322
        self.proc.LogWarning(msg)
6323
    finally:
6324
      _ShutdownInstanceDisks(self, inst)
6325

    
6326
    return inst.name
6327

    
6328

    
6329
class LUInstanceRemove(LogicalUnit):
6330
  """Remove an instance.
6331

6332
  """
6333
  HPATH = "instance-remove"
6334
  HTYPE = constants.HTYPE_INSTANCE
6335
  REQ_BGL = False
6336

    
6337
  def ExpandNames(self):
6338
    self._ExpandAndLockInstance()
6339
    self.needed_locks[locking.LEVEL_NODE] = []
6340
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6341

    
6342
  def DeclareLocks(self, level):
6343
    if level == locking.LEVEL_NODE:
6344
      self._LockInstancesNodes()
6345

    
6346
  def BuildHooksEnv(self):
6347
    """Build hooks env.
6348

6349
    This runs on master, primary and secondary nodes of the instance.
6350

6351
    """
6352
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6353
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6354
    return env
6355

    
6356
  def BuildHooksNodes(self):
6357
    """Build hooks nodes.
6358

6359
    """
6360
    nl = [self.cfg.GetMasterNode()]
6361
    nl_post = list(self.instance.all_nodes) + nl
6362
    return (nl, nl_post)
6363

    
6364
  def CheckPrereq(self):
6365
    """Check prerequisites.
6366

6367
    This checks that the instance is in the cluster.
6368

6369
    """
6370
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6371
    assert self.instance is not None, \
6372
      "Cannot retrieve locked instance %s" % self.op.instance_name
6373

    
6374
  def Exec(self, feedback_fn):
6375
    """Remove the instance.
6376

6377
    """
6378
    instance = self.instance
6379
    logging.info("Shutting down instance %s on node %s",
6380
                 instance.name, instance.primary_node)
6381

    
6382
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6383
                                             self.op.shutdown_timeout)
6384
    msg = result.fail_msg
6385
    if msg:
6386
      if self.op.ignore_failures:
6387
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6388
      else:
6389
        raise errors.OpExecError("Could not shutdown instance %s on"
6390
                                 " node %s: %s" %
6391
                                 (instance.name, instance.primary_node, msg))
6392

    
6393
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6394

    
6395

    
6396
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6397
  """Utility function to remove an instance.
6398

6399
  """
6400
  logging.info("Removing block devices for instance %s", instance.name)
6401

    
6402
  if not _RemoveDisks(lu, instance):
6403
    if not ignore_failures:
6404
      raise errors.OpExecError("Can't remove instance's disks")
6405
    feedback_fn("Warning: can't remove instance's disks")
6406

    
6407
  logging.info("Removing instance %s out of cluster config", instance.name)
6408

    
6409
  lu.cfg.RemoveInstance(instance.name)
6410

    
6411
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6412
    "Instance lock removal conflict"
6413

    
6414
  # Remove lock for the instance
6415
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6416

    
6417

    
6418
class LUInstanceQuery(NoHooksLU):
6419
  """Logical unit for querying instances.
6420

6421
  """
6422
  # pylint: disable-msg=W0142
6423
  REQ_BGL = False
6424

    
6425
  def CheckArguments(self):
6426
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6427
                             self.op.output_fields, self.op.use_locking)
6428

    
6429
  def ExpandNames(self):
6430
    self.iq.ExpandNames(self)
6431

    
6432
  def DeclareLocks(self, level):
6433
    self.iq.DeclareLocks(self, level)
6434

    
6435
  def Exec(self, feedback_fn):
6436
    return self.iq.OldStyleQuery(self)
6437

    
6438

    
6439
class LUInstanceFailover(LogicalUnit):
6440
  """Failover an instance.
6441

6442
  """
6443
  HPATH = "instance-failover"
6444
  HTYPE = constants.HTYPE_INSTANCE
6445
  REQ_BGL = False
6446

    
6447
  def CheckArguments(self):
6448
    """Check the arguments.
6449

6450
    """
6451
    self.iallocator = getattr(self.op, "iallocator", None)
6452
    self.target_node = getattr(self.op, "target_node", None)
6453

    
6454
  def ExpandNames(self):
6455
    self._ExpandAndLockInstance()
6456

    
6457
    if self.op.target_node is not None:
6458
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6459

    
6460
    self.needed_locks[locking.LEVEL_NODE] = []
6461
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6462

    
6463
    ignore_consistency = self.op.ignore_consistency
6464
    shutdown_timeout = self.op.shutdown_timeout
6465
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6466
                                       cleanup=False,
6467
                                       failover=True,
6468
                                       ignore_consistency=ignore_consistency,
6469
                                       shutdown_timeout=shutdown_timeout)
6470
    self.tasklets = [self._migrater]
6471

    
6472
  def DeclareLocks(self, level):
6473
    if level == locking.LEVEL_NODE:
6474
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6475
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6476
        if self.op.target_node is None:
6477
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6478
        else:
6479
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6480
                                                   self.op.target_node]
6481
        del self.recalculate_locks[locking.LEVEL_NODE]
6482
      else:
6483
        self._LockInstancesNodes()
6484

    
6485
  def BuildHooksEnv(self):
6486
    """Build hooks env.
6487

6488
    This runs on master, primary and secondary nodes of the instance.
6489

6490
    """
6491
    instance = self._migrater.instance
6492
    source_node = instance.primary_node
6493
    target_node = self.op.target_node
6494
    env = {
6495
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6496
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6497
      "OLD_PRIMARY": source_node,
6498
      "NEW_PRIMARY": target_node,
6499
      }
6500

    
6501
    if instance.disk_template in constants.DTS_INT_MIRROR:
6502
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6503
      env["NEW_SECONDARY"] = source_node
6504
    else:
6505
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6506

    
6507
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6508

    
6509
    return env
6510

    
6511
  def BuildHooksNodes(self):
6512
    """Build hooks nodes.
6513

6514
    """
6515
    instance = self._migrater.instance
6516
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6517
    return (nl, nl + [instance.primary_node])
6518

    
6519

    
6520
class LUInstanceMigrate(LogicalUnit):
6521
  """Migrate an instance.
6522

6523
  This is migration without shutting down, compared to the failover,
6524
  which is done with shutdown.
6525

6526
  """
6527
  HPATH = "instance-migrate"
6528
  HTYPE = constants.HTYPE_INSTANCE
6529
  REQ_BGL = False
6530

    
6531
  def ExpandNames(self):
6532
    self._ExpandAndLockInstance()
6533

    
6534
    if self.op.target_node is not None:
6535
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6536

    
6537
    self.needed_locks[locking.LEVEL_NODE] = []
6538
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6539

    
6540
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6541
                                       cleanup=self.op.cleanup,
6542
                                       failover=False,
6543
                                       fallback=self.op.allow_failover)
6544
    self.tasklets = [self._migrater]
6545

    
6546
  def DeclareLocks(self, level):
6547
    if level == locking.LEVEL_NODE:
6548
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6549
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6550
        if self.op.target_node is None:
6551
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6552
        else:
6553
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6554
                                                   self.op.target_node]
6555
        del self.recalculate_locks[locking.LEVEL_NODE]
6556
      else:
6557
        self._LockInstancesNodes()
6558

    
6559
  def BuildHooksEnv(self):
6560
    """Build hooks env.
6561

6562
    This runs on master, primary and secondary nodes of the instance.
6563

6564
    """
6565
    instance = self._migrater.instance
6566
    source_node = instance.primary_node
6567
    target_node = self.op.target_node
6568
    env = _BuildInstanceHookEnvByObject(self, instance)
6569
    env.update({
6570
      "MIGRATE_LIVE": self._migrater.live,
6571
      "MIGRATE_CLEANUP": self.op.cleanup,
6572
      "OLD_PRIMARY": source_node,
6573
      "NEW_PRIMARY": target_node,
6574
      })
6575

    
6576
    if instance.disk_template in constants.DTS_INT_MIRROR:
6577
      env["OLD_SECONDARY"] = target_node
6578
      env["NEW_SECONDARY"] = source_node
6579
    else:
6580
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6581

    
6582
    return env
6583

    
6584
  def BuildHooksNodes(self):
6585
    """Build hooks nodes.
6586

6587
    """
6588
    instance = self._migrater.instance
6589
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6590
    return (nl, nl + [instance.primary_node])
6591

    
6592

    
6593
class LUInstanceMove(LogicalUnit):
6594
  """Move an instance by data-copying.
6595

6596
  """
6597
  HPATH = "instance-move"
6598
  HTYPE = constants.HTYPE_INSTANCE
6599
  REQ_BGL = False
6600

    
6601
  def ExpandNames(self):
6602
    self._ExpandAndLockInstance()
6603
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6604
    self.op.target_node = target_node
6605
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6606
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6607

    
6608
  def DeclareLocks(self, level):
6609
    if level == locking.LEVEL_NODE:
6610
      self._LockInstancesNodes(primary_only=True)
6611

    
6612
  def BuildHooksEnv(self):
6613
    """Build hooks env.
6614

6615
    This runs on master, primary and secondary nodes of the instance.
6616

6617
    """
6618
    env = {
6619
      "TARGET_NODE": self.op.target_node,
6620
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6621
      }
6622
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6623
    return env
6624

    
6625
  def BuildHooksNodes(self):
6626
    """Build hooks nodes.
6627

6628
    """
6629
    nl = [
6630
      self.cfg.GetMasterNode(),
6631
      self.instance.primary_node,
6632
      self.op.target_node,
6633
      ]
6634
    return (nl, nl)
6635

    
6636
  def CheckPrereq(self):
6637
    """Check prerequisites.
6638

6639
    This checks that the instance is in the cluster.
6640

6641
    """
6642
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6643
    assert self.instance is not None, \
6644
      "Cannot retrieve locked instance %s" % self.op.instance_name
6645

    
6646
    node = self.cfg.GetNodeInfo(self.op.target_node)
6647
    assert node is not None, \
6648
      "Cannot retrieve locked node %s" % self.op.target_node
6649

    
6650
    self.target_node = target_node = node.name
6651

    
6652
    if target_node == instance.primary_node:
6653
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6654
                                 (instance.name, target_node),
6655
                                 errors.ECODE_STATE)
6656

    
6657
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6658

    
6659
    for idx, dsk in enumerate(instance.disks):
6660
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6661
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6662
                                   " cannot copy" % idx, errors.ECODE_STATE)
6663

    
6664
    _CheckNodeOnline(self, target_node)
6665
    _CheckNodeNotDrained(self, target_node)
6666
    _CheckNodeVmCapable(self, target_node)
6667

    
6668
    if instance.admin_up:
6669
      # check memory requirements on the secondary node
6670
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6671
                           instance.name, bep[constants.BE_MEMORY],
6672
                           instance.hypervisor)
6673
    else:
6674
      self.LogInfo("Not checking memory on the secondary node as"
6675
                   " instance will not be started")
6676

    
6677
    # check bridge existance
6678
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6679

    
6680
  def Exec(self, feedback_fn):
6681
    """Move an instance.
6682

6683
    The move is done by shutting it down on its present node, copying
6684
    the data over (slow) and starting it on the new node.
6685

6686
    """
6687
    instance = self.instance
6688

    
6689
    source_node = instance.primary_node
6690
    target_node = self.target_node
6691

    
6692
    self.LogInfo("Shutting down instance %s on source node %s",
6693
                 instance.name, source_node)
6694

    
6695
    result = self.rpc.call_instance_shutdown(source_node, instance,
6696
                                             self.op.shutdown_timeout)
6697
    msg = result.fail_msg
6698
    if msg:
6699
      if self.op.ignore_consistency:
6700
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6701
                             " Proceeding anyway. Please make sure node"
6702
                             " %s is down. Error details: %s",
6703
                             instance.name, source_node, source_node, msg)
6704
      else:
6705
        raise errors.OpExecError("Could not shutdown instance %s on"
6706
                                 " node %s: %s" %
6707
                                 (instance.name, source_node, msg))
6708

    
6709
    # create the target disks
6710
    try:
6711
      _CreateDisks(self, instance, target_node=target_node)
6712
    except errors.OpExecError:
6713
      self.LogWarning("Device creation failed, reverting...")
6714
      try:
6715
        _RemoveDisks(self, instance, target_node=target_node)
6716
      finally:
6717
        self.cfg.ReleaseDRBDMinors(instance.name)
6718
        raise
6719

    
6720
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6721

    
6722
    errs = []
6723
    # activate, get path, copy the data over
6724
    for idx, disk in enumerate(instance.disks):
6725
      self.LogInfo("Copying data for disk %d", idx)
6726
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6727
                                               instance.name, True, idx)
6728
      if result.fail_msg:
6729
        self.LogWarning("Can't assemble newly created disk %d: %s",
6730
                        idx, result.fail_msg)
6731
        errs.append(result.fail_msg)
6732
        break
6733
      dev_path = result.payload
6734
      result = self.rpc.call_blockdev_export(source_node, disk,
6735
                                             target_node, dev_path,
6736
                                             cluster_name)
6737
      if result.fail_msg:
6738
        self.LogWarning("Can't copy data over for disk %d: %s",
6739
                        idx, result.fail_msg)
6740
        errs.append(result.fail_msg)
6741
        break
6742

    
6743
    if errs:
6744
      self.LogWarning("Some disks failed to copy, aborting")
6745
      try:
6746
        _RemoveDisks(self, instance, target_node=target_node)
6747
      finally:
6748
        self.cfg.ReleaseDRBDMinors(instance.name)
6749
        raise errors.OpExecError("Errors during disk copy: %s" %
6750
                                 (",".join(errs),))
6751

    
6752
    instance.primary_node = target_node
6753
    self.cfg.Update(instance, feedback_fn)
6754

    
6755
    self.LogInfo("Removing the disks on the original node")
6756
    _RemoveDisks(self, instance, target_node=source_node)
6757

    
6758
    # Only start the instance if it's marked as up
6759
    if instance.admin_up:
6760
      self.LogInfo("Starting instance %s on node %s",
6761
                   instance.name, target_node)
6762

    
6763
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6764
                                           ignore_secondaries=True)
6765
      if not disks_ok:
6766
        _ShutdownInstanceDisks(self, instance)
6767
        raise errors.OpExecError("Can't activate the instance's disks")
6768

    
6769
      result = self.rpc.call_instance_start(target_node, instance,
6770
                                            None, None, False)
6771
      msg = result.fail_msg
6772
      if msg:
6773
        _ShutdownInstanceDisks(self, instance)
6774
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6775
                                 (instance.name, target_node, msg))
6776

    
6777

    
6778
class LUNodeMigrate(LogicalUnit):
6779
  """Migrate all instances from a node.
6780

6781
  """
6782
  HPATH = "node-migrate"
6783
  HTYPE = constants.HTYPE_NODE
6784
  REQ_BGL = False
6785

    
6786
  def CheckArguments(self):
6787
    pass
6788

    
6789
  def ExpandNames(self):
6790
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6791

    
6792
    self.share_locks = _ShareAll()
6793
    self.needed_locks = {
6794
      locking.LEVEL_NODE: [self.op.node_name],
6795
      }
6796

    
6797
  def BuildHooksEnv(self):
6798
    """Build hooks env.
6799

6800
    This runs on the master, the primary and all the secondaries.
6801

6802
    """
6803
    return {
6804
      "NODE_NAME": self.op.node_name,
6805
      }
6806

    
6807
  def BuildHooksNodes(self):
6808
    """Build hooks nodes.
6809

6810
    """
6811
    nl = [self.cfg.GetMasterNode()]
6812
    return (nl, nl)
6813

    
6814
  def CheckPrereq(self):
6815
    pass
6816

    
6817
  def Exec(self, feedback_fn):
6818
    # Prepare jobs for migration instances
6819
    jobs = [
6820
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6821
                                 mode=self.op.mode,
6822
                                 live=self.op.live,
6823
                                 iallocator=self.op.iallocator,
6824
                                 target_node=self.op.target_node)]
6825
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6826
      ]
6827

    
6828
    # TODO: Run iallocator in this opcode and pass correct placement options to
6829
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6830
    # running the iallocator and the actual migration, a good consistency model
6831
    # will have to be found.
6832

    
6833
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6834
            frozenset([self.op.node_name]))
6835

    
6836
    return ResultWithJobs(jobs)
6837

    
6838

    
6839
class TLMigrateInstance(Tasklet):
6840
  """Tasklet class for instance migration.
6841

6842
  @type live: boolean
6843
  @ivar live: whether the migration will be done live or non-live;
6844
      this variable is initalized only after CheckPrereq has run
6845
  @type cleanup: boolean
6846
  @ivar cleanup: Wheater we cleanup from a failed migration
6847
  @type iallocator: string
6848
  @ivar iallocator: The iallocator used to determine target_node
6849
  @type target_node: string
6850
  @ivar target_node: If given, the target_node to reallocate the instance to
6851
  @type failover: boolean
6852
  @ivar failover: Whether operation results in failover or migration
6853
  @type fallback: boolean
6854
  @ivar fallback: Whether fallback to failover is allowed if migration not
6855
                  possible
6856
  @type ignore_consistency: boolean
6857
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6858
                            and target node
6859
  @type shutdown_timeout: int
6860
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6861

6862
  """
6863
  def __init__(self, lu, instance_name, cleanup=False,
6864
               failover=False, fallback=False,
6865
               ignore_consistency=False,
6866
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6867
    """Initializes this class.
6868

6869
    """
6870
    Tasklet.__init__(self, lu)
6871

    
6872
    # Parameters
6873
    self.instance_name = instance_name
6874
    self.cleanup = cleanup
6875
    self.live = False # will be overridden later
6876
    self.failover = failover
6877
    self.fallback = fallback
6878
    self.ignore_consistency = ignore_consistency
6879
    self.shutdown_timeout = shutdown_timeout
6880

    
6881
  def CheckPrereq(self):
6882
    """Check prerequisites.
6883

6884
    This checks that the instance is in the cluster.
6885

6886
    """
6887
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6888
    instance = self.cfg.GetInstanceInfo(instance_name)
6889
    assert instance is not None
6890
    self.instance = instance
6891

    
6892
    if (not self.cleanup and not instance.admin_up and not self.failover and
6893
        self.fallback):
6894
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6895
                      " to failover")
6896
      self.failover = True
6897

    
6898
    if instance.disk_template not in constants.DTS_MIRRORED:
6899
      if self.failover:
6900
        text = "failovers"
6901
      else:
6902
        text = "migrations"
6903
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6904
                                 " %s" % (instance.disk_template, text),
6905
                                 errors.ECODE_STATE)
6906

    
6907
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6908
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6909

    
6910
      if self.lu.op.iallocator:
6911
        self._RunAllocator()
6912
      else:
6913
        # We set set self.target_node as it is required by
6914
        # BuildHooksEnv
6915
        self.target_node = self.lu.op.target_node
6916

    
6917
      # self.target_node is already populated, either directly or by the
6918
      # iallocator run
6919
      target_node = self.target_node
6920
      if self.target_node == instance.primary_node:
6921
        raise errors.OpPrereqError("Cannot migrate instance %s"
6922
                                   " to its primary (%s)" %
6923
                                   (instance.name, instance.primary_node))
6924

    
6925
      if len(self.lu.tasklets) == 1:
6926
        # It is safe to release locks only when we're the only tasklet
6927
        # in the LU
6928
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6929
                      keep=[instance.primary_node, self.target_node])
6930

    
6931
    else:
6932
      secondary_nodes = instance.secondary_nodes
6933
      if not secondary_nodes:
6934
        raise errors.ConfigurationError("No secondary node but using"
6935
                                        " %s disk template" %
6936
                                        instance.disk_template)
6937
      target_node = secondary_nodes[0]
6938
      if self.lu.op.iallocator or (self.lu.op.target_node and
6939
                                   self.lu.op.target_node != target_node):
6940
        if self.failover:
6941
          text = "failed over"
6942
        else:
6943
          text = "migrated"
6944
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6945
                                   " be %s to arbitrary nodes"
6946
                                   " (neither an iallocator nor a target"
6947
                                   " node can be passed)" %
6948
                                   (instance.disk_template, text),
6949
                                   errors.ECODE_INVAL)
6950

    
6951
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6952

    
6953
    # check memory requirements on the secondary node
6954
    if not self.failover or instance.admin_up:
6955
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6956
                           instance.name, i_be[constants.BE_MEMORY],
6957
                           instance.hypervisor)
6958
    else:
6959
      self.lu.LogInfo("Not checking memory on the secondary node as"
6960
                      " instance will not be started")
6961

    
6962
    # check bridge existance
6963
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6964

    
6965
    if not self.cleanup:
6966
      _CheckNodeNotDrained(self.lu, target_node)
6967
      if not self.failover:
6968
        result = self.rpc.call_instance_migratable(instance.primary_node,
6969
                                                   instance)
6970
        if result.fail_msg and self.fallback:
6971
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6972
                          " failover")
6973
          self.failover = True
6974
        else:
6975
          result.Raise("Can't migrate, please use failover",
6976
                       prereq=True, ecode=errors.ECODE_STATE)
6977

    
6978
    assert not (self.failover and self.cleanup)
6979

    
6980
    if not self.failover:
6981
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6982
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6983
                                   " parameters are accepted",
6984
                                   errors.ECODE_INVAL)
6985
      if self.lu.op.live is not None:
6986
        if self.lu.op.live:
6987
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6988
        else:
6989
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6990
        # reset the 'live' parameter to None so that repeated
6991
        # invocations of CheckPrereq do not raise an exception
6992
        self.lu.op.live = None
6993
      elif self.lu.op.mode is None:
6994
        # read the default value from the hypervisor
6995
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6996
                                                skip_globals=False)
6997
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6998

    
6999
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7000
    else:
7001
      # Failover is never live
7002
      self.live = False
7003

    
7004
  def _RunAllocator(self):
7005
    """Run the allocator based on input opcode.
7006

7007
    """
7008
    ial = IAllocator(self.cfg, self.rpc,
7009
                     mode=constants.IALLOCATOR_MODE_RELOC,
7010
                     name=self.instance_name,
7011
                     # TODO See why hail breaks with a single node below
7012
                     relocate_from=[self.instance.primary_node,
7013
                                    self.instance.primary_node],
7014
                     )
7015

    
7016
    ial.Run(self.lu.op.iallocator)
7017

    
7018
    if not ial.success:
7019
      raise errors.OpPrereqError("Can't compute nodes using"
7020
                                 " iallocator '%s': %s" %
7021
                                 (self.lu.op.iallocator, ial.info),
7022
                                 errors.ECODE_NORES)
7023
    if len(ial.result) != ial.required_nodes:
7024
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7025
                                 " of nodes (%s), required %s" %
7026
                                 (self.lu.op.iallocator, len(ial.result),
7027
                                  ial.required_nodes), errors.ECODE_FAULT)
7028
    self.target_node = ial.result[0]
7029
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7030
                 self.instance_name, self.lu.op.iallocator,
7031
                 utils.CommaJoin(ial.result))
7032

    
7033
  def _WaitUntilSync(self):
7034
    """Poll with custom rpc for disk sync.
7035

7036
    This uses our own step-based rpc call.
7037

7038
    """
7039
    self.feedback_fn("* wait until resync is done")
7040
    all_done = False
7041
    while not all_done:
7042
      all_done = True
7043
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7044
                                            self.nodes_ip,
7045
                                            self.instance.disks)
7046
      min_percent = 100
7047
      for node, nres in result.items():
7048
        nres.Raise("Cannot resync disks on node %s" % node)
7049
        node_done, node_percent = nres.payload
7050
        all_done = all_done and node_done
7051
        if node_percent is not None:
7052
          min_percent = min(min_percent, node_percent)
7053
      if not all_done:
7054
        if min_percent < 100:
7055
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7056
        time.sleep(2)
7057

    
7058
  def _EnsureSecondary(self, node):
7059
    """Demote a node to secondary.
7060

7061
    """
7062
    self.feedback_fn("* switching node %s to secondary mode" % node)
7063

    
7064
    for dev in self.instance.disks:
7065
      self.cfg.SetDiskID(dev, node)
7066

    
7067
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7068
                                          self.instance.disks)
7069
    result.Raise("Cannot change disk to secondary on node %s" % node)
7070

    
7071
  def _GoStandalone(self):
7072
    """Disconnect from the network.
7073

7074
    """
7075
    self.feedback_fn("* changing into standalone mode")
7076
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7077
                                               self.instance.disks)
7078
    for node, nres in result.items():
7079
      nres.Raise("Cannot disconnect disks node %s" % node)
7080

    
7081
  def _GoReconnect(self, multimaster):
7082
    """Reconnect to the network.
7083

7084
    """
7085
    if multimaster:
7086
      msg = "dual-master"
7087
    else:
7088
      msg = "single-master"
7089
    self.feedback_fn("* changing disks into %s mode" % msg)
7090
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7091
                                           self.instance.disks,
7092
                                           self.instance.name, multimaster)
7093
    for node, nres in result.items():
7094
      nres.Raise("Cannot change disks config on node %s" % node)
7095

    
7096
  def _ExecCleanup(self):
7097
    """Try to cleanup after a failed migration.
7098

7099
    The cleanup is done by:
7100
      - check that the instance is running only on one node
7101
        (and update the config if needed)
7102
      - change disks on its secondary node to secondary
7103
      - wait until disks are fully synchronized
7104
      - disconnect from the network
7105
      - change disks into single-master mode
7106
      - wait again until disks are fully synchronized
7107

7108
    """
7109
    instance = self.instance
7110
    target_node = self.target_node
7111
    source_node = self.source_node
7112

    
7113
    # check running on only one node
7114
    self.feedback_fn("* checking where the instance actually runs"
7115
                     " (if this hangs, the hypervisor might be in"
7116
                     " a bad state)")
7117
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7118
    for node, result in ins_l.items():
7119
      result.Raise("Can't contact node %s" % node)
7120

    
7121
    runningon_source = instance.name in ins_l[source_node].payload
7122
    runningon_target = instance.name in ins_l[target_node].payload
7123

    
7124
    if runningon_source and runningon_target:
7125
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7126
                               " or the hypervisor is confused; you will have"
7127
                               " to ensure manually that it runs only on one"
7128
                               " and restart this operation")
7129

    
7130
    if not (runningon_source or runningon_target):
7131
      raise errors.OpExecError("Instance does not seem to be running at all;"
7132
                               " in this case it's safer to repair by"
7133
                               " running 'gnt-instance stop' to ensure disk"
7134
                               " shutdown, and then restarting it")
7135

    
7136
    if runningon_target:
7137
      # the migration has actually succeeded, we need to update the config
7138
      self.feedback_fn("* instance running on secondary node (%s),"
7139
                       " updating config" % target_node)
7140
      instance.primary_node = target_node
7141
      self.cfg.Update(instance, self.feedback_fn)
7142
      demoted_node = source_node
7143
    else:
7144
      self.feedback_fn("* instance confirmed to be running on its"
7145
                       " primary node (%s)" % source_node)
7146
      demoted_node = target_node
7147

    
7148
    if instance.disk_template in constants.DTS_INT_MIRROR:
7149
      self._EnsureSecondary(demoted_node)
7150
      try:
7151
        self._WaitUntilSync()
7152
      except errors.OpExecError:
7153
        # we ignore here errors, since if the device is standalone, it
7154
        # won't be able to sync
7155
        pass
7156
      self._GoStandalone()
7157
      self._GoReconnect(False)
7158
      self._WaitUntilSync()
7159

    
7160
    self.feedback_fn("* done")
7161

    
7162
  def _RevertDiskStatus(self):
7163
    """Try to revert the disk status after a failed migration.
7164

7165
    """
7166
    target_node = self.target_node
7167
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7168
      return
7169

    
7170
    try:
7171
      self._EnsureSecondary(target_node)
7172
      self._GoStandalone()
7173
      self._GoReconnect(False)
7174
      self._WaitUntilSync()
7175
    except errors.OpExecError, err:
7176
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7177
                         " please try to recover the instance manually;"
7178
                         " error '%s'" % str(err))
7179

    
7180
  def _AbortMigration(self):
7181
    """Call the hypervisor code to abort a started migration.
7182

7183
    """
7184
    instance = self.instance
7185
    target_node = self.target_node
7186
    migration_info = self.migration_info
7187

    
7188
    abort_result = self.rpc.call_finalize_migration(target_node,
7189
                                                    instance,
7190
                                                    migration_info,
7191
                                                    False)
7192
    abort_msg = abort_result.fail_msg
7193
    if abort_msg:
7194
      logging.error("Aborting migration failed on target node %s: %s",
7195
                    target_node, abort_msg)
7196
      # Don't raise an exception here, as we stil have to try to revert the
7197
      # disk status, even if this step failed.
7198

    
7199
  def _ExecMigration(self):
7200
    """Migrate an instance.
7201

7202
    The migrate is done by:
7203
      - change the disks into dual-master mode
7204
      - wait until disks are fully synchronized again
7205
      - migrate the instance
7206
      - change disks on the new secondary node (the old primary) to secondary
7207
      - wait until disks are fully synchronized
7208
      - change disks into single-master mode
7209

7210
    """
7211
    instance = self.instance
7212
    target_node = self.target_node
7213
    source_node = self.source_node
7214

    
7215
    self.feedback_fn("* checking disk consistency between source and target")
7216
    for dev in instance.disks:
7217
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7218
        raise errors.OpExecError("Disk %s is degraded or not fully"
7219
                                 " synchronized on target node,"
7220
                                 " aborting migration" % dev.iv_name)
7221

    
7222
    # First get the migration information from the remote node
7223
    result = self.rpc.call_migration_info(source_node, instance)
7224
    msg = result.fail_msg
7225
    if msg:
7226
      log_err = ("Failed fetching source migration information from %s: %s" %
7227
                 (source_node, msg))
7228
      logging.error(log_err)
7229
      raise errors.OpExecError(log_err)
7230

    
7231
    self.migration_info = migration_info = result.payload
7232

    
7233
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7234
      # Then switch the disks to master/master mode
7235
      self._EnsureSecondary(target_node)
7236
      self._GoStandalone()
7237
      self._GoReconnect(True)
7238
      self._WaitUntilSync()
7239

    
7240
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7241
    result = self.rpc.call_accept_instance(target_node,
7242
                                           instance,
7243
                                           migration_info,
7244
                                           self.nodes_ip[target_node])
7245

    
7246
    msg = result.fail_msg
7247
    if msg:
7248
      logging.error("Instance pre-migration failed, trying to revert"
7249
                    " disk status: %s", msg)
7250
      self.feedback_fn("Pre-migration failed, aborting")
7251
      self._AbortMigration()
7252
      self._RevertDiskStatus()
7253
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7254
                               (instance.name, msg))
7255

    
7256
    self.feedback_fn("* migrating instance to %s" % target_node)
7257
    result = self.rpc.call_instance_migrate(source_node, instance,
7258
                                            self.nodes_ip[target_node],
7259
                                            self.live)
7260
    msg = result.fail_msg
7261
    if msg:
7262
      logging.error("Instance migration failed, trying to revert"
7263
                    " disk status: %s", msg)
7264
      self.feedback_fn("Migration failed, aborting")
7265
      self._AbortMigration()
7266
      self._RevertDiskStatus()
7267
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7268
                               (instance.name, msg))
7269

    
7270
    instance.primary_node = target_node
7271
    # distribute new instance config to the other nodes
7272
    self.cfg.Update(instance, self.feedback_fn)
7273

    
7274
    result = self.rpc.call_finalize_migration(target_node,
7275
                                              instance,
7276
                                              migration_info,
7277
                                              True)
7278
    msg = result.fail_msg
7279
    if msg:
7280
      logging.error("Instance migration succeeded, but finalization failed:"
7281
                    " %s", msg)
7282
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7283
                               msg)
7284

    
7285
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7286
      self._EnsureSecondary(source_node)
7287
      self._WaitUntilSync()
7288
      self._GoStandalone()
7289
      self._GoReconnect(False)
7290
      self._WaitUntilSync()
7291

    
7292
    self.feedback_fn("* done")
7293

    
7294
  def _ExecFailover(self):
7295
    """Failover an instance.
7296

7297
    The failover is done by shutting it down on its present node and
7298
    starting it on the secondary.
7299

7300
    """
7301
    instance = self.instance
7302
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7303

    
7304
    source_node = instance.primary_node
7305
    target_node = self.target_node
7306

    
7307
    if instance.admin_up:
7308
      self.feedback_fn("* checking disk consistency between source and target")
7309
      for dev in instance.disks:
7310
        # for drbd, these are drbd over lvm
7311
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7312
          if primary_node.offline:
7313
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7314
                             " target node %s" %
7315
                             (primary_node.name, dev.iv_name, target_node))
7316
          elif not self.ignore_consistency:
7317
            raise errors.OpExecError("Disk %s is degraded on target node,"
7318
                                     " aborting failover" % dev.iv_name)
7319
    else:
7320
      self.feedback_fn("* not checking disk consistency as instance is not"
7321
                       " running")
7322

    
7323
    self.feedback_fn("* shutting down instance on source node")
7324
    logging.info("Shutting down instance %s on node %s",
7325
                 instance.name, source_node)
7326

    
7327
    result = self.rpc.call_instance_shutdown(source_node, instance,
7328
                                             self.shutdown_timeout)
7329
    msg = result.fail_msg
7330
    if msg:
7331
      if self.ignore_consistency or primary_node.offline:
7332
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7333
                           " proceeding anyway; please make sure node"
7334
                           " %s is down; error details: %s",
7335
                           instance.name, source_node, source_node, msg)
7336
      else:
7337
        raise errors.OpExecError("Could not shutdown instance %s on"
7338
                                 " node %s: %s" %
7339
                                 (instance.name, source_node, msg))
7340

    
7341
    self.feedback_fn("* deactivating the instance's disks on source node")
7342
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7343
      raise errors.OpExecError("Can't shut down the instance's disks")
7344

    
7345
    instance.primary_node = target_node
7346
    # distribute new instance config to the other nodes
7347
    self.cfg.Update(instance, self.feedback_fn)
7348

    
7349
    # Only start the instance if it's marked as up
7350
    if instance.admin_up:
7351
      self.feedback_fn("* activating the instance's disks on target node %s" %
7352
                       target_node)
7353
      logging.info("Starting instance %s on node %s",
7354
                   instance.name, target_node)
7355

    
7356
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7357
                                           ignore_secondaries=True)
7358
      if not disks_ok:
7359
        _ShutdownInstanceDisks(self.lu, instance)
7360
        raise errors.OpExecError("Can't activate the instance's disks")
7361

    
7362
      self.feedback_fn("* starting the instance on the target node %s" %
7363
                       target_node)
7364
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7365
                                            False)
7366
      msg = result.fail_msg
7367
      if msg:
7368
        _ShutdownInstanceDisks(self.lu, instance)
7369
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7370
                                 (instance.name, target_node, msg))
7371

    
7372
  def Exec(self, feedback_fn):
7373
    """Perform the migration.
7374

7375
    """
7376
    self.feedback_fn = feedback_fn
7377
    self.source_node = self.instance.primary_node
7378

    
7379
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7380
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7381
      self.target_node = self.instance.secondary_nodes[0]
7382
      # Otherwise self.target_node has been populated either
7383
      # directly, or through an iallocator.
7384

    
7385
    self.all_nodes = [self.source_node, self.target_node]
7386
    self.nodes_ip = {
7387
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7388
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7389
      }
7390

    
7391
    if self.failover:
7392
      feedback_fn("Failover instance %s" % self.instance.name)
7393
      self._ExecFailover()
7394
    else:
7395
      feedback_fn("Migrating instance %s" % self.instance.name)
7396

    
7397
      if self.cleanup:
7398
        return self._ExecCleanup()
7399
      else:
7400
        return self._ExecMigration()
7401

    
7402

    
7403
def _CreateBlockDev(lu, node, instance, device, force_create,
7404
                    info, force_open):
7405
  """Create a tree of block devices on a given node.
7406

7407
  If this device type has to be created on secondaries, create it and
7408
  all its children.
7409

7410
  If not, just recurse to children keeping the same 'force' value.
7411

7412
  @param lu: the lu on whose behalf we execute
7413
  @param node: the node on which to create the device
7414
  @type instance: L{objects.Instance}
7415
  @param instance: the instance which owns the device
7416
  @type device: L{objects.Disk}
7417
  @param device: the device to create
7418
  @type force_create: boolean
7419
  @param force_create: whether to force creation of this device; this
7420
      will be change to True whenever we find a device which has
7421
      CreateOnSecondary() attribute
7422
  @param info: the extra 'metadata' we should attach to the device
7423
      (this will be represented as a LVM tag)
7424
  @type force_open: boolean
7425
  @param force_open: this parameter will be passes to the
7426
      L{backend.BlockdevCreate} function where it specifies
7427
      whether we run on primary or not, and it affects both
7428
      the child assembly and the device own Open() execution
7429

7430
  """
7431
  if device.CreateOnSecondary():
7432
    force_create = True
7433

    
7434
  if device.children:
7435
    for child in device.children:
7436
      _CreateBlockDev(lu, node, instance, child, force_create,
7437
                      info, force_open)
7438

    
7439
  if not force_create:
7440
    return
7441

    
7442
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7443

    
7444

    
7445
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7446
  """Create a single block device on a given node.
7447

7448
  This will not recurse over children of the device, so they must be
7449
  created in advance.
7450

7451
  @param lu: the lu on whose behalf we execute
7452
  @param node: the node on which to create the device
7453
  @type instance: L{objects.Instance}
7454
  @param instance: the instance which owns the device
7455
  @type device: L{objects.Disk}
7456
  @param device: the device to create
7457
  @param info: the extra 'metadata' we should attach to the device
7458
      (this will be represented as a LVM tag)
7459
  @type force_open: boolean
7460
  @param force_open: this parameter will be passes to the
7461
      L{backend.BlockdevCreate} function where it specifies
7462
      whether we run on primary or not, and it affects both
7463
      the child assembly and the device own Open() execution
7464

7465
  """
7466
  lu.cfg.SetDiskID(device, node)
7467
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7468
                                       instance.name, force_open, info)
7469
  result.Raise("Can't create block device %s on"
7470
               " node %s for instance %s" % (device, node, instance.name))
7471
  if device.physical_id is None:
7472
    device.physical_id = result.payload
7473

    
7474

    
7475
def _GenerateUniqueNames(lu, exts):
7476
  """Generate a suitable LV name.
7477

7478
  This will generate a logical volume name for the given instance.
7479

7480
  """
7481
  results = []
7482
  for val in exts:
7483
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7484
    results.append("%s%s" % (new_id, val))
7485
  return results
7486

    
7487

    
7488
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7489
                         iv_name, p_minor, s_minor):
7490
  """Generate a drbd8 device complete with its children.
7491

7492
  """
7493
  assert len(vgnames) == len(names) == 2
7494
  port = lu.cfg.AllocatePort()
7495
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7496
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7497
                          logical_id=(vgnames[0], names[0]))
7498
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7499
                          logical_id=(vgnames[1], names[1]))
7500
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7501
                          logical_id=(primary, secondary, port,
7502
                                      p_minor, s_minor,
7503
                                      shared_secret),
7504
                          children=[dev_data, dev_meta],
7505
                          iv_name=iv_name)
7506
  return drbd_dev
7507

    
7508

    
7509
def _GenerateDiskTemplate(lu, template_name,
7510
                          instance_name, primary_node,
7511
                          secondary_nodes, disk_info,
7512
                          file_storage_dir, file_driver,
7513
                          base_index, feedback_fn):
7514
  """Generate the entire disk layout for a given template type.
7515

7516
  """
7517
  #TODO: compute space requirements
7518

    
7519
  vgname = lu.cfg.GetVGName()
7520
  disk_count = len(disk_info)
7521
  disks = []
7522
  if template_name == constants.DT_DISKLESS:
7523
    pass
7524
  elif template_name == constants.DT_PLAIN:
7525
    if len(secondary_nodes) != 0:
7526
      raise errors.ProgrammerError("Wrong template configuration")
7527

    
7528
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7529
                                      for i in range(disk_count)])
7530
    for idx, disk in enumerate(disk_info):
7531
      disk_index = idx + base_index
7532
      vg = disk.get(constants.IDISK_VG, vgname)
7533
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7534
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7535
                              size=disk[constants.IDISK_SIZE],
7536
                              logical_id=(vg, names[idx]),
7537
                              iv_name="disk/%d" % disk_index,
7538
                              mode=disk[constants.IDISK_MODE])
7539
      disks.append(disk_dev)
7540
  elif template_name == constants.DT_DRBD8:
7541
    if len(secondary_nodes) != 1:
7542
      raise errors.ProgrammerError("Wrong template configuration")
7543
    remote_node = secondary_nodes[0]
7544
    minors = lu.cfg.AllocateDRBDMinor(
7545
      [primary_node, remote_node] * len(disk_info), instance_name)
7546

    
7547
    names = []
7548
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7549
                                               for i in range(disk_count)]):
7550
      names.append(lv_prefix + "_data")
7551
      names.append(lv_prefix + "_meta")
7552
    for idx, disk in enumerate(disk_info):
7553
      disk_index = idx + base_index
7554
      data_vg = disk.get(constants.IDISK_VG, vgname)
7555
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7556
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7557
                                      disk[constants.IDISK_SIZE],
7558
                                      [data_vg, meta_vg],
7559
                                      names[idx * 2:idx * 2 + 2],
7560
                                      "disk/%d" % disk_index,
7561
                                      minors[idx * 2], minors[idx * 2 + 1])
7562
      disk_dev.mode = disk[constants.IDISK_MODE]
7563
      disks.append(disk_dev)
7564
  elif template_name == constants.DT_FILE:
7565
    if len(secondary_nodes) != 0:
7566
      raise errors.ProgrammerError("Wrong template configuration")
7567

    
7568
    opcodes.RequireFileStorage()
7569

    
7570
    for idx, disk in enumerate(disk_info):
7571
      disk_index = idx + base_index
7572
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7573
                              size=disk[constants.IDISK_SIZE],
7574
                              iv_name="disk/%d" % disk_index,
7575
                              logical_id=(file_driver,
7576
                                          "%s/disk%d" % (file_storage_dir,
7577
                                                         disk_index)),
7578
                              mode=disk[constants.IDISK_MODE])
7579
      disks.append(disk_dev)
7580
  elif template_name == constants.DT_SHARED_FILE:
7581
    if len(secondary_nodes) != 0:
7582
      raise errors.ProgrammerError("Wrong template configuration")
7583

    
7584
    opcodes.RequireSharedFileStorage()
7585

    
7586
    for idx, disk in enumerate(disk_info):
7587
      disk_index = idx + base_index
7588
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7589
                              size=disk[constants.IDISK_SIZE],
7590
                              iv_name="disk/%d" % disk_index,
7591
                              logical_id=(file_driver,
7592
                                          "%s/disk%d" % (file_storage_dir,
7593
                                                         disk_index)),
7594
                              mode=disk[constants.IDISK_MODE])
7595
      disks.append(disk_dev)
7596
  elif template_name == constants.DT_BLOCK:
7597
    if len(secondary_nodes) != 0:
7598
      raise errors.ProgrammerError("Wrong template configuration")
7599

    
7600
    for idx, disk in enumerate(disk_info):
7601
      disk_index = idx + base_index
7602
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7603
                              size=disk[constants.IDISK_SIZE],
7604
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7605
                                          disk[constants.IDISK_ADOPT]),
7606
                              iv_name="disk/%d" % disk_index,
7607
                              mode=disk[constants.IDISK_MODE])
7608
      disks.append(disk_dev)
7609

    
7610
  else:
7611
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7612
  return disks
7613

    
7614

    
7615
def _GetInstanceInfoText(instance):
7616
  """Compute that text that should be added to the disk's metadata.
7617

7618
  """
7619
  return "originstname+%s" % instance.name
7620

    
7621

    
7622
def _CalcEta(time_taken, written, total_size):
7623
  """Calculates the ETA based on size written and total size.
7624

7625
  @param time_taken: The time taken so far
7626
  @param written: amount written so far
7627
  @param total_size: The total size of data to be written
7628
  @return: The remaining time in seconds
7629

7630
  """
7631
  avg_time = time_taken / float(written)
7632
  return (total_size - written) * avg_time
7633

    
7634

    
7635
def _WipeDisks(lu, instance):
7636
  """Wipes instance disks.
7637

7638
  @type lu: L{LogicalUnit}
7639
  @param lu: the logical unit on whose behalf we execute
7640
  @type instance: L{objects.Instance}
7641
  @param instance: the instance whose disks we should create
7642
  @return: the success of the wipe
7643

7644
  """
7645
  node = instance.primary_node
7646

    
7647
  for device in instance.disks:
7648
    lu.cfg.SetDiskID(device, node)
7649

    
7650
  logging.info("Pause sync of instance %s disks", instance.name)
7651
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7652

    
7653
  for idx, success in enumerate(result.payload):
7654
    if not success:
7655
      logging.warn("pause-sync of instance %s for disks %d failed",
7656
                   instance.name, idx)
7657

    
7658
  try:
7659
    for idx, device in enumerate(instance.disks):
7660
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7661
      # MAX_WIPE_CHUNK at max
7662
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7663
                            constants.MIN_WIPE_CHUNK_PERCENT)
7664
      # we _must_ make this an int, otherwise rounding errors will
7665
      # occur
7666
      wipe_chunk_size = int(wipe_chunk_size)
7667

    
7668
      lu.LogInfo("* Wiping disk %d", idx)
7669
      logging.info("Wiping disk %d for instance %s, node %s using"
7670
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7671

    
7672
      offset = 0
7673
      size = device.size
7674
      last_output = 0
7675
      start_time = time.time()
7676

    
7677
      while offset < size:
7678
        wipe_size = min(wipe_chunk_size, size - offset)
7679
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7680
                      idx, offset, wipe_size)
7681
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7682
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7683
                     (idx, offset, wipe_size))
7684
        now = time.time()
7685
        offset += wipe_size
7686
        if now - last_output >= 60:
7687
          eta = _CalcEta(now - start_time, offset, size)
7688
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7689
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7690
          last_output = now
7691
  finally:
7692
    logging.info("Resume sync of instance %s disks", instance.name)
7693

    
7694
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7695

    
7696
    for idx, success in enumerate(result.payload):
7697
      if not success:
7698
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7699
                      " look at the status and troubleshoot the issue", idx)
7700
        logging.warn("resume-sync of instance %s for disks %d failed",
7701
                     instance.name, idx)
7702

    
7703

    
7704
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7705
  """Create all disks for an instance.
7706

7707
  This abstracts away some work from AddInstance.
7708

7709
  @type lu: L{LogicalUnit}
7710
  @param lu: the logical unit on whose behalf we execute
7711
  @type instance: L{objects.Instance}
7712
  @param instance: the instance whose disks we should create
7713
  @type to_skip: list
7714
  @param to_skip: list of indices to skip
7715
  @type target_node: string
7716
  @param target_node: if passed, overrides the target node for creation
7717
  @rtype: boolean
7718
  @return: the success of the creation
7719

7720
  """
7721
  info = _GetInstanceInfoText(instance)
7722
  if target_node is None:
7723
    pnode = instance.primary_node
7724
    all_nodes = instance.all_nodes
7725
  else:
7726
    pnode = target_node
7727
    all_nodes = [pnode]
7728

    
7729
  if instance.disk_template in constants.DTS_FILEBASED:
7730
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7731
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7732

    
7733
    result.Raise("Failed to create directory '%s' on"
7734
                 " node %s" % (file_storage_dir, pnode))
7735

    
7736
  # Note: this needs to be kept in sync with adding of disks in
7737
  # LUInstanceSetParams
7738
  for idx, device in enumerate(instance.disks):
7739
    if to_skip and idx in to_skip:
7740
      continue
7741
    logging.info("Creating volume %s for instance %s",
7742
                 device.iv_name, instance.name)
7743
    #HARDCODE
7744
    for node in all_nodes:
7745
      f_create = node == pnode
7746
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7747

    
7748

    
7749
def _RemoveDisks(lu, instance, target_node=None):
7750
  """Remove all disks for an instance.
7751

7752
  This abstracts away some work from `AddInstance()` and
7753
  `RemoveInstance()`. Note that in case some of the devices couldn't
7754
  be removed, the removal will continue with the other ones (compare
7755
  with `_CreateDisks()`).
7756

7757
  @type lu: L{LogicalUnit}
7758
  @param lu: the logical unit on whose behalf we execute
7759
  @type instance: L{objects.Instance}
7760
  @param instance: the instance whose disks we should remove
7761
  @type target_node: string
7762
  @param target_node: used to override the node on which to remove the disks
7763
  @rtype: boolean
7764
  @return: the success of the removal
7765

7766
  """
7767
  logging.info("Removing block devices for instance %s", instance.name)
7768

    
7769
  all_result = True
7770
  for device in instance.disks:
7771
    if target_node:
7772
      edata = [(target_node, device)]
7773
    else:
7774
      edata = device.ComputeNodeTree(instance.primary_node)
7775
    for node, disk in edata:
7776
      lu.cfg.SetDiskID(disk, node)
7777
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7778
      if msg:
7779
        lu.LogWarning("Could not remove block device %s on node %s,"
7780
                      " continuing anyway: %s", device.iv_name, node, msg)
7781
        all_result = False
7782

    
7783
  if instance.disk_template == constants.DT_FILE:
7784
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7785
    if target_node:
7786
      tgt = target_node
7787
    else:
7788
      tgt = instance.primary_node
7789
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7790
    if result.fail_msg:
7791
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7792
                    file_storage_dir, instance.primary_node, result.fail_msg)
7793
      all_result = False
7794

    
7795
  return all_result
7796

    
7797

    
7798
def _ComputeDiskSizePerVG(disk_template, disks):
7799
  """Compute disk size requirements in the volume group
7800

7801
  """
7802
  def _compute(disks, payload):
7803
    """Universal algorithm.
7804

7805
    """
7806
    vgs = {}
7807
    for disk in disks:
7808
      vgs[disk[constants.IDISK_VG]] = \
7809
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7810

    
7811
    return vgs
7812

    
7813
  # Required free disk space as a function of disk and swap space
7814
  req_size_dict = {
7815
    constants.DT_DISKLESS: {},
7816
    constants.DT_PLAIN: _compute(disks, 0),
7817
    # 128 MB are added for drbd metadata for each disk
7818
    constants.DT_DRBD8: _compute(disks, 128),
7819
    constants.DT_FILE: {},
7820
    constants.DT_SHARED_FILE: {},
7821
  }
7822

    
7823
  if disk_template not in req_size_dict:
7824
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7825
                                 " is unknown" %  disk_template)
7826

    
7827
  return req_size_dict[disk_template]
7828

    
7829

    
7830
def _ComputeDiskSize(disk_template, disks):
7831
  """Compute disk size requirements in the volume group
7832

7833
  """
7834
  # Required free disk space as a function of disk and swap space
7835
  req_size_dict = {
7836
    constants.DT_DISKLESS: None,
7837
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7838
    # 128 MB are added for drbd metadata for each disk
7839
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7840
    constants.DT_FILE: None,
7841
    constants.DT_SHARED_FILE: 0,
7842
    constants.DT_BLOCK: 0,
7843
  }
7844

    
7845
  if disk_template not in req_size_dict:
7846
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7847
                                 " is unknown" %  disk_template)
7848

    
7849
  return req_size_dict[disk_template]
7850

    
7851

    
7852
def _FilterVmNodes(lu, nodenames):
7853
  """Filters out non-vm_capable nodes from a list.
7854

7855
  @type lu: L{LogicalUnit}
7856
  @param lu: the logical unit for which we check
7857
  @type nodenames: list
7858
  @param nodenames: the list of nodes on which we should check
7859
  @rtype: list
7860
  @return: the list of vm-capable nodes
7861

7862
  """
7863
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7864
  return [name for name in nodenames if name not in vm_nodes]
7865

    
7866

    
7867
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7868
  """Hypervisor parameter validation.
7869

7870
  This function abstract the hypervisor parameter validation to be
7871
  used in both instance create and instance modify.
7872

7873
  @type lu: L{LogicalUnit}
7874
  @param lu: the logical unit for which we check
7875
  @type nodenames: list
7876
  @param nodenames: the list of nodes on which we should check
7877
  @type hvname: string
7878
  @param hvname: the name of the hypervisor we should use
7879
  @type hvparams: dict
7880
  @param hvparams: the parameters which we need to check
7881
  @raise errors.OpPrereqError: if the parameters are not valid
7882

7883
  """
7884
  nodenames = _FilterVmNodes(lu, nodenames)
7885
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7886
                                                  hvname,
7887
                                                  hvparams)
7888
  for node in nodenames:
7889
    info = hvinfo[node]
7890
    if info.offline:
7891
      continue
7892
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7893

    
7894

    
7895
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7896
  """OS parameters validation.
7897

7898
  @type lu: L{LogicalUnit}
7899
  @param lu: the logical unit for which we check
7900
  @type required: boolean
7901
  @param required: whether the validation should fail if the OS is not
7902
      found
7903
  @type nodenames: list
7904
  @param nodenames: the list of nodes on which we should check
7905
  @type osname: string
7906
  @param osname: the name of the hypervisor we should use
7907
  @type osparams: dict
7908
  @param osparams: the parameters which we need to check
7909
  @raise errors.OpPrereqError: if the parameters are not valid
7910

7911
  """
7912
  nodenames = _FilterVmNodes(lu, nodenames)
7913
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7914
                                   [constants.OS_VALIDATE_PARAMETERS],
7915
                                   osparams)
7916
  for node, nres in result.items():
7917
    # we don't check for offline cases since this should be run only
7918
    # against the master node and/or an instance's nodes
7919
    nres.Raise("OS Parameters validation failed on node %s" % node)
7920
    if not nres.payload:
7921
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7922
                 osname, node)
7923

    
7924

    
7925
class LUInstanceCreate(LogicalUnit):
7926
  """Create an instance.
7927

7928
  """
7929
  HPATH = "instance-add"
7930
  HTYPE = constants.HTYPE_INSTANCE
7931
  REQ_BGL = False
7932

    
7933
  def CheckArguments(self):
7934
    """Check arguments.
7935

7936
    """
7937
    # do not require name_check to ease forward/backward compatibility
7938
    # for tools
7939
    if self.op.no_install and self.op.start:
7940
      self.LogInfo("No-installation mode selected, disabling startup")
7941
      self.op.start = False
7942
    # validate/normalize the instance name
7943
    self.op.instance_name = \
7944
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7945

    
7946
    if self.op.ip_check and not self.op.name_check:
7947
      # TODO: make the ip check more flexible and not depend on the name check
7948
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7949
                                 " check", errors.ECODE_INVAL)
7950

    
7951
    # check nics' parameter names
7952
    for nic in self.op.nics:
7953
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7954

    
7955
    # check disks. parameter names and consistent adopt/no-adopt strategy
7956
    has_adopt = has_no_adopt = False
7957
    for disk in self.op.disks:
7958
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7959
      if constants.IDISK_ADOPT in disk:
7960
        has_adopt = True
7961
      else:
7962
        has_no_adopt = True
7963
    if has_adopt and has_no_adopt:
7964
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7965
                                 errors.ECODE_INVAL)
7966
    if has_adopt:
7967
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7968
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7969
                                   " '%s' disk template" %
7970
                                   self.op.disk_template,
7971
                                   errors.ECODE_INVAL)
7972
      if self.op.iallocator is not None:
7973
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7974
                                   " iallocator script", errors.ECODE_INVAL)
7975
      if self.op.mode == constants.INSTANCE_IMPORT:
7976
        raise errors.OpPrereqError("Disk adoption not allowed for"
7977
                                   " instance import", errors.ECODE_INVAL)
7978
    else:
7979
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7980
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7981
                                   " but no 'adopt' parameter given" %
7982
                                   self.op.disk_template,
7983
                                   errors.ECODE_INVAL)
7984

    
7985
    self.adopt_disks = has_adopt
7986

    
7987
    # instance name verification
7988
    if self.op.name_check:
7989
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7990
      self.op.instance_name = self.hostname1.name
7991
      # used in CheckPrereq for ip ping check
7992
      self.check_ip = self.hostname1.ip
7993
    else:
7994
      self.check_ip = None
7995

    
7996
    # file storage checks
7997
    if (self.op.file_driver and
7998
        not self.op.file_driver in constants.FILE_DRIVER):
7999
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8000
                                 self.op.file_driver, errors.ECODE_INVAL)
8001

    
8002
    if self.op.disk_template == constants.DT_FILE:
8003
      opcodes.RequireFileStorage()
8004
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8005
      opcodes.RequireSharedFileStorage()
8006

    
8007
    ### Node/iallocator related checks
8008
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8009

    
8010
    if self.op.pnode is not None:
8011
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8012
        if self.op.snode is None:
8013
          raise errors.OpPrereqError("The networked disk templates need"
8014
                                     " a mirror node", errors.ECODE_INVAL)
8015
      elif self.op.snode:
8016
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8017
                        " template")
8018
        self.op.snode = None
8019

    
8020
    self._cds = _GetClusterDomainSecret()
8021

    
8022
    if self.op.mode == constants.INSTANCE_IMPORT:
8023
      # On import force_variant must be True, because if we forced it at
8024
      # initial install, our only chance when importing it back is that it
8025
      # works again!
8026
      self.op.force_variant = True
8027

    
8028
      if self.op.no_install:
8029
        self.LogInfo("No-installation mode has no effect during import")
8030

    
8031
    elif self.op.mode == constants.INSTANCE_CREATE:
8032
      if self.op.os_type is None:
8033
        raise errors.OpPrereqError("No guest OS specified",
8034
                                   errors.ECODE_INVAL)
8035
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8036
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8037
                                   " installation" % self.op.os_type,
8038
                                   errors.ECODE_STATE)
8039
      if self.op.disk_template is None:
8040
        raise errors.OpPrereqError("No disk template specified",
8041
                                   errors.ECODE_INVAL)
8042

    
8043
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8044
      # Check handshake to ensure both clusters have the same domain secret
8045
      src_handshake = self.op.source_handshake
8046
      if not src_handshake:
8047
        raise errors.OpPrereqError("Missing source handshake",
8048
                                   errors.ECODE_INVAL)
8049

    
8050
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8051
                                                           src_handshake)
8052
      if errmsg:
8053
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8054
                                   errors.ECODE_INVAL)
8055

    
8056
      # Load and check source CA
8057
      self.source_x509_ca_pem = self.op.source_x509_ca
8058
      if not self.source_x509_ca_pem:
8059
        raise errors.OpPrereqError("Missing source X509 CA",
8060
                                   errors.ECODE_INVAL)
8061

    
8062
      try:
8063
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8064
                                                    self._cds)
8065
      except OpenSSL.crypto.Error, err:
8066
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8067
                                   (err, ), errors.ECODE_INVAL)
8068

    
8069
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8070
      if errcode is not None:
8071
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8072
                                   errors.ECODE_INVAL)
8073

    
8074
      self.source_x509_ca = cert
8075

    
8076
      src_instance_name = self.op.source_instance_name
8077
      if not src_instance_name:
8078
        raise errors.OpPrereqError("Missing source instance name",
8079
                                   errors.ECODE_INVAL)
8080

    
8081
      self.source_instance_name = \
8082
          netutils.GetHostname(name=src_instance_name).name
8083

    
8084
    else:
8085
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8086
                                 self.op.mode, errors.ECODE_INVAL)
8087

    
8088
  def ExpandNames(self):
8089
    """ExpandNames for CreateInstance.
8090

8091
    Figure out the right locks for instance creation.
8092

8093
    """
8094
    self.needed_locks = {}
8095

    
8096
    instance_name = self.op.instance_name
8097
    # this is just a preventive check, but someone might still add this
8098
    # instance in the meantime, and creation will fail at lock-add time
8099
    if instance_name in self.cfg.GetInstanceList():
8100
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8101
                                 instance_name, errors.ECODE_EXISTS)
8102

    
8103
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8104

    
8105
    if self.op.iallocator:
8106
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8107
    else:
8108
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8109
      nodelist = [self.op.pnode]
8110
      if self.op.snode is not None:
8111
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8112
        nodelist.append(self.op.snode)
8113
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8114

    
8115
    # in case of import lock the source node too
8116
    if self.op.mode == constants.INSTANCE_IMPORT:
8117
      src_node = self.op.src_node
8118
      src_path = self.op.src_path
8119

    
8120
      if src_path is None:
8121
        self.op.src_path = src_path = self.op.instance_name
8122

    
8123
      if src_node is None:
8124
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8125
        self.op.src_node = None
8126
        if os.path.isabs(src_path):
8127
          raise errors.OpPrereqError("Importing an instance from an absolute"
8128
                                     " path requires a source node option",
8129
                                     errors.ECODE_INVAL)
8130
      else:
8131
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8132
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8133
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8134
        if not os.path.isabs(src_path):
8135
          self.op.src_path = src_path = \
8136
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8137

    
8138
  def _RunAllocator(self):
8139
    """Run the allocator based on input opcode.
8140

8141
    """
8142
    nics = [n.ToDict() for n in self.nics]
8143
    ial = IAllocator(self.cfg, self.rpc,
8144
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8145
                     name=self.op.instance_name,
8146
                     disk_template=self.op.disk_template,
8147
                     tags=self.op.tags,
8148
                     os=self.op.os_type,
8149
                     vcpus=self.be_full[constants.BE_VCPUS],
8150
                     memory=self.be_full[constants.BE_MEMORY],
8151
                     disks=self.disks,
8152
                     nics=nics,
8153
                     hypervisor=self.op.hypervisor,
8154
                     )
8155

    
8156
    ial.Run(self.op.iallocator)
8157

    
8158
    if not ial.success:
8159
      raise errors.OpPrereqError("Can't compute nodes using"
8160
                                 " iallocator '%s': %s" %
8161
                                 (self.op.iallocator, ial.info),
8162
                                 errors.ECODE_NORES)
8163
    if len(ial.result) != ial.required_nodes:
8164
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8165
                                 " of nodes (%s), required %s" %
8166
                                 (self.op.iallocator, len(ial.result),
8167
                                  ial.required_nodes), errors.ECODE_FAULT)
8168
    self.op.pnode = ial.result[0]
8169
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8170
                 self.op.instance_name, self.op.iallocator,
8171
                 utils.CommaJoin(ial.result))
8172
    if ial.required_nodes == 2:
8173
      self.op.snode = ial.result[1]
8174

    
8175
  def BuildHooksEnv(self):
8176
    """Build hooks env.
8177

8178
    This runs on master, primary and secondary nodes of the instance.
8179

8180
    """
8181
    env = {
8182
      "ADD_MODE": self.op.mode,
8183
      }
8184
    if self.op.mode == constants.INSTANCE_IMPORT:
8185
      env["SRC_NODE"] = self.op.src_node
8186
      env["SRC_PATH"] = self.op.src_path
8187
      env["SRC_IMAGES"] = self.src_images
8188

    
8189
    env.update(_BuildInstanceHookEnv(
8190
      name=self.op.instance_name,
8191
      primary_node=self.op.pnode,
8192
      secondary_nodes=self.secondaries,
8193
      status=self.op.start,
8194
      os_type=self.op.os_type,
8195
      memory=self.be_full[constants.BE_MEMORY],
8196
      vcpus=self.be_full[constants.BE_VCPUS],
8197
      nics=_NICListToTuple(self, self.nics),
8198
      disk_template=self.op.disk_template,
8199
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8200
             for d in self.disks],
8201
      bep=self.be_full,
8202
      hvp=self.hv_full,
8203
      hypervisor_name=self.op.hypervisor,
8204
      tags=self.op.tags,
8205
    ))
8206

    
8207
    return env
8208

    
8209
  def BuildHooksNodes(self):
8210
    """Build hooks nodes.
8211

8212
    """
8213
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8214
    return nl, nl
8215

    
8216
  def _ReadExportInfo(self):
8217
    """Reads the export information from disk.
8218

8219
    It will override the opcode source node and path with the actual
8220
    information, if these two were not specified before.
8221

8222
    @return: the export information
8223

8224
    """
8225
    assert self.op.mode == constants.INSTANCE_IMPORT
8226

    
8227
    src_node = self.op.src_node
8228
    src_path = self.op.src_path
8229

    
8230
    if src_node is None:
8231
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8232
      exp_list = self.rpc.call_export_list(locked_nodes)
8233
      found = False
8234
      for node in exp_list:
8235
        if exp_list[node].fail_msg:
8236
          continue
8237
        if src_path in exp_list[node].payload:
8238
          found = True
8239
          self.op.src_node = src_node = node
8240
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8241
                                                       src_path)
8242
          break
8243
      if not found:
8244
        raise errors.OpPrereqError("No export found for relative path %s" %
8245
                                    src_path, errors.ECODE_INVAL)
8246

    
8247
    _CheckNodeOnline(self, src_node)
8248
    result = self.rpc.call_export_info(src_node, src_path)
8249
    result.Raise("No export or invalid export found in dir %s" % src_path)
8250

    
8251
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8252
    if not export_info.has_section(constants.INISECT_EXP):
8253
      raise errors.ProgrammerError("Corrupted export config",
8254
                                   errors.ECODE_ENVIRON)
8255

    
8256
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8257
    if (int(ei_version) != constants.EXPORT_VERSION):
8258
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8259
                                 (ei_version, constants.EXPORT_VERSION),
8260
                                 errors.ECODE_ENVIRON)
8261
    return export_info
8262

    
8263
  def _ReadExportParams(self, einfo):
8264
    """Use export parameters as defaults.
8265

8266
    In case the opcode doesn't specify (as in override) some instance
8267
    parameters, then try to use them from the export information, if
8268
    that declares them.
8269

8270
    """
8271
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8272

    
8273
    if self.op.disk_template is None:
8274
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8275
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8276
                                          "disk_template")
8277
      else:
8278
        raise errors.OpPrereqError("No disk template specified and the export"
8279
                                   " is missing the disk_template information",
8280
                                   errors.ECODE_INVAL)
8281

    
8282
    if not self.op.disks:
8283
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8284
        disks = []
8285
        # TODO: import the disk iv_name too
8286
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8287
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8288
          disks.append({constants.IDISK_SIZE: disk_sz})
8289
        self.op.disks = disks
8290
      else:
8291
        raise errors.OpPrereqError("No disk info specified and the export"
8292
                                   " is missing the disk information",
8293
                                   errors.ECODE_INVAL)
8294

    
8295
    if (not self.op.nics and
8296
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8297
      nics = []
8298
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8299
        ndict = {}
8300
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8301
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8302
          ndict[name] = v
8303
        nics.append(ndict)
8304
      self.op.nics = nics
8305

    
8306
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8307
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8308

    
8309
    if (self.op.hypervisor is None and
8310
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8311
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8312

    
8313
    if einfo.has_section(constants.INISECT_HYP):
8314
      # use the export parameters but do not override the ones
8315
      # specified by the user
8316
      for name, value in einfo.items(constants.INISECT_HYP):
8317
        if name not in self.op.hvparams:
8318
          self.op.hvparams[name] = value
8319

    
8320
    if einfo.has_section(constants.INISECT_BEP):
8321
      # use the parameters, without overriding
8322
      for name, value in einfo.items(constants.INISECT_BEP):
8323
        if name not in self.op.beparams:
8324
          self.op.beparams[name] = value
8325
    else:
8326
      # try to read the parameters old style, from the main section
8327
      for name in constants.BES_PARAMETERS:
8328
        if (name not in self.op.beparams and
8329
            einfo.has_option(constants.INISECT_INS, name)):
8330
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8331

    
8332
    if einfo.has_section(constants.INISECT_OSP):
8333
      # use the parameters, without overriding
8334
      for name, value in einfo.items(constants.INISECT_OSP):
8335
        if name not in self.op.osparams:
8336
          self.op.osparams[name] = value
8337

    
8338
  def _RevertToDefaults(self, cluster):
8339
    """Revert the instance parameters to the default values.
8340

8341
    """
8342
    # hvparams
8343
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8344
    for name in self.op.hvparams.keys():
8345
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8346
        del self.op.hvparams[name]
8347
    # beparams
8348
    be_defs = cluster.SimpleFillBE({})
8349
    for name in self.op.beparams.keys():
8350
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8351
        del self.op.beparams[name]
8352
    # nic params
8353
    nic_defs = cluster.SimpleFillNIC({})
8354
    for nic in self.op.nics:
8355
      for name in constants.NICS_PARAMETERS:
8356
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8357
          del nic[name]
8358
    # osparams
8359
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8360
    for name in self.op.osparams.keys():
8361
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8362
        del self.op.osparams[name]
8363

    
8364
  def _CalculateFileStorageDir(self):
8365
    """Calculate final instance file storage dir.
8366

8367
    """
8368
    # file storage dir calculation/check
8369
    self.instance_file_storage_dir = None
8370
    if self.op.disk_template in constants.DTS_FILEBASED:
8371
      # build the full file storage dir path
8372
      joinargs = []
8373

    
8374
      if self.op.disk_template == constants.DT_SHARED_FILE:
8375
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8376
      else:
8377
        get_fsd_fn = self.cfg.GetFileStorageDir
8378

    
8379
      cfg_storagedir = get_fsd_fn()
8380
      if not cfg_storagedir:
8381
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8382
      joinargs.append(cfg_storagedir)
8383

    
8384
      if self.op.file_storage_dir is not None:
8385
        joinargs.append(self.op.file_storage_dir)
8386

    
8387
      joinargs.append(self.op.instance_name)
8388

    
8389
      # pylint: disable-msg=W0142
8390
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8391

    
8392
  def CheckPrereq(self):
8393
    """Check prerequisites.
8394

8395
    """
8396
    self._CalculateFileStorageDir()
8397

    
8398
    if self.op.mode == constants.INSTANCE_IMPORT:
8399
      export_info = self._ReadExportInfo()
8400
      self._ReadExportParams(export_info)
8401

    
8402
    if (not self.cfg.GetVGName() and
8403
        self.op.disk_template not in constants.DTS_NOT_LVM):
8404
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8405
                                 " instances", errors.ECODE_STATE)
8406

    
8407
    if self.op.hypervisor is None:
8408
      self.op.hypervisor = self.cfg.GetHypervisorType()
8409

    
8410
    cluster = self.cfg.GetClusterInfo()
8411
    enabled_hvs = cluster.enabled_hypervisors
8412
    if self.op.hypervisor not in enabled_hvs:
8413
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8414
                                 " cluster (%s)" % (self.op.hypervisor,
8415
                                  ",".join(enabled_hvs)),
8416
                                 errors.ECODE_STATE)
8417

    
8418
    # Check tag validity
8419
    for tag in self.op.tags:
8420
      objects.TaggableObject.ValidateTag(tag)
8421

    
8422
    # check hypervisor parameter syntax (locally)
8423
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8424
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8425
                                      self.op.hvparams)
8426
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8427
    hv_type.CheckParameterSyntax(filled_hvp)
8428
    self.hv_full = filled_hvp
8429
    # check that we don't specify global parameters on an instance
8430
    _CheckGlobalHvParams(self.op.hvparams)
8431

    
8432
    # fill and remember the beparams dict
8433
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8434
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8435

    
8436
    # build os parameters
8437
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8438

    
8439
    # now that hvp/bep are in final format, let's reset to defaults,
8440
    # if told to do so
8441
    if self.op.identify_defaults:
8442
      self._RevertToDefaults(cluster)
8443

    
8444
    # NIC buildup
8445
    self.nics = []
8446
    for idx, nic in enumerate(self.op.nics):
8447
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8448
      nic_mode = nic_mode_req
8449
      if nic_mode is None:
8450
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8451

    
8452
      # in routed mode, for the first nic, the default ip is 'auto'
8453
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8454
        default_ip_mode = constants.VALUE_AUTO
8455
      else:
8456
        default_ip_mode = constants.VALUE_NONE
8457

    
8458
      # ip validity checks
8459
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8460
      if ip is None or ip.lower() == constants.VALUE_NONE:
8461
        nic_ip = None
8462
      elif ip.lower() == constants.VALUE_AUTO:
8463
        if not self.op.name_check:
8464
          raise errors.OpPrereqError("IP address set to auto but name checks"
8465
                                     " have been skipped",
8466
                                     errors.ECODE_INVAL)
8467
        nic_ip = self.hostname1.ip
8468
      else:
8469
        if not netutils.IPAddress.IsValid(ip):
8470
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8471
                                     errors.ECODE_INVAL)
8472
        nic_ip = ip
8473

    
8474
      # TODO: check the ip address for uniqueness
8475
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8476
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8477
                                   errors.ECODE_INVAL)
8478

    
8479
      # MAC address verification
8480
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8481
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8482
        mac = utils.NormalizeAndValidateMac(mac)
8483

    
8484
        try:
8485
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8486
        except errors.ReservationError:
8487
          raise errors.OpPrereqError("MAC address %s already in use"
8488
                                     " in cluster" % mac,
8489
                                     errors.ECODE_NOTUNIQUE)
8490

    
8491
      #  Build nic parameters
8492
      link = nic.get(constants.INIC_LINK, None)
8493
      nicparams = {}
8494
      if nic_mode_req:
8495
        nicparams[constants.NIC_MODE] = nic_mode_req
8496
      if link:
8497
        nicparams[constants.NIC_LINK] = link
8498

    
8499
      check_params = cluster.SimpleFillNIC(nicparams)
8500
      objects.NIC.CheckParameterSyntax(check_params)
8501
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8502

    
8503
    # disk checks/pre-build
8504
    default_vg = self.cfg.GetVGName()
8505
    self.disks = []
8506
    for disk in self.op.disks:
8507
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8508
      if mode not in constants.DISK_ACCESS_SET:
8509
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8510
                                   mode, errors.ECODE_INVAL)
8511
      size = disk.get(constants.IDISK_SIZE, None)
8512
      if size is None:
8513
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8514
      try:
8515
        size = int(size)
8516
      except (TypeError, ValueError):
8517
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8518
                                   errors.ECODE_INVAL)
8519

    
8520
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8521
      new_disk = {
8522
        constants.IDISK_SIZE: size,
8523
        constants.IDISK_MODE: mode,
8524
        constants.IDISK_VG: data_vg,
8525
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8526
        }
8527
      if constants.IDISK_ADOPT in disk:
8528
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8529
      self.disks.append(new_disk)
8530

    
8531
    if self.op.mode == constants.INSTANCE_IMPORT:
8532

    
8533
      # Check that the new instance doesn't have less disks than the export
8534
      instance_disks = len(self.disks)
8535
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8536
      if instance_disks < export_disks:
8537
        raise errors.OpPrereqError("Not enough disks to import."
8538
                                   " (instance: %d, export: %d)" %
8539
                                   (instance_disks, export_disks),
8540
                                   errors.ECODE_INVAL)
8541

    
8542
      disk_images = []
8543
      for idx in range(export_disks):
8544
        option = "disk%d_dump" % idx
8545
        if export_info.has_option(constants.INISECT_INS, option):
8546
          # FIXME: are the old os-es, disk sizes, etc. useful?
8547
          export_name = export_info.get(constants.INISECT_INS, option)
8548
          image = utils.PathJoin(self.op.src_path, export_name)
8549
          disk_images.append(image)
8550
        else:
8551
          disk_images.append(False)
8552

    
8553
      self.src_images = disk_images
8554

    
8555
      old_name = export_info.get(constants.INISECT_INS, "name")
8556
      try:
8557
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8558
      except (TypeError, ValueError), err:
8559
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8560
                                   " an integer: %s" % str(err),
8561
                                   errors.ECODE_STATE)
8562
      if self.op.instance_name == old_name:
8563
        for idx, nic in enumerate(self.nics):
8564
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8565
            nic_mac_ini = "nic%d_mac" % idx
8566
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8567

    
8568
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8569

    
8570
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8571
    if self.op.ip_check:
8572
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8573
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8574
                                   (self.check_ip, self.op.instance_name),
8575
                                   errors.ECODE_NOTUNIQUE)
8576

    
8577
    #### mac address generation
8578
    # By generating here the mac address both the allocator and the hooks get
8579
    # the real final mac address rather than the 'auto' or 'generate' value.
8580
    # There is a race condition between the generation and the instance object
8581
    # creation, which means that we know the mac is valid now, but we're not
8582
    # sure it will be when we actually add the instance. If things go bad
8583
    # adding the instance will abort because of a duplicate mac, and the
8584
    # creation job will fail.
8585
    for nic in self.nics:
8586
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8587
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8588

    
8589
    #### allocator run
8590

    
8591
    if self.op.iallocator is not None:
8592
      self._RunAllocator()
8593

    
8594
    #### node related checks
8595

    
8596
    # check primary node
8597
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8598
    assert self.pnode is not None, \
8599
      "Cannot retrieve locked node %s" % self.op.pnode
8600
    if pnode.offline:
8601
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8602
                                 pnode.name, errors.ECODE_STATE)
8603
    if pnode.drained:
8604
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8605
                                 pnode.name, errors.ECODE_STATE)
8606
    if not pnode.vm_capable:
8607
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8608
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8609

    
8610
    self.secondaries = []
8611

    
8612
    # mirror node verification
8613
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8614
      if self.op.snode == pnode.name:
8615
        raise errors.OpPrereqError("The secondary node cannot be the"
8616
                                   " primary node", errors.ECODE_INVAL)
8617
      _CheckNodeOnline(self, self.op.snode)
8618
      _CheckNodeNotDrained(self, self.op.snode)
8619
      _CheckNodeVmCapable(self, self.op.snode)
8620
      self.secondaries.append(self.op.snode)
8621

    
8622
    nodenames = [pnode.name] + self.secondaries
8623

    
8624
    if not self.adopt_disks:
8625
      # Check lv size requirements, if not adopting
8626
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8627
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8628

    
8629
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8630
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8631
                                disk[constants.IDISK_ADOPT])
8632
                     for disk in self.disks])
8633
      if len(all_lvs) != len(self.disks):
8634
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8635
                                   errors.ECODE_INVAL)
8636
      for lv_name in all_lvs:
8637
        try:
8638
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8639
          # to ReserveLV uses the same syntax
8640
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8641
        except errors.ReservationError:
8642
          raise errors.OpPrereqError("LV named %s used by another instance" %
8643
                                     lv_name, errors.ECODE_NOTUNIQUE)
8644

    
8645
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8646
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8647

    
8648
      node_lvs = self.rpc.call_lv_list([pnode.name],
8649
                                       vg_names.payload.keys())[pnode.name]
8650
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8651
      node_lvs = node_lvs.payload
8652

    
8653
      delta = all_lvs.difference(node_lvs.keys())
8654
      if delta:
8655
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8656
                                   utils.CommaJoin(delta),
8657
                                   errors.ECODE_INVAL)
8658
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8659
      if online_lvs:
8660
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8661
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8662
                                   errors.ECODE_STATE)
8663
      # update the size of disk based on what is found
8664
      for dsk in self.disks:
8665
        dsk[constants.IDISK_SIZE] = \
8666
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8667
                                        dsk[constants.IDISK_ADOPT])][0]))
8668

    
8669
    elif self.op.disk_template == constants.DT_BLOCK:
8670
      # Normalize and de-duplicate device paths
8671
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8672
                       for disk in self.disks])
8673
      if len(all_disks) != len(self.disks):
8674
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8675
                                   errors.ECODE_INVAL)
8676
      baddisks = [d for d in all_disks
8677
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8678
      if baddisks:
8679
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8680
                                   " cannot be adopted" %
8681
                                   (", ".join(baddisks),
8682
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8683
                                   errors.ECODE_INVAL)
8684

    
8685
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8686
                                            list(all_disks))[pnode.name]
8687
      node_disks.Raise("Cannot get block device information from node %s" %
8688
                       pnode.name)
8689
      node_disks = node_disks.payload
8690
      delta = all_disks.difference(node_disks.keys())
8691
      if delta:
8692
        raise errors.OpPrereqError("Missing block device(s): %s" %
8693
                                   utils.CommaJoin(delta),
8694
                                   errors.ECODE_INVAL)
8695
      for dsk in self.disks:
8696
        dsk[constants.IDISK_SIZE] = \
8697
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8698

    
8699
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8700

    
8701
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8702
    # check OS parameters (remotely)
8703
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8704

    
8705
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8706

    
8707
    # memory check on primary node
8708
    if self.op.start:
8709
      _CheckNodeFreeMemory(self, self.pnode.name,
8710
                           "creating instance %s" % self.op.instance_name,
8711
                           self.be_full[constants.BE_MEMORY],
8712
                           self.op.hypervisor)
8713

    
8714
    self.dry_run_result = list(nodenames)
8715

    
8716
  def Exec(self, feedback_fn):
8717
    """Create and add the instance to the cluster.
8718

8719
    """
8720
    instance = self.op.instance_name
8721
    pnode_name = self.pnode.name
8722

    
8723
    ht_kind = self.op.hypervisor
8724
    if ht_kind in constants.HTS_REQ_PORT:
8725
      network_port = self.cfg.AllocatePort()
8726
    else:
8727
      network_port = None
8728

    
8729
    disks = _GenerateDiskTemplate(self,
8730
                                  self.op.disk_template,
8731
                                  instance, pnode_name,
8732
                                  self.secondaries,
8733
                                  self.disks,
8734
                                  self.instance_file_storage_dir,
8735
                                  self.op.file_driver,
8736
                                  0,
8737
                                  feedback_fn)
8738

    
8739
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8740
                            primary_node=pnode_name,
8741
                            nics=self.nics, disks=disks,
8742
                            disk_template=self.op.disk_template,
8743
                            admin_up=False,
8744
                            network_port=network_port,
8745
                            beparams=self.op.beparams,
8746
                            hvparams=self.op.hvparams,
8747
                            hypervisor=self.op.hypervisor,
8748
                            osparams=self.op.osparams,
8749
                            )
8750

    
8751
    if self.op.tags:
8752
      for tag in self.op.tags:
8753
        iobj.AddTag(tag)
8754

    
8755
    if self.adopt_disks:
8756
      if self.op.disk_template == constants.DT_PLAIN:
8757
        # rename LVs to the newly-generated names; we need to construct
8758
        # 'fake' LV disks with the old data, plus the new unique_id
8759
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8760
        rename_to = []
8761
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8762
          rename_to.append(t_dsk.logical_id)
8763
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8764
          self.cfg.SetDiskID(t_dsk, pnode_name)
8765
        result = self.rpc.call_blockdev_rename(pnode_name,
8766
                                               zip(tmp_disks, rename_to))
8767
        result.Raise("Failed to rename adoped LVs")
8768
    else:
8769
      feedback_fn("* creating instance disks...")
8770
      try:
8771
        _CreateDisks(self, iobj)
8772
      except errors.OpExecError:
8773
        self.LogWarning("Device creation failed, reverting...")
8774
        try:
8775
          _RemoveDisks(self, iobj)
8776
        finally:
8777
          self.cfg.ReleaseDRBDMinors(instance)
8778
          raise
8779

    
8780
    feedback_fn("adding instance %s to cluster config" % instance)
8781

    
8782
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8783

    
8784
    # Declare that we don't want to remove the instance lock anymore, as we've
8785
    # added the instance to the config
8786
    del self.remove_locks[locking.LEVEL_INSTANCE]
8787

    
8788
    if self.op.mode == constants.INSTANCE_IMPORT:
8789
      # Release unused nodes
8790
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8791
    else:
8792
      # Release all nodes
8793
      _ReleaseLocks(self, locking.LEVEL_NODE)
8794

    
8795
    disk_abort = False
8796
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8797
      feedback_fn("* wiping instance disks...")
8798
      try:
8799
        _WipeDisks(self, iobj)
8800
      except errors.OpExecError, err:
8801
        logging.exception("Wiping disks failed")
8802
        self.LogWarning("Wiping instance disks failed (%s)", err)
8803
        disk_abort = True
8804

    
8805
    if disk_abort:
8806
      # Something is already wrong with the disks, don't do anything else
8807
      pass
8808
    elif self.op.wait_for_sync:
8809
      disk_abort = not _WaitForSync(self, iobj)
8810
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8811
      # make sure the disks are not degraded (still sync-ing is ok)
8812
      time.sleep(15)
8813
      feedback_fn("* checking mirrors status")
8814
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8815
    else:
8816
      disk_abort = False
8817

    
8818
    if disk_abort:
8819
      _RemoveDisks(self, iobj)
8820
      self.cfg.RemoveInstance(iobj.name)
8821
      # Make sure the instance lock gets removed
8822
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8823
      raise errors.OpExecError("There are some degraded disks for"
8824
                               " this instance")
8825

    
8826
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8827
      if self.op.mode == constants.INSTANCE_CREATE:
8828
        if not self.op.no_install:
8829
          feedback_fn("* running the instance OS create scripts...")
8830
          # FIXME: pass debug option from opcode to backend
8831
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8832
                                                 self.op.debug_level)
8833
          result.Raise("Could not add os for instance %s"
8834
                       " on node %s" % (instance, pnode_name))
8835

    
8836
      elif self.op.mode == constants.INSTANCE_IMPORT:
8837
        feedback_fn("* running the instance OS import scripts...")
8838

    
8839
        transfers = []
8840

    
8841
        for idx, image in enumerate(self.src_images):
8842
          if not image:
8843
            continue
8844

    
8845
          # FIXME: pass debug option from opcode to backend
8846
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8847
                                             constants.IEIO_FILE, (image, ),
8848
                                             constants.IEIO_SCRIPT,
8849
                                             (iobj.disks[idx], idx),
8850
                                             None)
8851
          transfers.append(dt)
8852

    
8853
        import_result = \
8854
          masterd.instance.TransferInstanceData(self, feedback_fn,
8855
                                                self.op.src_node, pnode_name,
8856
                                                self.pnode.secondary_ip,
8857
                                                iobj, transfers)
8858
        if not compat.all(import_result):
8859
          self.LogWarning("Some disks for instance %s on node %s were not"
8860
                          " imported successfully" % (instance, pnode_name))
8861

    
8862
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8863
        feedback_fn("* preparing remote import...")
8864
        # The source cluster will stop the instance before attempting to make a
8865
        # connection. In some cases stopping an instance can take a long time,
8866
        # hence the shutdown timeout is added to the connection timeout.
8867
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8868
                           self.op.source_shutdown_timeout)
8869
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8870

    
8871
        assert iobj.primary_node == self.pnode.name
8872
        disk_results = \
8873
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8874
                                        self.source_x509_ca,
8875
                                        self._cds, timeouts)
8876
        if not compat.all(disk_results):
8877
          # TODO: Should the instance still be started, even if some disks
8878
          # failed to import (valid for local imports, too)?
8879
          self.LogWarning("Some disks for instance %s on node %s were not"
8880
                          " imported successfully" % (instance, pnode_name))
8881

    
8882
        # Run rename script on newly imported instance
8883
        assert iobj.name == instance
8884
        feedback_fn("Running rename script for %s" % instance)
8885
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8886
                                                   self.source_instance_name,
8887
                                                   self.op.debug_level)
8888
        if result.fail_msg:
8889
          self.LogWarning("Failed to run rename script for %s on node"
8890
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8891

    
8892
      else:
8893
        # also checked in the prereq part
8894
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8895
                                     % self.op.mode)
8896

    
8897
    if self.op.start:
8898
      iobj.admin_up = True
8899
      self.cfg.Update(iobj, feedback_fn)
8900
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8901
      feedback_fn("* starting instance...")
8902
      result = self.rpc.call_instance_start(pnode_name, iobj,
8903
                                            None, None, False)
8904
      result.Raise("Could not start instance")
8905

    
8906
    return list(iobj.all_nodes)
8907

    
8908

    
8909
class LUInstanceConsole(NoHooksLU):
8910
  """Connect to an instance's console.
8911

8912
  This is somewhat special in that it returns the command line that
8913
  you need to run on the master node in order to connect to the
8914
  console.
8915

8916
  """
8917
  REQ_BGL = False
8918

    
8919
  def ExpandNames(self):
8920
    self._ExpandAndLockInstance()
8921

    
8922
  def CheckPrereq(self):
8923
    """Check prerequisites.
8924

8925
    This checks that the instance is in the cluster.
8926

8927
    """
8928
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8929
    assert self.instance is not None, \
8930
      "Cannot retrieve locked instance %s" % self.op.instance_name
8931
    _CheckNodeOnline(self, self.instance.primary_node)
8932

    
8933
  def Exec(self, feedback_fn):
8934
    """Connect to the console of an instance
8935

8936
    """
8937
    instance = self.instance
8938
    node = instance.primary_node
8939

    
8940
    node_insts = self.rpc.call_instance_list([node],
8941
                                             [instance.hypervisor])[node]
8942
    node_insts.Raise("Can't get node information from %s" % node)
8943

    
8944
    if instance.name not in node_insts.payload:
8945
      if instance.admin_up:
8946
        state = constants.INSTST_ERRORDOWN
8947
      else:
8948
        state = constants.INSTST_ADMINDOWN
8949
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8950
                               (instance.name, state))
8951

    
8952
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8953

    
8954
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8955

    
8956

    
8957
def _GetInstanceConsole(cluster, instance):
8958
  """Returns console information for an instance.
8959

8960
  @type cluster: L{objects.Cluster}
8961
  @type instance: L{objects.Instance}
8962
  @rtype: dict
8963

8964
  """
8965
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8966
  # beparams and hvparams are passed separately, to avoid editing the
8967
  # instance and then saving the defaults in the instance itself.
8968
  hvparams = cluster.FillHV(instance)
8969
  beparams = cluster.FillBE(instance)
8970
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8971

    
8972
  assert console.instance == instance.name
8973
  assert console.Validate()
8974

    
8975
  return console.ToDict()
8976

    
8977

    
8978
class LUInstanceReplaceDisks(LogicalUnit):
8979
  """Replace the disks of an instance.
8980

8981
  """
8982
  HPATH = "mirrors-replace"
8983
  HTYPE = constants.HTYPE_INSTANCE
8984
  REQ_BGL = False
8985

    
8986
  def CheckArguments(self):
8987
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8988
                                  self.op.iallocator)
8989

    
8990
  def ExpandNames(self):
8991
    self._ExpandAndLockInstance()
8992

    
8993
    assert locking.LEVEL_NODE not in self.needed_locks
8994
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8995

    
8996
    assert self.op.iallocator is None or self.op.remote_node is None, \
8997
      "Conflicting options"
8998

    
8999
    if self.op.remote_node is not None:
9000
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9001

    
9002
      # Warning: do not remove the locking of the new secondary here
9003
      # unless DRBD8.AddChildren is changed to work in parallel;
9004
      # currently it doesn't since parallel invocations of
9005
      # FindUnusedMinor will conflict
9006
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9007
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9008
    else:
9009
      self.needed_locks[locking.LEVEL_NODE] = []
9010
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9011

    
9012
      if self.op.iallocator is not None:
9013
        # iallocator will select a new node in the same group
9014
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9015

    
9016
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9017
                                   self.op.iallocator, self.op.remote_node,
9018
                                   self.op.disks, False, self.op.early_release)
9019

    
9020
    self.tasklets = [self.replacer]
9021

    
9022
  def DeclareLocks(self, level):
9023
    if level == locking.LEVEL_NODEGROUP:
9024
      assert self.op.remote_node is None
9025
      assert self.op.iallocator is not None
9026
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9027

    
9028
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9029
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9030
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9031

    
9032
    elif level == locking.LEVEL_NODE:
9033
      if self.op.iallocator is not None:
9034
        assert self.op.remote_node is None
9035
        assert not self.needed_locks[locking.LEVEL_NODE]
9036

    
9037
        # Lock member nodes of all locked groups
9038
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9039
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9040
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9041
      else:
9042
        self._LockInstancesNodes()
9043

    
9044
  def BuildHooksEnv(self):
9045
    """Build hooks env.
9046

9047
    This runs on the master, the primary and all the secondaries.
9048

9049
    """
9050
    instance = self.replacer.instance
9051
    env = {
9052
      "MODE": self.op.mode,
9053
      "NEW_SECONDARY": self.op.remote_node,
9054
      "OLD_SECONDARY": instance.secondary_nodes[0],
9055
      }
9056
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9057
    return env
9058

    
9059
  def BuildHooksNodes(self):
9060
    """Build hooks nodes.
9061

9062
    """
9063
    instance = self.replacer.instance
9064
    nl = [
9065
      self.cfg.GetMasterNode(),
9066
      instance.primary_node,
9067
      ]
9068
    if self.op.remote_node is not None:
9069
      nl.append(self.op.remote_node)
9070
    return nl, nl
9071

    
9072
  def CheckPrereq(self):
9073
    """Check prerequisites.
9074

9075
    """
9076
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9077
            self.op.iallocator is None)
9078

    
9079
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9080
    if owned_groups:
9081
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9082
      if owned_groups != groups:
9083
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9084
                                 " since lock was acquired, current list is %r,"
9085
                                 " used to be '%s'" %
9086
                                 (self.op.instance_name,
9087
                                  utils.CommaJoin(groups),
9088
                                  utils.CommaJoin(owned_groups)))
9089

    
9090
    return LogicalUnit.CheckPrereq(self)
9091

    
9092

    
9093
class TLReplaceDisks(Tasklet):
9094
  """Replaces disks for an instance.
9095

9096
  Note: Locking is not within the scope of this class.
9097

9098
  """
9099
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9100
               disks, delay_iallocator, early_release):
9101
    """Initializes this class.
9102

9103
    """
9104
    Tasklet.__init__(self, lu)
9105

    
9106
    # Parameters
9107
    self.instance_name = instance_name
9108
    self.mode = mode
9109
    self.iallocator_name = iallocator_name
9110
    self.remote_node = remote_node
9111
    self.disks = disks
9112
    self.delay_iallocator = delay_iallocator
9113
    self.early_release = early_release
9114

    
9115
    # Runtime data
9116
    self.instance = None
9117
    self.new_node = None
9118
    self.target_node = None
9119
    self.other_node = None
9120
    self.remote_node_info = None
9121
    self.node_secondary_ip = None
9122

    
9123
  @staticmethod
9124
  def CheckArguments(mode, remote_node, iallocator):
9125
    """Helper function for users of this class.
9126

9127
    """
9128
    # check for valid parameter combination
9129
    if mode == constants.REPLACE_DISK_CHG:
9130
      if remote_node is None and iallocator is None:
9131
        raise errors.OpPrereqError("When changing the secondary either an"
9132
                                   " iallocator script must be used or the"
9133
                                   " new node given", errors.ECODE_INVAL)
9134

    
9135
      if remote_node is not None and iallocator is not None:
9136
        raise errors.OpPrereqError("Give either the iallocator or the new"
9137
                                   " secondary, not both", errors.ECODE_INVAL)
9138

    
9139
    elif remote_node is not None or iallocator is not None:
9140
      # Not replacing the secondary
9141
      raise errors.OpPrereqError("The iallocator and new node options can"
9142
                                 " only be used when changing the"
9143
                                 " secondary node", errors.ECODE_INVAL)
9144

    
9145
  @staticmethod
9146
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9147
    """Compute a new secondary node using an IAllocator.
9148

9149
    """
9150
    ial = IAllocator(lu.cfg, lu.rpc,
9151
                     mode=constants.IALLOCATOR_MODE_RELOC,
9152
                     name=instance_name,
9153
                     relocate_from=relocate_from)
9154

    
9155
    ial.Run(iallocator_name)
9156

    
9157
    if not ial.success:
9158
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9159
                                 " %s" % (iallocator_name, ial.info),
9160
                                 errors.ECODE_NORES)
9161

    
9162
    if len(ial.result) != ial.required_nodes:
9163
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9164
                                 " of nodes (%s), required %s" %
9165
                                 (iallocator_name,
9166
                                  len(ial.result), ial.required_nodes),
9167
                                 errors.ECODE_FAULT)
9168

    
9169
    remote_node_name = ial.result[0]
9170

    
9171
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9172
               instance_name, remote_node_name)
9173

    
9174
    return remote_node_name
9175

    
9176
  def _FindFaultyDisks(self, node_name):
9177
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9178
                                    node_name, True)
9179

    
9180
  def _CheckDisksActivated(self, instance):
9181
    """Checks if the instance disks are activated.
9182

9183
    @param instance: The instance to check disks
9184
    @return: True if they are activated, False otherwise
9185

9186
    """
9187
    nodes = instance.all_nodes
9188

    
9189
    for idx, dev in enumerate(instance.disks):
9190
      for node in nodes:
9191
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9192
        self.cfg.SetDiskID(dev, node)
9193

    
9194
        result = self.rpc.call_blockdev_find(node, dev)
9195

    
9196
        if result.offline:
9197
          continue
9198
        elif result.fail_msg or not result.payload:
9199
          return False
9200

    
9201
    return True
9202

    
9203
  def CheckPrereq(self):
9204
    """Check prerequisites.
9205

9206
    This checks that the instance is in the cluster.
9207

9208
    """
9209
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9210
    assert instance is not None, \
9211
      "Cannot retrieve locked instance %s" % self.instance_name
9212

    
9213
    if instance.disk_template != constants.DT_DRBD8:
9214
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9215
                                 " instances", errors.ECODE_INVAL)
9216

    
9217
    if len(instance.secondary_nodes) != 1:
9218
      raise errors.OpPrereqError("The instance has a strange layout,"
9219
                                 " expected one secondary but found %d" %
9220
                                 len(instance.secondary_nodes),
9221
                                 errors.ECODE_FAULT)
9222

    
9223
    if not self.delay_iallocator:
9224
      self._CheckPrereq2()
9225

    
9226
  def _CheckPrereq2(self):
9227
    """Check prerequisites, second part.
9228

9229
    This function should always be part of CheckPrereq. It was separated and is
9230
    now called from Exec because during node evacuation iallocator was only
9231
    called with an unmodified cluster model, not taking planned changes into
9232
    account.
9233

9234
    """
9235
    instance = self.instance
9236
    secondary_node = instance.secondary_nodes[0]
9237

    
9238
    if self.iallocator_name is None:
9239
      remote_node = self.remote_node
9240
    else:
9241
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9242
                                       instance.name, instance.secondary_nodes)
9243

    
9244
    if remote_node is None:
9245
      self.remote_node_info = None
9246
    else:
9247
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9248
             "Remote node '%s' is not locked" % remote_node
9249

    
9250
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9251
      assert self.remote_node_info is not None, \
9252
        "Cannot retrieve locked node %s" % remote_node
9253

    
9254
    if remote_node == self.instance.primary_node:
9255
      raise errors.OpPrereqError("The specified node is the primary node of"
9256
                                 " the instance", errors.ECODE_INVAL)
9257

    
9258
    if remote_node == secondary_node:
9259
      raise errors.OpPrereqError("The specified node is already the"
9260
                                 " secondary node of the instance",
9261
                                 errors.ECODE_INVAL)
9262

    
9263
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9264
                                    constants.REPLACE_DISK_CHG):
9265
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9266
                                 errors.ECODE_INVAL)
9267

    
9268
    if self.mode == constants.REPLACE_DISK_AUTO:
9269
      if not self._CheckDisksActivated(instance):
9270
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9271
                                   " first" % self.instance_name,
9272
                                   errors.ECODE_STATE)
9273
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9274
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9275

    
9276
      if faulty_primary and faulty_secondary:
9277
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9278
                                   " one node and can not be repaired"
9279
                                   " automatically" % self.instance_name,
9280
                                   errors.ECODE_STATE)
9281

    
9282
      if faulty_primary:
9283
        self.disks = faulty_primary
9284
        self.target_node = instance.primary_node
9285
        self.other_node = secondary_node
9286
        check_nodes = [self.target_node, self.other_node]
9287
      elif faulty_secondary:
9288
        self.disks = faulty_secondary
9289
        self.target_node = secondary_node
9290
        self.other_node = instance.primary_node
9291
        check_nodes = [self.target_node, self.other_node]
9292
      else:
9293
        self.disks = []
9294
        check_nodes = []
9295

    
9296
    else:
9297
      # Non-automatic modes
9298
      if self.mode == constants.REPLACE_DISK_PRI:
9299
        self.target_node = instance.primary_node
9300
        self.other_node = secondary_node
9301
        check_nodes = [self.target_node, self.other_node]
9302

    
9303
      elif self.mode == constants.REPLACE_DISK_SEC:
9304
        self.target_node = secondary_node
9305
        self.other_node = instance.primary_node
9306
        check_nodes = [self.target_node, self.other_node]
9307

    
9308
      elif self.mode == constants.REPLACE_DISK_CHG:
9309
        self.new_node = remote_node
9310
        self.other_node = instance.primary_node
9311
        self.target_node = secondary_node
9312
        check_nodes = [self.new_node, self.other_node]
9313

    
9314
        _CheckNodeNotDrained(self.lu, remote_node)
9315
        _CheckNodeVmCapable(self.lu, remote_node)
9316

    
9317
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9318
        assert old_node_info is not None
9319
        if old_node_info.offline and not self.early_release:
9320
          # doesn't make sense to delay the release
9321
          self.early_release = True
9322
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9323
                          " early-release mode", secondary_node)
9324

    
9325
      else:
9326
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9327
                                     self.mode)
9328

    
9329
      # If not specified all disks should be replaced
9330
      if not self.disks:
9331
        self.disks = range(len(self.instance.disks))
9332

    
9333
    for node in check_nodes:
9334
      _CheckNodeOnline(self.lu, node)
9335

    
9336
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9337
                                                          self.other_node,
9338
                                                          self.target_node]
9339
                              if node_name is not None)
9340

    
9341
    # Release unneeded node locks
9342
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9343

    
9344
    # Release any owned node group
9345
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9346
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9347

    
9348
    # Check whether disks are valid
9349
    for disk_idx in self.disks:
9350
      instance.FindDisk(disk_idx)
9351

    
9352
    # Get secondary node IP addresses
9353
    self.node_secondary_ip = \
9354
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9355
           for node_name in touched_nodes)
9356

    
9357
  def Exec(self, feedback_fn):
9358
    """Execute disk replacement.
9359

9360
    This dispatches the disk replacement to the appropriate handler.
9361

9362
    """
9363
    if self.delay_iallocator:
9364
      self._CheckPrereq2()
9365

    
9366
    if __debug__:
9367
      # Verify owned locks before starting operation
9368
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9369
      assert set(owned_locks) == set(self.node_secondary_ip), \
9370
          ("Incorrect node locks, owning %s, expected %s" %
9371
           (owned_locks, self.node_secondary_ip.keys()))
9372

    
9373
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9374
      assert list(owned_locks) == [self.instance_name], \
9375
          "Instance '%s' not locked" % self.instance_name
9376

    
9377
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9378
          "Should not own any node group lock at this point"
9379

    
9380
    if not self.disks:
9381
      feedback_fn("No disks need replacement")
9382
      return
9383

    
9384
    feedback_fn("Replacing disk(s) %s for %s" %
9385
                (utils.CommaJoin(self.disks), self.instance.name))
9386

    
9387
    activate_disks = (not self.instance.admin_up)
9388

    
9389
    # Activate the instance disks if we're replacing them on a down instance
9390
    if activate_disks:
9391
      _StartInstanceDisks(self.lu, self.instance, True)
9392

    
9393
    try:
9394
      # Should we replace the secondary node?
9395
      if self.new_node is not None:
9396
        fn = self._ExecDrbd8Secondary
9397
      else:
9398
        fn = self._ExecDrbd8DiskOnly
9399

    
9400
      result = fn(feedback_fn)
9401
    finally:
9402
      # Deactivate the instance disks if we're replacing them on a
9403
      # down instance
9404
      if activate_disks:
9405
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9406

    
9407
    if __debug__:
9408
      # Verify owned locks
9409
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9410
      nodes = frozenset(self.node_secondary_ip)
9411
      assert ((self.early_release and not owned_locks) or
9412
              (not self.early_release and not (set(owned_locks) - nodes))), \
9413
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9414
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9415

    
9416
    return result
9417

    
9418
  def _CheckVolumeGroup(self, nodes):
9419
    self.lu.LogInfo("Checking volume groups")
9420

    
9421
    vgname = self.cfg.GetVGName()
9422

    
9423
    # Make sure volume group exists on all involved nodes
9424
    results = self.rpc.call_vg_list(nodes)
9425
    if not results:
9426
      raise errors.OpExecError("Can't list volume groups on the nodes")
9427

    
9428
    for node in nodes:
9429
      res = results[node]
9430
      res.Raise("Error checking node %s" % node)
9431
      if vgname not in res.payload:
9432
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9433
                                 (vgname, node))
9434

    
9435
  def _CheckDisksExistence(self, nodes):
9436
    # Check disk existence
9437
    for idx, dev in enumerate(self.instance.disks):
9438
      if idx not in self.disks:
9439
        continue
9440

    
9441
      for node in nodes:
9442
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9443
        self.cfg.SetDiskID(dev, node)
9444

    
9445
        result = self.rpc.call_blockdev_find(node, dev)
9446

    
9447
        msg = result.fail_msg
9448
        if msg or not result.payload:
9449
          if not msg:
9450
            msg = "disk not found"
9451
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9452
                                   (idx, node, msg))
9453

    
9454
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9455
    for idx, dev in enumerate(self.instance.disks):
9456
      if idx not in self.disks:
9457
        continue
9458

    
9459
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9460
                      (idx, node_name))
9461

    
9462
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9463
                                   ldisk=ldisk):
9464
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9465
                                 " replace disks for instance %s" %
9466
                                 (node_name, self.instance.name))
9467

    
9468
  def _CreateNewStorage(self, node_name):
9469
    """Create new storage on the primary or secondary node.
9470

9471
    This is only used for same-node replaces, not for changing the
9472
    secondary node, hence we don't want to modify the existing disk.
9473

9474
    """
9475
    iv_names = {}
9476

    
9477
    for idx, dev in enumerate(self.instance.disks):
9478
      if idx not in self.disks:
9479
        continue
9480

    
9481
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9482

    
9483
      self.cfg.SetDiskID(dev, node_name)
9484

    
9485
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9486
      names = _GenerateUniqueNames(self.lu, lv_names)
9487

    
9488
      vg_data = dev.children[0].logical_id[0]
9489
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9490
                             logical_id=(vg_data, names[0]))
9491
      vg_meta = dev.children[1].logical_id[0]
9492
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9493
                             logical_id=(vg_meta, names[1]))
9494

    
9495
      new_lvs = [lv_data, lv_meta]
9496
      old_lvs = [child.Copy() for child in dev.children]
9497
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9498

    
9499
      # we pass force_create=True to force the LVM creation
9500
      for new_lv in new_lvs:
9501
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9502
                        _GetInstanceInfoText(self.instance), False)
9503

    
9504
    return iv_names
9505

    
9506
  def _CheckDevices(self, node_name, iv_names):
9507
    for name, (dev, _, _) in iv_names.iteritems():
9508
      self.cfg.SetDiskID(dev, node_name)
9509

    
9510
      result = self.rpc.call_blockdev_find(node_name, dev)
9511

    
9512
      msg = result.fail_msg
9513
      if msg or not result.payload:
9514
        if not msg:
9515
          msg = "disk not found"
9516
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9517
                                 (name, msg))
9518

    
9519
      if result.payload.is_degraded:
9520
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9521

    
9522
  def _RemoveOldStorage(self, node_name, iv_names):
9523
    for name, (_, old_lvs, _) in iv_names.iteritems():
9524
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9525

    
9526
      for lv in old_lvs:
9527
        self.cfg.SetDiskID(lv, node_name)
9528

    
9529
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9530
        if msg:
9531
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9532
                             hint="remove unused LVs manually")
9533

    
9534
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9535
    """Replace a disk on the primary or secondary for DRBD 8.
9536

9537
    The algorithm for replace is quite complicated:
9538

9539
      1. for each disk to be replaced:
9540

9541
        1. create new LVs on the target node with unique names
9542
        1. detach old LVs from the drbd device
9543
        1. rename old LVs to name_replaced.<time_t>
9544
        1. rename new LVs to old LVs
9545
        1. attach the new LVs (with the old names now) to the drbd device
9546

9547
      1. wait for sync across all devices
9548

9549
      1. for each modified disk:
9550

9551
        1. remove old LVs (which have the name name_replaces.<time_t>)
9552

9553
    Failures are not very well handled.
9554

9555
    """
9556
    steps_total = 6
9557

    
9558
    # Step: check device activation
9559
    self.lu.LogStep(1, steps_total, "Check device existence")
9560
    self._CheckDisksExistence([self.other_node, self.target_node])
9561
    self._CheckVolumeGroup([self.target_node, self.other_node])
9562

    
9563
    # Step: check other node consistency
9564
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9565
    self._CheckDisksConsistency(self.other_node,
9566
                                self.other_node == self.instance.primary_node,
9567
                                False)
9568

    
9569
    # Step: create new storage
9570
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9571
    iv_names = self._CreateNewStorage(self.target_node)
9572

    
9573
    # Step: for each lv, detach+rename*2+attach
9574
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9575
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9576
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9577

    
9578
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9579
                                                     old_lvs)
9580
      result.Raise("Can't detach drbd from local storage on node"
9581
                   " %s for device %s" % (self.target_node, dev.iv_name))
9582
      #dev.children = []
9583
      #cfg.Update(instance)
9584

    
9585
      # ok, we created the new LVs, so now we know we have the needed
9586
      # storage; as such, we proceed on the target node to rename
9587
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9588
      # using the assumption that logical_id == physical_id (which in
9589
      # turn is the unique_id on that node)
9590

    
9591
      # FIXME(iustin): use a better name for the replaced LVs
9592
      temp_suffix = int(time.time())
9593
      ren_fn = lambda d, suff: (d.physical_id[0],
9594
                                d.physical_id[1] + "_replaced-%s" % suff)
9595

    
9596
      # Build the rename list based on what LVs exist on the node
9597
      rename_old_to_new = []
9598
      for to_ren in old_lvs:
9599
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9600
        if not result.fail_msg and result.payload:
9601
          # device exists
9602
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9603

    
9604
      self.lu.LogInfo("Renaming the old LVs on the target node")
9605
      result = self.rpc.call_blockdev_rename(self.target_node,
9606
                                             rename_old_to_new)
9607
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9608

    
9609
      # Now we rename the new LVs to the old LVs
9610
      self.lu.LogInfo("Renaming the new LVs on the target node")
9611
      rename_new_to_old = [(new, old.physical_id)
9612
                           for old, new in zip(old_lvs, new_lvs)]
9613
      result = self.rpc.call_blockdev_rename(self.target_node,
9614
                                             rename_new_to_old)
9615
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9616

    
9617
      # Intermediate steps of in memory modifications
9618
      for old, new in zip(old_lvs, new_lvs):
9619
        new.logical_id = old.logical_id
9620
        self.cfg.SetDiskID(new, self.target_node)
9621

    
9622
      # We need to modify old_lvs so that removal later removes the
9623
      # right LVs, not the newly added ones; note that old_lvs is a
9624
      # copy here
9625
      for disk in old_lvs:
9626
        disk.logical_id = ren_fn(disk, temp_suffix)
9627
        self.cfg.SetDiskID(disk, self.target_node)
9628

    
9629
      # Now that the new lvs have the old name, we can add them to the device
9630
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9631
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9632
                                                  new_lvs)
9633
      msg = result.fail_msg
9634
      if msg:
9635
        for new_lv in new_lvs:
9636
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9637
                                               new_lv).fail_msg
9638
          if msg2:
9639
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9640
                               hint=("cleanup manually the unused logical"
9641
                                     "volumes"))
9642
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9643

    
9644
    cstep = 5
9645
    if self.early_release:
9646
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9647
      cstep += 1
9648
      self._RemoveOldStorage(self.target_node, iv_names)
9649
      # WARNING: we release both node locks here, do not do other RPCs
9650
      # than WaitForSync to the primary node
9651
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9652
                    names=[self.target_node, self.other_node])
9653

    
9654
    # Wait for sync
9655
    # This can fail as the old devices are degraded and _WaitForSync
9656
    # does a combined result over all disks, so we don't check its return value
9657
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9658
    cstep += 1
9659
    _WaitForSync(self.lu, self.instance)
9660

    
9661
    # Check all devices manually
9662
    self._CheckDevices(self.instance.primary_node, iv_names)
9663

    
9664
    # Step: remove old storage
9665
    if not self.early_release:
9666
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9667
      cstep += 1
9668
      self._RemoveOldStorage(self.target_node, iv_names)
9669

    
9670
  def _ExecDrbd8Secondary(self, feedback_fn):
9671
    """Replace the secondary node for DRBD 8.
9672

9673
    The algorithm for replace is quite complicated:
9674
      - for all disks of the instance:
9675
        - create new LVs on the new node with same names
9676
        - shutdown the drbd device on the old secondary
9677
        - disconnect the drbd network on the primary
9678
        - create the drbd device on the new secondary
9679
        - network attach the drbd on the primary, using an artifice:
9680
          the drbd code for Attach() will connect to the network if it
9681
          finds a device which is connected to the good local disks but
9682
          not network enabled
9683
      - wait for sync across all devices
9684
      - remove all disks from the old secondary
9685

9686
    Failures are not very well handled.
9687

9688
    """
9689
    steps_total = 6
9690

    
9691
    # Step: check device activation
9692
    self.lu.LogStep(1, steps_total, "Check device existence")
9693
    self._CheckDisksExistence([self.instance.primary_node])
9694
    self._CheckVolumeGroup([self.instance.primary_node])
9695

    
9696
    # Step: check other node consistency
9697
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9698
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9699

    
9700
    # Step: create new storage
9701
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9702
    for idx, dev in enumerate(self.instance.disks):
9703
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9704
                      (self.new_node, idx))
9705
      # we pass force_create=True to force LVM creation
9706
      for new_lv in dev.children:
9707
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9708
                        _GetInstanceInfoText(self.instance), False)
9709

    
9710
    # Step 4: dbrd minors and drbd setups changes
9711
    # after this, we must manually remove the drbd minors on both the
9712
    # error and the success paths
9713
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9714
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9715
                                         for dev in self.instance.disks],
9716
                                        self.instance.name)
9717
    logging.debug("Allocated minors %r", minors)
9718

    
9719
    iv_names = {}
9720
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9721
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9722
                      (self.new_node, idx))
9723
      # create new devices on new_node; note that we create two IDs:
9724
      # one without port, so the drbd will be activated without
9725
      # networking information on the new node at this stage, and one
9726
      # with network, for the latter activation in step 4
9727
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9728
      if self.instance.primary_node == o_node1:
9729
        p_minor = o_minor1
9730
      else:
9731
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9732
        p_minor = o_minor2
9733

    
9734
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9735
                      p_minor, new_minor, o_secret)
9736
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9737
                    p_minor, new_minor, o_secret)
9738

    
9739
      iv_names[idx] = (dev, dev.children, new_net_id)
9740
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9741
                    new_net_id)
9742
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9743
                              logical_id=new_alone_id,
9744
                              children=dev.children,
9745
                              size=dev.size)
9746
      try:
9747
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9748
                              _GetInstanceInfoText(self.instance), False)
9749
      except errors.GenericError:
9750
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9751
        raise
9752

    
9753
    # We have new devices, shutdown the drbd on the old secondary
9754
    for idx, dev in enumerate(self.instance.disks):
9755
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9756
      self.cfg.SetDiskID(dev, self.target_node)
9757
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9758
      if msg:
9759
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9760
                           "node: %s" % (idx, msg),
9761
                           hint=("Please cleanup this device manually as"
9762
                                 " soon as possible"))
9763

    
9764
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9765
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9766
                                               self.node_secondary_ip,
9767
                                               self.instance.disks)\
9768
                                              [self.instance.primary_node]
9769

    
9770
    msg = result.fail_msg
9771
    if msg:
9772
      # detaches didn't succeed (unlikely)
9773
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9774
      raise errors.OpExecError("Can't detach the disks from the network on"
9775
                               " old node: %s" % (msg,))
9776

    
9777
    # if we managed to detach at least one, we update all the disks of
9778
    # the instance to point to the new secondary
9779
    self.lu.LogInfo("Updating instance configuration")
9780
    for dev, _, new_logical_id in iv_names.itervalues():
9781
      dev.logical_id = new_logical_id
9782
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9783

    
9784
    self.cfg.Update(self.instance, feedback_fn)
9785

    
9786
    # and now perform the drbd attach
9787
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9788
                    " (standalone => connected)")
9789
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9790
                                            self.new_node],
9791
                                           self.node_secondary_ip,
9792
                                           self.instance.disks,
9793
                                           self.instance.name,
9794
                                           False)
9795
    for to_node, to_result in result.items():
9796
      msg = to_result.fail_msg
9797
      if msg:
9798
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9799
                           to_node, msg,
9800
                           hint=("please do a gnt-instance info to see the"
9801
                                 " status of disks"))
9802
    cstep = 5
9803
    if self.early_release:
9804
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9805
      cstep += 1
9806
      self._RemoveOldStorage(self.target_node, iv_names)
9807
      # WARNING: we release all node locks here, do not do other RPCs
9808
      # than WaitForSync to the primary node
9809
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9810
                    names=[self.instance.primary_node,
9811
                           self.target_node,
9812
                           self.new_node])
9813

    
9814
    # Wait for sync
9815
    # This can fail as the old devices are degraded and _WaitForSync
9816
    # does a combined result over all disks, so we don't check its return value
9817
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9818
    cstep += 1
9819
    _WaitForSync(self.lu, self.instance)
9820

    
9821
    # Check all devices manually
9822
    self._CheckDevices(self.instance.primary_node, iv_names)
9823

    
9824
    # Step: remove old storage
9825
    if not self.early_release:
9826
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9827
      self._RemoveOldStorage(self.target_node, iv_names)
9828

    
9829

    
9830
class LURepairNodeStorage(NoHooksLU):
9831
  """Repairs the volume group on a node.
9832

9833
  """
9834
  REQ_BGL = False
9835

    
9836
  def CheckArguments(self):
9837
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9838

    
9839
    storage_type = self.op.storage_type
9840

    
9841
    if (constants.SO_FIX_CONSISTENCY not in
9842
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9843
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9844
                                 " repaired" % storage_type,
9845
                                 errors.ECODE_INVAL)
9846

    
9847
  def ExpandNames(self):
9848
    self.needed_locks = {
9849
      locking.LEVEL_NODE: [self.op.node_name],
9850
      }
9851

    
9852
  def _CheckFaultyDisks(self, instance, node_name):
9853
    """Ensure faulty disks abort the opcode or at least warn."""
9854
    try:
9855
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9856
                                  node_name, True):
9857
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9858
                                   " node '%s'" % (instance.name, node_name),
9859
                                   errors.ECODE_STATE)
9860
    except errors.OpPrereqError, err:
9861
      if self.op.ignore_consistency:
9862
        self.proc.LogWarning(str(err.args[0]))
9863
      else:
9864
        raise
9865

    
9866
  def CheckPrereq(self):
9867
    """Check prerequisites.
9868

9869
    """
9870
    # Check whether any instance on this node has faulty disks
9871
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9872
      if not inst.admin_up:
9873
        continue
9874
      check_nodes = set(inst.all_nodes)
9875
      check_nodes.discard(self.op.node_name)
9876
      for inst_node_name in check_nodes:
9877
        self._CheckFaultyDisks(inst, inst_node_name)
9878

    
9879
  def Exec(self, feedback_fn):
9880
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9881
                (self.op.name, self.op.node_name))
9882

    
9883
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9884
    result = self.rpc.call_storage_execute(self.op.node_name,
9885
                                           self.op.storage_type, st_args,
9886
                                           self.op.name,
9887
                                           constants.SO_FIX_CONSISTENCY)
9888
    result.Raise("Failed to repair storage unit '%s' on %s" %
9889
                 (self.op.name, self.op.node_name))
9890

    
9891

    
9892
class LUNodeEvacuate(NoHooksLU):
9893
  """Evacuates instances off a list of nodes.
9894

9895
  """
9896
  REQ_BGL = False
9897

    
9898
  def CheckArguments(self):
9899
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9900

    
9901
  def ExpandNames(self):
9902
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9903

    
9904
    if self.op.remote_node is not None:
9905
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9906
      assert self.op.remote_node
9907

    
9908
      if self.op.remote_node == self.op.node_name:
9909
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9910
                                   " secondary node", errors.ECODE_INVAL)
9911

    
9912
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9913
        raise errors.OpPrereqError("Without the use of an iallocator only"
9914
                                   " secondary instances can be evacuated",
9915
                                   errors.ECODE_INVAL)
9916

    
9917
    # Declare locks
9918
    self.share_locks = _ShareAll()
9919
    self.needed_locks = {
9920
      locking.LEVEL_INSTANCE: [],
9921
      locking.LEVEL_NODEGROUP: [],
9922
      locking.LEVEL_NODE: [],
9923
      }
9924

    
9925
    if self.op.remote_node is None:
9926
      # Iallocator will choose any node(s) in the same group
9927
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9928
    else:
9929
      group_nodes = frozenset([self.op.remote_node])
9930

    
9931
    # Determine nodes to be locked
9932
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9933

    
9934
  def _DetermineInstances(self):
9935
    """Builds list of instances to operate on.
9936

9937
    """
9938
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9939

    
9940
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9941
      # Primary instances only
9942
      inst_fn = _GetNodePrimaryInstances
9943
      assert self.op.remote_node is None, \
9944
        "Evacuating primary instances requires iallocator"
9945
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9946
      # Secondary instances only
9947
      inst_fn = _GetNodeSecondaryInstances
9948
    else:
9949
      # All instances
9950
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9951
      inst_fn = _GetNodeInstances
9952

    
9953
    return inst_fn(self.cfg, self.op.node_name)
9954

    
9955
  def DeclareLocks(self, level):
9956
    if level == locking.LEVEL_INSTANCE:
9957
      # Lock instances optimistically, needs verification once node and group
9958
      # locks have been acquired
9959
      self.needed_locks[locking.LEVEL_INSTANCE] = \
9960
        set(i.name for i in self._DetermineInstances())
9961

    
9962
    elif level == locking.LEVEL_NODEGROUP:
9963
      # Lock node groups optimistically, needs verification once nodes have
9964
      # been acquired
9965
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9966
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9967

    
9968
    elif level == locking.LEVEL_NODE:
9969
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9970

    
9971
  def CheckPrereq(self):
9972
    # Verify locks
9973
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9974
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9975
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9976

    
9977
    assert owned_nodes == self.lock_nodes
9978

    
9979
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9980
    if owned_groups != wanted_groups:
9981
      raise errors.OpExecError("Node groups changed since locks were acquired,"
9982
                               " current groups are '%s', used to be '%s'" %
9983
                               (utils.CommaJoin(wanted_groups),
9984
                                utils.CommaJoin(owned_groups)))
9985

    
9986
    # Determine affected instances
9987
    self.instances = self._DetermineInstances()
9988
    self.instance_names = [i.name for i in self.instances]
9989

    
9990
    if set(self.instance_names) != owned_instances:
9991
      raise errors.OpExecError("Instances on node '%s' changed since locks"
9992
                               " were acquired, current instances are '%s',"
9993
                               " used to be '%s'" %
9994
                               (self.op.node_name,
9995
                                utils.CommaJoin(self.instance_names),
9996
                                utils.CommaJoin(owned_instances)))
9997

    
9998
    if self.instance_names:
9999
      self.LogInfo("Evacuating instances from node '%s': %s",
10000
                   self.op.node_name,
10001
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10002
    else:
10003
      self.LogInfo("No instances to evacuate from node '%s'",
10004
                   self.op.node_name)
10005

    
10006
    if self.op.remote_node is not None:
10007
      for i in self.instances:
10008
        if i.primary_node == self.op.remote_node:
10009
          raise errors.OpPrereqError("Node %s is the primary node of"
10010
                                     " instance %s, cannot use it as"
10011
                                     " secondary" %
10012
                                     (self.op.remote_node, i.name),
10013
                                     errors.ECODE_INVAL)
10014

    
10015
  def Exec(self, feedback_fn):
10016
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10017

    
10018
    if not self.instance_names:
10019
      # No instances to evacuate
10020
      jobs = []
10021

    
10022
    elif self.op.iallocator is not None:
10023
      # TODO: Implement relocation to other group
10024
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10025
                       evac_mode=self.op.mode,
10026
                       instances=list(self.instance_names))
10027

    
10028
      ial.Run(self.op.iallocator)
10029

    
10030
      if not ial.success:
10031
        raise errors.OpPrereqError("Can't compute node evacuation using"
10032
                                   " iallocator '%s': %s" %
10033
                                   (self.op.iallocator, ial.info),
10034
                                   errors.ECODE_NORES)
10035

    
10036
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10037

    
10038
    elif self.op.remote_node is not None:
10039
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10040
      jobs = [
10041
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10042
                                        remote_node=self.op.remote_node,
10043
                                        disks=[],
10044
                                        mode=constants.REPLACE_DISK_CHG,
10045
                                        early_release=self.op.early_release)]
10046
        for instance_name in self.instance_names
10047
        ]
10048

    
10049
    else:
10050
      raise errors.ProgrammerError("No iallocator or remote node")
10051

    
10052
    return ResultWithJobs(jobs)
10053

    
10054

    
10055
def _SetOpEarlyRelease(early_release, op):
10056
  """Sets C{early_release} flag on opcodes if available.
10057

10058
  """
10059
  try:
10060
    op.early_release = early_release
10061
  except AttributeError:
10062
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10063

    
10064
  return op
10065

    
10066

    
10067
def _NodeEvacDest(use_nodes, group, nodes):
10068
  """Returns group or nodes depending on caller's choice.
10069

10070
  """
10071
  if use_nodes:
10072
    return utils.CommaJoin(nodes)
10073
  else:
10074
    return group
10075

    
10076

    
10077
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10078
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10079

10080
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10081
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10082

10083
  @type lu: L{LogicalUnit}
10084
  @param lu: Logical unit instance
10085
  @type alloc_result: tuple/list
10086
  @param alloc_result: Result from iallocator
10087
  @type early_release: bool
10088
  @param early_release: Whether to release locks early if possible
10089
  @type use_nodes: bool
10090
  @param use_nodes: Whether to display node names instead of groups
10091

10092
  """
10093
  (moved, failed, jobs) = alloc_result
10094

    
10095
  if failed:
10096
    lu.LogWarning("Unable to evacuate instances %s",
10097
                  utils.CommaJoin("%s (%s)" % (name, reason)
10098
                                  for (name, reason) in failed))
10099

    
10100
  if moved:
10101
    lu.LogInfo("Instances to be moved: %s",
10102
               utils.CommaJoin("%s (to %s)" %
10103
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10104
                               for (name, group, nodes) in moved))
10105

    
10106
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10107
              map(opcodes.OpCode.LoadOpCode, ops))
10108
          for ops in jobs]
10109

    
10110

    
10111
class LUInstanceGrowDisk(LogicalUnit):
10112
  """Grow a disk of an instance.
10113

10114
  """
10115
  HPATH = "disk-grow"
10116
  HTYPE = constants.HTYPE_INSTANCE
10117
  REQ_BGL = False
10118

    
10119
  def ExpandNames(self):
10120
    self._ExpandAndLockInstance()
10121
    self.needed_locks[locking.LEVEL_NODE] = []
10122
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10123

    
10124
  def DeclareLocks(self, level):
10125
    if level == locking.LEVEL_NODE:
10126
      self._LockInstancesNodes()
10127

    
10128
  def BuildHooksEnv(self):
10129
    """Build hooks env.
10130

10131
    This runs on the master, the primary and all the secondaries.
10132

10133
    """
10134
    env = {
10135
      "DISK": self.op.disk,
10136
      "AMOUNT": self.op.amount,
10137
      }
10138
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10139
    return env
10140

    
10141
  def BuildHooksNodes(self):
10142
    """Build hooks nodes.
10143

10144
    """
10145
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10146
    return (nl, nl)
10147

    
10148
  def CheckPrereq(self):
10149
    """Check prerequisites.
10150

10151
    This checks that the instance is in the cluster.
10152

10153
    """
10154
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10155
    assert instance is not None, \
10156
      "Cannot retrieve locked instance %s" % self.op.instance_name
10157
    nodenames = list(instance.all_nodes)
10158
    for node in nodenames:
10159
      _CheckNodeOnline(self, node)
10160

    
10161
    self.instance = instance
10162

    
10163
    if instance.disk_template not in constants.DTS_GROWABLE:
10164
      raise errors.OpPrereqError("Instance's disk layout does not support"
10165
                                 " growing", errors.ECODE_INVAL)
10166

    
10167
    self.disk = instance.FindDisk(self.op.disk)
10168

    
10169
    if instance.disk_template not in (constants.DT_FILE,
10170
                                      constants.DT_SHARED_FILE):
10171
      # TODO: check the free disk space for file, when that feature will be
10172
      # supported
10173
      _CheckNodesFreeDiskPerVG(self, nodenames,
10174
                               self.disk.ComputeGrowth(self.op.amount))
10175

    
10176
  def Exec(self, feedback_fn):
10177
    """Execute disk grow.
10178

10179
    """
10180
    instance = self.instance
10181
    disk = self.disk
10182

    
10183
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10184
    if not disks_ok:
10185
      raise errors.OpExecError("Cannot activate block device to grow")
10186

    
10187
    # First run all grow ops in dry-run mode
10188
    for node in instance.all_nodes:
10189
      self.cfg.SetDiskID(disk, node)
10190
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10191
      result.Raise("Grow request failed to node %s" % node)
10192

    
10193
    # We know that (as far as we can test) operations across different
10194
    # nodes will succeed, time to run it for real
10195
    for node in instance.all_nodes:
10196
      self.cfg.SetDiskID(disk, node)
10197
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10198
      result.Raise("Grow request failed to node %s" % node)
10199

    
10200
      # TODO: Rewrite code to work properly
10201
      # DRBD goes into sync mode for a short amount of time after executing the
10202
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10203
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10204
      # time is a work-around.
10205
      time.sleep(5)
10206

    
10207
    disk.RecordGrow(self.op.amount)
10208
    self.cfg.Update(instance, feedback_fn)
10209
    if self.op.wait_for_sync:
10210
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10211
      if disk_abort:
10212
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10213
                             " status; please check the instance")
10214
      if not instance.admin_up:
10215
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10216
    elif not instance.admin_up:
10217
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10218
                           " not supposed to be running because no wait for"
10219
                           " sync mode was requested")
10220

    
10221

    
10222
class LUInstanceQueryData(NoHooksLU):
10223
  """Query runtime instance data.
10224

10225
  """
10226
  REQ_BGL = False
10227

    
10228
  def ExpandNames(self):
10229
    self.needed_locks = {}
10230

    
10231
    # Use locking if requested or when non-static information is wanted
10232
    if not (self.op.static or self.op.use_locking):
10233
      self.LogWarning("Non-static data requested, locks need to be acquired")
10234
      self.op.use_locking = True
10235

    
10236
    if self.op.instances or not self.op.use_locking:
10237
      # Expand instance names right here
10238
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10239
    else:
10240
      # Will use acquired locks
10241
      self.wanted_names = None
10242

    
10243
    if self.op.use_locking:
10244
      self.share_locks = _ShareAll()
10245

    
10246
      if self.wanted_names is None:
10247
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10248
      else:
10249
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10250

    
10251
      self.needed_locks[locking.LEVEL_NODE] = []
10252
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10253

    
10254
  def DeclareLocks(self, level):
10255
    if self.op.use_locking and level == locking.LEVEL_NODE:
10256
      self._LockInstancesNodes()
10257

    
10258
  def CheckPrereq(self):
10259
    """Check prerequisites.
10260

10261
    This only checks the optional instance list against the existing names.
10262

10263
    """
10264
    if self.wanted_names is None:
10265
      assert self.op.use_locking, "Locking was not used"
10266
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10267

    
10268
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10269
                             for name in self.wanted_names]
10270

    
10271
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10272
    """Returns the status of a block device
10273

10274
    """
10275
    if self.op.static or not node:
10276
      return None
10277

    
10278
    self.cfg.SetDiskID(dev, node)
10279

    
10280
    result = self.rpc.call_blockdev_find(node, dev)
10281
    if result.offline:
10282
      return None
10283

    
10284
    result.Raise("Can't compute disk status for %s" % instance_name)
10285

    
10286
    status = result.payload
10287
    if status is None:
10288
      return None
10289

    
10290
    return (status.dev_path, status.major, status.minor,
10291
            status.sync_percent, status.estimated_time,
10292
            status.is_degraded, status.ldisk_status)
10293

    
10294
  def _ComputeDiskStatus(self, instance, snode, dev):
10295
    """Compute block device status.
10296

10297
    """
10298
    if dev.dev_type in constants.LDS_DRBD:
10299
      # we change the snode then (otherwise we use the one passed in)
10300
      if dev.logical_id[0] == instance.primary_node:
10301
        snode = dev.logical_id[1]
10302
      else:
10303
        snode = dev.logical_id[0]
10304

    
10305
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10306
                                              instance.name, dev)
10307
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10308

    
10309
    if dev.children:
10310
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10311
                                        instance, snode),
10312
                         dev.children)
10313
    else:
10314
      dev_children = []
10315

    
10316
    return {
10317
      "iv_name": dev.iv_name,
10318
      "dev_type": dev.dev_type,
10319
      "logical_id": dev.logical_id,
10320
      "physical_id": dev.physical_id,
10321
      "pstatus": dev_pstatus,
10322
      "sstatus": dev_sstatus,
10323
      "children": dev_children,
10324
      "mode": dev.mode,
10325
      "size": dev.size,
10326
      }
10327

    
10328
  def Exec(self, feedback_fn):
10329
    """Gather and return data"""
10330
    result = {}
10331

    
10332
    cluster = self.cfg.GetClusterInfo()
10333

    
10334
    for instance in self.wanted_instances:
10335
      pnode = self.cfg.GetNodeInfo(instance.primary_node)
10336

    
10337
      if self.op.static or pnode.offline:
10338
        remote_state = None
10339
        if pnode.offline:
10340
          self.LogWarning("Primary node %s is marked offline, returning static"
10341
                          " information only for instance %s" %
10342
                          (pnode.name, instance.name))
10343
      else:
10344
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10345
                                                  instance.name,
10346
                                                  instance.hypervisor)
10347
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10348
        remote_info = remote_info.payload
10349
        if remote_info and "state" in remote_info:
10350
          remote_state = "up"
10351
        else:
10352
          remote_state = "down"
10353

    
10354
      if instance.admin_up:
10355
        config_state = "up"
10356
      else:
10357
        config_state = "down"
10358

    
10359
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10360
                  instance.disks)
10361

    
10362
      result[instance.name] = {
10363
        "name": instance.name,
10364
        "config_state": config_state,
10365
        "run_state": remote_state,
10366
        "pnode": instance.primary_node,
10367
        "snodes": instance.secondary_nodes,
10368
        "os": instance.os,
10369
        # this happens to be the same format used for hooks
10370
        "nics": _NICListToTuple(self, instance.nics),
10371
        "disk_template": instance.disk_template,
10372
        "disks": disks,
10373
        "hypervisor": instance.hypervisor,
10374
        "network_port": instance.network_port,
10375
        "hv_instance": instance.hvparams,
10376
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10377
        "be_instance": instance.beparams,
10378
        "be_actual": cluster.FillBE(instance),
10379
        "os_instance": instance.osparams,
10380
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10381
        "serial_no": instance.serial_no,
10382
        "mtime": instance.mtime,
10383
        "ctime": instance.ctime,
10384
        "uuid": instance.uuid,
10385
        }
10386

    
10387
    return result
10388

    
10389

    
10390
class LUInstanceSetParams(LogicalUnit):
10391
  """Modifies an instances's parameters.
10392

10393
  """
10394
  HPATH = "instance-modify"
10395
  HTYPE = constants.HTYPE_INSTANCE
10396
  REQ_BGL = False
10397

    
10398
  def CheckArguments(self):
10399
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10400
            self.op.hvparams or self.op.beparams or self.op.os_name):
10401
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10402

    
10403
    if self.op.hvparams:
10404
      _CheckGlobalHvParams(self.op.hvparams)
10405

    
10406
    # Disk validation
10407
    disk_addremove = 0
10408
    for disk_op, disk_dict in self.op.disks:
10409
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10410
      if disk_op == constants.DDM_REMOVE:
10411
        disk_addremove += 1
10412
        continue
10413
      elif disk_op == constants.DDM_ADD:
10414
        disk_addremove += 1
10415
      else:
10416
        if not isinstance(disk_op, int):
10417
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10418
        if not isinstance(disk_dict, dict):
10419
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10420
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10421

    
10422
      if disk_op == constants.DDM_ADD:
10423
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10424
        if mode not in constants.DISK_ACCESS_SET:
10425
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10426
                                     errors.ECODE_INVAL)
10427
        size = disk_dict.get(constants.IDISK_SIZE, None)
10428
        if size is None:
10429
          raise errors.OpPrereqError("Required disk parameter size missing",
10430
                                     errors.ECODE_INVAL)
10431
        try:
10432
          size = int(size)
10433
        except (TypeError, ValueError), err:
10434
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10435
                                     str(err), errors.ECODE_INVAL)
10436
        disk_dict[constants.IDISK_SIZE] = size
10437
      else:
10438
        # modification of disk
10439
        if constants.IDISK_SIZE in disk_dict:
10440
          raise errors.OpPrereqError("Disk size change not possible, use"
10441
                                     " grow-disk", errors.ECODE_INVAL)
10442

    
10443
    if disk_addremove > 1:
10444
      raise errors.OpPrereqError("Only one disk add or remove operation"
10445
                                 " supported at a time", errors.ECODE_INVAL)
10446

    
10447
    if self.op.disks and self.op.disk_template is not None:
10448
      raise errors.OpPrereqError("Disk template conversion and other disk"
10449
                                 " changes not supported at the same time",
10450
                                 errors.ECODE_INVAL)
10451

    
10452
    if (self.op.disk_template and
10453
        self.op.disk_template in constants.DTS_INT_MIRROR and
10454
        self.op.remote_node is None):
10455
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10456
                                 " one requires specifying a secondary node",
10457
                                 errors.ECODE_INVAL)
10458

    
10459
    # NIC validation
10460
    nic_addremove = 0
10461
    for nic_op, nic_dict in self.op.nics:
10462
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10463
      if nic_op == constants.DDM_REMOVE:
10464
        nic_addremove += 1
10465
        continue
10466
      elif nic_op == constants.DDM_ADD:
10467
        nic_addremove += 1
10468
      else:
10469
        if not isinstance(nic_op, int):
10470
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10471
        if not isinstance(nic_dict, dict):
10472
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10473
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10474

    
10475
      # nic_dict should be a dict
10476
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10477
      if nic_ip is not None:
10478
        if nic_ip.lower() == constants.VALUE_NONE:
10479
          nic_dict[constants.INIC_IP] = None
10480
        else:
10481
          if not netutils.IPAddress.IsValid(nic_ip):
10482
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10483
                                       errors.ECODE_INVAL)
10484

    
10485
      nic_bridge = nic_dict.get("bridge", None)
10486
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10487
      if nic_bridge and nic_link:
10488
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10489
                                   " at the same time", errors.ECODE_INVAL)
10490
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10491
        nic_dict["bridge"] = None
10492
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10493
        nic_dict[constants.INIC_LINK] = None
10494

    
10495
      if nic_op == constants.DDM_ADD:
10496
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10497
        if nic_mac is None:
10498
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10499

    
10500
      if constants.INIC_MAC in nic_dict:
10501
        nic_mac = nic_dict[constants.INIC_MAC]
10502
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10503
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10504

    
10505
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10506
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10507
                                     " modifying an existing nic",
10508
                                     errors.ECODE_INVAL)
10509

    
10510
    if nic_addremove > 1:
10511
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10512
                                 " supported at a time", errors.ECODE_INVAL)
10513

    
10514
  def ExpandNames(self):
10515
    self._ExpandAndLockInstance()
10516
    self.needed_locks[locking.LEVEL_NODE] = []
10517
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10518

    
10519
  def DeclareLocks(self, level):
10520
    if level == locking.LEVEL_NODE:
10521
      self._LockInstancesNodes()
10522
      if self.op.disk_template and self.op.remote_node:
10523
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10524
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10525

    
10526
  def BuildHooksEnv(self):
10527
    """Build hooks env.
10528

10529
    This runs on the master, primary and secondaries.
10530

10531
    """
10532
    args = dict()
10533
    if constants.BE_MEMORY in self.be_new:
10534
      args["memory"] = self.be_new[constants.BE_MEMORY]
10535
    if constants.BE_VCPUS in self.be_new:
10536
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10537
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10538
    # information at all.
10539
    if self.op.nics:
10540
      args["nics"] = []
10541
      nic_override = dict(self.op.nics)
10542
      for idx, nic in enumerate(self.instance.nics):
10543
        if idx in nic_override:
10544
          this_nic_override = nic_override[idx]
10545
        else:
10546
          this_nic_override = {}
10547
        if constants.INIC_IP in this_nic_override:
10548
          ip = this_nic_override[constants.INIC_IP]
10549
        else:
10550
          ip = nic.ip
10551
        if constants.INIC_MAC in this_nic_override:
10552
          mac = this_nic_override[constants.INIC_MAC]
10553
        else:
10554
          mac = nic.mac
10555
        if idx in self.nic_pnew:
10556
          nicparams = self.nic_pnew[idx]
10557
        else:
10558
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10559
        mode = nicparams[constants.NIC_MODE]
10560
        link = nicparams[constants.NIC_LINK]
10561
        args["nics"].append((ip, mac, mode, link))
10562
      if constants.DDM_ADD in nic_override:
10563
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10564
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10565
        nicparams = self.nic_pnew[constants.DDM_ADD]
10566
        mode = nicparams[constants.NIC_MODE]
10567
        link = nicparams[constants.NIC_LINK]
10568
        args["nics"].append((ip, mac, mode, link))
10569
      elif constants.DDM_REMOVE in nic_override:
10570
        del args["nics"][-1]
10571

    
10572
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10573
    if self.op.disk_template:
10574
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10575

    
10576
    return env
10577

    
10578
  def BuildHooksNodes(self):
10579
    """Build hooks nodes.
10580

10581
    """
10582
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10583
    return (nl, nl)
10584

    
10585
  def CheckPrereq(self):
10586
    """Check prerequisites.
10587

10588
    This only checks the instance list against the existing names.
10589

10590
    """
10591
    # checking the new params on the primary/secondary nodes
10592

    
10593
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10594
    cluster = self.cluster = self.cfg.GetClusterInfo()
10595
    assert self.instance is not None, \
10596
      "Cannot retrieve locked instance %s" % self.op.instance_name
10597
    pnode = instance.primary_node
10598
    nodelist = list(instance.all_nodes)
10599

    
10600
    # OS change
10601
    if self.op.os_name and not self.op.force:
10602
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10603
                      self.op.force_variant)
10604
      instance_os = self.op.os_name
10605
    else:
10606
      instance_os = instance.os
10607

    
10608
    if self.op.disk_template:
10609
      if instance.disk_template == self.op.disk_template:
10610
        raise errors.OpPrereqError("Instance already has disk template %s" %
10611
                                   instance.disk_template, errors.ECODE_INVAL)
10612

    
10613
      if (instance.disk_template,
10614
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10615
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10616
                                   " %s to %s" % (instance.disk_template,
10617
                                                  self.op.disk_template),
10618
                                   errors.ECODE_INVAL)
10619
      _CheckInstanceDown(self, instance, "cannot change disk template")
10620
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10621
        if self.op.remote_node == pnode:
10622
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10623
                                     " as the primary node of the instance" %
10624
                                     self.op.remote_node, errors.ECODE_STATE)
10625
        _CheckNodeOnline(self, self.op.remote_node)
10626
        _CheckNodeNotDrained(self, self.op.remote_node)
10627
        # FIXME: here we assume that the old instance type is DT_PLAIN
10628
        assert instance.disk_template == constants.DT_PLAIN
10629
        disks = [{constants.IDISK_SIZE: d.size,
10630
                  constants.IDISK_VG: d.logical_id[0]}
10631
                 for d in instance.disks]
10632
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10633
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10634

    
10635
    # hvparams processing
10636
    if self.op.hvparams:
10637
      hv_type = instance.hypervisor
10638
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10639
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10640
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10641

    
10642
      # local check
10643
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10644
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10645
      self.hv_new = hv_new # the new actual values
10646
      self.hv_inst = i_hvdict # the new dict (without defaults)
10647
    else:
10648
      self.hv_new = self.hv_inst = {}
10649

    
10650
    # beparams processing
10651
    if self.op.beparams:
10652
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10653
                                   use_none=True)
10654
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10655
      be_new = cluster.SimpleFillBE(i_bedict)
10656
      self.be_new = be_new # the new actual values
10657
      self.be_inst = i_bedict # the new dict (without defaults)
10658
    else:
10659
      self.be_new = self.be_inst = {}
10660
    be_old = cluster.FillBE(instance)
10661

    
10662
    # osparams processing
10663
    if self.op.osparams:
10664
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10665
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10666
      self.os_inst = i_osdict # the new dict (without defaults)
10667
    else:
10668
      self.os_inst = {}
10669

    
10670
    self.warn = []
10671

    
10672
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10673
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10674
      mem_check_list = [pnode]
10675
      if be_new[constants.BE_AUTO_BALANCE]:
10676
        # either we changed auto_balance to yes or it was from before
10677
        mem_check_list.extend(instance.secondary_nodes)
10678
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10679
                                                  instance.hypervisor)
10680
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10681
                                         instance.hypervisor)
10682
      pninfo = nodeinfo[pnode]
10683
      msg = pninfo.fail_msg
10684
      if msg:
10685
        # Assume the primary node is unreachable and go ahead
10686
        self.warn.append("Can't get info from primary node %s: %s" %
10687
                         (pnode,  msg))
10688
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10689
        self.warn.append("Node data from primary node %s doesn't contain"
10690
                         " free memory information" % pnode)
10691
      elif instance_info.fail_msg:
10692
        self.warn.append("Can't get instance runtime information: %s" %
10693
                        instance_info.fail_msg)
10694
      else:
10695
        if instance_info.payload:
10696
          current_mem = int(instance_info.payload["memory"])
10697
        else:
10698
          # Assume instance not running
10699
          # (there is a slight race condition here, but it's not very probable,
10700
          # and we have no other way to check)
10701
          current_mem = 0
10702
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10703
                    pninfo.payload["memory_free"])
10704
        if miss_mem > 0:
10705
          raise errors.OpPrereqError("This change will prevent the instance"
10706
                                     " from starting, due to %d MB of memory"
10707
                                     " missing on its primary node" % miss_mem,
10708
                                     errors.ECODE_NORES)
10709

    
10710
      if be_new[constants.BE_AUTO_BALANCE]:
10711
        for node, nres in nodeinfo.items():
10712
          if node not in instance.secondary_nodes:
10713
            continue
10714
          nres.Raise("Can't get info from secondary node %s" % node,
10715
                     prereq=True, ecode=errors.ECODE_STATE)
10716
          if not isinstance(nres.payload.get("memory_free", None), int):
10717
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10718
                                       " memory information" % node,
10719
                                       errors.ECODE_STATE)
10720
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10721
            raise errors.OpPrereqError("This change will prevent the instance"
10722
                                       " from failover to its secondary node"
10723
                                       " %s, due to not enough memory" % node,
10724
                                       errors.ECODE_STATE)
10725

    
10726
    # NIC processing
10727
    self.nic_pnew = {}
10728
    self.nic_pinst = {}
10729
    for nic_op, nic_dict in self.op.nics:
10730
      if nic_op == constants.DDM_REMOVE:
10731
        if not instance.nics:
10732
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10733
                                     errors.ECODE_INVAL)
10734
        continue
10735
      if nic_op != constants.DDM_ADD:
10736
        # an existing nic
10737
        if not instance.nics:
10738
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10739
                                     " no NICs" % nic_op,
10740
                                     errors.ECODE_INVAL)
10741
        if nic_op < 0 or nic_op >= len(instance.nics):
10742
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10743
                                     " are 0 to %d" %
10744
                                     (nic_op, len(instance.nics) - 1),
10745
                                     errors.ECODE_INVAL)
10746
        old_nic_params = instance.nics[nic_op].nicparams
10747
        old_nic_ip = instance.nics[nic_op].ip
10748
      else:
10749
        old_nic_params = {}
10750
        old_nic_ip = None
10751

    
10752
      update_params_dict = dict([(key, nic_dict[key])
10753
                                 for key in constants.NICS_PARAMETERS
10754
                                 if key in nic_dict])
10755

    
10756
      if "bridge" in nic_dict:
10757
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10758

    
10759
      new_nic_params = _GetUpdatedParams(old_nic_params,
10760
                                         update_params_dict)
10761
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10762
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10763
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10764
      self.nic_pinst[nic_op] = new_nic_params
10765
      self.nic_pnew[nic_op] = new_filled_nic_params
10766
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10767

    
10768
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10769
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10770
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10771
        if msg:
10772
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10773
          if self.op.force:
10774
            self.warn.append(msg)
10775
          else:
10776
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10777
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10778
        if constants.INIC_IP in nic_dict:
10779
          nic_ip = nic_dict[constants.INIC_IP]
10780
        else:
10781
          nic_ip = old_nic_ip
10782
        if nic_ip is None:
10783
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10784
                                     " on a routed nic", errors.ECODE_INVAL)
10785
      if constants.INIC_MAC in nic_dict:
10786
        nic_mac = nic_dict[constants.INIC_MAC]
10787
        if nic_mac is None:
10788
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10789
                                     errors.ECODE_INVAL)
10790
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10791
          # otherwise generate the mac
10792
          nic_dict[constants.INIC_MAC] = \
10793
            self.cfg.GenerateMAC(self.proc.GetECId())
10794
        else:
10795
          # or validate/reserve the current one
10796
          try:
10797
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10798
          except errors.ReservationError:
10799
            raise errors.OpPrereqError("MAC address %s already in use"
10800
                                       " in cluster" % nic_mac,
10801
                                       errors.ECODE_NOTUNIQUE)
10802

    
10803
    # DISK processing
10804
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10805
      raise errors.OpPrereqError("Disk operations not supported for"
10806
                                 " diskless instances",
10807
                                 errors.ECODE_INVAL)
10808
    for disk_op, _ in self.op.disks:
10809
      if disk_op == constants.DDM_REMOVE:
10810
        if len(instance.disks) == 1:
10811
          raise errors.OpPrereqError("Cannot remove the last disk of"
10812
                                     " an instance", errors.ECODE_INVAL)
10813
        _CheckInstanceDown(self, instance, "cannot remove disks")
10814

    
10815
      if (disk_op == constants.DDM_ADD and
10816
          len(instance.disks) >= constants.MAX_DISKS):
10817
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10818
                                   " add more" % constants.MAX_DISKS,
10819
                                   errors.ECODE_STATE)
10820
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10821
        # an existing disk
10822
        if disk_op < 0 or disk_op >= len(instance.disks):
10823
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10824
                                     " are 0 to %d" %
10825
                                     (disk_op, len(instance.disks)),
10826
                                     errors.ECODE_INVAL)
10827

    
10828
    return
10829

    
10830
  def _ConvertPlainToDrbd(self, feedback_fn):
10831
    """Converts an instance from plain to drbd.
10832

10833
    """
10834
    feedback_fn("Converting template to drbd")
10835
    instance = self.instance
10836
    pnode = instance.primary_node
10837
    snode = self.op.remote_node
10838

    
10839
    # create a fake disk info for _GenerateDiskTemplate
10840
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10841
                  constants.IDISK_VG: d.logical_id[0]}
10842
                 for d in instance.disks]
10843
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10844
                                      instance.name, pnode, [snode],
10845
                                      disk_info, None, None, 0, feedback_fn)
10846
    info = _GetInstanceInfoText(instance)
10847
    feedback_fn("Creating aditional volumes...")
10848
    # first, create the missing data and meta devices
10849
    for disk in new_disks:
10850
      # unfortunately this is... not too nice
10851
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10852
                            info, True)
10853
      for child in disk.children:
10854
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10855
    # at this stage, all new LVs have been created, we can rename the
10856
    # old ones
10857
    feedback_fn("Renaming original volumes...")
10858
    rename_list = [(o, n.children[0].logical_id)
10859
                   for (o, n) in zip(instance.disks, new_disks)]
10860
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10861
    result.Raise("Failed to rename original LVs")
10862

    
10863
    feedback_fn("Initializing DRBD devices...")
10864
    # all child devices are in place, we can now create the DRBD devices
10865
    for disk in new_disks:
10866
      for node in [pnode, snode]:
10867
        f_create = node == pnode
10868
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10869

    
10870
    # at this point, the instance has been modified
10871
    instance.disk_template = constants.DT_DRBD8
10872
    instance.disks = new_disks
10873
    self.cfg.Update(instance, feedback_fn)
10874

    
10875
    # disks are created, waiting for sync
10876
    disk_abort = not _WaitForSync(self, instance,
10877
                                  oneshot=not self.op.wait_for_sync)
10878
    if disk_abort:
10879
      raise errors.OpExecError("There are some degraded disks for"
10880
                               " this instance, please cleanup manually")
10881

    
10882
  def _ConvertDrbdToPlain(self, feedback_fn):
10883
    """Converts an instance from drbd to plain.
10884

10885
    """
10886
    instance = self.instance
10887
    assert len(instance.secondary_nodes) == 1
10888
    pnode = instance.primary_node
10889
    snode = instance.secondary_nodes[0]
10890
    feedback_fn("Converting template to plain")
10891

    
10892
    old_disks = instance.disks
10893
    new_disks = [d.children[0] for d in old_disks]
10894

    
10895
    # copy over size and mode
10896
    for parent, child in zip(old_disks, new_disks):
10897
      child.size = parent.size
10898
      child.mode = parent.mode
10899

    
10900
    # update instance structure
10901
    instance.disks = new_disks
10902
    instance.disk_template = constants.DT_PLAIN
10903
    self.cfg.Update(instance, feedback_fn)
10904

    
10905
    feedback_fn("Removing volumes on the secondary node...")
10906
    for disk in old_disks:
10907
      self.cfg.SetDiskID(disk, snode)
10908
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10909
      if msg:
10910
        self.LogWarning("Could not remove block device %s on node %s,"
10911
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10912

    
10913
    feedback_fn("Removing unneeded volumes on the primary node...")
10914
    for idx, disk in enumerate(old_disks):
10915
      meta = disk.children[1]
10916
      self.cfg.SetDiskID(meta, pnode)
10917
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10918
      if msg:
10919
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10920
                        " continuing anyway: %s", idx, pnode, msg)
10921

    
10922
  def Exec(self, feedback_fn):
10923
    """Modifies an instance.
10924

10925
    All parameters take effect only at the next restart of the instance.
10926

10927
    """
10928
    # Process here the warnings from CheckPrereq, as we don't have a
10929
    # feedback_fn there.
10930
    for warn in self.warn:
10931
      feedback_fn("WARNING: %s" % warn)
10932

    
10933
    result = []
10934
    instance = self.instance
10935
    # disk changes
10936
    for disk_op, disk_dict in self.op.disks:
10937
      if disk_op == constants.DDM_REMOVE:
10938
        # remove the last disk
10939
        device = instance.disks.pop()
10940
        device_idx = len(instance.disks)
10941
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10942
          self.cfg.SetDiskID(disk, node)
10943
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10944
          if msg:
10945
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10946
                            " continuing anyway", device_idx, node, msg)
10947
        result.append(("disk/%d" % device_idx, "remove"))
10948
      elif disk_op == constants.DDM_ADD:
10949
        # add a new disk
10950
        if instance.disk_template in (constants.DT_FILE,
10951
                                        constants.DT_SHARED_FILE):
10952
          file_driver, file_path = instance.disks[0].logical_id
10953
          file_path = os.path.dirname(file_path)
10954
        else:
10955
          file_driver = file_path = None
10956
        disk_idx_base = len(instance.disks)
10957
        new_disk = _GenerateDiskTemplate(self,
10958
                                         instance.disk_template,
10959
                                         instance.name, instance.primary_node,
10960
                                         instance.secondary_nodes,
10961
                                         [disk_dict],
10962
                                         file_path,
10963
                                         file_driver,
10964
                                         disk_idx_base, feedback_fn)[0]
10965
        instance.disks.append(new_disk)
10966
        info = _GetInstanceInfoText(instance)
10967

    
10968
        logging.info("Creating volume %s for instance %s",
10969
                     new_disk.iv_name, instance.name)
10970
        # Note: this needs to be kept in sync with _CreateDisks
10971
        #HARDCODE
10972
        for node in instance.all_nodes:
10973
          f_create = node == instance.primary_node
10974
          try:
10975
            _CreateBlockDev(self, node, instance, new_disk,
10976
                            f_create, info, f_create)
10977
          except errors.OpExecError, err:
10978
            self.LogWarning("Failed to create volume %s (%s) on"
10979
                            " node %s: %s",
10980
                            new_disk.iv_name, new_disk, node, err)
10981
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10982
                       (new_disk.size, new_disk.mode)))
10983
      else:
10984
        # change a given disk
10985
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10986
        result.append(("disk.mode/%d" % disk_op,
10987
                       disk_dict[constants.IDISK_MODE]))
10988

    
10989
    if self.op.disk_template:
10990
      r_shut = _ShutdownInstanceDisks(self, instance)
10991
      if not r_shut:
10992
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10993
                                 " proceed with disk template conversion")
10994
      mode = (instance.disk_template, self.op.disk_template)
10995
      try:
10996
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10997
      except:
10998
        self.cfg.ReleaseDRBDMinors(instance.name)
10999
        raise
11000
      result.append(("disk_template", self.op.disk_template))
11001

    
11002
    # NIC changes
11003
    for nic_op, nic_dict in self.op.nics:
11004
      if nic_op == constants.DDM_REMOVE:
11005
        # remove the last nic
11006
        del instance.nics[-1]
11007
        result.append(("nic.%d" % len(instance.nics), "remove"))
11008
      elif nic_op == constants.DDM_ADD:
11009
        # mac and bridge should be set, by now
11010
        mac = nic_dict[constants.INIC_MAC]
11011
        ip = nic_dict.get(constants.INIC_IP, None)
11012
        nicparams = self.nic_pinst[constants.DDM_ADD]
11013
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11014
        instance.nics.append(new_nic)
11015
        result.append(("nic.%d" % (len(instance.nics) - 1),
11016
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11017
                       (new_nic.mac, new_nic.ip,
11018
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11019
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11020
                       )))
11021
      else:
11022
        for key in (constants.INIC_MAC, constants.INIC_IP):
11023
          if key in nic_dict:
11024
            setattr(instance.nics[nic_op], key, nic_dict[key])
11025
        if nic_op in self.nic_pinst:
11026
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11027
        for key, val in nic_dict.iteritems():
11028
          result.append(("nic.%s/%d" % (key, nic_op), val))
11029

    
11030
    # hvparams changes
11031
    if self.op.hvparams:
11032
      instance.hvparams = self.hv_inst
11033
      for key, val in self.op.hvparams.iteritems():
11034
        result.append(("hv/%s" % key, val))
11035

    
11036
    # beparams changes
11037
    if self.op.beparams:
11038
      instance.beparams = self.be_inst
11039
      for key, val in self.op.beparams.iteritems():
11040
        result.append(("be/%s" % key, val))
11041

    
11042
    # OS change
11043
    if self.op.os_name:
11044
      instance.os = self.op.os_name
11045

    
11046
    # osparams changes
11047
    if self.op.osparams:
11048
      instance.osparams = self.os_inst
11049
      for key, val in self.op.osparams.iteritems():
11050
        result.append(("os/%s" % key, val))
11051

    
11052
    self.cfg.Update(instance, feedback_fn)
11053

    
11054
    return result
11055

    
11056
  _DISK_CONVERSIONS = {
11057
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11058
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11059
    }
11060

    
11061

    
11062
class LUBackupQuery(NoHooksLU):
11063
  """Query the exports list
11064

11065
  """
11066
  REQ_BGL = False
11067

    
11068
  def ExpandNames(self):
11069
    self.needed_locks = {}
11070
    self.share_locks[locking.LEVEL_NODE] = 1
11071
    if not self.op.nodes:
11072
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11073
    else:
11074
      self.needed_locks[locking.LEVEL_NODE] = \
11075
        _GetWantedNodes(self, self.op.nodes)
11076

    
11077
  def Exec(self, feedback_fn):
11078
    """Compute the list of all the exported system images.
11079

11080
    @rtype: dict
11081
    @return: a dictionary with the structure node->(export-list)
11082
        where export-list is a list of the instances exported on
11083
        that node.
11084

11085
    """
11086
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11087
    rpcresult = self.rpc.call_export_list(self.nodes)
11088
    result = {}
11089
    for node in rpcresult:
11090
      if rpcresult[node].fail_msg:
11091
        result[node] = False
11092
      else:
11093
        result[node] = rpcresult[node].payload
11094

    
11095
    return result
11096

    
11097

    
11098
class LUBackupPrepare(NoHooksLU):
11099
  """Prepares an instance for an export and returns useful information.
11100

11101
  """
11102
  REQ_BGL = False
11103

    
11104
  def ExpandNames(self):
11105
    self._ExpandAndLockInstance()
11106

    
11107
  def CheckPrereq(self):
11108
    """Check prerequisites.
11109

11110
    """
11111
    instance_name = self.op.instance_name
11112

    
11113
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11114
    assert self.instance is not None, \
11115
          "Cannot retrieve locked instance %s" % self.op.instance_name
11116
    _CheckNodeOnline(self, self.instance.primary_node)
11117

    
11118
    self._cds = _GetClusterDomainSecret()
11119

    
11120
  def Exec(self, feedback_fn):
11121
    """Prepares an instance for an export.
11122

11123
    """
11124
    instance = self.instance
11125

    
11126
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11127
      salt = utils.GenerateSecret(8)
11128

    
11129
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11130
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11131
                                              constants.RIE_CERT_VALIDITY)
11132
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11133

    
11134
      (name, cert_pem) = result.payload
11135

    
11136
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11137
                                             cert_pem)
11138

    
11139
      return {
11140
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11141
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11142
                          salt),
11143
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11144
        }
11145

    
11146
    return None
11147

    
11148

    
11149
class LUBackupExport(LogicalUnit):
11150
  """Export an instance to an image in the cluster.
11151

11152
  """
11153
  HPATH = "instance-export"
11154
  HTYPE = constants.HTYPE_INSTANCE
11155
  REQ_BGL = False
11156

    
11157
  def CheckArguments(self):
11158
    """Check the arguments.
11159

11160
    """
11161
    self.x509_key_name = self.op.x509_key_name
11162
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11163

    
11164
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11165
      if not self.x509_key_name:
11166
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11167
                                   errors.ECODE_INVAL)
11168

    
11169
      if not self.dest_x509_ca_pem:
11170
        raise errors.OpPrereqError("Missing destination X509 CA",
11171
                                   errors.ECODE_INVAL)
11172

    
11173
  def ExpandNames(self):
11174
    self._ExpandAndLockInstance()
11175

    
11176
    # Lock all nodes for local exports
11177
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11178
      # FIXME: lock only instance primary and destination node
11179
      #
11180
      # Sad but true, for now we have do lock all nodes, as we don't know where
11181
      # the previous export might be, and in this LU we search for it and
11182
      # remove it from its current node. In the future we could fix this by:
11183
      #  - making a tasklet to search (share-lock all), then create the
11184
      #    new one, then one to remove, after
11185
      #  - removing the removal operation altogether
11186
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11187

    
11188
  def DeclareLocks(self, level):
11189
    """Last minute lock declaration."""
11190
    # All nodes are locked anyway, so nothing to do here.
11191

    
11192
  def BuildHooksEnv(self):
11193
    """Build hooks env.
11194

11195
    This will run on the master, primary node and target node.
11196

11197
    """
11198
    env = {
11199
      "EXPORT_MODE": self.op.mode,
11200
      "EXPORT_NODE": self.op.target_node,
11201
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11202
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11203
      # TODO: Generic function for boolean env variables
11204
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11205
      }
11206

    
11207
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11208

    
11209
    return env
11210

    
11211
  def BuildHooksNodes(self):
11212
    """Build hooks nodes.
11213

11214
    """
11215
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11216

    
11217
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11218
      nl.append(self.op.target_node)
11219

    
11220
    return (nl, nl)
11221

    
11222
  def CheckPrereq(self):
11223
    """Check prerequisites.
11224

11225
    This checks that the instance and node names are valid.
11226

11227
    """
11228
    instance_name = self.op.instance_name
11229

    
11230
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11231
    assert self.instance is not None, \
11232
          "Cannot retrieve locked instance %s" % self.op.instance_name
11233
    _CheckNodeOnline(self, self.instance.primary_node)
11234

    
11235
    if (self.op.remove_instance and self.instance.admin_up and
11236
        not self.op.shutdown):
11237
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11238
                                 " down before")
11239

    
11240
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11241
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11242
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11243
      assert self.dst_node is not None
11244

    
11245
      _CheckNodeOnline(self, self.dst_node.name)
11246
      _CheckNodeNotDrained(self, self.dst_node.name)
11247

    
11248
      self._cds = None
11249
      self.dest_disk_info = None
11250
      self.dest_x509_ca = None
11251

    
11252
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11253
      self.dst_node = None
11254

    
11255
      if len(self.op.target_node) != len(self.instance.disks):
11256
        raise errors.OpPrereqError(("Received destination information for %s"
11257
                                    " disks, but instance %s has %s disks") %
11258
                                   (len(self.op.target_node), instance_name,
11259
                                    len(self.instance.disks)),
11260
                                   errors.ECODE_INVAL)
11261

    
11262
      cds = _GetClusterDomainSecret()
11263

    
11264
      # Check X509 key name
11265
      try:
11266
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11267
      except (TypeError, ValueError), err:
11268
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11269

    
11270
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11271
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11272
                                   errors.ECODE_INVAL)
11273

    
11274
      # Load and verify CA
11275
      try:
11276
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11277
      except OpenSSL.crypto.Error, err:
11278
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11279
                                   (err, ), errors.ECODE_INVAL)
11280

    
11281
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11282
      if errcode is not None:
11283
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11284
                                   (msg, ), errors.ECODE_INVAL)
11285

    
11286
      self.dest_x509_ca = cert
11287

    
11288
      # Verify target information
11289
      disk_info = []
11290
      for idx, disk_data in enumerate(self.op.target_node):
11291
        try:
11292
          (host, port, magic) = \
11293
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11294
        except errors.GenericError, err:
11295
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11296
                                     (idx, err), errors.ECODE_INVAL)
11297

    
11298
        disk_info.append((host, port, magic))
11299

    
11300
      assert len(disk_info) == len(self.op.target_node)
11301
      self.dest_disk_info = disk_info
11302

    
11303
    else:
11304
      raise errors.ProgrammerError("Unhandled export mode %r" %
11305
                                   self.op.mode)
11306

    
11307
    # instance disk type verification
11308
    # TODO: Implement export support for file-based disks
11309
    for disk in self.instance.disks:
11310
      if disk.dev_type == constants.LD_FILE:
11311
        raise errors.OpPrereqError("Export not supported for instances with"
11312
                                   " file-based disks", errors.ECODE_INVAL)
11313

    
11314
  def _CleanupExports(self, feedback_fn):
11315
    """Removes exports of current instance from all other nodes.
11316

11317
    If an instance in a cluster with nodes A..D was exported to node C, its
11318
    exports will be removed from the nodes A, B and D.
11319

11320
    """
11321
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11322

    
11323
    nodelist = self.cfg.GetNodeList()
11324
    nodelist.remove(self.dst_node.name)
11325

    
11326
    # on one-node clusters nodelist will be empty after the removal
11327
    # if we proceed the backup would be removed because OpBackupQuery
11328
    # substitutes an empty list with the full cluster node list.
11329
    iname = self.instance.name
11330
    if nodelist:
11331
      feedback_fn("Removing old exports for instance %s" % iname)
11332
      exportlist = self.rpc.call_export_list(nodelist)
11333
      for node in exportlist:
11334
        if exportlist[node].fail_msg:
11335
          continue
11336
        if iname in exportlist[node].payload:
11337
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11338
          if msg:
11339
            self.LogWarning("Could not remove older export for instance %s"
11340
                            " on node %s: %s", iname, node, msg)
11341

    
11342
  def Exec(self, feedback_fn):
11343
    """Export an instance to an image in the cluster.
11344

11345
    """
11346
    assert self.op.mode in constants.EXPORT_MODES
11347

    
11348
    instance = self.instance
11349
    src_node = instance.primary_node
11350

    
11351
    if self.op.shutdown:
11352
      # shutdown the instance, but not the disks
11353
      feedback_fn("Shutting down instance %s" % instance.name)
11354
      result = self.rpc.call_instance_shutdown(src_node, instance,
11355
                                               self.op.shutdown_timeout)
11356
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11357
      result.Raise("Could not shutdown instance %s on"
11358
                   " node %s" % (instance.name, src_node))
11359

    
11360
    # set the disks ID correctly since call_instance_start needs the
11361
    # correct drbd minor to create the symlinks
11362
    for disk in instance.disks:
11363
      self.cfg.SetDiskID(disk, src_node)
11364

    
11365
    activate_disks = (not instance.admin_up)
11366

    
11367
    if activate_disks:
11368
      # Activate the instance disks if we'exporting a stopped instance
11369
      feedback_fn("Activating disks for %s" % instance.name)
11370
      _StartInstanceDisks(self, instance, None)
11371

    
11372
    try:
11373
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11374
                                                     instance)
11375

    
11376
      helper.CreateSnapshots()
11377
      try:
11378
        if (self.op.shutdown and instance.admin_up and
11379
            not self.op.remove_instance):
11380
          assert not activate_disks
11381
          feedback_fn("Starting instance %s" % instance.name)
11382
          result = self.rpc.call_instance_start(src_node, instance,
11383
                                                None, None, False)
11384
          msg = result.fail_msg
11385
          if msg:
11386
            feedback_fn("Failed to start instance: %s" % msg)
11387
            _ShutdownInstanceDisks(self, instance)
11388
            raise errors.OpExecError("Could not start instance: %s" % msg)
11389

    
11390
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11391
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11392
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11393
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11394
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11395

    
11396
          (key_name, _, _) = self.x509_key_name
11397

    
11398
          dest_ca_pem = \
11399
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11400
                                            self.dest_x509_ca)
11401

    
11402
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11403
                                                     key_name, dest_ca_pem,
11404
                                                     timeouts)
11405
      finally:
11406
        helper.Cleanup()
11407

    
11408
      # Check for backwards compatibility
11409
      assert len(dresults) == len(instance.disks)
11410
      assert compat.all(isinstance(i, bool) for i in dresults), \
11411
             "Not all results are boolean: %r" % dresults
11412

    
11413
    finally:
11414
      if activate_disks:
11415
        feedback_fn("Deactivating disks for %s" % instance.name)
11416
        _ShutdownInstanceDisks(self, instance)
11417

    
11418
    if not (compat.all(dresults) and fin_resu):
11419
      failures = []
11420
      if not fin_resu:
11421
        failures.append("export finalization")
11422
      if not compat.all(dresults):
11423
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11424
                               if not dsk)
11425
        failures.append("disk export: disk(s) %s" % fdsk)
11426

    
11427
      raise errors.OpExecError("Export failed, errors in %s" %
11428
                               utils.CommaJoin(failures))
11429

    
11430
    # At this point, the export was successful, we can cleanup/finish
11431

    
11432
    # Remove instance if requested
11433
    if self.op.remove_instance:
11434
      feedback_fn("Removing instance %s" % instance.name)
11435
      _RemoveInstance(self, feedback_fn, instance,
11436
                      self.op.ignore_remove_failures)
11437

    
11438
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11439
      self._CleanupExports(feedback_fn)
11440

    
11441
    return fin_resu, dresults
11442

    
11443

    
11444
class LUBackupRemove(NoHooksLU):
11445
  """Remove exports related to the named instance.
11446

11447
  """
11448
  REQ_BGL = False
11449

    
11450
  def ExpandNames(self):
11451
    self.needed_locks = {}
11452
    # We need all nodes to be locked in order for RemoveExport to work, but we
11453
    # don't need to lock the instance itself, as nothing will happen to it (and
11454
    # we can remove exports also for a removed instance)
11455
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11456

    
11457
  def Exec(self, feedback_fn):
11458
    """Remove any export.
11459

11460
    """
11461
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11462
    # If the instance was not found we'll try with the name that was passed in.
11463
    # This will only work if it was an FQDN, though.
11464
    fqdn_warn = False
11465
    if not instance_name:
11466
      fqdn_warn = True
11467
      instance_name = self.op.instance_name
11468

    
11469
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11470
    exportlist = self.rpc.call_export_list(locked_nodes)
11471
    found = False
11472
    for node in exportlist:
11473
      msg = exportlist[node].fail_msg
11474
      if msg:
11475
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11476
        continue
11477
      if instance_name in exportlist[node].payload:
11478
        found = True
11479
        result = self.rpc.call_export_remove(node, instance_name)
11480
        msg = result.fail_msg
11481
        if msg:
11482
          logging.error("Could not remove export for instance %s"
11483
                        " on node %s: %s", instance_name, node, msg)
11484

    
11485
    if fqdn_warn and not found:
11486
      feedback_fn("Export not found. If trying to remove an export belonging"
11487
                  " to a deleted instance please use its Fully Qualified"
11488
                  " Domain Name.")
11489

    
11490

    
11491
class LUGroupAdd(LogicalUnit):
11492
  """Logical unit for creating node groups.
11493

11494
  """
11495
  HPATH = "group-add"
11496
  HTYPE = constants.HTYPE_GROUP
11497
  REQ_BGL = False
11498

    
11499
  def ExpandNames(self):
11500
    # We need the new group's UUID here so that we can create and acquire the
11501
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11502
    # that it should not check whether the UUID exists in the configuration.
11503
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11504
    self.needed_locks = {}
11505
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11506

    
11507
  def CheckPrereq(self):
11508
    """Check prerequisites.
11509

11510
    This checks that the given group name is not an existing node group
11511
    already.
11512

11513
    """
11514
    try:
11515
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11516
    except errors.OpPrereqError:
11517
      pass
11518
    else:
11519
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11520
                                 " node group (UUID: %s)" %
11521
                                 (self.op.group_name, existing_uuid),
11522
                                 errors.ECODE_EXISTS)
11523

    
11524
    if self.op.ndparams:
11525
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11526

    
11527
  def BuildHooksEnv(self):
11528
    """Build hooks env.
11529

11530
    """
11531
    return {
11532
      "GROUP_NAME": self.op.group_name,
11533
      }
11534

    
11535
  def BuildHooksNodes(self):
11536
    """Build hooks nodes.
11537

11538
    """
11539
    mn = self.cfg.GetMasterNode()
11540
    return ([mn], [mn])
11541

    
11542
  def Exec(self, feedback_fn):
11543
    """Add the node group to the cluster.
11544

11545
    """
11546
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11547
                                  uuid=self.group_uuid,
11548
                                  alloc_policy=self.op.alloc_policy,
11549
                                  ndparams=self.op.ndparams)
11550

    
11551
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11552
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11553

    
11554

    
11555
class LUGroupAssignNodes(NoHooksLU):
11556
  """Logical unit for assigning nodes to groups.
11557

11558
  """
11559
  REQ_BGL = False
11560

    
11561
  def ExpandNames(self):
11562
    # These raise errors.OpPrereqError on their own:
11563
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11564
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11565

    
11566
    # We want to lock all the affected nodes and groups. We have readily
11567
    # available the list of nodes, and the *destination* group. To gather the
11568
    # list of "source" groups, we need to fetch node information later on.
11569
    self.needed_locks = {
11570
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11571
      locking.LEVEL_NODE: self.op.nodes,
11572
      }
11573

    
11574
  def DeclareLocks(self, level):
11575
    if level == locking.LEVEL_NODEGROUP:
11576
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11577

    
11578
      # Try to get all affected nodes' groups without having the group or node
11579
      # lock yet. Needs verification later in the code flow.
11580
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11581

    
11582
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11583

    
11584
  def CheckPrereq(self):
11585
    """Check prerequisites.
11586

11587
    """
11588
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11589
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11590
            frozenset(self.op.nodes))
11591

    
11592
    expected_locks = (set([self.group_uuid]) |
11593
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11594
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11595
    if actual_locks != expected_locks:
11596
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11597
                               " current groups are '%s', used to be '%s'" %
11598
                               (utils.CommaJoin(expected_locks),
11599
                                utils.CommaJoin(actual_locks)))
11600

    
11601
    self.node_data = self.cfg.GetAllNodesInfo()
11602
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11603
    instance_data = self.cfg.GetAllInstancesInfo()
11604

    
11605
    if self.group is None:
11606
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11607
                               (self.op.group_name, self.group_uuid))
11608

    
11609
    (new_splits, previous_splits) = \
11610
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11611
                                             for node in self.op.nodes],
11612
                                            self.node_data, instance_data)
11613

    
11614
    if new_splits:
11615
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11616

    
11617
      if not self.op.force:
11618
        raise errors.OpExecError("The following instances get split by this"
11619
                                 " change and --force was not given: %s" %
11620
                                 fmt_new_splits)
11621
      else:
11622
        self.LogWarning("This operation will split the following instances: %s",
11623
                        fmt_new_splits)
11624

    
11625
        if previous_splits:
11626
          self.LogWarning("In addition, these already-split instances continue"
11627
                          " to be split across groups: %s",
11628
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11629

    
11630
  def Exec(self, feedback_fn):
11631
    """Assign nodes to a new group.
11632

11633
    """
11634
    for node in self.op.nodes:
11635
      self.node_data[node].group = self.group_uuid
11636

    
11637
    # FIXME: Depends on side-effects of modifying the result of
11638
    # C{cfg.GetAllNodesInfo}
11639

    
11640
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11641

    
11642
  @staticmethod
11643
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11644
    """Check for split instances after a node assignment.
11645

11646
    This method considers a series of node assignments as an atomic operation,
11647
    and returns information about split instances after applying the set of
11648
    changes.
11649

11650
    In particular, it returns information about newly split instances, and
11651
    instances that were already split, and remain so after the change.
11652

11653
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11654
    considered.
11655

11656
    @type changes: list of (node_name, new_group_uuid) pairs.
11657
    @param changes: list of node assignments to consider.
11658
    @param node_data: a dict with data for all nodes
11659
    @param instance_data: a dict with all instances to consider
11660
    @rtype: a two-tuple
11661
    @return: a list of instances that were previously okay and result split as a
11662
      consequence of this change, and a list of instances that were previously
11663
      split and this change does not fix.
11664

11665
    """
11666
    changed_nodes = dict((node, group) for node, group in changes
11667
                         if node_data[node].group != group)
11668

    
11669
    all_split_instances = set()
11670
    previously_split_instances = set()
11671

    
11672
    def InstanceNodes(instance):
11673
      return [instance.primary_node] + list(instance.secondary_nodes)
11674

    
11675
    for inst in instance_data.values():
11676
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11677
        continue
11678

    
11679
      instance_nodes = InstanceNodes(inst)
11680

    
11681
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11682
        previously_split_instances.add(inst.name)
11683

    
11684
      if len(set(changed_nodes.get(node, node_data[node].group)
11685
                 for node in instance_nodes)) > 1:
11686
        all_split_instances.add(inst.name)
11687

    
11688
    return (list(all_split_instances - previously_split_instances),
11689
            list(previously_split_instances & all_split_instances))
11690

    
11691

    
11692
class _GroupQuery(_QueryBase):
11693
  FIELDS = query.GROUP_FIELDS
11694

    
11695
  def ExpandNames(self, lu):
11696
    lu.needed_locks = {}
11697

    
11698
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11699
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11700

    
11701
    if not self.names:
11702
      self.wanted = [name_to_uuid[name]
11703
                     for name in utils.NiceSort(name_to_uuid.keys())]
11704
    else:
11705
      # Accept names to be either names or UUIDs.
11706
      missing = []
11707
      self.wanted = []
11708
      all_uuid = frozenset(self._all_groups.keys())
11709

    
11710
      for name in self.names:
11711
        if name in all_uuid:
11712
          self.wanted.append(name)
11713
        elif name in name_to_uuid:
11714
          self.wanted.append(name_to_uuid[name])
11715
        else:
11716
          missing.append(name)
11717

    
11718
      if missing:
11719
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11720
                                   utils.CommaJoin(missing),
11721
                                   errors.ECODE_NOENT)
11722

    
11723
  def DeclareLocks(self, lu, level):
11724
    pass
11725

    
11726
  def _GetQueryData(self, lu):
11727
    """Computes the list of node groups and their attributes.
11728

11729
    """
11730
    do_nodes = query.GQ_NODE in self.requested_data
11731
    do_instances = query.GQ_INST in self.requested_data
11732

    
11733
    group_to_nodes = None
11734
    group_to_instances = None
11735

    
11736
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11737
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11738
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11739
    # instance->node. Hence, we will need to process nodes even if we only need
11740
    # instance information.
11741
    if do_nodes or do_instances:
11742
      all_nodes = lu.cfg.GetAllNodesInfo()
11743
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11744
      node_to_group = {}
11745

    
11746
      for node in all_nodes.values():
11747
        if node.group in group_to_nodes:
11748
          group_to_nodes[node.group].append(node.name)
11749
          node_to_group[node.name] = node.group
11750

    
11751
      if do_instances:
11752
        all_instances = lu.cfg.GetAllInstancesInfo()
11753
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11754

    
11755
        for instance in all_instances.values():
11756
          node = instance.primary_node
11757
          if node in node_to_group:
11758
            group_to_instances[node_to_group[node]].append(instance.name)
11759

    
11760
        if not do_nodes:
11761
          # Do not pass on node information if it was not requested.
11762
          group_to_nodes = None
11763

    
11764
    return query.GroupQueryData([self._all_groups[uuid]
11765
                                 for uuid in self.wanted],
11766
                                group_to_nodes, group_to_instances)
11767

    
11768

    
11769
class LUGroupQuery(NoHooksLU):
11770
  """Logical unit for querying node groups.
11771

11772
  """
11773
  REQ_BGL = False
11774

    
11775
  def CheckArguments(self):
11776
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11777
                          self.op.output_fields, False)
11778

    
11779
  def ExpandNames(self):
11780
    self.gq.ExpandNames(self)
11781

    
11782
  def Exec(self, feedback_fn):
11783
    return self.gq.OldStyleQuery(self)
11784

    
11785

    
11786
class LUGroupSetParams(LogicalUnit):
11787
  """Modifies the parameters of a node group.
11788

11789
  """
11790
  HPATH = "group-modify"
11791
  HTYPE = constants.HTYPE_GROUP
11792
  REQ_BGL = False
11793

    
11794
  def CheckArguments(self):
11795
    all_changes = [
11796
      self.op.ndparams,
11797
      self.op.alloc_policy,
11798
      ]
11799

    
11800
    if all_changes.count(None) == len(all_changes):
11801
      raise errors.OpPrereqError("Please pass at least one modification",
11802
                                 errors.ECODE_INVAL)
11803

    
11804
  def ExpandNames(self):
11805
    # This raises errors.OpPrereqError on its own:
11806
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11807

    
11808
    self.needed_locks = {
11809
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11810
      }
11811

    
11812
  def CheckPrereq(self):
11813
    """Check prerequisites.
11814

11815
    """
11816
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11817

    
11818
    if self.group is None:
11819
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11820
                               (self.op.group_name, self.group_uuid))
11821

    
11822
    if self.op.ndparams:
11823
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11824
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11825
      self.new_ndparams = new_ndparams
11826

    
11827
  def BuildHooksEnv(self):
11828
    """Build hooks env.
11829

11830
    """
11831
    return {
11832
      "GROUP_NAME": self.op.group_name,
11833
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11834
      }
11835

    
11836
  def BuildHooksNodes(self):
11837
    """Build hooks nodes.
11838

11839
    """
11840
    mn = self.cfg.GetMasterNode()
11841
    return ([mn], [mn])
11842

    
11843
  def Exec(self, feedback_fn):
11844
    """Modifies the node group.
11845

11846
    """
11847
    result = []
11848

    
11849
    if self.op.ndparams:
11850
      self.group.ndparams = self.new_ndparams
11851
      result.append(("ndparams", str(self.group.ndparams)))
11852

    
11853
    if self.op.alloc_policy:
11854
      self.group.alloc_policy = self.op.alloc_policy
11855

    
11856
    self.cfg.Update(self.group, feedback_fn)
11857
    return result
11858

    
11859

    
11860

    
11861
class LUGroupRemove(LogicalUnit):
11862
  HPATH = "group-remove"
11863
  HTYPE = constants.HTYPE_GROUP
11864
  REQ_BGL = False
11865

    
11866
  def ExpandNames(self):
11867
    # This will raises errors.OpPrereqError on its own:
11868
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11869
    self.needed_locks = {
11870
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11871
      }
11872

    
11873
  def CheckPrereq(self):
11874
    """Check prerequisites.
11875

11876
    This checks that the given group name exists as a node group, that is
11877
    empty (i.e., contains no nodes), and that is not the last group of the
11878
    cluster.
11879

11880
    """
11881
    # Verify that the group is empty.
11882
    group_nodes = [node.name
11883
                   for node in self.cfg.GetAllNodesInfo().values()
11884
                   if node.group == self.group_uuid]
11885

    
11886
    if group_nodes:
11887
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11888
                                 " nodes: %s" %
11889
                                 (self.op.group_name,
11890
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11891
                                 errors.ECODE_STATE)
11892

    
11893
    # Verify the cluster would not be left group-less.
11894
    if len(self.cfg.GetNodeGroupList()) == 1:
11895
      raise errors.OpPrereqError("Group '%s' is the only group,"
11896
                                 " cannot be removed" %
11897
                                 self.op.group_name,
11898
                                 errors.ECODE_STATE)
11899

    
11900
  def BuildHooksEnv(self):
11901
    """Build hooks env.
11902

11903
    """
11904
    return {
11905
      "GROUP_NAME": self.op.group_name,
11906
      }
11907

    
11908
  def BuildHooksNodes(self):
11909
    """Build hooks nodes.
11910

11911
    """
11912
    mn = self.cfg.GetMasterNode()
11913
    return ([mn], [mn])
11914

    
11915
  def Exec(self, feedback_fn):
11916
    """Remove the node group.
11917

11918
    """
11919
    try:
11920
      self.cfg.RemoveNodeGroup(self.group_uuid)
11921
    except errors.ConfigurationError:
11922
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11923
                               (self.op.group_name, self.group_uuid))
11924

    
11925
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11926

    
11927

    
11928
class LUGroupRename(LogicalUnit):
11929
  HPATH = "group-rename"
11930
  HTYPE = constants.HTYPE_GROUP
11931
  REQ_BGL = False
11932

    
11933
  def ExpandNames(self):
11934
    # This raises errors.OpPrereqError on its own:
11935
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11936

    
11937
    self.needed_locks = {
11938
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11939
      }
11940

    
11941
  def CheckPrereq(self):
11942
    """Check prerequisites.
11943

11944
    Ensures requested new name is not yet used.
11945

11946
    """
11947
    try:
11948
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11949
    except errors.OpPrereqError:
11950
      pass
11951
    else:
11952
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11953
                                 " node group (UUID: %s)" %
11954
                                 (self.op.new_name, new_name_uuid),
11955
                                 errors.ECODE_EXISTS)
11956

    
11957
  def BuildHooksEnv(self):
11958
    """Build hooks env.
11959

11960
    """
11961
    return {
11962
      "OLD_NAME": self.op.group_name,
11963
      "NEW_NAME": self.op.new_name,
11964
      }
11965

    
11966
  def BuildHooksNodes(self):
11967
    """Build hooks nodes.
11968

11969
    """
11970
    mn = self.cfg.GetMasterNode()
11971

    
11972
    all_nodes = self.cfg.GetAllNodesInfo()
11973
    all_nodes.pop(mn, None)
11974

    
11975
    run_nodes = [mn]
11976
    run_nodes.extend(node.name for node in all_nodes.values()
11977
                     if node.group == self.group_uuid)
11978

    
11979
    return (run_nodes, run_nodes)
11980

    
11981
  def Exec(self, feedback_fn):
11982
    """Rename the node group.
11983

11984
    """
11985
    group = self.cfg.GetNodeGroup(self.group_uuid)
11986

    
11987
    if group is None:
11988
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11989
                               (self.op.group_name, self.group_uuid))
11990

    
11991
    group.name = self.op.new_name
11992
    self.cfg.Update(group, feedback_fn)
11993

    
11994
    return self.op.new_name
11995

    
11996

    
11997
class LUGroupEvacuate(LogicalUnit):
11998
  HPATH = "group-evacuate"
11999
  HTYPE = constants.HTYPE_GROUP
12000
  REQ_BGL = False
12001

    
12002
  def ExpandNames(self):
12003
    # This raises errors.OpPrereqError on its own:
12004
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12005

    
12006
    if self.op.target_groups:
12007
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12008
                                  self.op.target_groups)
12009
    else:
12010
      self.req_target_uuids = []
12011

    
12012
    if self.group_uuid in self.req_target_uuids:
12013
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12014
                                 " as a target group (targets are %s)" %
12015
                                 (self.group_uuid,
12016
                                  utils.CommaJoin(self.req_target_uuids)),
12017
                                 errors.ECODE_INVAL)
12018

    
12019
    if not self.op.iallocator:
12020
      # Use default iallocator
12021
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
12022

    
12023
    if not self.op.iallocator:
12024
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
12025
                                 " opcode nor as a cluster-wide default",
12026
                                 errors.ECODE_INVAL)
12027

    
12028
    self.share_locks = _ShareAll()
12029
    self.needed_locks = {
12030
      locking.LEVEL_INSTANCE: [],
12031
      locking.LEVEL_NODEGROUP: [],
12032
      locking.LEVEL_NODE: [],
12033
      }
12034

    
12035
  def DeclareLocks(self, level):
12036
    if level == locking.LEVEL_INSTANCE:
12037
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12038

    
12039
      # Lock instances optimistically, needs verification once node and group
12040
      # locks have been acquired
12041
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12042
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12043

    
12044
    elif level == locking.LEVEL_NODEGROUP:
12045
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12046

    
12047
      if self.req_target_uuids:
12048
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12049

    
12050
        # Lock all groups used by instances optimistically; this requires going
12051
        # via the node before it's locked, requiring verification later on
12052
        lock_groups.update(group_uuid
12053
                           for instance_name in
12054
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12055
                           for group_uuid in
12056
                             self.cfg.GetInstanceNodeGroups(instance_name))
12057
      else:
12058
        # No target groups, need to lock all of them
12059
        lock_groups = locking.ALL_SET
12060

    
12061
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12062

    
12063
    elif level == locking.LEVEL_NODE:
12064
      # This will only lock the nodes in the group to be evacuated which
12065
      # contain actual instances
12066
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12067
      self._LockInstancesNodes()
12068

    
12069
      # Lock all nodes in group to be evacuated
12070
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12071
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12072
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12073

    
12074
  def CheckPrereq(self):
12075
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12076
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12077
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12078

    
12079
    assert owned_groups.issuperset(self.req_target_uuids)
12080
    assert self.group_uuid in owned_groups
12081

    
12082
    # Check if locked instances are still correct
12083
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12084
    if owned_instances != wanted_instances:
12085
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12086
                                 " changed since locks were acquired, wanted"
12087
                                 " %s, have %s; retry the operation" %
12088
                                 (self.group_uuid,
12089
                                  utils.CommaJoin(wanted_instances),
12090
                                  utils.CommaJoin(owned_instances)),
12091
                                 errors.ECODE_STATE)
12092

    
12093
    # Get instance information
12094
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
12095
                          for name in owned_instances)
12096

    
12097
    # Check if node groups for locked instances are still correct
12098
    for instance_name in owned_instances:
12099
      inst = self.instances[instance_name]
12100
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12101
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12102
      assert owned_nodes.issuperset(inst.all_nodes), \
12103
        "Instance %s's nodes changed while we kept the lock" % instance_name
12104

    
12105
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12106
      if not owned_groups.issuperset(inst_groups):
12107
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12108
                                   " locks were acquired, current groups"
12109
                                   " are '%s', owning groups '%s'; retry the"
12110
                                   " operation" %
12111
                                   (instance_name,
12112
                                    utils.CommaJoin(inst_groups),
12113
                                    utils.CommaJoin(owned_groups)),
12114
                                   errors.ECODE_STATE)
12115

    
12116
    if self.req_target_uuids:
12117
      # User requested specific target groups
12118
      self.target_uuids = self.req_target_uuids
12119
    else:
12120
      # All groups except the one to be evacuated are potential targets
12121
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12122
                           if group_uuid != self.group_uuid]
12123

    
12124
      if not self.target_uuids:
12125
        raise errors.OpExecError("There are no possible target groups")
12126

    
12127
  def BuildHooksEnv(self):
12128
    """Build hooks env.
12129

12130
    """
12131
    return {
12132
      "GROUP_NAME": self.op.group_name,
12133
      "TARGET_GROUPS": " ".join(self.target_uuids),
12134
      }
12135

    
12136
  def BuildHooksNodes(self):
12137
    """Build hooks nodes.
12138

12139
    """
12140
    mn = self.cfg.GetMasterNode()
12141

    
12142
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12143

    
12144
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12145

    
12146
    return (run_nodes, run_nodes)
12147

    
12148
  def Exec(self, feedback_fn):
12149
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12150

    
12151
    assert self.group_uuid not in self.target_uuids
12152

    
12153
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12154
                     instances=instances, target_groups=self.target_uuids)
12155

    
12156
    ial.Run(self.op.iallocator)
12157

    
12158
    if not ial.success:
12159
      raise errors.OpPrereqError("Can't compute group evacuation using"
12160
                                 " iallocator '%s': %s" %
12161
                                 (self.op.iallocator, ial.info),
12162
                                 errors.ECODE_NORES)
12163

    
12164
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12165

    
12166
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12167
                 len(jobs), self.op.group_name)
12168

    
12169
    return ResultWithJobs(jobs)
12170

    
12171

    
12172
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12173
  """Generic tags LU.
12174

12175
  This is an abstract class which is the parent of all the other tags LUs.
12176

12177
  """
12178
  def ExpandNames(self):
12179
    self.group_uuid = None
12180
    self.needed_locks = {}
12181
    if self.op.kind == constants.TAG_NODE:
12182
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12183
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12184
    elif self.op.kind == constants.TAG_INSTANCE:
12185
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12186
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12187
    elif self.op.kind == constants.TAG_NODEGROUP:
12188
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12189

    
12190
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12191
    # not possible to acquire the BGL based on opcode parameters)
12192

    
12193
  def CheckPrereq(self):
12194
    """Check prerequisites.
12195

12196
    """
12197
    if self.op.kind == constants.TAG_CLUSTER:
12198
      self.target = self.cfg.GetClusterInfo()
12199
    elif self.op.kind == constants.TAG_NODE:
12200
      self.target = self.cfg.GetNodeInfo(self.op.name)
12201
    elif self.op.kind == constants.TAG_INSTANCE:
12202
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12203
    elif self.op.kind == constants.TAG_NODEGROUP:
12204
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12205
    else:
12206
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12207
                                 str(self.op.kind), errors.ECODE_INVAL)
12208

    
12209

    
12210
class LUTagsGet(TagsLU):
12211
  """Returns the tags of a given object.
12212

12213
  """
12214
  REQ_BGL = False
12215

    
12216
  def ExpandNames(self):
12217
    TagsLU.ExpandNames(self)
12218

    
12219
    # Share locks as this is only a read operation
12220
    self.share_locks = _ShareAll()
12221

    
12222
  def Exec(self, feedback_fn):
12223
    """Returns the tag list.
12224

12225
    """
12226
    return list(self.target.GetTags())
12227

    
12228

    
12229
class LUTagsSearch(NoHooksLU):
12230
  """Searches the tags for a given pattern.
12231

12232
  """
12233
  REQ_BGL = False
12234

    
12235
  def ExpandNames(self):
12236
    self.needed_locks = {}
12237

    
12238
  def CheckPrereq(self):
12239
    """Check prerequisites.
12240

12241
    This checks the pattern passed for validity by compiling it.
12242

12243
    """
12244
    try:
12245
      self.re = re.compile(self.op.pattern)
12246
    except re.error, err:
12247
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12248
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12249

    
12250
  def Exec(self, feedback_fn):
12251
    """Returns the tag list.
12252

12253
    """
12254
    cfg = self.cfg
12255
    tgts = [("/cluster", cfg.GetClusterInfo())]
12256
    ilist = cfg.GetAllInstancesInfo().values()
12257
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12258
    nlist = cfg.GetAllNodesInfo().values()
12259
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12260
    tgts.extend(("/nodegroup/%s" % n.name, n)
12261
                for n in cfg.GetAllNodeGroupsInfo().values())
12262
    results = []
12263
    for path, target in tgts:
12264
      for tag in target.GetTags():
12265
        if self.re.search(tag):
12266
          results.append((path, tag))
12267
    return results
12268

    
12269

    
12270
class LUTagsSet(TagsLU):
12271
  """Sets a tag on a given object.
12272

12273
  """
12274
  REQ_BGL = False
12275

    
12276
  def CheckPrereq(self):
12277
    """Check prerequisites.
12278

12279
    This checks the type and length of the tag name and value.
12280

12281
    """
12282
    TagsLU.CheckPrereq(self)
12283
    for tag in self.op.tags:
12284
      objects.TaggableObject.ValidateTag(tag)
12285

    
12286
  def Exec(self, feedback_fn):
12287
    """Sets the tag.
12288

12289
    """
12290
    try:
12291
      for tag in self.op.tags:
12292
        self.target.AddTag(tag)
12293
    except errors.TagError, err:
12294
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12295
    self.cfg.Update(self.target, feedback_fn)
12296

    
12297

    
12298
class LUTagsDel(TagsLU):
12299
  """Delete a list of tags from a given object.
12300

12301
  """
12302
  REQ_BGL = False
12303

    
12304
  def CheckPrereq(self):
12305
    """Check prerequisites.
12306

12307
    This checks that we have the given tag.
12308

12309
    """
12310
    TagsLU.CheckPrereq(self)
12311
    for tag in self.op.tags:
12312
      objects.TaggableObject.ValidateTag(tag)
12313
    del_tags = frozenset(self.op.tags)
12314
    cur_tags = self.target.GetTags()
12315

    
12316
    diff_tags = del_tags - cur_tags
12317
    if diff_tags:
12318
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12319
      raise errors.OpPrereqError("Tag(s) %s not found" %
12320
                                 (utils.CommaJoin(diff_names), ),
12321
                                 errors.ECODE_NOENT)
12322

    
12323
  def Exec(self, feedback_fn):
12324
    """Remove the tag from the object.
12325

12326
    """
12327
    for tag in self.op.tags:
12328
      self.target.RemoveTag(tag)
12329
    self.cfg.Update(self.target, feedback_fn)
12330

    
12331

    
12332
class LUTestDelay(NoHooksLU):
12333
  """Sleep for a specified amount of time.
12334

12335
  This LU sleeps on the master and/or nodes for a specified amount of
12336
  time.
12337

12338
  """
12339
  REQ_BGL = False
12340

    
12341
  def ExpandNames(self):
12342
    """Expand names and set required locks.
12343

12344
    This expands the node list, if any.
12345

12346
    """
12347
    self.needed_locks = {}
12348
    if self.op.on_nodes:
12349
      # _GetWantedNodes can be used here, but is not always appropriate to use
12350
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12351
      # more information.
12352
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12353
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12354

    
12355
  def _TestDelay(self):
12356
    """Do the actual sleep.
12357

12358
    """
12359
    if self.op.on_master:
12360
      if not utils.TestDelay(self.op.duration):
12361
        raise errors.OpExecError("Error during master delay test")
12362
    if self.op.on_nodes:
12363
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12364
      for node, node_result in result.items():
12365
        node_result.Raise("Failure during rpc call to node %s" % node)
12366

    
12367
  def Exec(self, feedback_fn):
12368
    """Execute the test delay opcode, with the wanted repetitions.
12369

12370
    """
12371
    if self.op.repeat == 0:
12372
      self._TestDelay()
12373
    else:
12374
      top_value = self.op.repeat - 1
12375
      for i in range(self.op.repeat):
12376
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12377
        self._TestDelay()
12378

    
12379

    
12380
class LUTestJqueue(NoHooksLU):
12381
  """Utility LU to test some aspects of the job queue.
12382

12383
  """
12384
  REQ_BGL = False
12385

    
12386
  # Must be lower than default timeout for WaitForJobChange to see whether it
12387
  # notices changed jobs
12388
  _CLIENT_CONNECT_TIMEOUT = 20.0
12389
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12390

    
12391
  @classmethod
12392
  def _NotifyUsingSocket(cls, cb, errcls):
12393
    """Opens a Unix socket and waits for another program to connect.
12394

12395
    @type cb: callable
12396
    @param cb: Callback to send socket name to client
12397
    @type errcls: class
12398
    @param errcls: Exception class to use for errors
12399

12400
    """
12401
    # Using a temporary directory as there's no easy way to create temporary
12402
    # sockets without writing a custom loop around tempfile.mktemp and
12403
    # socket.bind
12404
    tmpdir = tempfile.mkdtemp()
12405
    try:
12406
      tmpsock = utils.PathJoin(tmpdir, "sock")
12407

    
12408
      logging.debug("Creating temporary socket at %s", tmpsock)
12409
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12410
      try:
12411
        sock.bind(tmpsock)
12412
        sock.listen(1)
12413

    
12414
        # Send details to client
12415
        cb(tmpsock)
12416

    
12417
        # Wait for client to connect before continuing
12418
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12419
        try:
12420
          (conn, _) = sock.accept()
12421
        except socket.error, err:
12422
          raise errcls("Client didn't connect in time (%s)" % err)
12423
      finally:
12424
        sock.close()
12425
    finally:
12426
      # Remove as soon as client is connected
12427
      shutil.rmtree(tmpdir)
12428

    
12429
    # Wait for client to close
12430
    try:
12431
      try:
12432
        # pylint: disable-msg=E1101
12433
        # Instance of '_socketobject' has no ... member
12434
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12435
        conn.recv(1)
12436
      except socket.error, err:
12437
        raise errcls("Client failed to confirm notification (%s)" % err)
12438
    finally:
12439
      conn.close()
12440

    
12441
  def _SendNotification(self, test, arg, sockname):
12442
    """Sends a notification to the client.
12443

12444
    @type test: string
12445
    @param test: Test name
12446
    @param arg: Test argument (depends on test)
12447
    @type sockname: string
12448
    @param sockname: Socket path
12449

12450
    """
12451
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12452

    
12453
  def _Notify(self, prereq, test, arg):
12454
    """Notifies the client of a test.
12455

12456
    @type prereq: bool
12457
    @param prereq: Whether this is a prereq-phase test
12458
    @type test: string
12459
    @param test: Test name
12460
    @param arg: Test argument (depends on test)
12461

12462
    """
12463
    if prereq:
12464
      errcls = errors.OpPrereqError
12465
    else:
12466
      errcls = errors.OpExecError
12467

    
12468
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12469
                                                  test, arg),
12470
                                   errcls)
12471

    
12472
  def CheckArguments(self):
12473
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12474
    self.expandnames_calls = 0
12475

    
12476
  def ExpandNames(self):
12477
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12478
    if checkargs_calls < 1:
12479
      raise errors.ProgrammerError("CheckArguments was not called")
12480

    
12481
    self.expandnames_calls += 1
12482

    
12483
    if self.op.notify_waitlock:
12484
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12485

    
12486
    self.LogInfo("Expanding names")
12487

    
12488
    # Get lock on master node (just to get a lock, not for a particular reason)
12489
    self.needed_locks = {
12490
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12491
      }
12492

    
12493
  def Exec(self, feedback_fn):
12494
    if self.expandnames_calls < 1:
12495
      raise errors.ProgrammerError("ExpandNames was not called")
12496

    
12497
    if self.op.notify_exec:
12498
      self._Notify(False, constants.JQT_EXEC, None)
12499

    
12500
    self.LogInfo("Executing")
12501

    
12502
    if self.op.log_messages:
12503
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12504
      for idx, msg in enumerate(self.op.log_messages):
12505
        self.LogInfo("Sending log message %s", idx + 1)
12506
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12507
        # Report how many test messages have been sent
12508
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12509

    
12510
    if self.op.fail:
12511
      raise errors.OpExecError("Opcode failure was requested")
12512

    
12513
    return True
12514

    
12515

    
12516
class IAllocator(object):
12517
  """IAllocator framework.
12518

12519
  An IAllocator instance has three sets of attributes:
12520
    - cfg that is needed to query the cluster
12521
    - input data (all members of the _KEYS class attribute are required)
12522
    - four buffer attributes (in|out_data|text), that represent the
12523
      input (to the external script) in text and data structure format,
12524
      and the output from it, again in two formats
12525
    - the result variables from the script (success, info, nodes) for
12526
      easy usage
12527

12528
  """
12529
  # pylint: disable-msg=R0902
12530
  # lots of instance attributes
12531

    
12532
  def __init__(self, cfg, rpc, mode, **kwargs):
12533
    self.cfg = cfg
12534
    self.rpc = rpc
12535
    # init buffer variables
12536
    self.in_text = self.out_text = self.in_data = self.out_data = None
12537
    # init all input fields so that pylint is happy
12538
    self.mode = mode
12539
    self.memory = self.disks = self.disk_template = None
12540
    self.os = self.tags = self.nics = self.vcpus = None
12541
    self.hypervisor = None
12542
    self.relocate_from = None
12543
    self.name = None
12544
    self.evac_nodes = None
12545
    self.instances = None
12546
    self.evac_mode = None
12547
    self.target_groups = []
12548
    # computed fields
12549
    self.required_nodes = None
12550
    # init result fields
12551
    self.success = self.info = self.result = None
12552

    
12553
    try:
12554
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12555
    except KeyError:
12556
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12557
                                   " IAllocator" % self.mode)
12558

    
12559
    keyset = [n for (n, _) in keydata]
12560

    
12561
    for key in kwargs:
12562
      if key not in keyset:
12563
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12564
                                     " IAllocator" % key)
12565
      setattr(self, key, kwargs[key])
12566

    
12567
    for key in keyset:
12568
      if key not in kwargs:
12569
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12570
                                     " IAllocator" % key)
12571
    self._BuildInputData(compat.partial(fn, self), keydata)
12572

    
12573
  def _ComputeClusterData(self):
12574
    """Compute the generic allocator input data.
12575

12576
    This is the data that is independent of the actual operation.
12577

12578
    """
12579
    cfg = self.cfg
12580
    cluster_info = cfg.GetClusterInfo()
12581
    # cluster data
12582
    data = {
12583
      "version": constants.IALLOCATOR_VERSION,
12584
      "cluster_name": cfg.GetClusterName(),
12585
      "cluster_tags": list(cluster_info.GetTags()),
12586
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12587
      # we don't have job IDs
12588
      }
12589
    ninfo = cfg.GetAllNodesInfo()
12590
    iinfo = cfg.GetAllInstancesInfo().values()
12591
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12592

    
12593
    # node data
12594
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12595

    
12596
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12597
      hypervisor_name = self.hypervisor
12598
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12599
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12600
    else:
12601
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12602

    
12603
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12604
                                        hypervisor_name)
12605
    node_iinfo = \
12606
      self.rpc.call_all_instances_info(node_list,
12607
                                       cluster_info.enabled_hypervisors)
12608

    
12609
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12610

    
12611
    config_ndata = self._ComputeBasicNodeData(ninfo)
12612
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12613
                                                 i_list, config_ndata)
12614
    assert len(data["nodes"]) == len(ninfo), \
12615
        "Incomplete node data computed"
12616

    
12617
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12618

    
12619
    self.in_data = data
12620

    
12621
  @staticmethod
12622
  def _ComputeNodeGroupData(cfg):
12623
    """Compute node groups data.
12624

12625
    """
12626
    ng = dict((guuid, {
12627
      "name": gdata.name,
12628
      "alloc_policy": gdata.alloc_policy,
12629
      })
12630
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12631

    
12632
    return ng
12633

    
12634
  @staticmethod
12635
  def _ComputeBasicNodeData(node_cfg):
12636
    """Compute global node data.
12637

12638
    @rtype: dict
12639
    @returns: a dict of name: (node dict, node config)
12640

12641
    """
12642
    # fill in static (config-based) values
12643
    node_results = dict((ninfo.name, {
12644
      "tags": list(ninfo.GetTags()),
12645
      "primary_ip": ninfo.primary_ip,
12646
      "secondary_ip": ninfo.secondary_ip,
12647
      "offline": ninfo.offline,
12648
      "drained": ninfo.drained,
12649
      "master_candidate": ninfo.master_candidate,
12650
      "group": ninfo.group,
12651
      "master_capable": ninfo.master_capable,
12652
      "vm_capable": ninfo.vm_capable,
12653
      })
12654
      for ninfo in node_cfg.values())
12655

    
12656
    return node_results
12657

    
12658
  @staticmethod
12659
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12660
                              node_results):
12661
    """Compute global node data.
12662

12663
    @param node_results: the basic node structures as filled from the config
12664

12665
    """
12666
    # make a copy of the current dict
12667
    node_results = dict(node_results)
12668
    for nname, nresult in node_data.items():
12669
      assert nname in node_results, "Missing basic data for node %s" % nname
12670
      ninfo = node_cfg[nname]
12671

    
12672
      if not (ninfo.offline or ninfo.drained):
12673
        nresult.Raise("Can't get data for node %s" % nname)
12674
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12675
                                nname)
12676
        remote_info = nresult.payload
12677

    
12678
        for attr in ["memory_total", "memory_free", "memory_dom0",
12679
                     "vg_size", "vg_free", "cpu_total"]:
12680
          if attr not in remote_info:
12681
            raise errors.OpExecError("Node '%s' didn't return attribute"
12682
                                     " '%s'" % (nname, attr))
12683
          if not isinstance(remote_info[attr], int):
12684
            raise errors.OpExecError("Node '%s' returned invalid value"
12685
                                     " for '%s': %s" %
12686
                                     (nname, attr, remote_info[attr]))
12687
        # compute memory used by primary instances
12688
        i_p_mem = i_p_up_mem = 0
12689
        for iinfo, beinfo in i_list:
12690
          if iinfo.primary_node == nname:
12691
            i_p_mem += beinfo[constants.BE_MEMORY]
12692
            if iinfo.name not in node_iinfo[nname].payload:
12693
              i_used_mem = 0
12694
            else:
12695
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12696
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12697
            remote_info["memory_free"] -= max(0, i_mem_diff)
12698

    
12699
            if iinfo.admin_up:
12700
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12701

    
12702
        # compute memory used by instances
12703
        pnr_dyn = {
12704
          "total_memory": remote_info["memory_total"],
12705
          "reserved_memory": remote_info["memory_dom0"],
12706
          "free_memory": remote_info["memory_free"],
12707
          "total_disk": remote_info["vg_size"],
12708
          "free_disk": remote_info["vg_free"],
12709
          "total_cpus": remote_info["cpu_total"],
12710
          "i_pri_memory": i_p_mem,
12711
          "i_pri_up_memory": i_p_up_mem,
12712
          }
12713
        pnr_dyn.update(node_results[nname])
12714
        node_results[nname] = pnr_dyn
12715

    
12716
    return node_results
12717

    
12718
  @staticmethod
12719
  def _ComputeInstanceData(cluster_info, i_list):
12720
    """Compute global instance data.
12721

12722
    """
12723
    instance_data = {}
12724
    for iinfo, beinfo in i_list:
12725
      nic_data = []
12726
      for nic in iinfo.nics:
12727
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12728
        nic_dict = {
12729
          "mac": nic.mac,
12730
          "ip": nic.ip,
12731
          "mode": filled_params[constants.NIC_MODE],
12732
          "link": filled_params[constants.NIC_LINK],
12733
          }
12734
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12735
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12736
        nic_data.append(nic_dict)
12737
      pir = {
12738
        "tags": list(iinfo.GetTags()),
12739
        "admin_up": iinfo.admin_up,
12740
        "vcpus": beinfo[constants.BE_VCPUS],
12741
        "memory": beinfo[constants.BE_MEMORY],
12742
        "os": iinfo.os,
12743
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12744
        "nics": nic_data,
12745
        "disks": [{constants.IDISK_SIZE: dsk.size,
12746
                   constants.IDISK_MODE: dsk.mode}
12747
                  for dsk in iinfo.disks],
12748
        "disk_template": iinfo.disk_template,
12749
        "hypervisor": iinfo.hypervisor,
12750
        }
12751
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12752
                                                 pir["disks"])
12753
      instance_data[iinfo.name] = pir
12754

    
12755
    return instance_data
12756

    
12757
  def _AddNewInstance(self):
12758
    """Add new instance data to allocator structure.
12759

12760
    This in combination with _AllocatorGetClusterData will create the
12761
    correct structure needed as input for the allocator.
12762

12763
    The checks for the completeness of the opcode must have already been
12764
    done.
12765

12766
    """
12767
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12768

    
12769
    if self.disk_template in constants.DTS_INT_MIRROR:
12770
      self.required_nodes = 2
12771
    else:
12772
      self.required_nodes = 1
12773

    
12774
    request = {
12775
      "name": self.name,
12776
      "disk_template": self.disk_template,
12777
      "tags": self.tags,
12778
      "os": self.os,
12779
      "vcpus": self.vcpus,
12780
      "memory": self.memory,
12781
      "disks": self.disks,
12782
      "disk_space_total": disk_space,
12783
      "nics": self.nics,
12784
      "required_nodes": self.required_nodes,
12785
      "hypervisor": self.hypervisor,
12786
      }
12787

    
12788
    return request
12789

    
12790
  def _AddRelocateInstance(self):
12791
    """Add relocate instance data to allocator structure.
12792

12793
    This in combination with _IAllocatorGetClusterData will create the
12794
    correct structure needed as input for the allocator.
12795

12796
    The checks for the completeness of the opcode must have already been
12797
    done.
12798

12799
    """
12800
    instance = self.cfg.GetInstanceInfo(self.name)
12801
    if instance is None:
12802
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12803
                                   " IAllocator" % self.name)
12804

    
12805
    if instance.disk_template not in constants.DTS_MIRRORED:
12806
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12807
                                 errors.ECODE_INVAL)
12808

    
12809
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12810
        len(instance.secondary_nodes) != 1:
12811
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12812
                                 errors.ECODE_STATE)
12813

    
12814
    self.required_nodes = 1
12815
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12816
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12817

    
12818
    request = {
12819
      "name": self.name,
12820
      "disk_space_total": disk_space,
12821
      "required_nodes": self.required_nodes,
12822
      "relocate_from": self.relocate_from,
12823
      }
12824
    return request
12825

    
12826
  def _AddEvacuateNodes(self):
12827
    """Add evacuate nodes data to allocator structure.
12828

12829
    """
12830
    request = {
12831
      "evac_nodes": self.evac_nodes
12832
      }
12833
    return request
12834

    
12835
  def _AddNodeEvacuate(self):
12836
    """Get data for node-evacuate requests.
12837

12838
    """
12839
    return {
12840
      "instances": self.instances,
12841
      "evac_mode": self.evac_mode,
12842
      }
12843

    
12844
  def _AddChangeGroup(self):
12845
    """Get data for node-evacuate requests.
12846

12847
    """
12848
    return {
12849
      "instances": self.instances,
12850
      "target_groups": self.target_groups,
12851
      }
12852

    
12853
  def _BuildInputData(self, fn, keydata):
12854
    """Build input data structures.
12855

12856
    """
12857
    self._ComputeClusterData()
12858

    
12859
    request = fn()
12860
    request["type"] = self.mode
12861
    for keyname, keytype in keydata:
12862
      if keyname not in request:
12863
        raise errors.ProgrammerError("Request parameter %s is missing" %
12864
                                     keyname)
12865
      val = request[keyname]
12866
      if not keytype(val):
12867
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12868
                                     " validation, value %s, expected"
12869
                                     " type %s" % (keyname, val, keytype))
12870
    self.in_data["request"] = request
12871

    
12872
    self.in_text = serializer.Dump(self.in_data)
12873

    
12874
  _STRING_LIST = ht.TListOf(ht.TString)
12875
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12876
     # pylint: disable-msg=E1101
12877
     # Class '...' has no 'OP_ID' member
12878
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12879
                          opcodes.OpInstanceMigrate.OP_ID,
12880
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12881
     })))
12882

    
12883
  _NEVAC_MOVED = \
12884
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12885
                       ht.TItems([ht.TNonEmptyString,
12886
                                  ht.TNonEmptyString,
12887
                                  ht.TListOf(ht.TNonEmptyString),
12888
                                 ])))
12889
  _NEVAC_FAILED = \
12890
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12891
                       ht.TItems([ht.TNonEmptyString,
12892
                                  ht.TMaybeString,
12893
                                 ])))
12894
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12895
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12896

    
12897
  _MODE_DATA = {
12898
    constants.IALLOCATOR_MODE_ALLOC:
12899
      (_AddNewInstance,
12900
       [
12901
        ("name", ht.TString),
12902
        ("memory", ht.TInt),
12903
        ("disks", ht.TListOf(ht.TDict)),
12904
        ("disk_template", ht.TString),
12905
        ("os", ht.TString),
12906
        ("tags", _STRING_LIST),
12907
        ("nics", ht.TListOf(ht.TDict)),
12908
        ("vcpus", ht.TInt),
12909
        ("hypervisor", ht.TString),
12910
        ], ht.TList),
12911
    constants.IALLOCATOR_MODE_RELOC:
12912
      (_AddRelocateInstance,
12913
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12914
       ht.TList),
12915
    constants.IALLOCATOR_MODE_MEVAC:
12916
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12917
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12918
     constants.IALLOCATOR_MODE_NODE_EVAC:
12919
      (_AddNodeEvacuate, [
12920
        ("instances", _STRING_LIST),
12921
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12922
        ], _NEVAC_RESULT),
12923
     constants.IALLOCATOR_MODE_CHG_GROUP:
12924
      (_AddChangeGroup, [
12925
        ("instances", _STRING_LIST),
12926
        ("target_groups", _STRING_LIST),
12927
        ], _NEVAC_RESULT),
12928
    }
12929

    
12930
  def Run(self, name, validate=True, call_fn=None):
12931
    """Run an instance allocator and return the results.
12932

12933
    """
12934
    if call_fn is None:
12935
      call_fn = self.rpc.call_iallocator_runner
12936

    
12937
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12938
    result.Raise("Failure while running the iallocator script")
12939

    
12940
    self.out_text = result.payload
12941
    if validate:
12942
      self._ValidateResult()
12943

    
12944
  def _ValidateResult(self):
12945
    """Process the allocator results.
12946

12947
    This will process and if successful save the result in
12948
    self.out_data and the other parameters.
12949

12950
    """
12951
    try:
12952
      rdict = serializer.Load(self.out_text)
12953
    except Exception, err:
12954
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12955

    
12956
    if not isinstance(rdict, dict):
12957
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12958

    
12959
    # TODO: remove backwards compatiblity in later versions
12960
    if "nodes" in rdict and "result" not in rdict:
12961
      rdict["result"] = rdict["nodes"]
12962
      del rdict["nodes"]
12963

    
12964
    for key in "success", "info", "result":
12965
      if key not in rdict:
12966
        raise errors.OpExecError("Can't parse iallocator results:"
12967
                                 " missing key '%s'" % key)
12968
      setattr(self, key, rdict[key])
12969

    
12970
    if not self._result_check(self.result):
12971
      raise errors.OpExecError("Iallocator returned invalid result,"
12972
                               " expected %s, got %s" %
12973
                               (self._result_check, self.result),
12974
                               errors.ECODE_INVAL)
12975

    
12976
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12977
                     constants.IALLOCATOR_MODE_MEVAC):
12978
      node2group = dict((name, ndata["group"])
12979
                        for (name, ndata) in self.in_data["nodes"].items())
12980

    
12981
      fn = compat.partial(self._NodesToGroups, node2group,
12982
                          self.in_data["nodegroups"])
12983

    
12984
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12985
        assert self.relocate_from is not None
12986
        assert self.required_nodes == 1
12987

    
12988
        request_groups = fn(self.relocate_from)
12989
        result_groups = fn(rdict["result"])
12990

    
12991
        if result_groups != request_groups:
12992
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12993
                                   " differ from original groups (%s)" %
12994
                                   (utils.CommaJoin(result_groups),
12995
                                    utils.CommaJoin(request_groups)))
12996
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12997
        request_groups = fn(self.evac_nodes)
12998
        for (instance_name, secnode) in self.result:
12999
          result_groups = fn([secnode])
13000
          if result_groups != request_groups:
13001
            raise errors.OpExecError("Iallocator returned new secondary node"
13002
                                     " '%s' (group '%s') for instance '%s'"
13003
                                     " which is not in original group '%s'" %
13004
                                     (secnode, utils.CommaJoin(result_groups),
13005
                                      instance_name,
13006
                                      utils.CommaJoin(request_groups)))
13007
      else:
13008
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13009

    
13010
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13011
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13012

    
13013
    self.out_data = rdict
13014

    
13015
  @staticmethod
13016
  def _NodesToGroups(node2group, groups, nodes):
13017
    """Returns a list of unique group names for a list of nodes.
13018

13019
    @type node2group: dict
13020
    @param node2group: Map from node name to group UUID
13021
    @type groups: dict
13022
    @param groups: Group information
13023
    @type nodes: list
13024
    @param nodes: Node names
13025

13026
    """
13027
    result = set()
13028

    
13029
    for node in nodes:
13030
      try:
13031
        group_uuid = node2group[node]
13032
      except KeyError:
13033
        # Ignore unknown node
13034
        pass
13035
      else:
13036
        try:
13037
          group = groups[group_uuid]
13038
        except KeyError:
13039
          # Can't find group, let's use UUID
13040
          group_name = group_uuid
13041
        else:
13042
          group_name = group["name"]
13043

    
13044
        result.add(group_name)
13045

    
13046
    return sorted(result)
13047

    
13048

    
13049
class LUTestAllocator(NoHooksLU):
13050
  """Run allocator tests.
13051

13052
  This LU runs the allocator tests
13053

13054
  """
13055
  def CheckPrereq(self):
13056
    """Check prerequisites.
13057

13058
    This checks the opcode parameters depending on the director and mode test.
13059

13060
    """
13061
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13062
      for attr in ["memory", "disks", "disk_template",
13063
                   "os", "tags", "nics", "vcpus"]:
13064
        if not hasattr(self.op, attr):
13065
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13066
                                     attr, errors.ECODE_INVAL)
13067
      iname = self.cfg.ExpandInstanceName(self.op.name)
13068
      if iname is not None:
13069
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13070
                                   iname, errors.ECODE_EXISTS)
13071
      if not isinstance(self.op.nics, list):
13072
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13073
                                   errors.ECODE_INVAL)
13074
      if not isinstance(self.op.disks, list):
13075
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13076
                                   errors.ECODE_INVAL)
13077
      for row in self.op.disks:
13078
        if (not isinstance(row, dict) or
13079
            constants.IDISK_SIZE not in row or
13080
            not isinstance(row[constants.IDISK_SIZE], int) or
13081
            constants.IDISK_MODE not in row or
13082
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13083
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13084
                                     " parameter", errors.ECODE_INVAL)
13085
      if self.op.hypervisor is None:
13086
        self.op.hypervisor = self.cfg.GetHypervisorType()
13087
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13088
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13089
      self.op.name = fname
13090
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
13091
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13092
      if not hasattr(self.op, "evac_nodes"):
13093
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13094
                                   " opcode input", errors.ECODE_INVAL)
13095
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13096
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13097
      if not self.op.instances:
13098
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13099
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13100
    else:
13101
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13102
                                 self.op.mode, errors.ECODE_INVAL)
13103

    
13104
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13105
      if self.op.allocator is None:
13106
        raise errors.OpPrereqError("Missing allocator name",
13107
                                   errors.ECODE_INVAL)
13108
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13109
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13110
                                 self.op.direction, errors.ECODE_INVAL)
13111

    
13112
  def Exec(self, feedback_fn):
13113
    """Run the allocator test.
13114

13115
    """
13116
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13117
      ial = IAllocator(self.cfg, self.rpc,
13118
                       mode=self.op.mode,
13119
                       name=self.op.name,
13120
                       memory=self.op.memory,
13121
                       disks=self.op.disks,
13122
                       disk_template=self.op.disk_template,
13123
                       os=self.op.os,
13124
                       tags=self.op.tags,
13125
                       nics=self.op.nics,
13126
                       vcpus=self.op.vcpus,
13127
                       hypervisor=self.op.hypervisor,
13128
                       )
13129
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13130
      ial = IAllocator(self.cfg, self.rpc,
13131
                       mode=self.op.mode,
13132
                       name=self.op.name,
13133
                       relocate_from=list(self.relocate_from),
13134
                       )
13135
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13136
      ial = IAllocator(self.cfg, self.rpc,
13137
                       mode=self.op.mode,
13138
                       evac_nodes=self.op.evac_nodes)
13139
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13140
      ial = IAllocator(self.cfg, self.rpc,
13141
                       mode=self.op.mode,
13142
                       instances=self.op.instances,
13143
                       target_groups=self.op.target_groups)
13144
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13145
      ial = IAllocator(self.cfg, self.rpc,
13146
                       mode=self.op.mode,
13147
                       instances=self.op.instances,
13148
                       evac_mode=self.op.evac_mode)
13149
    else:
13150
      raise errors.ProgrammerError("Uncatched mode %s in"
13151
                                   " LUTestAllocator.Exec", self.op.mode)
13152

    
13153
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13154
      result = ial.in_text
13155
    else:
13156
      ial.Run(self.op.allocator, validate=False)
13157
      result = ial.out_text
13158
    return result
13159

    
13160

    
13161
#: Query type implementations
13162
_QUERY_IMPL = {
13163
  constants.QR_INSTANCE: _InstanceQuery,
13164
  constants.QR_NODE: _NodeQuery,
13165
  constants.QR_GROUP: _GroupQuery,
13166
  constants.QR_OS: _OsQuery,
13167
  }
13168

    
13169
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13170

    
13171

    
13172
def _GetQueryImplementation(name):
13173
  """Returns the implemtnation for a query type.
13174

13175
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13176

13177
  """
13178
  try:
13179
    return _QUERY_IMPL[name]
13180
  except KeyError:
13181
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13182
                               errors.ECODE_INVAL)