Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 9f039737

History | View | Annotate | Download (464.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    self.context = context
123
    self.rpc = rpc
124
    # Dicts used to declare locking needs to mcpu
125
    self.needed_locks = None
126
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
127
    self.add_locks = {}
128
    self.remove_locks = {}
129
    # Used to force good behavior when calling helper functions
130
    self.recalculate_locks = {}
131
    # logging
132
    self.Log = processor.Log # pylint: disable-msg=C0103
133
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136
    # support for dry-run
137
    self.dry_run_result = None
138
    # support for generic debug attribute
139
    if (not hasattr(self.op, "debug_level") or
140
        not isinstance(self.op.debug_level, int)):
141
      self.op.debug_level = 0
142

    
143
    # Tasklets
144
    self.tasklets = None
145

    
146
    # Validate opcode parameters and set defaults
147
    self.op.Validate(True)
148

    
149
    self.CheckArguments()
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    @rtype: dict
277
    @return: Dictionary containing the environment that will be used for
278
      running the hooks for this LU. The keys of the dict must not be prefixed
279
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280
      will extend the environment with additional variables. If no environment
281
      should be defined, an empty dictionary should be returned (not C{None}).
282
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
283
      will not be called.
284

285
    """
286
    raise NotImplementedError
287

    
288
  def BuildHooksNodes(self):
289
    """Build list of nodes to run LU's hooks.
290

291
    @rtype: tuple; (list, list)
292
    @return: Tuple containing a list of node names on which the hook
293
      should run before the execution and a list of node names on which the
294
      hook should run after the execution. No nodes should be returned as an
295
      empty list (and not None).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303
    """Notify the LU about the results of its hooks.
304

305
    This method is called every time a hooks phase is executed, and notifies
306
    the Logical Unit about the hooks' result. The LU can then use it to alter
307
    its result based on the hooks.  By default the method does nothing and the
308
    previous result is passed back unchanged but any LU can define it if it
309
    wants to use the local cluster hook-scripts somehow.
310

311
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
312
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313
    @param hook_results: the results of the multi-node hooks rpc call
314
    @param feedback_fn: function used send feedback back to the caller
315
    @param lu_result: the previous Exec result this LU had, or None
316
        in the PRE phase
317
    @return: the new Exec result, based on the previous result
318
        and hook results
319

320
    """
321
    # API must be kept, thus we ignore the unused argument and could
322
    # be a function warnings
323
    # pylint: disable-msg=W0613,R0201
324
    return lu_result
325

    
326
  def _ExpandAndLockInstance(self):
327
    """Helper function to expand and lock an instance.
328

329
    Many LUs that work on an instance take its name in self.op.instance_name
330
    and need to expand it and then declare the expanded name for locking. This
331
    function does it, and then updates self.op.instance_name to the expanded
332
    name. It also initializes needed_locks as a dict, if this hasn't been done
333
    before.
334

335
    """
336
    if self.needed_locks is None:
337
      self.needed_locks = {}
338
    else:
339
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340
        "_ExpandAndLockInstance called with instance-level locks set"
341
    self.op.instance_name = _ExpandInstanceName(self.cfg,
342
                                                self.op.instance_name)
343
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
344

    
345
  def _LockInstancesNodes(self, primary_only=False):
346
    """Helper function to declare instances' nodes for locking.
347

348
    This function should be called after locking one or more instances to lock
349
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350
    with all primary or secondary nodes for instances already locked and
351
    present in self.needed_locks[locking.LEVEL_INSTANCE].
352

353
    It should be called from DeclareLocks, and for safety only works if
354
    self.recalculate_locks[locking.LEVEL_NODE] is set.
355

356
    In the future it may grow parameters to just lock some instance's nodes, or
357
    to just lock primaries or secondary nodes, if needed.
358

359
    If should be called in DeclareLocks in a way similar to::
360

361
      if level == locking.LEVEL_NODE:
362
        self._LockInstancesNodes()
363

364
    @type primary_only: boolean
365
    @param primary_only: only lock primary nodes of locked instances
366

367
    """
368
    assert locking.LEVEL_NODE in self.recalculate_locks, \
369
      "_LockInstancesNodes helper function called with no nodes to recalculate"
370

    
371
    # TODO: check if we're really been called with the instance locks held
372

    
373
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374
    # future we might want to have different behaviors depending on the value
375
    # of self.recalculate_locks[locking.LEVEL_NODE]
376
    wanted_nodes = []
377
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
378
      instance = self.context.cfg.GetInstanceInfo(instance_name)
379
      wanted_nodes.append(instance.primary_node)
380
      if not primary_only:
381
        wanted_nodes.extend(instance.secondary_nodes)
382

    
383
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
387

    
388
    del self.recalculate_locks[locking.LEVEL_NODE]
389

    
390

    
391
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392
  """Simple LU which runs no hooks.
393

394
  This LU is intended as a parent for other LogicalUnits which will
395
  run no hooks, in order to reduce duplicate code.
396

397
  """
398
  HPATH = None
399
  HTYPE = None
400

    
401
  def BuildHooksEnv(self):
402
    """Empty BuildHooksEnv for NoHooksLu.
403

404
    This just raises an error.
405

406
    """
407
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
408

    
409
  def BuildHooksNodes(self):
410
    """Empty BuildHooksNodes for NoHooksLU.
411

412
    """
413
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
414

    
415

    
416
class Tasklet:
417
  """Tasklet base class.
418

419
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
421
  tasklets know nothing about locks.
422

423
  Subclasses must follow these rules:
424
    - Implement CheckPrereq
425
    - Implement Exec
426

427
  """
428
  def __init__(self, lu):
429
    self.lu = lu
430

    
431
    # Shortcuts
432
    self.cfg = lu.cfg
433
    self.rpc = lu.rpc
434

    
435
  def CheckPrereq(self):
436
    """Check prerequisites for this tasklets.
437

438
    This method should check whether the prerequisites for the execution of
439
    this tasklet are fulfilled. It can do internode communication, but it
440
    should be idempotent - no cluster or system changes are allowed.
441

442
    The method should raise errors.OpPrereqError in case something is not
443
    fulfilled. Its return value is ignored.
444

445
    This method should also update all parameters to their canonical form if it
446
    hasn't been done before.
447

448
    """
449
    pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the tasklet.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in code, or
456
    expected.
457

458
    """
459
    raise NotImplementedError
460

    
461

    
462
class _QueryBase:
463
  """Base for query utility classes.
464

465
  """
466
  #: Attribute holding field definitions
467
  FIELDS = None
468

    
469
  def __init__(self, filter_, fields, use_locking):
470
    """Initializes this class.
471

472
    """
473
    self.use_locking = use_locking
474

    
475
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
476
                             namefield="name")
477
    self.requested_data = self.query.RequestedData()
478
    self.names = self.query.RequestedNames()
479

    
480
    # Sort only if no names were requested
481
    self.sort_by_name = not self.names
482

    
483
    self.do_locking = None
484
    self.wanted = None
485

    
486
  def _GetNames(self, lu, all_names, lock_level):
487
    """Helper function to determine names asked for in the query.
488

489
    """
490
    if self.do_locking:
491
      names = lu.glm.list_owned(lock_level)
492
    else:
493
      names = all_names
494

    
495
    if self.wanted == locking.ALL_SET:
496
      assert not self.names
497
      # caller didn't specify names, so ordering is not important
498
      return utils.NiceSort(names)
499

    
500
    # caller specified names and we must keep the same order
501
    assert self.names
502
    assert not self.do_locking or lu.glm.is_owned(lock_level)
503

    
504
    missing = set(self.wanted).difference(names)
505
    if missing:
506
      raise errors.OpExecError("Some items were removed before retrieving"
507
                               " their data: %s" % missing)
508

    
509
    # Return expanded names
510
    return self.wanted
511

    
512
  def ExpandNames(self, lu):
513
    """Expand names for this query.
514

515
    See L{LogicalUnit.ExpandNames}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def DeclareLocks(self, lu, level):
521
    """Declare locks for this query.
522

523
    See L{LogicalUnit.DeclareLocks}.
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def _GetQueryData(self, lu):
529
    """Collects all data for this query.
530

531
    @return: Query data object
532

533
    """
534
    raise NotImplementedError()
535

    
536
  def NewStyleQuery(self, lu):
537
    """Collect data and execute query.
538

539
    """
540
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541
                                  sort_by_name=self.sort_by_name)
542

    
543
  def OldStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return self.query.OldStyleQuery(self._GetQueryData(lu),
548
                                    sort_by_name=self.sort_by_name)
549

    
550

    
551
def _ShareAll():
552
  """Returns a dict declaring all lock levels shared.
553

554
  """
555
  return dict.fromkeys(locking.LEVELS, 1)
556

    
557

    
558
def _SupportsOob(cfg, node):
559
  """Tells if node supports OOB.
560

561
  @type cfg: L{config.ConfigWriter}
562
  @param cfg: The cluster configuration
563
  @type node: L{objects.Node}
564
  @param node: The node
565
  @return: The OOB script if supported or an empty string otherwise
566

567
  """
568
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
569

    
570

    
571
def _GetWantedNodes(lu, nodes):
572
  """Returns list of checked and expanded node names.
573

574
  @type lu: L{LogicalUnit}
575
  @param lu: the logical unit on whose behalf we execute
576
  @type nodes: list
577
  @param nodes: list of node names or None for all nodes
578
  @rtype: list
579
  @return: the list of nodes, sorted
580
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
581

582
  """
583
  if nodes:
584
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
585

    
586
  return utils.NiceSort(lu.cfg.GetNodeList())
587

    
588

    
589
def _GetWantedInstances(lu, instances):
590
  """Returns list of checked and expanded instance names.
591

592
  @type lu: L{LogicalUnit}
593
  @param lu: the logical unit on whose behalf we execute
594
  @type instances: list
595
  @param instances: list of instance names or None for all instances
596
  @rtype: list
597
  @return: the list of instances, sorted
598
  @raise errors.OpPrereqError: if the instances parameter is wrong type
599
  @raise errors.OpPrereqError: if any of the passed instances is not found
600

601
  """
602
  if instances:
603
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
604
  else:
605
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
606
  return wanted
607

    
608

    
609
def _GetUpdatedParams(old_params, update_dict,
610
                      use_default=True, use_none=False):
611
  """Return the new version of a parameter dictionary.
612

613
  @type old_params: dict
614
  @param old_params: old parameters
615
  @type update_dict: dict
616
  @param update_dict: dict containing new parameter values, or
617
      constants.VALUE_DEFAULT to reset the parameter to its default
618
      value
619
  @param use_default: boolean
620
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621
      values as 'to be deleted' values
622
  @param use_none: boolean
623
  @type use_none: whether to recognise C{None} values as 'to be
624
      deleted' values
625
  @rtype: dict
626
  @return: the new parameter dictionary
627

628
  """
629
  params_copy = copy.deepcopy(old_params)
630
  for key, val in update_dict.iteritems():
631
    if ((use_default and val == constants.VALUE_DEFAULT) or
632
        (use_none and val is None)):
633
      try:
634
        del params_copy[key]
635
      except KeyError:
636
        pass
637
    else:
638
      params_copy[key] = val
639
  return params_copy
640

    
641

    
642
def _ReleaseLocks(lu, level, names=None, keep=None):
643
  """Releases locks owned by an LU.
644

645
  @type lu: L{LogicalUnit}
646
  @param level: Lock level
647
  @type names: list or None
648
  @param names: Names of locks to release
649
  @type keep: list or None
650
  @param keep: Names of locks to retain
651

652
  """
653
  assert not (keep is not None and names is not None), \
654
         "Only one of the 'names' and the 'keep' parameters can be given"
655

    
656
  if names is not None:
657
    should_release = names.__contains__
658
  elif keep:
659
    should_release = lambda name: name not in keep
660
  else:
661
    should_release = None
662

    
663
  if should_release:
664
    retain = []
665
    release = []
666

    
667
    # Determine which locks to release
668
    for name in lu.glm.list_owned(level):
669
      if should_release(name):
670
        release.append(name)
671
      else:
672
        retain.append(name)
673

    
674
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
675

    
676
    # Release just some locks
677
    lu.glm.release(level, names=release)
678

    
679
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
680
  else:
681
    # Release everything
682
    lu.glm.release(level)
683

    
684
    assert not lu.glm.is_owned(level), "No locks should be owned"
685

    
686

    
687
def _MapInstanceDisksToNodes(instances):
688
  """Creates a map from (node, volume) to instance name.
689

690
  @type instances: list of L{objects.Instance}
691
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
692

693
  """
694
  return dict(((node, vol), inst.name)
695
              for inst in instances
696
              for (node, vols) in inst.MapLVsByNode().items()
697
              for vol in vols)
698

    
699

    
700
def _RunPostHook(lu, node_name):
701
  """Runs the post-hook for an opcode on a single node.
702

703
  """
704
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
705
  try:
706
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
707
  except:
708
    # pylint: disable-msg=W0702
709
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
710

    
711

    
712
def _CheckOutputFields(static, dynamic, selected):
713
  """Checks whether all selected fields are valid.
714

715
  @type static: L{utils.FieldSet}
716
  @param static: static fields set
717
  @type dynamic: L{utils.FieldSet}
718
  @param dynamic: dynamic fields set
719

720
  """
721
  f = utils.FieldSet()
722
  f.Extend(static)
723
  f.Extend(dynamic)
724

    
725
  delta = f.NonMatching(selected)
726
  if delta:
727
    raise errors.OpPrereqError("Unknown output fields selected: %s"
728
                               % ",".join(delta), errors.ECODE_INVAL)
729

    
730

    
731
def _CheckGlobalHvParams(params):
732
  """Validates that given hypervisor params are not global ones.
733

734
  This will ensure that instances don't get customised versions of
735
  global params.
736

737
  """
738
  used_globals = constants.HVC_GLOBALS.intersection(params)
739
  if used_globals:
740
    msg = ("The following hypervisor parameters are global and cannot"
741
           " be customized at instance level, please modify them at"
742
           " cluster level: %s" % utils.CommaJoin(used_globals))
743
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
744

    
745

    
746
def _CheckNodeOnline(lu, node, msg=None):
747
  """Ensure that a given node is online.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param msg: if passed, should be a message to replace the default one
752
  @raise errors.OpPrereqError: if the node is offline
753

754
  """
755
  if msg is None:
756
    msg = "Can't use offline node"
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
759

    
760

    
761
def _CheckNodeNotDrained(lu, node):
762
  """Ensure that a given node is not drained.
763

764
  @param lu: the LU on behalf of which we make the check
765
  @param node: the node to check
766
  @raise errors.OpPrereqError: if the node is drained
767

768
  """
769
  if lu.cfg.GetNodeInfo(node).drained:
770
    raise errors.OpPrereqError("Can't use drained node %s" % node,
771
                               errors.ECODE_STATE)
772

    
773

    
774
def _CheckNodeVmCapable(lu, node):
775
  """Ensure that a given node is vm capable.
776

777
  @param lu: the LU on behalf of which we make the check
778
  @param node: the node to check
779
  @raise errors.OpPrereqError: if the node is not vm capable
780

781
  """
782
  if not lu.cfg.GetNodeInfo(node).vm_capable:
783
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
784
                               errors.ECODE_STATE)
785

    
786

    
787
def _CheckNodeHasOS(lu, node, os_name, force_variant):
788
  """Ensure that a node supports a given OS.
789

790
  @param lu: the LU on behalf of which we make the check
791
  @param node: the node to check
792
  @param os_name: the OS to query about
793
  @param force_variant: whether to ignore variant errors
794
  @raise errors.OpPrereqError: if the node is not supporting the OS
795

796
  """
797
  result = lu.rpc.call_os_get(node, os_name)
798
  result.Raise("OS '%s' not in supported OS list for node %s" %
799
               (os_name, node),
800
               prereq=True, ecode=errors.ECODE_INVAL)
801
  if not force_variant:
802
    _CheckOSVariant(result.payload, os_name)
803

    
804

    
805
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806
  """Ensure that a node has the given secondary ip.
807

808
  @type lu: L{LogicalUnit}
809
  @param lu: the LU on behalf of which we make the check
810
  @type node: string
811
  @param node: the node to check
812
  @type secondary_ip: string
813
  @param secondary_ip: the ip to check
814
  @type prereq: boolean
815
  @param prereq: whether to throw a prerequisite or an execute error
816
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
818

819
  """
820
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821
  result.Raise("Failure checking secondary ip on node %s" % node,
822
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
823
  if not result.payload:
824
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825
           " please fix and re-run this command" % secondary_ip)
826
    if prereq:
827
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
828
    else:
829
      raise errors.OpExecError(msg)
830

    
831

    
832
def _GetClusterDomainSecret():
833
  """Reads the cluster domain secret.
834

835
  """
836
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
837
                               strict=True)
838

    
839

    
840
def _CheckInstanceDown(lu, instance, reason):
841
  """Ensure that an instance is not running."""
842
  if instance.admin_up:
843
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844
                               (instance.name, reason), errors.ECODE_STATE)
845

    
846
  pnode = instance.primary_node
847
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
849
              prereq=True, ecode=errors.ECODE_ENVIRON)
850

    
851
  if instance.name in ins_l.payload:
852
    raise errors.OpPrereqError("Instance %s is running, %s" %
853
                               (instance.name, reason), errors.ECODE_STATE)
854

    
855

    
856
def _ExpandItemName(fn, name, kind):
857
  """Expand an item name.
858

859
  @param fn: the function to use for expansion
860
  @param name: requested item name
861
  @param kind: text description ('Node' or 'Instance')
862
  @return: the resolved (full) name
863
  @raise errors.OpPrereqError: if the item is not found
864

865
  """
866
  full_name = fn(name)
867
  if full_name is None:
868
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
869
                               errors.ECODE_NOENT)
870
  return full_name
871

    
872

    
873
def _ExpandNodeName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for nodes."""
875
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
876

    
877

    
878
def _ExpandInstanceName(cfg, name):
879
  """Wrapper over L{_ExpandItemName} for instance."""
880
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
881

    
882

    
883
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884
                          memory, vcpus, nics, disk_template, disks,
885
                          bep, hvp, hypervisor_name, tags):
886
  """Builds instance related env variables for hooks
887

888
  This builds the hook environment from individual variables.
889

890
  @type name: string
891
  @param name: the name of the instance
892
  @type primary_node: string
893
  @param primary_node: the name of the instance's primary node
894
  @type secondary_nodes: list
895
  @param secondary_nodes: list of secondary nodes as strings
896
  @type os_type: string
897
  @param os_type: the name of the instance's OS
898
  @type status: boolean
899
  @param status: the should_run status of the instance
900
  @type memory: string
901
  @param memory: the memory size of the instance
902
  @type vcpus: string
903
  @param vcpus: the count of VCPUs the instance has
904
  @type nics: list
905
  @param nics: list of tuples (ip, mac, mode, link) representing
906
      the NICs the instance has
907
  @type disk_template: string
908
  @param disk_template: the disk template of the instance
909
  @type disks: list
910
  @param disks: the list of (size, mode) pairs
911
  @type bep: dict
912
  @param bep: the backend parameters for the instance
913
  @type hvp: dict
914
  @param hvp: the hypervisor parameters for the instance
915
  @type hypervisor_name: string
916
  @param hypervisor_name: the hypervisor for the instance
917
  @type tags: list
918
  @param tags: list of instance tags as strings
919
  @rtype: dict
920
  @return: the hook environment for this instance
921

922
  """
923
  if status:
924
    str_status = "up"
925
  else:
926
    str_status = "down"
927
  env = {
928
    "OP_TARGET": name,
929
    "INSTANCE_NAME": name,
930
    "INSTANCE_PRIMARY": primary_node,
931
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932
    "INSTANCE_OS_TYPE": os_type,
933
    "INSTANCE_STATUS": str_status,
934
    "INSTANCE_MEMORY": memory,
935
    "INSTANCE_VCPUS": vcpus,
936
    "INSTANCE_DISK_TEMPLATE": disk_template,
937
    "INSTANCE_HYPERVISOR": hypervisor_name,
938
  }
939

    
940
  if nics:
941
    nic_count = len(nics)
942
    for idx, (ip, mac, mode, link) in enumerate(nics):
943
      if ip is None:
944
        ip = ""
945
      env["INSTANCE_NIC%d_IP" % idx] = ip
946
      env["INSTANCE_NIC%d_MAC" % idx] = mac
947
      env["INSTANCE_NIC%d_MODE" % idx] = mode
948
      env["INSTANCE_NIC%d_LINK" % idx] = link
949
      if mode == constants.NIC_MODE_BRIDGED:
950
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
951
  else:
952
    nic_count = 0
953

    
954
  env["INSTANCE_NIC_COUNT"] = nic_count
955

    
956
  if disks:
957
    disk_count = len(disks)
958
    for idx, (size, mode) in enumerate(disks):
959
      env["INSTANCE_DISK%d_SIZE" % idx] = size
960
      env["INSTANCE_DISK%d_MODE" % idx] = mode
961
  else:
962
    disk_count = 0
963

    
964
  env["INSTANCE_DISK_COUNT"] = disk_count
965

    
966
  if not tags:
967
    tags = []
968

    
969
  env["INSTANCE_TAGS"] = " ".join(tags)
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUInstanceQueryData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    "name": instance.name,
1022
    "primary_node": instance.primary_node,
1023
    "secondary_nodes": instance.secondary_nodes,
1024
    "os_type": instance.os,
1025
    "status": instance.admin_up,
1026
    "memory": bep[constants.BE_MEMORY],
1027
    "vcpus": bep[constants.BE_VCPUS],
1028
    "nics": _NICListToTuple(lu, instance.nics),
1029
    "disk_template": instance.disk_template,
1030
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031
    "bep": bep,
1032
    "hvp": hvp,
1033
    "hypervisor_name": instance.hypervisor,
1034
    "tags": instance.tags,
1035
  }
1036
  if override:
1037
    args.update(override)
1038
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039

    
1040

    
1041
def _AdjustCandidatePool(lu, exceptions):
1042
  """Adjust the candidate pool after node operations.
1043

1044
  """
1045
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046
  if mod_list:
1047
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1048
               utils.CommaJoin(node.name for node in mod_list))
1049
    for name in mod_list:
1050
      lu.context.ReaddNode(name)
1051
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  if mc_now > mc_max:
1053
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054
               (mc_now, mc_max))
1055

    
1056

    
1057
def _DecideSelfPromotion(lu, exceptions=None):
1058
  """Decide whether I should promote myself as a master candidate.
1059

1060
  """
1061
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063
  # the new node will increase mc_max with one, so:
1064
  mc_should = min(mc_should + 1, cp_size)
1065
  return mc_now < mc_should
1066

    
1067

    
1068
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069
  """Check that the brigdes needed by a list of nics exist.
1070

1071
  """
1072
  cluster = lu.cfg.GetClusterInfo()
1073
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074
  brlist = [params[constants.NIC_LINK] for params in paramslist
1075
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076
  if brlist:
1077
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1078
    result.Raise("Error checking bridges on destination node '%s'" %
1079
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080

    
1081

    
1082
def _CheckInstanceBridgesExist(lu, instance, node=None):
1083
  """Check that the brigdes needed by an instance exist.
1084

1085
  """
1086
  if node is None:
1087
    node = instance.primary_node
1088
  _CheckNicsBridgesExist(lu, instance.nics, node)
1089

    
1090

    
1091
def _CheckOSVariant(os_obj, name):
1092
  """Check whether an OS name conforms to the os variants specification.
1093

1094
  @type os_obj: L{objects.OS}
1095
  @param os_obj: OS object to check
1096
  @type name: string
1097
  @param name: OS name passed by the user, to check for validity
1098

1099
  """
1100
  if not os_obj.supported_variants:
1101
    return
1102
  variant = objects.OS.GetVariant(name)
1103
  if not variant:
1104
    raise errors.OpPrereqError("OS name must include a variant",
1105
                               errors.ECODE_INVAL)
1106

    
1107
  if variant not in os_obj.supported_variants:
1108
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1109

    
1110

    
1111
def _GetNodeInstancesInner(cfg, fn):
1112
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1113

    
1114

    
1115
def _GetNodeInstances(cfg, node_name):
1116
  """Returns a list of all primary and secondary instances on a node.
1117

1118
  """
1119

    
1120
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1121

    
1122

    
1123
def _GetNodePrimaryInstances(cfg, node_name):
1124
  """Returns primary instances on a node.
1125

1126
  """
1127
  return _GetNodeInstancesInner(cfg,
1128
                                lambda inst: node_name == inst.primary_node)
1129

    
1130

    
1131
def _GetNodeSecondaryInstances(cfg, node_name):
1132
  """Returns secondary instances on a node.
1133

1134
  """
1135
  return _GetNodeInstancesInner(cfg,
1136
                                lambda inst: node_name in inst.secondary_nodes)
1137

    
1138

    
1139
def _GetStorageTypeArgs(cfg, storage_type):
1140
  """Returns the arguments for a storage type.
1141

1142
  """
1143
  # Special case for file storage
1144
  if storage_type == constants.ST_FILE:
1145
    # storage.FileStorage wants a list of storage directories
1146
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1147

    
1148
  return []
1149

    
1150

    
1151
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1152
  faulty = []
1153

    
1154
  for dev in instance.disks:
1155
    cfg.SetDiskID(dev, node_name)
1156

    
1157
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1158
  result.Raise("Failed to get disk status from node %s" % node_name,
1159
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1160

    
1161
  for idx, bdev_status in enumerate(result.payload):
1162
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1163
      faulty.append(idx)
1164

    
1165
  return faulty
1166

    
1167

    
1168
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1169
  """Check the sanity of iallocator and node arguments and use the
1170
  cluster-wide iallocator if appropriate.
1171

1172
  Check that at most one of (iallocator, node) is specified. If none is
1173
  specified, then the LU's opcode's iallocator slot is filled with the
1174
  cluster-wide default iallocator.
1175

1176
  @type iallocator_slot: string
1177
  @param iallocator_slot: the name of the opcode iallocator slot
1178
  @type node_slot: string
1179
  @param node_slot: the name of the opcode target node slot
1180

1181
  """
1182
  node = getattr(lu.op, node_slot, None)
1183
  iallocator = getattr(lu.op, iallocator_slot, None)
1184

    
1185
  if node is not None and iallocator is not None:
1186
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1187
                               errors.ECODE_INVAL)
1188
  elif node is None and iallocator is None:
1189
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1190
    if default_iallocator:
1191
      setattr(lu.op, iallocator_slot, default_iallocator)
1192
    else:
1193
      raise errors.OpPrereqError("No iallocator or node given and no"
1194
                                 " cluster-wide default iallocator found;"
1195
                                 " please specify either an iallocator or a"
1196
                                 " node, or set a cluster-wide default"
1197
                                 " iallocator")
1198

    
1199

    
1200
class LUClusterPostInit(LogicalUnit):
1201
  """Logical unit for running hooks after cluster initialization.
1202

1203
  """
1204
  HPATH = "cluster-init"
1205
  HTYPE = constants.HTYPE_CLUSTER
1206

    
1207
  def BuildHooksEnv(self):
1208
    """Build hooks env.
1209

1210
    """
1211
    return {
1212
      "OP_TARGET": self.cfg.GetClusterName(),
1213
      }
1214

    
1215
  def BuildHooksNodes(self):
1216
    """Build hooks nodes.
1217

1218
    """
1219
    return ([], [self.cfg.GetMasterNode()])
1220

    
1221
  def Exec(self, feedback_fn):
1222
    """Nothing to do.
1223

1224
    """
1225
    return True
1226

    
1227

    
1228
class LUClusterDestroy(LogicalUnit):
1229
  """Logical unit for destroying the cluster.
1230

1231
  """
1232
  HPATH = "cluster-destroy"
1233
  HTYPE = constants.HTYPE_CLUSTER
1234

    
1235
  def BuildHooksEnv(self):
1236
    """Build hooks env.
1237

1238
    """
1239
    return {
1240
      "OP_TARGET": self.cfg.GetClusterName(),
1241
      }
1242

    
1243
  def BuildHooksNodes(self):
1244
    """Build hooks nodes.
1245

1246
    """
1247
    return ([], [])
1248

    
1249
  def CheckPrereq(self):
1250
    """Check prerequisites.
1251

1252
    This checks whether the cluster is empty.
1253

1254
    Any errors are signaled by raising errors.OpPrereqError.
1255

1256
    """
1257
    master = self.cfg.GetMasterNode()
1258

    
1259
    nodelist = self.cfg.GetNodeList()
1260
    if len(nodelist) != 1 or nodelist[0] != master:
1261
      raise errors.OpPrereqError("There are still %d node(s) in"
1262
                                 " this cluster." % (len(nodelist) - 1),
1263
                                 errors.ECODE_INVAL)
1264
    instancelist = self.cfg.GetInstanceList()
1265
    if instancelist:
1266
      raise errors.OpPrereqError("There are still %d instance(s) in"
1267
                                 " this cluster." % len(instancelist),
1268
                                 errors.ECODE_INVAL)
1269

    
1270
  def Exec(self, feedback_fn):
1271
    """Destroys the cluster.
1272

1273
    """
1274
    master = self.cfg.GetMasterNode()
1275

    
1276
    # Run post hooks on master node before it's removed
1277
    _RunPostHook(self, master)
1278

    
1279
    result = self.rpc.call_node_stop_master(master, False)
1280
    result.Raise("Could not disable the master role")
1281

    
1282
    return master
1283

    
1284

    
1285
def _VerifyCertificate(filename):
1286
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1287

1288
  @type filename: string
1289
  @param filename: Path to PEM file
1290

1291
  """
1292
  try:
1293
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1294
                                           utils.ReadFile(filename))
1295
  except Exception, err: # pylint: disable-msg=W0703
1296
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1297
            "Failed to load X509 certificate %s: %s" % (filename, err))
1298

    
1299
  (errcode, msg) = \
1300
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1301
                                constants.SSL_CERT_EXPIRATION_ERROR)
1302

    
1303
  if msg:
1304
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1305
  else:
1306
    fnamemsg = None
1307

    
1308
  if errcode is None:
1309
    return (None, fnamemsg)
1310
  elif errcode == utils.CERT_WARNING:
1311
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1312
  elif errcode == utils.CERT_ERROR:
1313
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1314

    
1315
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1316

    
1317

    
1318
def _GetAllHypervisorParameters(cluster, instances):
1319
  """Compute the set of all hypervisor parameters.
1320

1321
  @type cluster: L{objects.Cluster}
1322
  @param cluster: the cluster object
1323
  @param instances: list of L{objects.Instance}
1324
  @param instances: additional instances from which to obtain parameters
1325
  @rtype: list of (origin, hypervisor, parameters)
1326
  @return: a list with all parameters found, indicating the hypervisor they
1327
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1328

1329
  """
1330
  hvp_data = []
1331

    
1332
  for hv_name in cluster.enabled_hypervisors:
1333
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1334

    
1335
  for os_name, os_hvp in cluster.os_hvp.items():
1336
    for hv_name, hv_params in os_hvp.items():
1337
      if hv_params:
1338
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1339
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1340

    
1341
  # TODO: collapse identical parameter values in a single one
1342
  for instance in instances:
1343
    if instance.hvparams:
1344
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1345
                       cluster.FillHV(instance)))
1346

    
1347
  return hvp_data
1348

    
1349

    
1350
class _VerifyErrors(object):
1351
  """Mix-in for cluster/group verify LUs.
1352

1353
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1354
  self.op and self._feedback_fn to be available.)
1355

1356
  """
1357
  TCLUSTER = "cluster"
1358
  TNODE = "node"
1359
  TINSTANCE = "instance"
1360

    
1361
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1362
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1363
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1364
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1365
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1366
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1367
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1368
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1369
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1370
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1371
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1372
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1373
  ENODEDRBD = (TNODE, "ENODEDRBD")
1374
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1375
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1376
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1377
  ENODEHV = (TNODE, "ENODEHV")
1378
  ENODELVM = (TNODE, "ENODELVM")
1379
  ENODEN1 = (TNODE, "ENODEN1")
1380
  ENODENET = (TNODE, "ENODENET")
1381
  ENODEOS = (TNODE, "ENODEOS")
1382
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1383
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1384
  ENODERPC = (TNODE, "ENODERPC")
1385
  ENODESSH = (TNODE, "ENODESSH")
1386
  ENODEVERSION = (TNODE, "ENODEVERSION")
1387
  ENODESETUP = (TNODE, "ENODESETUP")
1388
  ENODETIME = (TNODE, "ENODETIME")
1389
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1390

    
1391
  ETYPE_FIELD = "code"
1392
  ETYPE_ERROR = "ERROR"
1393
  ETYPE_WARNING = "WARNING"
1394

    
1395
  def _Error(self, ecode, item, msg, *args, **kwargs):
1396
    """Format an error message.
1397

1398
    Based on the opcode's error_codes parameter, either format a
1399
    parseable error code, or a simpler error string.
1400

1401
    This must be called only from Exec and functions called from Exec.
1402

1403
    """
1404
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1405
    itype, etxt = ecode
1406
    # first complete the msg
1407
    if args:
1408
      msg = msg % args
1409
    # then format the whole message
1410
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1411
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1412
    else:
1413
      if item:
1414
        item = " " + item
1415
      else:
1416
        item = ""
1417
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1418
    # and finally report it via the feedback_fn
1419
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1420

    
1421
  def _ErrorIf(self, cond, *args, **kwargs):
1422
    """Log an error message if the passed condition is True.
1423

1424
    """
1425
    cond = (bool(cond)
1426
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1427
    if cond:
1428
      self._Error(*args, **kwargs)
1429
    # do not mark the operation as failed for WARN cases only
1430
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1431
      self.bad = self.bad or cond
1432

    
1433

    
1434
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1435
  """Verifies the cluster config.
1436

1437
  """
1438
  REQ_BGL = True
1439

    
1440
  def _VerifyHVP(self, hvp_data):
1441
    """Verifies locally the syntax of the hypervisor parameters.
1442

1443
    """
1444
    for item, hv_name, hv_params in hvp_data:
1445
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1446
             (item, hv_name))
1447
      try:
1448
        hv_class = hypervisor.GetHypervisor(hv_name)
1449
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1450
        hv_class.CheckParameterSyntax(hv_params)
1451
      except errors.GenericError, err:
1452
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1453

    
1454
  def ExpandNames(self):
1455
    # Information can be safely retrieved as the BGL is acquired in exclusive
1456
    # mode
1457
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1458
    self.all_node_info = self.cfg.GetAllNodesInfo()
1459
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1460
    self.needed_locks = {}
1461

    
1462
  def Exec(self, feedback_fn):
1463
    """Verify integrity of cluster, performing various test on nodes.
1464

1465
    """
1466
    self.bad = False
1467
    self._feedback_fn = feedback_fn
1468

    
1469
    feedback_fn("* Verifying cluster config")
1470

    
1471
    for msg in self.cfg.VerifyConfig():
1472
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1473

    
1474
    feedback_fn("* Verifying cluster certificate files")
1475

    
1476
    for cert_filename in constants.ALL_CERT_FILES:
1477
      (errcode, msg) = _VerifyCertificate(cert_filename)
1478
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1479

    
1480
    feedback_fn("* Verifying hypervisor parameters")
1481

    
1482
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1483
                                                self.all_inst_info.values()))
1484

    
1485
    feedback_fn("* Verifying all nodes belong to an existing group")
1486

    
1487
    # We do this verification here because, should this bogus circumstance
1488
    # occur, it would never be caught by VerifyGroup, which only acts on
1489
    # nodes/instances reachable from existing node groups.
1490

    
1491
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1492
                         if node.group not in self.all_group_info)
1493

    
1494
    dangling_instances = {}
1495
    no_node_instances = []
1496

    
1497
    for inst in self.all_inst_info.values():
1498
      if inst.primary_node in dangling_nodes:
1499
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1500
      elif inst.primary_node not in self.all_node_info:
1501
        no_node_instances.append(inst.name)
1502

    
1503
    pretty_dangling = [
1504
        "%s (%s)" %
1505
        (node.name,
1506
         utils.CommaJoin(dangling_instances.get(node.name,
1507
                                                ["no instances"])))
1508
        for node in dangling_nodes]
1509

    
1510
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1511
                  "the following nodes (and their instances) belong to a non"
1512
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1513

    
1514
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1515
                  "the following instances have a non-existing primary-node:"
1516
                  " %s", utils.CommaJoin(no_node_instances))
1517

    
1518
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1519

    
1520

    
1521
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1522
  """Verifies the status of a node group.
1523

1524
  """
1525
  HPATH = "cluster-verify"
1526
  HTYPE = constants.HTYPE_CLUSTER
1527
  REQ_BGL = False
1528

    
1529
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1530

    
1531
  class NodeImage(object):
1532
    """A class representing the logical and physical status of a node.
1533

1534
    @type name: string
1535
    @ivar name: the node name to which this object refers
1536
    @ivar volumes: a structure as returned from
1537
        L{ganeti.backend.GetVolumeList} (runtime)
1538
    @ivar instances: a list of running instances (runtime)
1539
    @ivar pinst: list of configured primary instances (config)
1540
    @ivar sinst: list of configured secondary instances (config)
1541
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1542
        instances for which this node is secondary (config)
1543
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1544
    @ivar dfree: free disk, as reported by the node (runtime)
1545
    @ivar offline: the offline status (config)
1546
    @type rpc_fail: boolean
1547
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1548
        not whether the individual keys were correct) (runtime)
1549
    @type lvm_fail: boolean
1550
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1551
    @type hyp_fail: boolean
1552
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1553
    @type ghost: boolean
1554
    @ivar ghost: whether this is a known node or not (config)
1555
    @type os_fail: boolean
1556
    @ivar os_fail: whether the RPC call didn't return valid OS data
1557
    @type oslist: list
1558
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1559
    @type vm_capable: boolean
1560
    @ivar vm_capable: whether the node can host instances
1561

1562
    """
1563
    def __init__(self, offline=False, name=None, vm_capable=True):
1564
      self.name = name
1565
      self.volumes = {}
1566
      self.instances = []
1567
      self.pinst = []
1568
      self.sinst = []
1569
      self.sbp = {}
1570
      self.mfree = 0
1571
      self.dfree = 0
1572
      self.offline = offline
1573
      self.vm_capable = vm_capable
1574
      self.rpc_fail = False
1575
      self.lvm_fail = False
1576
      self.hyp_fail = False
1577
      self.ghost = False
1578
      self.os_fail = False
1579
      self.oslist = {}
1580

    
1581
  def ExpandNames(self):
1582
    # This raises errors.OpPrereqError on its own:
1583
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1584

    
1585
    # Get instances in node group; this is unsafe and needs verification later
1586
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1587

    
1588
    self.needed_locks = {
1589
      locking.LEVEL_INSTANCE: inst_names,
1590
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1591
      locking.LEVEL_NODE: [],
1592
      }
1593

    
1594
    self.share_locks = _ShareAll()
1595

    
1596
  def DeclareLocks(self, level):
1597
    if level == locking.LEVEL_NODE:
1598
      # Get members of node group; this is unsafe and needs verification later
1599
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1600

    
1601
      all_inst_info = self.cfg.GetAllInstancesInfo()
1602

    
1603
      # In Exec(), we warn about mirrored instances that have primary and
1604
      # secondary living in separate node groups. To fully verify that
1605
      # volumes for these instances are healthy, we will need to do an
1606
      # extra call to their secondaries. We ensure here those nodes will
1607
      # be locked.
1608
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1609
        # Important: access only the instances whose lock is owned
1610
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1611
          nodes.update(all_inst_info[inst].secondary_nodes)
1612

    
1613
      self.needed_locks[locking.LEVEL_NODE] = nodes
1614

    
1615
  def CheckPrereq(self):
1616
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1617
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1618

    
1619
    unlocked_nodes = \
1620
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1621

    
1622
    unlocked_instances = \
1623
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1624

    
1625
    if unlocked_nodes:
1626
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1627
                                 utils.CommaJoin(unlocked_nodes))
1628

    
1629
    if unlocked_instances:
1630
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1631
                                 utils.CommaJoin(unlocked_instances))
1632

    
1633
    self.all_node_info = self.cfg.GetAllNodesInfo()
1634
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1635

    
1636
    self.my_node_names = utils.NiceSort(group_nodes)
1637
    self.my_inst_names = utils.NiceSort(group_instances)
1638

    
1639
    self.my_node_info = dict((name, self.all_node_info[name])
1640
                             for name in self.my_node_names)
1641

    
1642
    self.my_inst_info = dict((name, self.all_inst_info[name])
1643
                             for name in self.my_inst_names)
1644

    
1645
    # We detect here the nodes that will need the extra RPC calls for verifying
1646
    # split LV volumes; they should be locked.
1647
    extra_lv_nodes = set()
1648

    
1649
    for inst in self.my_inst_info.values():
1650
      if inst.disk_template in constants.DTS_INT_MIRROR:
1651
        group = self.my_node_info[inst.primary_node].group
1652
        for nname in inst.secondary_nodes:
1653
          if self.all_node_info[nname].group != group:
1654
            extra_lv_nodes.add(nname)
1655

    
1656
    unlocked_lv_nodes = \
1657
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1658

    
1659
    if unlocked_lv_nodes:
1660
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1661
                                 utils.CommaJoin(unlocked_lv_nodes))
1662
    self.extra_lv_nodes = list(extra_lv_nodes)
1663

    
1664
  def _VerifyNode(self, ninfo, nresult):
1665
    """Perform some basic validation on data returned from a node.
1666

1667
      - check the result data structure is well formed and has all the
1668
        mandatory fields
1669
      - check ganeti version
1670

1671
    @type ninfo: L{objects.Node}
1672
    @param ninfo: the node to check
1673
    @param nresult: the results from the node
1674
    @rtype: boolean
1675
    @return: whether overall this call was successful (and we can expect
1676
         reasonable values in the respose)
1677

1678
    """
1679
    node = ninfo.name
1680
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1681

    
1682
    # main result, nresult should be a non-empty dict
1683
    test = not nresult or not isinstance(nresult, dict)
1684
    _ErrorIf(test, self.ENODERPC, node,
1685
                  "unable to verify node: no data returned")
1686
    if test:
1687
      return False
1688

    
1689
    # compares ganeti version
1690
    local_version = constants.PROTOCOL_VERSION
1691
    remote_version = nresult.get("version", None)
1692
    test = not (remote_version and
1693
                isinstance(remote_version, (list, tuple)) and
1694
                len(remote_version) == 2)
1695
    _ErrorIf(test, self.ENODERPC, node,
1696
             "connection to node returned invalid data")
1697
    if test:
1698
      return False
1699

    
1700
    test = local_version != remote_version[0]
1701
    _ErrorIf(test, self.ENODEVERSION, node,
1702
             "incompatible protocol versions: master %s,"
1703
             " node %s", local_version, remote_version[0])
1704
    if test:
1705
      return False
1706

    
1707
    # node seems compatible, we can actually try to look into its results
1708

    
1709
    # full package version
1710
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1711
                  self.ENODEVERSION, node,
1712
                  "software version mismatch: master %s, node %s",
1713
                  constants.RELEASE_VERSION, remote_version[1],
1714
                  code=self.ETYPE_WARNING)
1715

    
1716
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1717
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1718
      for hv_name, hv_result in hyp_result.iteritems():
1719
        test = hv_result is not None
1720
        _ErrorIf(test, self.ENODEHV, node,
1721
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1722

    
1723
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1724
    if ninfo.vm_capable and isinstance(hvp_result, list):
1725
      for item, hv_name, hv_result in hvp_result:
1726
        _ErrorIf(True, self.ENODEHV, node,
1727
                 "hypervisor %s parameter verify failure (source %s): %s",
1728
                 hv_name, item, hv_result)
1729

    
1730
    test = nresult.get(constants.NV_NODESETUP,
1731
                       ["Missing NODESETUP results"])
1732
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1733
             "; ".join(test))
1734

    
1735
    return True
1736

    
1737
  def _VerifyNodeTime(self, ninfo, nresult,
1738
                      nvinfo_starttime, nvinfo_endtime):
1739
    """Check the node time.
1740

1741
    @type ninfo: L{objects.Node}
1742
    @param ninfo: the node to check
1743
    @param nresult: the remote results for the node
1744
    @param nvinfo_starttime: the start time of the RPC call
1745
    @param nvinfo_endtime: the end time of the RPC call
1746

1747
    """
1748
    node = ninfo.name
1749
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1750

    
1751
    ntime = nresult.get(constants.NV_TIME, None)
1752
    try:
1753
      ntime_merged = utils.MergeTime(ntime)
1754
    except (ValueError, TypeError):
1755
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1756
      return
1757

    
1758
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1759
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1760
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1761
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1762
    else:
1763
      ntime_diff = None
1764

    
1765
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1766
             "Node time diverges by at least %s from master node time",
1767
             ntime_diff)
1768

    
1769
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1770
    """Check the node LVM results.
1771

1772
    @type ninfo: L{objects.Node}
1773
    @param ninfo: the node to check
1774
    @param nresult: the remote results for the node
1775
    @param vg_name: the configured VG name
1776

1777
    """
1778
    if vg_name is None:
1779
      return
1780

    
1781
    node = ninfo.name
1782
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1783

    
1784
    # checks vg existence and size > 20G
1785
    vglist = nresult.get(constants.NV_VGLIST, None)
1786
    test = not vglist
1787
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1788
    if not test:
1789
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1790
                                            constants.MIN_VG_SIZE)
1791
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1792

    
1793
    # check pv names
1794
    pvlist = nresult.get(constants.NV_PVLIST, None)
1795
    test = pvlist is None
1796
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1797
    if not test:
1798
      # check that ':' is not present in PV names, since it's a
1799
      # special character for lvcreate (denotes the range of PEs to
1800
      # use on the PV)
1801
      for _, pvname, owner_vg in pvlist:
1802
        test = ":" in pvname
1803
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1804
                 " '%s' of VG '%s'", pvname, owner_vg)
1805

    
1806
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1807
    """Check the node bridges.
1808

1809
    @type ninfo: L{objects.Node}
1810
    @param ninfo: the node to check
1811
    @param nresult: the remote results for the node
1812
    @param bridges: the expected list of bridges
1813

1814
    """
1815
    if not bridges:
1816
      return
1817

    
1818
    node = ninfo.name
1819
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1820

    
1821
    missing = nresult.get(constants.NV_BRIDGES, None)
1822
    test = not isinstance(missing, list)
1823
    _ErrorIf(test, self.ENODENET, node,
1824
             "did not return valid bridge information")
1825
    if not test:
1826
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1827
               utils.CommaJoin(sorted(missing)))
1828

    
1829
  def _VerifyNodeNetwork(self, ninfo, nresult):
1830
    """Check the node network connectivity results.
1831

1832
    @type ninfo: L{objects.Node}
1833
    @param ninfo: the node to check
1834
    @param nresult: the remote results for the node
1835

1836
    """
1837
    node = ninfo.name
1838
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1839

    
1840
    test = constants.NV_NODELIST not in nresult
1841
    _ErrorIf(test, self.ENODESSH, node,
1842
             "node hasn't returned node ssh connectivity data")
1843
    if not test:
1844
      if nresult[constants.NV_NODELIST]:
1845
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1846
          _ErrorIf(True, self.ENODESSH, node,
1847
                   "ssh communication with node '%s': %s", a_node, a_msg)
1848

    
1849
    test = constants.NV_NODENETTEST not in nresult
1850
    _ErrorIf(test, self.ENODENET, node,
1851
             "node hasn't returned node tcp connectivity data")
1852
    if not test:
1853
      if nresult[constants.NV_NODENETTEST]:
1854
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1855
        for anode in nlist:
1856
          _ErrorIf(True, self.ENODENET, node,
1857
                   "tcp communication with node '%s': %s",
1858
                   anode, nresult[constants.NV_NODENETTEST][anode])
1859

    
1860
    test = constants.NV_MASTERIP not in nresult
1861
    _ErrorIf(test, self.ENODENET, node,
1862
             "node hasn't returned node master IP reachability data")
1863
    if not test:
1864
      if not nresult[constants.NV_MASTERIP]:
1865
        if node == self.master_node:
1866
          msg = "the master node cannot reach the master IP (not configured?)"
1867
        else:
1868
          msg = "cannot reach the master IP"
1869
        _ErrorIf(True, self.ENODENET, node, msg)
1870

    
1871
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1872
                      diskstatus):
1873
    """Verify an instance.
1874

1875
    This function checks to see if the required block devices are
1876
    available on the instance's node.
1877

1878
    """
1879
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1880
    node_current = instanceconfig.primary_node
1881

    
1882
    node_vol_should = {}
1883
    instanceconfig.MapLVsByNode(node_vol_should)
1884

    
1885
    for node in node_vol_should:
1886
      n_img = node_image[node]
1887
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1888
        # ignore missing volumes on offline or broken nodes
1889
        continue
1890
      for volume in node_vol_should[node]:
1891
        test = volume not in n_img.volumes
1892
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1893
                 "volume %s missing on node %s", volume, node)
1894

    
1895
    if instanceconfig.admin_up:
1896
      pri_img = node_image[node_current]
1897
      test = instance not in pri_img.instances and not pri_img.offline
1898
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1899
               "instance not running on its primary node %s",
1900
               node_current)
1901

    
1902
    diskdata = [(nname, success, status, idx)
1903
                for (nname, disks) in diskstatus.items()
1904
                for idx, (success, status) in enumerate(disks)]
1905

    
1906
    for nname, success, bdev_status, idx in diskdata:
1907
      # the 'ghost node' construction in Exec() ensures that we have a
1908
      # node here
1909
      snode = node_image[nname]
1910
      bad_snode = snode.ghost or snode.offline
1911
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1912
               self.EINSTANCEFAULTYDISK, instance,
1913
               "couldn't retrieve status for disk/%s on %s: %s",
1914
               idx, nname, bdev_status)
1915
      _ErrorIf((instanceconfig.admin_up and success and
1916
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1917
               self.EINSTANCEFAULTYDISK, instance,
1918
               "disk/%s on %s is faulty", idx, nname)
1919

    
1920
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1921
    """Verify if there are any unknown volumes in the cluster.
1922

1923
    The .os, .swap and backup volumes are ignored. All other volumes are
1924
    reported as unknown.
1925

1926
    @type reserved: L{ganeti.utils.FieldSet}
1927
    @param reserved: a FieldSet of reserved volume names
1928

1929
    """
1930
    for node, n_img in node_image.items():
1931
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1932
        # skip non-healthy nodes
1933
        continue
1934
      for volume in n_img.volumes:
1935
        test = ((node not in node_vol_should or
1936
                volume not in node_vol_should[node]) and
1937
                not reserved.Matches(volume))
1938
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1939
                      "volume %s is unknown", volume)
1940

    
1941
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1942
    """Verify N+1 Memory Resilience.
1943

1944
    Check that if one single node dies we can still start all the
1945
    instances it was primary for.
1946

1947
    """
1948
    cluster_info = self.cfg.GetClusterInfo()
1949
    for node, n_img in node_image.items():
1950
      # This code checks that every node which is now listed as
1951
      # secondary has enough memory to host all instances it is
1952
      # supposed to should a single other node in the cluster fail.
1953
      # FIXME: not ready for failover to an arbitrary node
1954
      # FIXME: does not support file-backed instances
1955
      # WARNING: we currently take into account down instances as well
1956
      # as up ones, considering that even if they're down someone
1957
      # might want to start them even in the event of a node failure.
1958
      if n_img.offline:
1959
        # we're skipping offline nodes from the N+1 warning, since
1960
        # most likely we don't have good memory infromation from them;
1961
        # we already list instances living on such nodes, and that's
1962
        # enough warning
1963
        continue
1964
      for prinode, instances in n_img.sbp.items():
1965
        needed_mem = 0
1966
        for instance in instances:
1967
          bep = cluster_info.FillBE(instance_cfg[instance])
1968
          if bep[constants.BE_AUTO_BALANCE]:
1969
            needed_mem += bep[constants.BE_MEMORY]
1970
        test = n_img.mfree < needed_mem
1971
        self._ErrorIf(test, self.ENODEN1, node,
1972
                      "not enough memory to accomodate instance failovers"
1973
                      " should node %s fail (%dMiB needed, %dMiB available)",
1974
                      prinode, needed_mem, n_img.mfree)
1975

    
1976
  @classmethod
1977
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1978
                   (files_all, files_all_opt, files_mc, files_vm)):
1979
    """Verifies file checksums collected from all nodes.
1980

1981
    @param errorif: Callback for reporting errors
1982
    @param nodeinfo: List of L{objects.Node} objects
1983
    @param master_node: Name of master node
1984
    @param all_nvinfo: RPC results
1985

1986
    """
1987
    node_names = frozenset(node.name for node in nodeinfo)
1988

    
1989
    assert master_node in node_names
1990
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1991
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1992
           "Found file listed in more than one file list"
1993

    
1994
    # Define functions determining which nodes to consider for a file
1995
    file2nodefn = dict([(filename, fn)
1996
      for (files, fn) in [(files_all, None),
1997
                          (files_all_opt, None),
1998
                          (files_mc, lambda node: (node.master_candidate or
1999
                                                   node.name == master_node)),
2000
                          (files_vm, lambda node: node.vm_capable)]
2001
      for filename in files])
2002

    
2003
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2004

    
2005
    for node in nodeinfo:
2006
      nresult = all_nvinfo[node.name]
2007

    
2008
      if nresult.fail_msg or not nresult.payload:
2009
        node_files = None
2010
      else:
2011
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2012

    
2013
      test = not (node_files and isinstance(node_files, dict))
2014
      errorif(test, cls.ENODEFILECHECK, node.name,
2015
              "Node did not return file checksum data")
2016
      if test:
2017
        continue
2018

    
2019
      for (filename, checksum) in node_files.items():
2020
        # Check if the file should be considered for a node
2021
        fn = file2nodefn[filename]
2022
        if fn is None or fn(node):
2023
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2024

    
2025
    for (filename, checksums) in fileinfo.items():
2026
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2027

    
2028
      # Nodes having the file
2029
      with_file = frozenset(node_name
2030
                            for nodes in fileinfo[filename].values()
2031
                            for node_name in nodes)
2032

    
2033
      # Nodes missing file
2034
      missing_file = node_names - with_file
2035

    
2036
      if filename in files_all_opt:
2037
        # All or no nodes
2038
        errorif(missing_file and missing_file != node_names,
2039
                cls.ECLUSTERFILECHECK, None,
2040
                "File %s is optional, but it must exist on all or no"
2041
                " nodes (not found on %s)",
2042
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2043
      else:
2044
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2045
                "File %s is missing from node(s) %s", filename,
2046
                utils.CommaJoin(utils.NiceSort(missing_file)))
2047

    
2048
      # See if there are multiple versions of the file
2049
      test = len(checksums) > 1
2050
      if test:
2051
        variants = ["variant %s on %s" %
2052
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2053
                    for (idx, (checksum, nodes)) in
2054
                      enumerate(sorted(checksums.items()))]
2055
      else:
2056
        variants = []
2057

    
2058
      errorif(test, cls.ECLUSTERFILECHECK, None,
2059
              "File %s found with %s different checksums (%s)",
2060
              filename, len(checksums), "; ".join(variants))
2061

    
2062
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2063
                      drbd_map):
2064
    """Verifies and the node DRBD status.
2065

2066
    @type ninfo: L{objects.Node}
2067
    @param ninfo: the node to check
2068
    @param nresult: the remote results for the node
2069
    @param instanceinfo: the dict of instances
2070
    @param drbd_helper: the configured DRBD usermode helper
2071
    @param drbd_map: the DRBD map as returned by
2072
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2073

2074
    """
2075
    node = ninfo.name
2076
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2077

    
2078
    if drbd_helper:
2079
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2080
      test = (helper_result == None)
2081
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2082
               "no drbd usermode helper returned")
2083
      if helper_result:
2084
        status, payload = helper_result
2085
        test = not status
2086
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2087
                 "drbd usermode helper check unsuccessful: %s", payload)
2088
        test = status and (payload != drbd_helper)
2089
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2090
                 "wrong drbd usermode helper: %s", payload)
2091

    
2092
    # compute the DRBD minors
2093
    node_drbd = {}
2094
    for minor, instance in drbd_map[node].items():
2095
      test = instance not in instanceinfo
2096
      _ErrorIf(test, self.ECLUSTERCFG, None,
2097
               "ghost instance '%s' in temporary DRBD map", instance)
2098
        # ghost instance should not be running, but otherwise we
2099
        # don't give double warnings (both ghost instance and
2100
        # unallocated minor in use)
2101
      if test:
2102
        node_drbd[minor] = (instance, False)
2103
      else:
2104
        instance = instanceinfo[instance]
2105
        node_drbd[minor] = (instance.name, instance.admin_up)
2106

    
2107
    # and now check them
2108
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2109
    test = not isinstance(used_minors, (tuple, list))
2110
    _ErrorIf(test, self.ENODEDRBD, node,
2111
             "cannot parse drbd status file: %s", str(used_minors))
2112
    if test:
2113
      # we cannot check drbd status
2114
      return
2115

    
2116
    for minor, (iname, must_exist) in node_drbd.items():
2117
      test = minor not in used_minors and must_exist
2118
      _ErrorIf(test, self.ENODEDRBD, node,
2119
               "drbd minor %d of instance %s is not active", minor, iname)
2120
    for minor in used_minors:
2121
      test = minor not in node_drbd
2122
      _ErrorIf(test, self.ENODEDRBD, node,
2123
               "unallocated drbd minor %d is in use", minor)
2124

    
2125
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2126
    """Builds the node OS structures.
2127

2128
    @type ninfo: L{objects.Node}
2129
    @param ninfo: the node to check
2130
    @param nresult: the remote results for the node
2131
    @param nimg: the node image object
2132

2133
    """
2134
    node = ninfo.name
2135
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2136

    
2137
    remote_os = nresult.get(constants.NV_OSLIST, None)
2138
    test = (not isinstance(remote_os, list) or
2139
            not compat.all(isinstance(v, list) and len(v) == 7
2140
                           for v in remote_os))
2141

    
2142
    _ErrorIf(test, self.ENODEOS, node,
2143
             "node hasn't returned valid OS data")
2144

    
2145
    nimg.os_fail = test
2146

    
2147
    if test:
2148
      return
2149

    
2150
    os_dict = {}
2151

    
2152
    for (name, os_path, status, diagnose,
2153
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2154

    
2155
      if name not in os_dict:
2156
        os_dict[name] = []
2157

    
2158
      # parameters is a list of lists instead of list of tuples due to
2159
      # JSON lacking a real tuple type, fix it:
2160
      parameters = [tuple(v) for v in parameters]
2161
      os_dict[name].append((os_path, status, diagnose,
2162
                            set(variants), set(parameters), set(api_ver)))
2163

    
2164
    nimg.oslist = os_dict
2165

    
2166
  def _VerifyNodeOS(self, ninfo, nimg, base):
2167
    """Verifies the node OS list.
2168

2169
    @type ninfo: L{objects.Node}
2170
    @param ninfo: the node to check
2171
    @param nimg: the node image object
2172
    @param base: the 'template' node we match against (e.g. from the master)
2173

2174
    """
2175
    node = ninfo.name
2176
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2177

    
2178
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2179

    
2180
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2181
    for os_name, os_data in nimg.oslist.items():
2182
      assert os_data, "Empty OS status for OS %s?!" % os_name
2183
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2184
      _ErrorIf(not f_status, self.ENODEOS, node,
2185
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2186
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2187
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2188
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2189
      # this will catched in backend too
2190
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2191
               and not f_var, self.ENODEOS, node,
2192
               "OS %s with API at least %d does not declare any variant",
2193
               os_name, constants.OS_API_V15)
2194
      # comparisons with the 'base' image
2195
      test = os_name not in base.oslist
2196
      _ErrorIf(test, self.ENODEOS, node,
2197
               "Extra OS %s not present on reference node (%s)",
2198
               os_name, base.name)
2199
      if test:
2200
        continue
2201
      assert base.oslist[os_name], "Base node has empty OS status?"
2202
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2203
      if not b_status:
2204
        # base OS is invalid, skipping
2205
        continue
2206
      for kind, a, b in [("API version", f_api, b_api),
2207
                         ("variants list", f_var, b_var),
2208
                         ("parameters", beautify_params(f_param),
2209
                          beautify_params(b_param))]:
2210
        _ErrorIf(a != b, self.ENODEOS, node,
2211
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2212
                 kind, os_name, base.name,
2213
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2214

    
2215
    # check any missing OSes
2216
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2217
    _ErrorIf(missing, self.ENODEOS, node,
2218
             "OSes present on reference node %s but missing on this node: %s",
2219
             base.name, utils.CommaJoin(missing))
2220

    
2221
  def _VerifyOob(self, ninfo, nresult):
2222
    """Verifies out of band functionality of a node.
2223

2224
    @type ninfo: L{objects.Node}
2225
    @param ninfo: the node to check
2226
    @param nresult: the remote results for the node
2227

2228
    """
2229
    node = ninfo.name
2230
    # We just have to verify the paths on master and/or master candidates
2231
    # as the oob helper is invoked on the master
2232
    if ((ninfo.master_candidate or ninfo.master_capable) and
2233
        constants.NV_OOB_PATHS in nresult):
2234
      for path_result in nresult[constants.NV_OOB_PATHS]:
2235
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2236

    
2237
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2238
    """Verifies and updates the node volume data.
2239

2240
    This function will update a L{NodeImage}'s internal structures
2241
    with data from the remote call.
2242

2243
    @type ninfo: L{objects.Node}
2244
    @param ninfo: the node to check
2245
    @param nresult: the remote results for the node
2246
    @param nimg: the node image object
2247
    @param vg_name: the configured VG name
2248

2249
    """
2250
    node = ninfo.name
2251
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2252

    
2253
    nimg.lvm_fail = True
2254
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2255
    if vg_name is None:
2256
      pass
2257
    elif isinstance(lvdata, basestring):
2258
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2259
               utils.SafeEncode(lvdata))
2260
    elif not isinstance(lvdata, dict):
2261
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2262
    else:
2263
      nimg.volumes = lvdata
2264
      nimg.lvm_fail = False
2265

    
2266
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2267
    """Verifies and updates the node instance list.
2268

2269
    If the listing was successful, then updates this node's instance
2270
    list. Otherwise, it marks the RPC call as failed for the instance
2271
    list key.
2272

2273
    @type ninfo: L{objects.Node}
2274
    @param ninfo: the node to check
2275
    @param nresult: the remote results for the node
2276
    @param nimg: the node image object
2277

2278
    """
2279
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2280
    test = not isinstance(idata, list)
2281
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2282
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2283
    if test:
2284
      nimg.hyp_fail = True
2285
    else:
2286
      nimg.instances = idata
2287

    
2288
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2289
    """Verifies and computes a node information map
2290

2291
    @type ninfo: L{objects.Node}
2292
    @param ninfo: the node to check
2293
    @param nresult: the remote results for the node
2294
    @param nimg: the node image object
2295
    @param vg_name: the configured VG name
2296

2297
    """
2298
    node = ninfo.name
2299
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2300

    
2301
    # try to read free memory (from the hypervisor)
2302
    hv_info = nresult.get(constants.NV_HVINFO, None)
2303
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2304
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2305
    if not test:
2306
      try:
2307
        nimg.mfree = int(hv_info["memory_free"])
2308
      except (ValueError, TypeError):
2309
        _ErrorIf(True, self.ENODERPC, node,
2310
                 "node returned invalid nodeinfo, check hypervisor")
2311

    
2312
    # FIXME: devise a free space model for file based instances as well
2313
    if vg_name is not None:
2314
      test = (constants.NV_VGLIST not in nresult or
2315
              vg_name not in nresult[constants.NV_VGLIST])
2316
      _ErrorIf(test, self.ENODELVM, node,
2317
               "node didn't return data for the volume group '%s'"
2318
               " - it is either missing or broken", vg_name)
2319
      if not test:
2320
        try:
2321
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2322
        except (ValueError, TypeError):
2323
          _ErrorIf(True, self.ENODERPC, node,
2324
                   "node returned invalid LVM info, check LVM status")
2325

    
2326
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2327
    """Gets per-disk status information for all instances.
2328

2329
    @type nodelist: list of strings
2330
    @param nodelist: Node names
2331
    @type node_image: dict of (name, L{objects.Node})
2332
    @param node_image: Node objects
2333
    @type instanceinfo: dict of (name, L{objects.Instance})
2334
    @param instanceinfo: Instance objects
2335
    @rtype: {instance: {node: [(succes, payload)]}}
2336
    @return: a dictionary of per-instance dictionaries with nodes as
2337
        keys and disk information as values; the disk information is a
2338
        list of tuples (success, payload)
2339

2340
    """
2341
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2342

    
2343
    node_disks = {}
2344
    node_disks_devonly = {}
2345
    diskless_instances = set()
2346
    diskless = constants.DT_DISKLESS
2347

    
2348
    for nname in nodelist:
2349
      node_instances = list(itertools.chain(node_image[nname].pinst,
2350
                                            node_image[nname].sinst))
2351
      diskless_instances.update(inst for inst in node_instances
2352
                                if instanceinfo[inst].disk_template == diskless)
2353
      disks = [(inst, disk)
2354
               for inst in node_instances
2355
               for disk in instanceinfo[inst].disks]
2356

    
2357
      if not disks:
2358
        # No need to collect data
2359
        continue
2360

    
2361
      node_disks[nname] = disks
2362

    
2363
      # Creating copies as SetDiskID below will modify the objects and that can
2364
      # lead to incorrect data returned from nodes
2365
      devonly = [dev.Copy() for (_, dev) in disks]
2366

    
2367
      for dev in devonly:
2368
        self.cfg.SetDiskID(dev, nname)
2369

    
2370
      node_disks_devonly[nname] = devonly
2371

    
2372
    assert len(node_disks) == len(node_disks_devonly)
2373

    
2374
    # Collect data from all nodes with disks
2375
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2376
                                                          node_disks_devonly)
2377

    
2378
    assert len(result) == len(node_disks)
2379

    
2380
    instdisk = {}
2381

    
2382
    for (nname, nres) in result.items():
2383
      disks = node_disks[nname]
2384

    
2385
      if nres.offline:
2386
        # No data from this node
2387
        data = len(disks) * [(False, "node offline")]
2388
      else:
2389
        msg = nres.fail_msg
2390
        _ErrorIf(msg, self.ENODERPC, nname,
2391
                 "while getting disk information: %s", msg)
2392
        if msg:
2393
          # No data from this node
2394
          data = len(disks) * [(False, msg)]
2395
        else:
2396
          data = []
2397
          for idx, i in enumerate(nres.payload):
2398
            if isinstance(i, (tuple, list)) and len(i) == 2:
2399
              data.append(i)
2400
            else:
2401
              logging.warning("Invalid result from node %s, entry %d: %s",
2402
                              nname, idx, i)
2403
              data.append((False, "Invalid result from the remote node"))
2404

    
2405
      for ((inst, _), status) in zip(disks, data):
2406
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2407

    
2408
    # Add empty entries for diskless instances.
2409
    for inst in diskless_instances:
2410
      assert inst not in instdisk
2411
      instdisk[inst] = {}
2412

    
2413
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2414
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2415
                      compat.all(isinstance(s, (tuple, list)) and
2416
                                 len(s) == 2 for s in statuses)
2417
                      for inst, nnames in instdisk.items()
2418
                      for nname, statuses in nnames.items())
2419
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2420

    
2421
    return instdisk
2422

    
2423
  def BuildHooksEnv(self):
2424
    """Build hooks env.
2425

2426
    Cluster-Verify hooks just ran in the post phase and their failure makes
2427
    the output be logged in the verify output and the verification to fail.
2428

2429
    """
2430
    env = {
2431
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2432
      }
2433

    
2434
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2435
               for node in self.my_node_info.values())
2436

    
2437
    return env
2438

    
2439
  def BuildHooksNodes(self):
2440
    """Build hooks nodes.
2441

2442
    """
2443
    return ([], self.my_node_names)
2444

    
2445
  def Exec(self, feedback_fn):
2446
    """Verify integrity of the node group, performing various test on nodes.
2447

2448
    """
2449
    # This method has too many local variables. pylint: disable-msg=R0914
2450

    
2451
    if not self.my_node_names:
2452
      # empty node group
2453
      feedback_fn("* Empty node group, skipping verification")
2454
      return True
2455

    
2456
    self.bad = False
2457
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2458
    verbose = self.op.verbose
2459
    self._feedback_fn = feedback_fn
2460

    
2461
    vg_name = self.cfg.GetVGName()
2462
    drbd_helper = self.cfg.GetDRBDHelper()
2463
    cluster = self.cfg.GetClusterInfo()
2464
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2465
    hypervisors = cluster.enabled_hypervisors
2466
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2467

    
2468
    i_non_redundant = [] # Non redundant instances
2469
    i_non_a_balanced = [] # Non auto-balanced instances
2470
    n_offline = 0 # Count of offline nodes
2471
    n_drained = 0 # Count of nodes being drained
2472
    node_vol_should = {}
2473

    
2474
    # FIXME: verify OS list
2475

    
2476
    # File verification
2477
    filemap = _ComputeAncillaryFiles(cluster, False)
2478

    
2479
    # do local checksums
2480
    master_node = self.master_node = self.cfg.GetMasterNode()
2481
    master_ip = self.cfg.GetMasterIP()
2482

    
2483
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2484

    
2485
    # We will make nodes contact all nodes in their group, and one node from
2486
    # every other group.
2487
    # TODO: should it be a *random* node, different every time?
2488
    online_nodes = [node.name for node in node_data_list if not node.offline]
2489
    other_group_nodes = {}
2490

    
2491
    for name in sorted(self.all_node_info):
2492
      node = self.all_node_info[name]
2493
      if (node.group not in other_group_nodes
2494
          and node.group != self.group_uuid
2495
          and not node.offline):
2496
        other_group_nodes[node.group] = node.name
2497

    
2498
    node_verify_param = {
2499
      constants.NV_FILELIST:
2500
        utils.UniqueSequence(filename
2501
                             for files in filemap
2502
                             for filename in files),
2503
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2504
      constants.NV_HYPERVISOR: hypervisors,
2505
      constants.NV_HVPARAMS:
2506
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2507
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2508
                                 for node in node_data_list
2509
                                 if not node.offline],
2510
      constants.NV_INSTANCELIST: hypervisors,
2511
      constants.NV_VERSION: None,
2512
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2513
      constants.NV_NODESETUP: None,
2514
      constants.NV_TIME: None,
2515
      constants.NV_MASTERIP: (master_node, master_ip),
2516
      constants.NV_OSLIST: None,
2517
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2518
      }
2519

    
2520
    if vg_name is not None:
2521
      node_verify_param[constants.NV_VGLIST] = None
2522
      node_verify_param[constants.NV_LVLIST] = vg_name
2523
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2524
      node_verify_param[constants.NV_DRBDLIST] = None
2525

    
2526
    if drbd_helper:
2527
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2528

    
2529
    # bridge checks
2530
    # FIXME: this needs to be changed per node-group, not cluster-wide
2531
    bridges = set()
2532
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2533
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2534
      bridges.add(default_nicpp[constants.NIC_LINK])
2535
    for instance in self.my_inst_info.values():
2536
      for nic in instance.nics:
2537
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2538
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2539
          bridges.add(full_nic[constants.NIC_LINK])
2540

    
2541
    if bridges:
2542
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2543

    
2544
    # Build our expected cluster state
2545
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2546
                                                 name=node.name,
2547
                                                 vm_capable=node.vm_capable))
2548
                      for node in node_data_list)
2549

    
2550
    # Gather OOB paths
2551
    oob_paths = []
2552
    for node in self.all_node_info.values():
2553
      path = _SupportsOob(self.cfg, node)
2554
      if path and path not in oob_paths:
2555
        oob_paths.append(path)
2556

    
2557
    if oob_paths:
2558
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2559

    
2560
    for instance in self.my_inst_names:
2561
      inst_config = self.my_inst_info[instance]
2562

    
2563
      for nname in inst_config.all_nodes:
2564
        if nname not in node_image:
2565
          gnode = self.NodeImage(name=nname)
2566
          gnode.ghost = (nname not in self.all_node_info)
2567
          node_image[nname] = gnode
2568

    
2569
      inst_config.MapLVsByNode(node_vol_should)
2570

    
2571
      pnode = inst_config.primary_node
2572
      node_image[pnode].pinst.append(instance)
2573

    
2574
      for snode in inst_config.secondary_nodes:
2575
        nimg = node_image[snode]
2576
        nimg.sinst.append(instance)
2577
        if pnode not in nimg.sbp:
2578
          nimg.sbp[pnode] = []
2579
        nimg.sbp[pnode].append(instance)
2580

    
2581
    # At this point, we have the in-memory data structures complete,
2582
    # except for the runtime information, which we'll gather next
2583

    
2584
    # Due to the way our RPC system works, exact response times cannot be
2585
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2586
    # time before and after executing the request, we can at least have a time
2587
    # window.
2588
    nvinfo_starttime = time.time()
2589
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2590
                                           node_verify_param,
2591
                                           self.cfg.GetClusterName())
2592
    nvinfo_endtime = time.time()
2593

    
2594
    if self.extra_lv_nodes and vg_name is not None:
2595
      extra_lv_nvinfo = \
2596
          self.rpc.call_node_verify(self.extra_lv_nodes,
2597
                                    {constants.NV_LVLIST: vg_name},
2598
                                    self.cfg.GetClusterName())
2599
    else:
2600
      extra_lv_nvinfo = {}
2601

    
2602
    all_drbd_map = self.cfg.ComputeDRBDMap()
2603

    
2604
    feedback_fn("* Gathering disk information (%s nodes)" %
2605
                len(self.my_node_names))
2606
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2607
                                     self.my_inst_info)
2608

    
2609
    feedback_fn("* Verifying configuration file consistency")
2610

    
2611
    # If not all nodes are being checked, we need to make sure the master node
2612
    # and a non-checked vm_capable node are in the list.
2613
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2614
    if absent_nodes:
2615
      vf_nvinfo = all_nvinfo.copy()
2616
      vf_node_info = list(self.my_node_info.values())
2617
      additional_nodes = []
2618
      if master_node not in self.my_node_info:
2619
        additional_nodes.append(master_node)
2620
        vf_node_info.append(self.all_node_info[master_node])
2621
      # Add the first vm_capable node we find which is not included
2622
      for node in absent_nodes:
2623
        nodeinfo = self.all_node_info[node]
2624
        if nodeinfo.vm_capable and not nodeinfo.offline:
2625
          additional_nodes.append(node)
2626
          vf_node_info.append(self.all_node_info[node])
2627
          break
2628
      key = constants.NV_FILELIST
2629
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2630
                                                 {key: node_verify_param[key]},
2631
                                                 self.cfg.GetClusterName()))
2632
    else:
2633
      vf_nvinfo = all_nvinfo
2634
      vf_node_info = self.my_node_info.values()
2635

    
2636
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2637

    
2638
    feedback_fn("* Verifying node status")
2639

    
2640
    refos_img = None
2641

    
2642
    for node_i in node_data_list:
2643
      node = node_i.name
2644
      nimg = node_image[node]
2645

    
2646
      if node_i.offline:
2647
        if verbose:
2648
          feedback_fn("* Skipping offline node %s" % (node,))
2649
        n_offline += 1
2650
        continue
2651

    
2652
      if node == master_node:
2653
        ntype = "master"
2654
      elif node_i.master_candidate:
2655
        ntype = "master candidate"
2656
      elif node_i.drained:
2657
        ntype = "drained"
2658
        n_drained += 1
2659
      else:
2660
        ntype = "regular"
2661
      if verbose:
2662
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2663

    
2664
      msg = all_nvinfo[node].fail_msg
2665
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2666
      if msg:
2667
        nimg.rpc_fail = True
2668
        continue
2669

    
2670
      nresult = all_nvinfo[node].payload
2671

    
2672
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2673
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2674
      self._VerifyNodeNetwork(node_i, nresult)
2675
      self._VerifyOob(node_i, nresult)
2676

    
2677
      if nimg.vm_capable:
2678
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2679
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2680
                             all_drbd_map)
2681

    
2682
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2683
        self._UpdateNodeInstances(node_i, nresult, nimg)
2684
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2685
        self._UpdateNodeOS(node_i, nresult, nimg)
2686

    
2687
        if not nimg.os_fail:
2688
          if refos_img is None:
2689
            refos_img = nimg
2690
          self._VerifyNodeOS(node_i, nimg, refos_img)
2691
        self._VerifyNodeBridges(node_i, nresult, bridges)
2692

    
2693
        # Check whether all running instancies are primary for the node. (This
2694
        # can no longer be done from _VerifyInstance below, since some of the
2695
        # wrong instances could be from other node groups.)
2696
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2697

    
2698
        for inst in non_primary_inst:
2699
          test = inst in self.all_inst_info
2700
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2701
                   "instance should not run on node %s", node_i.name)
2702
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2703
                   "node is running unknown instance %s", inst)
2704

    
2705
    for node, result in extra_lv_nvinfo.items():
2706
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2707
                              node_image[node], vg_name)
2708

    
2709
    feedback_fn("* Verifying instance status")
2710
    for instance in self.my_inst_names:
2711
      if verbose:
2712
        feedback_fn("* Verifying instance %s" % instance)
2713
      inst_config = self.my_inst_info[instance]
2714
      self._VerifyInstance(instance, inst_config, node_image,
2715
                           instdisk[instance])
2716
      inst_nodes_offline = []
2717

    
2718
      pnode = inst_config.primary_node
2719
      pnode_img = node_image[pnode]
2720
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2721
               self.ENODERPC, pnode, "instance %s, connection to"
2722
               " primary node failed", instance)
2723

    
2724
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2725
               self.EINSTANCEBADNODE, instance,
2726
               "instance is marked as running and lives on offline node %s",
2727
               inst_config.primary_node)
2728

    
2729
      # If the instance is non-redundant we cannot survive losing its primary
2730
      # node, so we are not N+1 compliant. On the other hand we have no disk
2731
      # templates with more than one secondary so that situation is not well
2732
      # supported either.
2733
      # FIXME: does not support file-backed instances
2734
      if not inst_config.secondary_nodes:
2735
        i_non_redundant.append(instance)
2736

    
2737
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2738
               instance, "instance has multiple secondary nodes: %s",
2739
               utils.CommaJoin(inst_config.secondary_nodes),
2740
               code=self.ETYPE_WARNING)
2741

    
2742
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2743
        pnode = inst_config.primary_node
2744
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2745
        instance_groups = {}
2746

    
2747
        for node in instance_nodes:
2748
          instance_groups.setdefault(self.all_node_info[node].group,
2749
                                     []).append(node)
2750

    
2751
        pretty_list = [
2752
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2753
          # Sort so that we always list the primary node first.
2754
          for group, nodes in sorted(instance_groups.items(),
2755
                                     key=lambda (_, nodes): pnode in nodes,
2756
                                     reverse=True)]
2757

    
2758
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2759
                      instance, "instance has primary and secondary nodes in"
2760
                      " different groups: %s", utils.CommaJoin(pretty_list),
2761
                      code=self.ETYPE_WARNING)
2762

    
2763
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2764
        i_non_a_balanced.append(instance)
2765

    
2766
      for snode in inst_config.secondary_nodes:
2767
        s_img = node_image[snode]
2768
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2769
                 "instance %s, connection to secondary node failed", instance)
2770

    
2771
        if s_img.offline:
2772
          inst_nodes_offline.append(snode)
2773

    
2774
      # warn that the instance lives on offline nodes
2775
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2776
               "instance has offline secondary node(s) %s",
2777
               utils.CommaJoin(inst_nodes_offline))
2778
      # ... or ghost/non-vm_capable nodes
2779
      for node in inst_config.all_nodes:
2780
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2781
                 "instance lives on ghost node %s", node)
2782
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2783
                 instance, "instance lives on non-vm_capable node %s", node)
2784

    
2785
    feedback_fn("* Verifying orphan volumes")
2786
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2787

    
2788
    # We will get spurious "unknown volume" warnings if any node of this group
2789
    # is secondary for an instance whose primary is in another group. To avoid
2790
    # them, we find these instances and add their volumes to node_vol_should.
2791
    for inst in self.all_inst_info.values():
2792
      for secondary in inst.secondary_nodes:
2793
        if (secondary in self.my_node_info
2794
            and inst.name not in self.my_inst_info):
2795
          inst.MapLVsByNode(node_vol_should)
2796
          break
2797

    
2798
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2799

    
2800
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2801
      feedback_fn("* Verifying N+1 Memory redundancy")
2802
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2803

    
2804
    feedback_fn("* Other Notes")
2805
    if i_non_redundant:
2806
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2807
                  % len(i_non_redundant))
2808

    
2809
    if i_non_a_balanced:
2810
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2811
                  % len(i_non_a_balanced))
2812

    
2813
    if n_offline:
2814
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2815

    
2816
    if n_drained:
2817
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2818

    
2819
    return not self.bad
2820

    
2821
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2822
    """Analyze the post-hooks' result
2823

2824
    This method analyses the hook result, handles it, and sends some
2825
    nicely-formatted feedback back to the user.
2826

2827
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2828
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2829
    @param hooks_results: the results of the multi-node hooks rpc call
2830
    @param feedback_fn: function used send feedback back to the caller
2831
    @param lu_result: previous Exec result
2832
    @return: the new Exec result, based on the previous result
2833
        and hook results
2834

2835
    """
2836
    # We only really run POST phase hooks, only for non-empty groups,
2837
    # and are only interested in their results
2838
    if not self.my_node_names:
2839
      # empty node group
2840
      pass
2841
    elif phase == constants.HOOKS_PHASE_POST:
2842
      # Used to change hooks' output to proper indentation
2843
      feedback_fn("* Hooks Results")
2844
      assert hooks_results, "invalid result from hooks"
2845

    
2846
      for node_name in hooks_results:
2847
        res = hooks_results[node_name]
2848
        msg = res.fail_msg
2849
        test = msg and not res.offline
2850
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2851
                      "Communication failure in hooks execution: %s", msg)
2852
        if res.offline or msg:
2853
          # No need to investigate payload if node is offline or gave an error.
2854
          # override manually lu_result here as _ErrorIf only
2855
          # overrides self.bad
2856
          lu_result = 1
2857
          continue
2858
        for script, hkr, output in res.payload:
2859
          test = hkr == constants.HKR_FAIL
2860
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2861
                        "Script %s failed, output:", script)
2862
          if test:
2863
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2864
            feedback_fn("%s" % output)
2865
            lu_result = 0
2866

    
2867
    return lu_result
2868

    
2869

    
2870
class LUClusterVerifyDisks(NoHooksLU):
2871
  """Verifies the cluster disks status.
2872

2873
  """
2874
  REQ_BGL = False
2875

    
2876
  def ExpandNames(self):
2877
    self.share_locks = _ShareAll()
2878
    self.needed_locks = {
2879
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2880
      }
2881

    
2882
  def Exec(self, feedback_fn):
2883
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2884

    
2885
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2886
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2887
                           for group in group_names])
2888

    
2889

    
2890
class LUGroupVerifyDisks(NoHooksLU):
2891
  """Verifies the status of all disks in a node group.
2892

2893
  """
2894
  REQ_BGL = False
2895

    
2896
  def ExpandNames(self):
2897
    # Raises errors.OpPrereqError on its own if group can't be found
2898
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2899

    
2900
    self.share_locks = _ShareAll()
2901
    self.needed_locks = {
2902
      locking.LEVEL_INSTANCE: [],
2903
      locking.LEVEL_NODEGROUP: [],
2904
      locking.LEVEL_NODE: [],
2905
      }
2906

    
2907
  def DeclareLocks(self, level):
2908
    if level == locking.LEVEL_INSTANCE:
2909
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2910

    
2911
      # Lock instances optimistically, needs verification once node and group
2912
      # locks have been acquired
2913
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2914
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2915

    
2916
    elif level == locking.LEVEL_NODEGROUP:
2917
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2918

    
2919
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2920
        set([self.group_uuid] +
2921
            # Lock all groups used by instances optimistically; this requires
2922
            # going via the node before it's locked, requiring verification
2923
            # later on
2924
            [group_uuid
2925
             for instance_name in
2926
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2927
             for group_uuid in
2928
               self.cfg.GetInstanceNodeGroups(instance_name)])
2929

    
2930
    elif level == locking.LEVEL_NODE:
2931
      # This will only lock the nodes in the group to be verified which contain
2932
      # actual instances
2933
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2934
      self._LockInstancesNodes()
2935

    
2936
      # Lock all nodes in group to be verified
2937
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2938
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2939
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2940

    
2941
  def CheckPrereq(self):
2942
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2943
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2944
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2945

    
2946
    assert self.group_uuid in owned_groups
2947

    
2948
    # Check if locked instances are still correct
2949
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2950
    if owned_instances != wanted_instances:
2951
      raise errors.OpPrereqError("Instances in node group %s changed since"
2952
                                 " locks were acquired, wanted %s, have %s;"
2953
                                 " retry the operation" %
2954
                                 (self.op.group_name,
2955
                                  utils.CommaJoin(wanted_instances),
2956
                                  utils.CommaJoin(owned_instances)),
2957
                                 errors.ECODE_STATE)
2958

    
2959
    # Get instance information
2960
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
2961
                          for name in owned_instances)
2962

    
2963
    # Check if node groups for locked instances are still correct
2964
    for (instance_name, inst) in self.instances.items():
2965
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2966
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2967
      assert owned_nodes.issuperset(inst.all_nodes), \
2968
        "Instance %s's nodes changed while we kept the lock" % instance_name
2969

    
2970
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2971
      if not owned_groups.issuperset(inst_groups):
2972
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2973
                                   " locks were acquired, current groups are"
2974
                                   " are '%s', owning groups '%s'; retry the"
2975
                                   " operation" %
2976
                                   (instance_name,
2977
                                    utils.CommaJoin(inst_groups),
2978
                                    utils.CommaJoin(owned_groups)),
2979
                                   errors.ECODE_STATE)
2980

    
2981
  def Exec(self, feedback_fn):
2982
    """Verify integrity of cluster disks.
2983

2984
    @rtype: tuple of three items
2985
    @return: a tuple of (dict of node-to-node_error, list of instances
2986
        which need activate-disks, dict of instance: (node, volume) for
2987
        missing volumes
2988

2989
    """
2990
    res_nodes = {}
2991
    res_instances = set()
2992
    res_missing = {}
2993

    
2994
    nv_dict = _MapInstanceDisksToNodes([inst
2995
                                        for inst in self.instances.values()
2996
                                        if inst.admin_up])
2997

    
2998
    if nv_dict:
2999
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3000
                             set(self.cfg.GetVmCapableNodeList()))
3001

    
3002
      node_lvs = self.rpc.call_lv_list(nodes, [])
3003

    
3004
      for (node, node_res) in node_lvs.items():
3005
        if node_res.offline:
3006
          continue
3007

    
3008
        msg = node_res.fail_msg
3009
        if msg:
3010
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3011
          res_nodes[node] = msg
3012
          continue
3013

    
3014
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3015
          inst = nv_dict.pop((node, lv_name), None)
3016
          if not (lv_online or inst is None):
3017
            res_instances.add(inst)
3018

    
3019
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3020
      # better
3021
      for key, inst in nv_dict.iteritems():
3022
        res_missing.setdefault(inst, []).append(key)
3023

    
3024
    return (res_nodes, list(res_instances), res_missing)
3025

    
3026

    
3027
class LUClusterRepairDiskSizes(NoHooksLU):
3028
  """Verifies the cluster disks sizes.
3029

3030
  """
3031
  REQ_BGL = False
3032

    
3033
  def ExpandNames(self):
3034
    if self.op.instances:
3035
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3036
      self.needed_locks = {
3037
        locking.LEVEL_NODE: [],
3038
        locking.LEVEL_INSTANCE: self.wanted_names,
3039
        }
3040
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3041
    else:
3042
      self.wanted_names = None
3043
      self.needed_locks = {
3044
        locking.LEVEL_NODE: locking.ALL_SET,
3045
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3046
        }
3047
    self.share_locks = _ShareAll()
3048

    
3049
  def DeclareLocks(self, level):
3050
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3051
      self._LockInstancesNodes(primary_only=True)
3052

    
3053
  def CheckPrereq(self):
3054
    """Check prerequisites.
3055

3056
    This only checks the optional instance list against the existing names.
3057

3058
    """
3059
    if self.wanted_names is None:
3060
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3061

    
3062
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3063
                             in self.wanted_names]
3064

    
3065
  def _EnsureChildSizes(self, disk):
3066
    """Ensure children of the disk have the needed disk size.
3067

3068
    This is valid mainly for DRBD8 and fixes an issue where the
3069
    children have smaller disk size.
3070

3071
    @param disk: an L{ganeti.objects.Disk} object
3072

3073
    """
3074
    if disk.dev_type == constants.LD_DRBD8:
3075
      assert disk.children, "Empty children for DRBD8?"
3076
      fchild = disk.children[0]
3077
      mismatch = fchild.size < disk.size
3078
      if mismatch:
3079
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3080
                     fchild.size, disk.size)
3081
        fchild.size = disk.size
3082

    
3083
      # and we recurse on this child only, not on the metadev
3084
      return self._EnsureChildSizes(fchild) or mismatch
3085
    else:
3086
      return False
3087

    
3088
  def Exec(self, feedback_fn):
3089
    """Verify the size of cluster disks.
3090

3091
    """
3092
    # TODO: check child disks too
3093
    # TODO: check differences in size between primary/secondary nodes
3094
    per_node_disks = {}
3095
    for instance in self.wanted_instances:
3096
      pnode = instance.primary_node
3097
      if pnode not in per_node_disks:
3098
        per_node_disks[pnode] = []
3099
      for idx, disk in enumerate(instance.disks):
3100
        per_node_disks[pnode].append((instance, idx, disk))
3101

    
3102
    changed = []
3103
    for node, dskl in per_node_disks.items():
3104
      newl = [v[2].Copy() for v in dskl]
3105
      for dsk in newl:
3106
        self.cfg.SetDiskID(dsk, node)
3107
      result = self.rpc.call_blockdev_getsize(node, newl)
3108
      if result.fail_msg:
3109
        self.LogWarning("Failure in blockdev_getsize call to node"
3110
                        " %s, ignoring", node)
3111
        continue
3112
      if len(result.payload) != len(dskl):
3113
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3114
                        " result.payload=%s", node, len(dskl), result.payload)
3115
        self.LogWarning("Invalid result from node %s, ignoring node results",
3116
                        node)
3117
        continue
3118
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3119
        if size is None:
3120
          self.LogWarning("Disk %d of instance %s did not return size"
3121
                          " information, ignoring", idx, instance.name)
3122
          continue
3123
        if not isinstance(size, (int, long)):
3124
          self.LogWarning("Disk %d of instance %s did not return valid"
3125
                          " size information, ignoring", idx, instance.name)
3126
          continue
3127
        size = size >> 20
3128
        if size != disk.size:
3129
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3130
                       " correcting: recorded %d, actual %d", idx,
3131
                       instance.name, disk.size, size)
3132
          disk.size = size
3133
          self.cfg.Update(instance, feedback_fn)
3134
          changed.append((instance.name, idx, size))
3135
        if self._EnsureChildSizes(disk):
3136
          self.cfg.Update(instance, feedback_fn)
3137
          changed.append((instance.name, idx, disk.size))
3138
    return changed
3139

    
3140

    
3141
class LUClusterRename(LogicalUnit):
3142
  """Rename the cluster.
3143

3144
  """
3145
  HPATH = "cluster-rename"
3146
  HTYPE = constants.HTYPE_CLUSTER
3147

    
3148
  def BuildHooksEnv(self):
3149
    """Build hooks env.
3150

3151
    """
3152
    return {
3153
      "OP_TARGET": self.cfg.GetClusterName(),
3154
      "NEW_NAME": self.op.name,
3155
      }
3156

    
3157
  def BuildHooksNodes(self):
3158
    """Build hooks nodes.
3159

3160
    """
3161
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3162

    
3163
  def CheckPrereq(self):
3164
    """Verify that the passed name is a valid one.
3165

3166
    """
3167
    hostname = netutils.GetHostname(name=self.op.name,
3168
                                    family=self.cfg.GetPrimaryIPFamily())
3169

    
3170
    new_name = hostname.name
3171
    self.ip = new_ip = hostname.ip
3172
    old_name = self.cfg.GetClusterName()
3173
    old_ip = self.cfg.GetMasterIP()
3174
    if new_name == old_name and new_ip == old_ip:
3175
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3176
                                 " cluster has changed",
3177
                                 errors.ECODE_INVAL)
3178
    if new_ip != old_ip:
3179
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3180
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3181
                                   " reachable on the network" %
3182
                                   new_ip, errors.ECODE_NOTUNIQUE)
3183

    
3184
    self.op.name = new_name
3185

    
3186
  def Exec(self, feedback_fn):
3187
    """Rename the cluster.
3188

3189
    """
3190
    clustername = self.op.name
3191
    ip = self.ip
3192

    
3193
    # shutdown the master IP
3194
    master = self.cfg.GetMasterNode()
3195
    result = self.rpc.call_node_stop_master(master, False)
3196
    result.Raise("Could not disable the master role")
3197

    
3198
    try:
3199
      cluster = self.cfg.GetClusterInfo()
3200
      cluster.cluster_name = clustername
3201
      cluster.master_ip = ip
3202
      self.cfg.Update(cluster, feedback_fn)
3203

    
3204
      # update the known hosts file
3205
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3206
      node_list = self.cfg.GetOnlineNodeList()
3207
      try:
3208
        node_list.remove(master)
3209
      except ValueError:
3210
        pass
3211
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3212
    finally:
3213
      result = self.rpc.call_node_start_master(master, False, False)
3214
      msg = result.fail_msg
3215
      if msg:
3216
        self.LogWarning("Could not re-enable the master role on"
3217
                        " the master, please restart manually: %s", msg)
3218

    
3219
    return clustername
3220

    
3221

    
3222
class LUClusterSetParams(LogicalUnit):
3223
  """Change the parameters of the cluster.
3224

3225
  """
3226
  HPATH = "cluster-modify"
3227
  HTYPE = constants.HTYPE_CLUSTER
3228
  REQ_BGL = False
3229

    
3230
  def CheckArguments(self):
3231
    """Check parameters
3232

3233
    """
3234
    if self.op.uid_pool:
3235
      uidpool.CheckUidPool(self.op.uid_pool)
3236

    
3237
    if self.op.add_uids:
3238
      uidpool.CheckUidPool(self.op.add_uids)
3239

    
3240
    if self.op.remove_uids:
3241
      uidpool.CheckUidPool(self.op.remove_uids)
3242

    
3243
  def ExpandNames(self):
3244
    # FIXME: in the future maybe other cluster params won't require checking on
3245
    # all nodes to be modified.
3246
    self.needed_locks = {
3247
      locking.LEVEL_NODE: locking.ALL_SET,
3248
    }
3249
    self.share_locks[locking.LEVEL_NODE] = 1
3250

    
3251
  def BuildHooksEnv(self):
3252
    """Build hooks env.
3253

3254
    """
3255
    return {
3256
      "OP_TARGET": self.cfg.GetClusterName(),
3257
      "NEW_VG_NAME": self.op.vg_name,
3258
      }
3259

    
3260
  def BuildHooksNodes(self):
3261
    """Build hooks nodes.
3262

3263
    """
3264
    mn = self.cfg.GetMasterNode()
3265
    return ([mn], [mn])
3266

    
3267
  def CheckPrereq(self):
3268
    """Check prerequisites.
3269

3270
    This checks whether the given params don't conflict and
3271
    if the given volume group is valid.
3272

3273
    """
3274
    if self.op.vg_name is not None and not self.op.vg_name:
3275
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3276
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3277
                                   " instances exist", errors.ECODE_INVAL)
3278

    
3279
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3280
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3281
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3282
                                   " drbd-based instances exist",
3283
                                   errors.ECODE_INVAL)
3284

    
3285
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3286

    
3287
    # if vg_name not None, checks given volume group on all nodes
3288
    if self.op.vg_name:
3289
      vglist = self.rpc.call_vg_list(node_list)
3290
      for node in node_list:
3291
        msg = vglist[node].fail_msg
3292
        if msg:
3293
          # ignoring down node
3294
          self.LogWarning("Error while gathering data on node %s"
3295
                          " (ignoring node): %s", node, msg)
3296
          continue
3297
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3298
                                              self.op.vg_name,
3299
                                              constants.MIN_VG_SIZE)
3300
        if vgstatus:
3301
          raise errors.OpPrereqError("Error on node '%s': %s" %
3302
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3303

    
3304
    if self.op.drbd_helper:
3305
      # checks given drbd helper on all nodes
3306
      helpers = self.rpc.call_drbd_helper(node_list)
3307
      for node in node_list:
3308
        ninfo = self.cfg.GetNodeInfo(node)
3309
        if ninfo.offline:
3310
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3311
          continue
3312
        msg = helpers[node].fail_msg
3313
        if msg:
3314
          raise errors.OpPrereqError("Error checking drbd helper on node"
3315
                                     " '%s': %s" % (node, msg),
3316
                                     errors.ECODE_ENVIRON)
3317
        node_helper = helpers[node].payload
3318
        if node_helper != self.op.drbd_helper:
3319
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3320
                                     (node, node_helper), errors.ECODE_ENVIRON)
3321

    
3322
    self.cluster = cluster = self.cfg.GetClusterInfo()
3323
    # validate params changes
3324
    if self.op.beparams:
3325
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3326
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3327

    
3328
    if self.op.ndparams:
3329
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3330
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3331

    
3332
      # TODO: we need a more general way to handle resetting
3333
      # cluster-level parameters to default values
3334
      if self.new_ndparams["oob_program"] == "":
3335
        self.new_ndparams["oob_program"] = \
3336
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3337

    
3338
    if self.op.nicparams:
3339
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3340
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3341
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3342
      nic_errors = []
3343

    
3344
      # check all instances for consistency
3345
      for instance in self.cfg.GetAllInstancesInfo().values():
3346
        for nic_idx, nic in enumerate(instance.nics):
3347
          params_copy = copy.deepcopy(nic.nicparams)
3348
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3349

    
3350
          # check parameter syntax
3351
          try:
3352
            objects.NIC.CheckParameterSyntax(params_filled)
3353
          except errors.ConfigurationError, err:
3354
            nic_errors.append("Instance %s, nic/%d: %s" %
3355
                              (instance.name, nic_idx, err))
3356

    
3357
          # if we're moving instances to routed, check that they have an ip
3358
          target_mode = params_filled[constants.NIC_MODE]
3359
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3360
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3361
                              " address" % (instance.name, nic_idx))
3362
      if nic_errors:
3363
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3364
                                   "\n".join(nic_errors))
3365

    
3366
    # hypervisor list/parameters
3367
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3368
    if self.op.hvparams:
3369
      for hv_name, hv_dict in self.op.hvparams.items():
3370
        if hv_name not in self.new_hvparams:
3371
          self.new_hvparams[hv_name] = hv_dict
3372
        else:
3373
          self.new_hvparams[hv_name].update(hv_dict)
3374

    
3375
    # os hypervisor parameters
3376
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3377
    if self.op.os_hvp:
3378
      for os_name, hvs in self.op.os_hvp.items():
3379
        if os_name not in self.new_os_hvp:
3380
          self.new_os_hvp[os_name] = hvs
3381
        else:
3382
          for hv_name, hv_dict in hvs.items():
3383
            if hv_name not in self.new_os_hvp[os_name]:
3384
              self.new_os_hvp[os_name][hv_name] = hv_dict
3385
            else:
3386
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3387

    
3388
    # os parameters
3389
    self.new_osp = objects.FillDict(cluster.osparams, {})
3390
    if self.op.osparams:
3391
      for os_name, osp in self.op.osparams.items():
3392
        if os_name not in self.new_osp:
3393
          self.new_osp[os_name] = {}
3394

    
3395
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3396
                                                  use_none=True)
3397

    
3398
        if not self.new_osp[os_name]:
3399
          # we removed all parameters
3400
          del self.new_osp[os_name]
3401
        else:
3402
          # check the parameter validity (remote check)
3403
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3404
                         os_name, self.new_osp[os_name])
3405

    
3406
    # changes to the hypervisor list
3407
    if self.op.enabled_hypervisors is not None:
3408
      self.hv_list = self.op.enabled_hypervisors
3409
      for hv in self.hv_list:
3410
        # if the hypervisor doesn't already exist in the cluster
3411
        # hvparams, we initialize it to empty, and then (in both
3412
        # cases) we make sure to fill the defaults, as we might not
3413
        # have a complete defaults list if the hypervisor wasn't
3414
        # enabled before
3415
        if hv not in new_hvp:
3416
          new_hvp[hv] = {}
3417
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3418
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3419
    else:
3420
      self.hv_list = cluster.enabled_hypervisors
3421

    
3422
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3423
      # either the enabled list has changed, or the parameters have, validate
3424
      for hv_name, hv_params in self.new_hvparams.items():
3425
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3426
            (self.op.enabled_hypervisors and
3427
             hv_name in self.op.enabled_hypervisors)):
3428
          # either this is a new hypervisor, or its parameters have changed
3429
          hv_class = hypervisor.GetHypervisor(hv_name)
3430
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3431
          hv_class.CheckParameterSyntax(hv_params)
3432
          _CheckHVParams(self, node_list, hv_name, hv_params)
3433

    
3434
    if self.op.os_hvp:
3435
      # no need to check any newly-enabled hypervisors, since the
3436
      # defaults have already been checked in the above code-block
3437
      for os_name, os_hvp in self.new_os_hvp.items():
3438
        for hv_name, hv_params in os_hvp.items():
3439
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3440
          # we need to fill in the new os_hvp on top of the actual hv_p
3441
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3442
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3443
          hv_class = hypervisor.GetHypervisor(hv_name)
3444
          hv_class.CheckParameterSyntax(new_osp)
3445
          _CheckHVParams(self, node_list, hv_name, new_osp)
3446

    
3447
    if self.op.default_iallocator:
3448
      alloc_script = utils.FindFile(self.op.default_iallocator,
3449
                                    constants.IALLOCATOR_SEARCH_PATH,
3450
                                    os.path.isfile)
3451
      if alloc_script is None:
3452
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3453
                                   " specified" % self.op.default_iallocator,
3454
                                   errors.ECODE_INVAL)
3455

    
3456
  def Exec(self, feedback_fn):
3457
    """Change the parameters of the cluster.
3458

3459
    """
3460
    if self.op.vg_name is not None:
3461
      new_volume = self.op.vg_name
3462
      if not new_volume:
3463
        new_volume = None
3464
      if new_volume != self.cfg.GetVGName():
3465
        self.cfg.SetVGName(new_volume)
3466
      else:
3467
        feedback_fn("Cluster LVM configuration already in desired"
3468
                    " state, not changing")
3469
    if self.op.drbd_helper is not None:
3470
      new_helper = self.op.drbd_helper
3471
      if not new_helper:
3472
        new_helper = None
3473
      if new_helper != self.cfg.GetDRBDHelper():
3474
        self.cfg.SetDRBDHelper(new_helper)
3475
      else:
3476
        feedback_fn("Cluster DRBD helper already in desired state,"
3477
                    " not changing")
3478
    if self.op.hvparams:
3479
      self.cluster.hvparams = self.new_hvparams
3480
    if self.op.os_hvp:
3481
      self.cluster.os_hvp = self.new_os_hvp
3482
    if self.op.enabled_hypervisors is not None:
3483
      self.cluster.hvparams = self.new_hvparams
3484
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3485
    if self.op.beparams:
3486
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3487
    if self.op.nicparams:
3488
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3489
    if self.op.osparams:
3490
      self.cluster.osparams = self.new_osp
3491
    if self.op.ndparams:
3492
      self.cluster.ndparams = self.new_ndparams
3493

    
3494
    if self.op.candidate_pool_size is not None:
3495
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3496
      # we need to update the pool size here, otherwise the save will fail
3497
      _AdjustCandidatePool(self, [])
3498

    
3499
    if self.op.maintain_node_health is not None:
3500
      self.cluster.maintain_node_health = self.op.maintain_node_health
3501

    
3502
    if self.op.prealloc_wipe_disks is not None:
3503
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3504

    
3505
    if self.op.add_uids is not None:
3506
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3507

    
3508
    if self.op.remove_uids is not None:
3509
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3510

    
3511
    if self.op.uid_pool is not None:
3512
      self.cluster.uid_pool = self.op.uid_pool
3513

    
3514
    if self.op.default_iallocator is not None:
3515
      self.cluster.default_iallocator = self.op.default_iallocator
3516

    
3517
    if self.op.reserved_lvs is not None:
3518
      self.cluster.reserved_lvs = self.op.reserved_lvs
3519

    
3520
    def helper_os(aname, mods, desc):
3521
      desc += " OS list"
3522
      lst = getattr(self.cluster, aname)
3523
      for key, val in mods:
3524
        if key == constants.DDM_ADD:
3525
          if val in lst:
3526
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3527
          else:
3528
            lst.append(val)
3529
        elif key == constants.DDM_REMOVE:
3530
          if val in lst:
3531
            lst.remove(val)
3532
          else:
3533
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3534
        else:
3535
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3536

    
3537
    if self.op.hidden_os:
3538
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3539

    
3540
    if self.op.blacklisted_os:
3541
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3542

    
3543
    if self.op.master_netdev:
3544
      master = self.cfg.GetMasterNode()
3545
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3546
                  self.cluster.master_netdev)
3547
      result = self.rpc.call_node_stop_master(master, False)
3548
      result.Raise("Could not disable the master ip")
3549
      feedback_fn("Changing master_netdev from %s to %s" %
3550
                  (self.cluster.master_netdev, self.op.master_netdev))
3551
      self.cluster.master_netdev = self.op.master_netdev
3552

    
3553
    self.cfg.Update(self.cluster, feedback_fn)
3554

    
3555
    if self.op.master_netdev:
3556
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3557
                  self.op.master_netdev)
3558
      result = self.rpc.call_node_start_master(master, False, False)
3559
      if result.fail_msg:
3560
        self.LogWarning("Could not re-enable the master ip on"
3561
                        " the master, please restart manually: %s",
3562
                        result.fail_msg)
3563

    
3564

    
3565
def _UploadHelper(lu, nodes, fname):
3566
  """Helper for uploading a file and showing warnings.
3567

3568
  """
3569
  if os.path.exists(fname):
3570
    result = lu.rpc.call_upload_file(nodes, fname)
3571
    for to_node, to_result in result.items():
3572
      msg = to_result.fail_msg
3573
      if msg:
3574
        msg = ("Copy of file %s to node %s failed: %s" %
3575
               (fname, to_node, msg))
3576
        lu.proc.LogWarning(msg)
3577

    
3578

    
3579
def _ComputeAncillaryFiles(cluster, redist):
3580
  """Compute files external to Ganeti which need to be consistent.
3581

3582
  @type redist: boolean
3583
  @param redist: Whether to include files which need to be redistributed
3584

3585
  """
3586
  # Compute files for all nodes
3587
  files_all = set([
3588
    constants.SSH_KNOWN_HOSTS_FILE,
3589
    constants.CONFD_HMAC_KEY,
3590
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3591
    ])
3592

    
3593
  if not redist:
3594
    files_all.update(constants.ALL_CERT_FILES)
3595
    files_all.update(ssconf.SimpleStore().GetFileList())
3596

    
3597
  if cluster.modify_etc_hosts:
3598
    files_all.add(constants.ETC_HOSTS)
3599

    
3600
  # Files which must either exist on all nodes or on none
3601
  files_all_opt = set([
3602
    constants.RAPI_USERS_FILE,
3603
    ])
3604

    
3605
  # Files which should only be on master candidates
3606
  files_mc = set()
3607
  if not redist:
3608
    files_mc.add(constants.CLUSTER_CONF_FILE)
3609

    
3610
  # Files which should only be on VM-capable nodes
3611
  files_vm = set(filename
3612
    for hv_name in cluster.enabled_hypervisors
3613
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3614

    
3615
  # Filenames must be unique
3616
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3617
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3618
         "Found file listed in more than one file list"
3619

    
3620
  return (files_all, files_all_opt, files_mc, files_vm)
3621

    
3622

    
3623
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3624
  """Distribute additional files which are part of the cluster configuration.
3625

3626
  ConfigWriter takes care of distributing the config and ssconf files, but
3627
  there are more files which should be distributed to all nodes. This function
3628
  makes sure those are copied.
3629

3630
  @param lu: calling logical unit
3631
  @param additional_nodes: list of nodes not in the config to distribute to
3632
  @type additional_vm: boolean
3633
  @param additional_vm: whether the additional nodes are vm-capable or not
3634

3635
  """
3636
  # Gather target nodes
3637
  cluster = lu.cfg.GetClusterInfo()
3638
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3639

    
3640
  online_nodes = lu.cfg.GetOnlineNodeList()
3641
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3642

    
3643
  if additional_nodes is not None:
3644
    online_nodes.extend(additional_nodes)
3645
    if additional_vm:
3646
      vm_nodes.extend(additional_nodes)
3647

    
3648
  # Never distribute to master node
3649
  for nodelist in [online_nodes, vm_nodes]:
3650
    if master_info.name in nodelist:
3651
      nodelist.remove(master_info.name)
3652

    
3653
  # Gather file lists
3654
  (files_all, files_all_opt, files_mc, files_vm) = \
3655
    _ComputeAncillaryFiles(cluster, True)
3656

    
3657
  # Never re-distribute configuration file from here
3658
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3659
              constants.CLUSTER_CONF_FILE in files_vm)
3660
  assert not files_mc, "Master candidates not handled in this function"
3661

    
3662
  filemap = [
3663
    (online_nodes, files_all),
3664
    (online_nodes, files_all_opt),
3665
    (vm_nodes, files_vm),
3666
    ]
3667

    
3668
  # Upload the files
3669
  for (node_list, files) in filemap:
3670
    for fname in files:
3671
      _UploadHelper(lu, node_list, fname)
3672

    
3673

    
3674
class LUClusterRedistConf(NoHooksLU):
3675
  """Force the redistribution of cluster configuration.
3676

3677
  This is a very simple LU.
3678

3679
  """
3680
  REQ_BGL = False
3681

    
3682
  def ExpandNames(self):
3683
    self.needed_locks = {
3684
      locking.LEVEL_NODE: locking.ALL_SET,
3685
    }
3686
    self.share_locks[locking.LEVEL_NODE] = 1
3687

    
3688
  def Exec(self, feedback_fn):
3689
    """Redistribute the configuration.
3690

3691
    """
3692
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3693
    _RedistributeAncillaryFiles(self)
3694

    
3695

    
3696
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3697
  """Sleep and poll for an instance's disk to sync.
3698

3699
  """
3700
  if not instance.disks or disks is not None and not disks:
3701
    return True
3702

    
3703
  disks = _ExpandCheckDisks(instance, disks)
3704

    
3705
  if not oneshot:
3706
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3707

    
3708
  node = instance.primary_node
3709

    
3710
  for dev in disks:
3711
    lu.cfg.SetDiskID(dev, node)
3712

    
3713
  # TODO: Convert to utils.Retry
3714

    
3715
  retries = 0
3716
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3717
  while True:
3718
    max_time = 0
3719
    done = True
3720
    cumul_degraded = False
3721
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3722
    msg = rstats.fail_msg
3723
    if msg:
3724
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3725
      retries += 1
3726
      if retries >= 10:
3727
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3728
                                 " aborting." % node)
3729
      time.sleep(6)
3730
      continue
3731
    rstats = rstats.payload
3732
    retries = 0
3733
    for i, mstat in enumerate(rstats):
3734
      if mstat is None:
3735
        lu.LogWarning("Can't compute data for node %s/%s",
3736
                           node, disks[i].iv_name)
3737
        continue
3738

    
3739
      cumul_degraded = (cumul_degraded or
3740
                        (mstat.is_degraded and mstat.sync_percent is None))
3741
      if mstat.sync_percent is not None:
3742
        done = False
3743
        if mstat.estimated_time is not None:
3744
          rem_time = ("%s remaining (estimated)" %
3745
                      utils.FormatSeconds(mstat.estimated_time))
3746
          max_time = mstat.estimated_time
3747
        else:
3748
          rem_time = "no time estimate"
3749
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3750
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3751

    
3752
    # if we're done but degraded, let's do a few small retries, to
3753
    # make sure we see a stable and not transient situation; therefore
3754
    # we force restart of the loop
3755
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3756
      logging.info("Degraded disks found, %d retries left", degr_retries)
3757
      degr_retries -= 1
3758
      time.sleep(1)
3759
      continue
3760

    
3761
    if done or oneshot:
3762
      break
3763

    
3764
    time.sleep(min(60, max_time))
3765

    
3766
  if done:
3767
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3768
  return not cumul_degraded
3769

    
3770

    
3771
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3772
  """Check that mirrors are not degraded.
3773

3774
  The ldisk parameter, if True, will change the test from the
3775
  is_degraded attribute (which represents overall non-ok status for
3776
  the device(s)) to the ldisk (representing the local storage status).
3777

3778
  """
3779
  lu.cfg.SetDiskID(dev, node)
3780

    
3781
  result = True
3782

    
3783
  if on_primary or dev.AssembleOnSecondary():
3784
    rstats = lu.rpc.call_blockdev_find(node, dev)
3785
    msg = rstats.fail_msg
3786
    if msg:
3787
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3788
      result = False
3789
    elif not rstats.payload:
3790
      lu.LogWarning("Can't find disk on node %s", node)
3791
      result = False
3792
    else:
3793
      if ldisk:
3794
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3795
      else:
3796
        result = result and not rstats.payload.is_degraded
3797

    
3798
  if dev.children:
3799
    for child in dev.children:
3800
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3801

    
3802
  return result
3803

    
3804

    
3805
class LUOobCommand(NoHooksLU):
3806
  """Logical unit for OOB handling.
3807

3808
  """
3809
  REG_BGL = False
3810
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3811

    
3812
  def ExpandNames(self):
3813
    """Gather locks we need.
3814

3815
    """
3816
    if self.op.node_names:
3817
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3818
      lock_names = self.op.node_names
3819
    else:
3820
      lock_names = locking.ALL_SET
3821

    
3822
    self.needed_locks = {
3823
      locking.LEVEL_NODE: lock_names,
3824
      }
3825

    
3826
  def CheckPrereq(self):
3827
    """Check prerequisites.
3828

3829
    This checks:
3830
     - the node exists in the configuration
3831
     - OOB is supported
3832

3833
    Any errors are signaled by raising errors.OpPrereqError.
3834

3835
    """
3836
    self.nodes = []
3837
    self.master_node = self.cfg.GetMasterNode()
3838

    
3839
    assert self.op.power_delay >= 0.0
3840

    
3841
    if self.op.node_names:
3842
      if (self.op.command in self._SKIP_MASTER and
3843
          self.master_node in self.op.node_names):
3844
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3845
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3846

    
3847
        if master_oob_handler:
3848
          additional_text = ("run '%s %s %s' if you want to operate on the"
3849
                             " master regardless") % (master_oob_handler,
3850
                                                      self.op.command,
3851
                                                      self.master_node)
3852
        else:
3853
          additional_text = "it does not support out-of-band operations"
3854

    
3855
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3856
                                    " allowed for %s; %s") %
3857
                                   (self.master_node, self.op.command,
3858
                                    additional_text), errors.ECODE_INVAL)
3859
    else:
3860
      self.op.node_names = self.cfg.GetNodeList()
3861
      if self.op.command in self._SKIP_MASTER:
3862
        self.op.node_names.remove(self.master_node)
3863

    
3864
    if self.op.command in self._SKIP_MASTER:
3865
      assert self.master_node not in self.op.node_names
3866

    
3867
    for node_name in self.op.node_names:
3868
      node = self.cfg.GetNodeInfo(node_name)
3869

    
3870
      if node is None:
3871
        raise errors.OpPrereqError("Node %s not found" % node_name,
3872
                                   errors.ECODE_NOENT)
3873
      else:
3874
        self.nodes.append(node)
3875

    
3876
      if (not self.op.ignore_status and
3877
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3878
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3879
                                    " not marked offline") % node_name,
3880
                                   errors.ECODE_STATE)
3881

    
3882
  def Exec(self, feedback_fn):
3883
    """Execute OOB and return result if we expect any.
3884

3885
    """
3886
    master_node = self.master_node
3887
    ret = []
3888

    
3889
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3890
                                              key=lambda node: node.name)):
3891
      node_entry = [(constants.RS_NORMAL, node.name)]
3892
      ret.append(node_entry)
3893

    
3894
      oob_program = _SupportsOob(self.cfg, node)
3895

    
3896
      if not oob_program:
3897
        node_entry.append((constants.RS_UNAVAIL, None))
3898
        continue
3899

    
3900
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3901
                   self.op.command, oob_program, node.name)
3902
      result = self.rpc.call_run_oob(master_node, oob_program,
3903
                                     self.op.command, node.name,
3904
                                     self.op.timeout)
3905

    
3906
      if result.fail_msg:
3907
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3908
                        node.name, result.fail_msg)
3909
        node_entry.append((constants.RS_NODATA, None))
3910
      else:
3911
        try:
3912
          self._CheckPayload(result)
3913
        except errors.OpExecError, err:
3914
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3915
                          node.name, err)
3916
          node_entry.append((constants.RS_NODATA, None))
3917
        else:
3918
          if self.op.command == constants.OOB_HEALTH:
3919
            # For health we should log important events
3920
            for item, status in result.payload:
3921
              if status in [constants.OOB_STATUS_WARNING,
3922
                            constants.OOB_STATUS_CRITICAL]:
3923
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3924
                                item, node.name, status)
3925

    
3926
          if self.op.command == constants.OOB_POWER_ON:
3927
            node.powered = True
3928
          elif self.op.command == constants.OOB_POWER_OFF:
3929
            node.powered = False
3930
          elif self.op.command == constants.OOB_POWER_STATUS:
3931
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3932
            if powered != node.powered:
3933
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3934
                               " match actual power state (%s)"), node.powered,
3935
                              node.name, powered)
3936

    
3937
          # For configuration changing commands we should update the node
3938
          if self.op.command in (constants.OOB_POWER_ON,
3939
                                 constants.OOB_POWER_OFF):
3940
            self.cfg.Update(node, feedback_fn)
3941

    
3942
          node_entry.append((constants.RS_NORMAL, result.payload))
3943

    
3944
          if (self.op.command == constants.OOB_POWER_ON and
3945
              idx < len(self.nodes) - 1):
3946
            time.sleep(self.op.power_delay)
3947

    
3948
    return ret
3949

    
3950
  def _CheckPayload(self, result):
3951
    """Checks if the payload is valid.
3952

3953
    @param result: RPC result
3954
    @raises errors.OpExecError: If payload is not valid
3955

3956
    """
3957
    errs = []
3958
    if self.op.command == constants.OOB_HEALTH:
3959
      if not isinstance(result.payload, list):
3960
        errs.append("command 'health' is expected to return a list but got %s" %
3961
                    type(result.payload))
3962
      else:
3963
        for item, status in result.payload:
3964
          if status not in constants.OOB_STATUSES:
3965
            errs.append("health item '%s' has invalid status '%s'" %
3966
                        (item, status))
3967

    
3968
    if self.op.command == constants.OOB_POWER_STATUS:
3969
      if not isinstance(result.payload, dict):
3970
        errs.append("power-status is expected to return a dict but got %s" %
3971
                    type(result.payload))
3972

    
3973
    if self.op.command in [
3974
        constants.OOB_POWER_ON,
3975
        constants.OOB_POWER_OFF,
3976
        constants.OOB_POWER_CYCLE,
3977
        ]:
3978
      if result.payload is not None:
3979
        errs.append("%s is expected to not return payload but got '%s'" %
3980
                    (self.op.command, result.payload))
3981

    
3982
    if errs:
3983
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3984
                               utils.CommaJoin(errs))
3985

    
3986
class _OsQuery(_QueryBase):
3987
  FIELDS = query.OS_FIELDS
3988

    
3989
  def ExpandNames(self, lu):
3990
    # Lock all nodes in shared mode
3991
    # Temporary removal of locks, should be reverted later
3992
    # TODO: reintroduce locks when they are lighter-weight
3993
    lu.needed_locks = {}
3994
    #self.share_locks[locking.LEVEL_NODE] = 1
3995
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3996

    
3997
    # The following variables interact with _QueryBase._GetNames
3998
    if self.names:
3999
      self.wanted = self.names
4000
    else:
4001
      self.wanted = locking.ALL_SET
4002

    
4003
    self.do_locking = self.use_locking
4004

    
4005
  def DeclareLocks(self, lu, level):
4006
    pass
4007

    
4008
  @staticmethod
4009
  def _DiagnoseByOS(rlist):
4010
    """Remaps a per-node return list into an a per-os per-node dictionary
4011

4012
    @param rlist: a map with node names as keys and OS objects as values
4013

4014
    @rtype: dict
4015
    @return: a dictionary with osnames as keys and as value another
4016
        map, with nodes as keys and tuples of (path, status, diagnose,
4017
        variants, parameters, api_versions) as values, eg::
4018

4019
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4020
                                     (/srv/..., False, "invalid api")],
4021
                           "node2": [(/srv/..., True, "", [], [])]}
4022
          }
4023

4024
    """
4025
    all_os = {}
4026
    # we build here the list of nodes that didn't fail the RPC (at RPC
4027
    # level), so that nodes with a non-responding node daemon don't
4028
    # make all OSes invalid
4029
    good_nodes = [node_name for node_name in rlist
4030
                  if not rlist[node_name].fail_msg]
4031
    for node_name, nr in rlist.items():
4032
      if nr.fail_msg or not nr.payload:
4033
        continue
4034
      for (name, path, status, diagnose, variants,
4035
           params, api_versions) in nr.payload:
4036
        if name not in all_os:
4037
          # build a list of nodes for this os containing empty lists
4038
          # for each node in node_list
4039
          all_os[name] = {}
4040
          for nname in good_nodes:
4041
            all_os[name][nname] = []
4042
        # convert params from [name, help] to (name, help)
4043
        params = [tuple(v) for v in params]
4044
        all_os[name][node_name].append((path, status, diagnose,
4045
                                        variants, params, api_versions))
4046
    return all_os
4047

    
4048
  def _GetQueryData(self, lu):
4049
    """Computes the list of nodes and their attributes.
4050

4051
    """
4052
    # Locking is not used
4053
    assert not (compat.any(lu.glm.is_owned(level)
4054
                           for level in locking.LEVELS
4055
                           if level != locking.LEVEL_CLUSTER) or
4056
                self.do_locking or self.use_locking)
4057

    
4058
    valid_nodes = [node.name
4059
                   for node in lu.cfg.GetAllNodesInfo().values()
4060
                   if not node.offline and node.vm_capable]
4061
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4062
    cluster = lu.cfg.GetClusterInfo()
4063

    
4064
    data = {}
4065

    
4066
    for (os_name, os_data) in pol.items():
4067
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4068
                          hidden=(os_name in cluster.hidden_os),
4069
                          blacklisted=(os_name in cluster.blacklisted_os))
4070

    
4071
      variants = set()
4072
      parameters = set()
4073
      api_versions = set()
4074

    
4075
      for idx, osl in enumerate(os_data.values()):
4076
        info.valid = bool(info.valid and osl and osl[0][1])
4077
        if not info.valid:
4078
          break
4079

    
4080
        (node_variants, node_params, node_api) = osl[0][3:6]
4081
        if idx == 0:
4082
          # First entry
4083
          variants.update(node_variants)
4084
          parameters.update(node_params)
4085
          api_versions.update(node_api)
4086
        else:
4087
          # Filter out inconsistent values
4088
          variants.intersection_update(node_variants)
4089
          parameters.intersection_update(node_params)
4090
          api_versions.intersection_update(node_api)
4091

    
4092
      info.variants = list(variants)
4093
      info.parameters = list(parameters)
4094
      info.api_versions = list(api_versions)
4095

    
4096
      data[os_name] = info
4097

    
4098
    # Prepare data in requested order
4099
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4100
            if name in data]
4101

    
4102

    
4103
class LUOsDiagnose(NoHooksLU):
4104
  """Logical unit for OS diagnose/query.
4105

4106
  """
4107
  REQ_BGL = False
4108

    
4109
  @staticmethod
4110
  def _BuildFilter(fields, names):
4111
    """Builds a filter for querying OSes.
4112

4113
    """
4114
    name_filter = qlang.MakeSimpleFilter("name", names)
4115

    
4116
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4117
    # respective field is not requested
4118
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4119
                     for fname in ["hidden", "blacklisted"]
4120
                     if fname not in fields]
4121
    if "valid" not in fields:
4122
      status_filter.append([qlang.OP_TRUE, "valid"])
4123

    
4124
    if status_filter:
4125
      status_filter.insert(0, qlang.OP_AND)
4126
    else:
4127
      status_filter = None
4128

    
4129
    if name_filter and status_filter:
4130
      return [qlang.OP_AND, name_filter, status_filter]
4131
    elif name_filter:
4132
      return name_filter
4133
    else:
4134
      return status_filter
4135

    
4136
  def CheckArguments(self):
4137
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4138
                       self.op.output_fields, False)
4139

    
4140
  def ExpandNames(self):
4141
    self.oq.ExpandNames(self)
4142

    
4143
  def Exec(self, feedback_fn):
4144
    return self.oq.OldStyleQuery(self)
4145

    
4146

    
4147
class LUNodeRemove(LogicalUnit):
4148
  """Logical unit for removing a node.
4149

4150
  """
4151
  HPATH = "node-remove"
4152
  HTYPE = constants.HTYPE_NODE
4153

    
4154
  def BuildHooksEnv(self):
4155
    """Build hooks env.
4156

4157
    This doesn't run on the target node in the pre phase as a failed
4158
    node would then be impossible to remove.
4159

4160
    """
4161
    return {
4162
      "OP_TARGET": self.op.node_name,
4163
      "NODE_NAME": self.op.node_name,
4164
      }
4165

    
4166
  def BuildHooksNodes(self):
4167
    """Build hooks nodes.
4168

4169
    """
4170
    all_nodes = self.cfg.GetNodeList()
4171
    try:
4172
      all_nodes.remove(self.op.node_name)
4173
    except ValueError:
4174
      logging.warning("Node '%s', which is about to be removed, was not found"
4175
                      " in the list of all nodes", self.op.node_name)
4176
    return (all_nodes, all_nodes)
4177

    
4178
  def CheckPrereq(self):
4179
    """Check prerequisites.
4180

4181
    This checks:
4182
     - the node exists in the configuration
4183
     - it does not have primary or secondary instances
4184
     - it's not the master
4185

4186
    Any errors are signaled by raising errors.OpPrereqError.
4187

4188
    """
4189
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4190
    node = self.cfg.GetNodeInfo(self.op.node_name)
4191
    assert node is not None
4192

    
4193
    instance_list = self.cfg.GetInstanceList()
4194

    
4195
    masternode = self.cfg.GetMasterNode()
4196
    if node.name == masternode:
4197
      raise errors.OpPrereqError("Node is the master node, failover to another"
4198
                                 " node is required", errors.ECODE_INVAL)
4199

    
4200
    for instance_name in instance_list:
4201
      instance = self.cfg.GetInstanceInfo(instance_name)
4202
      if node.name in instance.all_nodes:
4203
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4204
                                   " please remove first" % instance_name,
4205
                                   errors.ECODE_INVAL)
4206
    self.op.node_name = node.name
4207
    self.node = node
4208

    
4209
  def Exec(self, feedback_fn):
4210
    """Removes the node from the cluster.
4211

4212
    """
4213
    node = self.node
4214
    logging.info("Stopping the node daemon and removing configs from node %s",
4215
                 node.name)
4216

    
4217
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4218

    
4219
    # Promote nodes to master candidate as needed
4220
    _AdjustCandidatePool(self, exceptions=[node.name])
4221
    self.context.RemoveNode(node.name)
4222

    
4223
    # Run post hooks on the node before it's removed
4224
    _RunPostHook(self, node.name)
4225

    
4226
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4227
    msg = result.fail_msg
4228
    if msg:
4229
      self.LogWarning("Errors encountered on the remote node while leaving"
4230
                      " the cluster: %s", msg)
4231

    
4232
    # Remove node from our /etc/hosts
4233
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4234
      master_node = self.cfg.GetMasterNode()
4235
      result = self.rpc.call_etc_hosts_modify(master_node,
4236
                                              constants.ETC_HOSTS_REMOVE,
4237
                                              node.name, None)
4238
      result.Raise("Can't update hosts file with new host data")
4239
      _RedistributeAncillaryFiles(self)
4240

    
4241

    
4242
class _NodeQuery(_QueryBase):
4243
  FIELDS = query.NODE_FIELDS
4244

    
4245
  def ExpandNames(self, lu):
4246
    lu.needed_locks = {}
4247
    lu.share_locks[locking.LEVEL_NODE] = 1
4248

    
4249
    if self.names:
4250
      self.wanted = _GetWantedNodes(lu, self.names)
4251
    else:
4252
      self.wanted = locking.ALL_SET
4253

    
4254
    self.do_locking = (self.use_locking and
4255
                       query.NQ_LIVE in self.requested_data)
4256

    
4257
    if self.do_locking:
4258
      # if we don't request only static fields, we need to lock the nodes
4259
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4260

    
4261
  def DeclareLocks(self, lu, level):
4262
    pass
4263

    
4264
  def _GetQueryData(self, lu):
4265
    """Computes the list of nodes and their attributes.
4266

4267
    """
4268
    all_info = lu.cfg.GetAllNodesInfo()
4269

    
4270
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4271

    
4272
    # Gather data as requested
4273
    if query.NQ_LIVE in self.requested_data:
4274
      # filter out non-vm_capable nodes
4275
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4276

    
4277
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4278
                                        lu.cfg.GetHypervisorType())
4279
      live_data = dict((name, nresult.payload)
4280
                       for (name, nresult) in node_data.items()
4281
                       if not nresult.fail_msg and nresult.payload)
4282
    else:
4283
      live_data = None
4284

    
4285
    if query.NQ_INST in self.requested_data:
4286
      node_to_primary = dict([(name, set()) for name in nodenames])
4287
      node_to_secondary = dict([(name, set()) for name in nodenames])
4288

    
4289
      inst_data = lu.cfg.GetAllInstancesInfo()
4290

    
4291
      for inst in inst_data.values():
4292
        if inst.primary_node in node_to_primary:
4293
          node_to_primary[inst.primary_node].add(inst.name)
4294
        for secnode in inst.secondary_nodes:
4295
          if secnode in node_to_secondary:
4296
            node_to_secondary[secnode].add(inst.name)
4297
    else:
4298
      node_to_primary = None
4299
      node_to_secondary = None
4300

    
4301
    if query.NQ_OOB in self.requested_data:
4302
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4303
                         for name, node in all_info.iteritems())
4304
    else:
4305
      oob_support = None
4306

    
4307
    if query.NQ_GROUP in self.requested_data:
4308
      groups = lu.cfg.GetAllNodeGroupsInfo()
4309
    else:
4310
      groups = {}
4311

    
4312
    return query.NodeQueryData([all_info[name] for name in nodenames],
4313
                               live_data, lu.cfg.GetMasterNode(),
4314
                               node_to_primary, node_to_secondary, groups,
4315
                               oob_support, lu.cfg.GetClusterInfo())
4316

    
4317

    
4318
class LUNodeQuery(NoHooksLU):
4319
  """Logical unit for querying nodes.
4320

4321
  """
4322
  # pylint: disable-msg=W0142
4323
  REQ_BGL = False
4324

    
4325
  def CheckArguments(self):
4326
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4327
                         self.op.output_fields, self.op.use_locking)
4328

    
4329
  def ExpandNames(self):
4330
    self.nq.ExpandNames(self)
4331

    
4332
  def Exec(self, feedback_fn):
4333
    return self.nq.OldStyleQuery(self)
4334

    
4335

    
4336
class LUNodeQueryvols(NoHooksLU):
4337
  """Logical unit for getting volumes on node(s).
4338

4339
  """
4340
  REQ_BGL = False
4341
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4342
  _FIELDS_STATIC = utils.FieldSet("node")
4343

    
4344
  def CheckArguments(self):
4345
    _CheckOutputFields(static=self._FIELDS_STATIC,
4346
                       dynamic=self._FIELDS_DYNAMIC,
4347
                       selected=self.op.output_fields)
4348

    
4349
  def ExpandNames(self):
4350
    self.needed_locks = {}
4351
    self.share_locks[locking.LEVEL_NODE] = 1
4352
    if not self.op.nodes:
4353
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4354
    else:
4355
      self.needed_locks[locking.LEVEL_NODE] = \
4356
        _GetWantedNodes(self, self.op.nodes)
4357

    
4358
  def Exec(self, feedback_fn):
4359
    """Computes the list of nodes and their attributes.
4360

4361
    """
4362
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4363
    volumes = self.rpc.call_node_volumes(nodenames)
4364

    
4365
    ilist = self.cfg.GetAllInstancesInfo()
4366
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4367

    
4368
    output = []
4369
    for node in nodenames:
4370
      nresult = volumes[node]
4371
      if nresult.offline:
4372
        continue
4373
      msg = nresult.fail_msg
4374
      if msg:
4375
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4376
        continue
4377

    
4378
      node_vols = sorted(nresult.payload,
4379
                         key=operator.itemgetter("dev"))
4380

    
4381
      for vol in node_vols:
4382
        node_output = []
4383
        for field in self.op.output_fields:
4384
          if field == "node":
4385
            val = node
4386
          elif field == "phys":
4387
            val = vol["dev"]
4388
          elif field == "vg":
4389
            val = vol["vg"]
4390
          elif field == "name":
4391
            val = vol["name"]
4392
          elif field == "size":
4393
            val = int(float(vol["size"]))
4394
          elif field == "instance":
4395
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4396
          else:
4397
            raise errors.ParameterError(field)
4398
          node_output.append(str(val))
4399

    
4400
        output.append(node_output)
4401

    
4402
    return output
4403

    
4404

    
4405
class LUNodeQueryStorage(NoHooksLU):
4406
  """Logical unit for getting information on storage units on node(s).
4407

4408
  """
4409
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4410
  REQ_BGL = False
4411

    
4412
  def CheckArguments(self):
4413
    _CheckOutputFields(static=self._FIELDS_STATIC,
4414
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4415
                       selected=self.op.output_fields)
4416

    
4417
  def ExpandNames(self):
4418
    self.needed_locks = {}
4419
    self.share_locks[locking.LEVEL_NODE] = 1
4420

    
4421
    if self.op.nodes:
4422
      self.needed_locks[locking.LEVEL_NODE] = \
4423
        _GetWantedNodes(self, self.op.nodes)
4424
    else:
4425
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4426

    
4427
  def Exec(self, feedback_fn):
4428
    """Computes the list of nodes and their attributes.
4429

4430
    """
4431
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4432

    
4433
    # Always get name to sort by
4434
    if constants.SF_NAME in self.op.output_fields:
4435
      fields = self.op.output_fields[:]
4436
    else:
4437
      fields = [constants.SF_NAME] + self.op.output_fields
4438

    
4439
    # Never ask for node or type as it's only known to the LU
4440
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4441
      while extra in fields:
4442
        fields.remove(extra)
4443

    
4444
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4445
    name_idx = field_idx[constants.SF_NAME]
4446

    
4447
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4448
    data = self.rpc.call_storage_list(self.nodes,
4449
                                      self.op.storage_type, st_args,
4450
                                      self.op.name, fields)
4451

    
4452
    result = []
4453

    
4454
    for node in utils.NiceSort(self.nodes):
4455
      nresult = data[node]
4456
      if nresult.offline:
4457
        continue
4458

    
4459
      msg = nresult.fail_msg
4460
      if msg:
4461
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4462
        continue
4463

    
4464
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4465

    
4466
      for name in utils.NiceSort(rows.keys()):
4467
        row = rows[name]
4468

    
4469
        out = []
4470

    
4471
        for field in self.op.output_fields:
4472
          if field == constants.SF_NODE:
4473
            val = node
4474
          elif field == constants.SF_TYPE:
4475
            val = self.op.storage_type
4476
          elif field in field_idx:
4477
            val = row[field_idx[field]]
4478
          else:
4479
            raise errors.ParameterError(field)
4480

    
4481
          out.append(val)
4482

    
4483
        result.append(out)
4484

    
4485
    return result
4486

    
4487

    
4488
class _InstanceQuery(_QueryBase):
4489
  FIELDS = query.INSTANCE_FIELDS
4490

    
4491
  def ExpandNames(self, lu):
4492
    lu.needed_locks = {}
4493
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4494
    lu.share_locks[locking.LEVEL_NODE] = 1
4495

    
4496
    if self.names:
4497
      self.wanted = _GetWantedInstances(lu, self.names)
4498
    else:
4499
      self.wanted = locking.ALL_SET
4500

    
4501
    self.do_locking = (self.use_locking and
4502
                       query.IQ_LIVE in self.requested_data)
4503
    if self.do_locking:
4504
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4505
      lu.needed_locks[locking.LEVEL_NODE] = []
4506
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4507

    
4508
  def DeclareLocks(self, lu, level):
4509
    if level == locking.LEVEL_NODE and self.do_locking:
4510
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4511

    
4512
  def _GetQueryData(self, lu):
4513
    """Computes the list of instances and their attributes.
4514

4515
    """
4516
    cluster = lu.cfg.GetClusterInfo()
4517
    all_info = lu.cfg.GetAllInstancesInfo()
4518

    
4519
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4520

    
4521
    instance_list = [all_info[name] for name in instance_names]
4522
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4523
                                        for inst in instance_list)))
4524
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4525
    bad_nodes = []
4526
    offline_nodes = []
4527
    wrongnode_inst = set()
4528

    
4529
    # Gather data as requested
4530
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4531
      live_data = {}
4532
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4533
      for name in nodes:
4534
        result = node_data[name]
4535
        if result.offline:
4536
          # offline nodes will be in both lists
4537
          assert result.fail_msg
4538
          offline_nodes.append(name)
4539
        if result.fail_msg:
4540
          bad_nodes.append(name)
4541
        elif result.payload:
4542
          for inst in result.payload:
4543
            if inst in all_info:
4544
              if all_info[inst].primary_node == name:
4545
                live_data.update(result.payload)
4546
              else:
4547
                wrongnode_inst.add(inst)
4548
            else:
4549
              # orphan instance; we don't list it here as we don't
4550
              # handle this case yet in the output of instance listing
4551
              logging.warning("Orphan instance '%s' found on node %s",
4552
                              inst, name)
4553
        # else no instance is alive
4554
    else:
4555
      live_data = {}
4556

    
4557
    if query.IQ_DISKUSAGE in self.requested_data:
4558
      disk_usage = dict((inst.name,
4559
                         _ComputeDiskSize(inst.disk_template,
4560
                                          [{constants.IDISK_SIZE: disk.size}
4561
                                           for disk in inst.disks]))
4562
                        for inst in instance_list)
4563
    else:
4564
      disk_usage = None
4565

    
4566
    if query.IQ_CONSOLE in self.requested_data:
4567
      consinfo = {}
4568
      for inst in instance_list:
4569
        if inst.name in live_data:
4570
          # Instance is running
4571
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4572
        else:
4573
          consinfo[inst.name] = None
4574
      assert set(consinfo.keys()) == set(instance_names)
4575
    else:
4576
      consinfo = None
4577

    
4578
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4579
                                   disk_usage, offline_nodes, bad_nodes,
4580
                                   live_data, wrongnode_inst, consinfo)
4581

    
4582

    
4583
class LUQuery(NoHooksLU):
4584
  """Query for resources/items of a certain kind.
4585

4586
  """
4587
  # pylint: disable-msg=W0142
4588
  REQ_BGL = False
4589

    
4590
  def CheckArguments(self):
4591
    qcls = _GetQueryImplementation(self.op.what)
4592

    
4593
    self.impl = qcls(self.op.filter, self.op.fields, False)
4594

    
4595
  def ExpandNames(self):
4596
    self.impl.ExpandNames(self)
4597

    
4598
  def DeclareLocks(self, level):
4599
    self.impl.DeclareLocks(self, level)
4600

    
4601
  def Exec(self, feedback_fn):
4602
    return self.impl.NewStyleQuery(self)
4603

    
4604

    
4605
class LUQueryFields(NoHooksLU):
4606
  """Query for resources/items of a certain kind.
4607

4608
  """
4609
  # pylint: disable-msg=W0142
4610
  REQ_BGL = False
4611

    
4612
  def CheckArguments(self):
4613
    self.qcls = _GetQueryImplementation(self.op.what)
4614

    
4615
  def ExpandNames(self):
4616
    self.needed_locks = {}
4617

    
4618
  def Exec(self, feedback_fn):
4619
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4620

    
4621

    
4622
class LUNodeModifyStorage(NoHooksLU):
4623
  """Logical unit for modifying a storage volume on a node.
4624

4625
  """
4626
  REQ_BGL = False
4627

    
4628
  def CheckArguments(self):
4629
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4630

    
4631
    storage_type = self.op.storage_type
4632

    
4633
    try:
4634
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4635
    except KeyError:
4636
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4637
                                 " modified" % storage_type,
4638
                                 errors.ECODE_INVAL)
4639

    
4640
    diff = set(self.op.changes.keys()) - modifiable
4641
    if diff:
4642
      raise errors.OpPrereqError("The following fields can not be modified for"
4643
                                 " storage units of type '%s': %r" %
4644
                                 (storage_type, list(diff)),
4645
                                 errors.ECODE_INVAL)
4646

    
4647
  def ExpandNames(self):
4648
    self.needed_locks = {
4649
      locking.LEVEL_NODE: self.op.node_name,
4650
      }
4651

    
4652
  def Exec(self, feedback_fn):
4653
    """Computes the list of nodes and their attributes.
4654

4655
    """
4656
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4657
    result = self.rpc.call_storage_modify(self.op.node_name,
4658
                                          self.op.storage_type, st_args,
4659
                                          self.op.name, self.op.changes)
4660
    result.Raise("Failed to modify storage unit '%s' on %s" %
4661
                 (self.op.name, self.op.node_name))
4662

    
4663

    
4664
class LUNodeAdd(LogicalUnit):
4665
  """Logical unit for adding node to the cluster.
4666

4667
  """
4668
  HPATH = "node-add"
4669
  HTYPE = constants.HTYPE_NODE
4670
  _NFLAGS = ["master_capable", "vm_capable"]
4671

    
4672
  def CheckArguments(self):
4673
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4674
    # validate/normalize the node name
4675
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4676
                                         family=self.primary_ip_family)
4677
    self.op.node_name = self.hostname.name
4678

    
4679
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4680
      raise errors.OpPrereqError("Cannot readd the master node",
4681
                                 errors.ECODE_STATE)
4682

    
4683
    if self.op.readd and self.op.group:
4684
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4685
                                 " being readded", errors.ECODE_INVAL)
4686

    
4687
  def BuildHooksEnv(self):
4688
    """Build hooks env.
4689

4690
    This will run on all nodes before, and on all nodes + the new node after.
4691

4692
    """
4693
    return {
4694
      "OP_TARGET": self.op.node_name,
4695
      "NODE_NAME": self.op.node_name,
4696
      "NODE_PIP": self.op.primary_ip,
4697
      "NODE_SIP": self.op.secondary_ip,
4698
      "MASTER_CAPABLE": str(self.op.master_capable),
4699
      "VM_CAPABLE": str(self.op.vm_capable),
4700
      }
4701

    
4702
  def BuildHooksNodes(self):
4703
    """Build hooks nodes.
4704

4705
    """
4706
    # Exclude added node
4707
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4708
    post_nodes = pre_nodes + [self.op.node_name, ]
4709

    
4710
    return (pre_nodes, post_nodes)
4711

    
4712
  def CheckPrereq(self):
4713
    """Check prerequisites.
4714

4715
    This checks:
4716
     - the new node is not already in the config
4717
     - it is resolvable
4718
     - its parameters (single/dual homed) matches the cluster
4719

4720
    Any errors are signaled by raising errors.OpPrereqError.
4721

4722
    """
4723
    cfg = self.cfg
4724
    hostname = self.hostname
4725
    node = hostname.name
4726
    primary_ip = self.op.primary_ip = hostname.ip
4727
    if self.op.secondary_ip is None:
4728
      if self.primary_ip_family == netutils.IP6Address.family:
4729
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4730
                                   " IPv4 address must be given as secondary",
4731
                                   errors.ECODE_INVAL)
4732
      self.op.secondary_ip = primary_ip
4733

    
4734
    secondary_ip = self.op.secondary_ip
4735
    if not netutils.IP4Address.IsValid(secondary_ip):
4736
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4737
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4738

    
4739
    node_list = cfg.GetNodeList()
4740
    if not self.op.readd and node in node_list:
4741
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4742
                                 node, errors.ECODE_EXISTS)
4743
    elif self.op.readd and node not in node_list:
4744
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4745
                                 errors.ECODE_NOENT)
4746

    
4747
    self.changed_primary_ip = False
4748

    
4749
    for existing_node_name in node_list:
4750
      existing_node = cfg.GetNodeInfo(existing_node_name)
4751

    
4752
      if self.op.readd and node == existing_node_name:
4753
        if existing_node.secondary_ip != secondary_ip:
4754
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4755
                                     " address configuration as before",
4756
                                     errors.ECODE_INVAL)
4757
        if existing_node.primary_ip != primary_ip:
4758
          self.changed_primary_ip = True
4759

    
4760
        continue
4761

    
4762
      if (existing_node.primary_ip == primary_ip or
4763
          existing_node.secondary_ip == primary_ip or
4764
          existing_node.primary_ip == secondary_ip or
4765
          existing_node.secondary_ip == secondary_ip):
4766
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4767
                                   " existing node %s" % existing_node.name,
4768
                                   errors.ECODE_NOTUNIQUE)
4769

    
4770
    # After this 'if' block, None is no longer a valid value for the
4771
    # _capable op attributes
4772
    if self.op.readd:
4773
      old_node = self.cfg.GetNodeInfo(node)
4774
      assert old_node is not None, "Can't retrieve locked node %s" % node
4775
      for attr in self._NFLAGS:
4776
        if getattr(self.op, attr) is None:
4777
          setattr(self.op, attr, getattr(old_node, attr))
4778
    else:
4779
      for attr in self._NFLAGS:
4780
        if getattr(self.op, attr) is None:
4781
          setattr(self.op, attr, True)
4782

    
4783
    if self.op.readd and not self.op.vm_capable:
4784
      pri, sec = cfg.GetNodeInstances(node)
4785
      if pri or sec:
4786
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4787
                                   " flag set to false, but it already holds"
4788
                                   " instances" % node,
4789
                                   errors.ECODE_STATE)
4790

    
4791
    # check that the type of the node (single versus dual homed) is the
4792
    # same as for the master
4793
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4794
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4795
    newbie_singlehomed = secondary_ip == primary_ip
4796
    if master_singlehomed != newbie_singlehomed:
4797
      if master_singlehomed:
4798
        raise errors.OpPrereqError("The master has no secondary ip but the"
4799
                                   " new node has one",
4800
                                   errors.ECODE_INVAL)
4801
      else:
4802
        raise errors.OpPrereqError("The master has a secondary ip but the"
4803
                                   " new node doesn't have one",
4804
                                   errors.ECODE_INVAL)
4805

    
4806
    # checks reachability
4807
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4808
      raise errors.OpPrereqError("Node not reachable by ping",
4809
                                 errors.ECODE_ENVIRON)
4810

    
4811
    if not newbie_singlehomed:
4812
      # check reachability from my secondary ip to newbie's secondary ip
4813
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4814
                           source=myself.secondary_ip):
4815
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4816
                                   " based ping to node daemon port",
4817
                                   errors.ECODE_ENVIRON)
4818

    
4819
    if self.op.readd:
4820
      exceptions = [node]
4821
    else:
4822
      exceptions = []
4823

    
4824
    if self.op.master_capable:
4825
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4826
    else:
4827
      self.master_candidate = False
4828

    
4829
    if self.op.readd:
4830
      self.new_node = old_node
4831
    else:
4832
      node_group = cfg.LookupNodeGroup(self.op.group)
4833
      self.new_node = objects.Node(name=node,
4834
                                   primary_ip=primary_ip,
4835
                                   secondary_ip=secondary_ip,
4836
                                   master_candidate=self.master_candidate,
4837
                                   offline=False, drained=False,
4838
                                   group=node_group)
4839

    
4840
    if self.op.ndparams:
4841
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4842

    
4843
  def Exec(self, feedback_fn):
4844
    """Adds the new node to the cluster.
4845

4846
    """
4847
    new_node = self.new_node
4848
    node = new_node.name
4849

    
4850
    # We adding a new node so we assume it's powered
4851
    new_node.powered = True
4852

    
4853
    # for re-adds, reset the offline/drained/master-candidate flags;
4854
    # we need to reset here, otherwise offline would prevent RPC calls
4855
    # later in the procedure; this also means that if the re-add
4856
    # fails, we are left with a non-offlined, broken node
4857
    if self.op.readd:
4858
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4859
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4860
      # if we demote the node, we do cleanup later in the procedure
4861
      new_node.master_candidate = self.master_candidate
4862
      if self.changed_primary_ip:
4863
        new_node.primary_ip = self.op.primary_ip
4864

    
4865
    # copy the master/vm_capable flags
4866
    for attr in self._NFLAGS:
4867
      setattr(new_node, attr, getattr(self.op, attr))
4868

    
4869
    # notify the user about any possible mc promotion
4870
    if new_node.master_candidate:
4871
      self.LogInfo("Node will be a master candidate")
4872

    
4873
    if self.op.ndparams:
4874
      new_node.ndparams = self.op.ndparams
4875
    else:
4876
      new_node.ndparams = {}
4877

    
4878
    # check connectivity
4879
    result = self.rpc.call_version([node])[node]
4880
    result.Raise("Can't get version information from node %s" % node)
4881
    if constants.PROTOCOL_VERSION == result.payload:
4882
      logging.info("Communication to node %s fine, sw version %s match",
4883
                   node, result.payload)
4884
    else:
4885
      raise errors.OpExecError("Version mismatch master version %s,"
4886
                               " node version %s" %
4887
                               (constants.PROTOCOL_VERSION, result.payload))
4888

    
4889
    # Add node to our /etc/hosts, and add key to known_hosts
4890
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4891
      master_node = self.cfg.GetMasterNode()
4892
      result = self.rpc.call_etc_hosts_modify(master_node,
4893
                                              constants.ETC_HOSTS_ADD,
4894
                                              self.hostname.name,
4895
                                              self.hostname.ip)
4896
      result.Raise("Can't update hosts file with new host data")
4897

    
4898
    if new_node.secondary_ip != new_node.primary_ip:
4899
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4900
                               False)
4901

    
4902
    node_verify_list = [self.cfg.GetMasterNode()]
4903
    node_verify_param = {
4904
      constants.NV_NODELIST: [node],
4905
      # TODO: do a node-net-test as well?
4906
    }
4907

    
4908
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4909
                                       self.cfg.GetClusterName())
4910
    for verifier in node_verify_list:
4911
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4912
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4913
      if nl_payload:
4914
        for failed in nl_payload:
4915
          feedback_fn("ssh/hostname verification failed"
4916
                      " (checking from %s): %s" %
4917
                      (verifier, nl_payload[failed]))
4918
        raise errors.OpExecError("ssh/hostname verification failed")
4919

    
4920
    if self.op.readd:
4921
      _RedistributeAncillaryFiles(self)
4922
      self.context.ReaddNode(new_node)
4923
      # make sure we redistribute the config
4924
      self.cfg.Update(new_node, feedback_fn)
4925
      # and make sure the new node will not have old files around
4926
      if not new_node.master_candidate:
4927
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4928
        msg = result.fail_msg
4929
        if msg:
4930
          self.LogWarning("Node failed to demote itself from master"
4931
                          " candidate status: %s" % msg)
4932
    else:
4933
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4934
                                  additional_vm=self.op.vm_capable)
4935
      self.context.AddNode(new_node, self.proc.GetECId())
4936

    
4937

    
4938
class LUNodeSetParams(LogicalUnit):
4939
  """Modifies the parameters of a node.
4940

4941
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4942
      to the node role (as _ROLE_*)
4943
  @cvar _R2F: a dictionary from node role to tuples of flags
4944
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4945

4946
  """
4947
  HPATH = "node-modify"
4948
  HTYPE = constants.HTYPE_NODE
4949
  REQ_BGL = False
4950
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4951
  _F2R = {
4952
    (True, False, False): _ROLE_CANDIDATE,
4953
    (False, True, False): _ROLE_DRAINED,
4954
    (False, False, True): _ROLE_OFFLINE,
4955
    (False, False, False): _ROLE_REGULAR,
4956
    }
4957
  _R2F = dict((v, k) for k, v in _F2R.items())
4958
  _FLAGS = ["master_candidate", "drained", "offline"]
4959

    
4960
  def CheckArguments(self):
4961
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4962
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4963
                self.op.master_capable, self.op.vm_capable,
4964
                self.op.secondary_ip, self.op.ndparams]
4965
    if all_mods.count(None) == len(all_mods):
4966
      raise errors.OpPrereqError("Please pass at least one modification",
4967
                                 errors.ECODE_INVAL)
4968
    if all_mods.count(True) > 1:
4969
      raise errors.OpPrereqError("Can't set the node into more than one"
4970
                                 " state at the same time",
4971
                                 errors.ECODE_INVAL)
4972

    
4973
    # Boolean value that tells us whether we might be demoting from MC
4974
    self.might_demote = (self.op.master_candidate == False or
4975
                         self.op.offline == True or
4976
                         self.op.drained == True or
4977
                         self.op.master_capable == False)
4978

    
4979
    if self.op.secondary_ip:
4980
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4981
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4982
                                   " address" % self.op.secondary_ip,
4983
                                   errors.ECODE_INVAL)
4984

    
4985
    self.lock_all = self.op.auto_promote and self.might_demote
4986
    self.lock_instances = self.op.secondary_ip is not None
4987

    
4988
  def ExpandNames(self):
4989
    if self.lock_all:
4990
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4991
    else:
4992
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4993

    
4994
    if self.lock_instances:
4995
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4996

    
4997
  def DeclareLocks(self, level):
4998
    # If we have locked all instances, before waiting to lock nodes, release
4999
    # all the ones living on nodes unrelated to the current operation.
5000
    if level == locking.LEVEL_NODE and self.lock_instances:
5001
      self.affected_instances = []
5002
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5003
        instances_keep = []
5004

    
5005
        # Build list of instances to release
5006
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
5007
          instance = self.context.cfg.GetInstanceInfo(instance_name)
5008
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5009
              self.op.node_name in instance.all_nodes):
5010
            instances_keep.append(instance_name)
5011
            self.affected_instances.append(instance)
5012

    
5013
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5014

    
5015
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5016
                set(instances_keep))
5017

    
5018
  def BuildHooksEnv(self):
5019
    """Build hooks env.
5020

5021
    This runs on the master node.
5022

5023
    """
5024
    return {
5025
      "OP_TARGET": self.op.node_name,
5026
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5027
      "OFFLINE": str(self.op.offline),
5028
      "DRAINED": str(self.op.drained),
5029
      "MASTER_CAPABLE": str(self.op.master_capable),
5030
      "VM_CAPABLE": str(self.op.vm_capable),
5031
      }
5032

    
5033
  def BuildHooksNodes(self):
5034
    """Build hooks nodes.
5035

5036
    """
5037
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5038
    return (nl, nl)
5039

    
5040
  def CheckPrereq(self):
5041
    """Check prerequisites.
5042

5043
    This only checks the instance list against the existing names.
5044

5045
    """
5046
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5047

    
5048
    if (self.op.master_candidate is not None or
5049
        self.op.drained is not None or
5050
        self.op.offline is not None):
5051
      # we can't change the master's node flags
5052
      if self.op.node_name == self.cfg.GetMasterNode():
5053
        raise errors.OpPrereqError("The master role can be changed"
5054
                                   " only via master-failover",
5055
                                   errors.ECODE_INVAL)
5056

    
5057
    if self.op.master_candidate and not node.master_capable:
5058
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5059
                                 " it a master candidate" % node.name,
5060
                                 errors.ECODE_STATE)
5061

    
5062
    if self.op.vm_capable == False:
5063
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5064
      if ipri or isec:
5065
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5066
                                   " the vm_capable flag" % node.name,
5067
                                   errors.ECODE_STATE)
5068

    
5069
    if node.master_candidate and self.might_demote and not self.lock_all:
5070
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5071
      # check if after removing the current node, we're missing master
5072
      # candidates
5073
      (mc_remaining, mc_should, _) = \
5074
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5075
      if mc_remaining < mc_should:
5076
        raise errors.OpPrereqError("Not enough master candidates, please"
5077
                                   " pass auto promote option to allow"
5078
                                   " promotion", errors.ECODE_STATE)
5079

    
5080
    self.old_flags = old_flags = (node.master_candidate,
5081
                                  node.drained, node.offline)
5082
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5083
    self.old_role = old_role = self._F2R[old_flags]
5084

    
5085
    # Check for ineffective changes
5086
    for attr in self._FLAGS:
5087
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5088
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5089
        setattr(self.op, attr, None)
5090

    
5091
    # Past this point, any flag change to False means a transition
5092
    # away from the respective state, as only real changes are kept
5093

    
5094
    # TODO: We might query the real power state if it supports OOB
5095
    if _SupportsOob(self.cfg, node):
5096
      if self.op.offline is False and not (node.powered or
5097
                                           self.op.powered == True):
5098
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5099
                                    " offline status can be reset") %
5100
                                   self.op.node_name)
5101
    elif self.op.powered is not None:
5102
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5103
                                  " as it does not support out-of-band"
5104
                                  " handling") % self.op.node_name)
5105

    
5106
    # If we're being deofflined/drained, we'll MC ourself if needed
5107
    if (self.op.drained == False or self.op.offline == False or
5108
        (self.op.master_capable and not node.master_capable)):
5109
      if _DecideSelfPromotion(self):
5110
        self.op.master_candidate = True
5111
        self.LogInfo("Auto-promoting node to master candidate")
5112

    
5113
    # If we're no longer master capable, we'll demote ourselves from MC
5114
    if self.op.master_capable == False and node.master_candidate:
5115
      self.LogInfo("Demoting from master candidate")
5116
      self.op.master_candidate = False
5117

    
5118
    # Compute new role
5119
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5120
    if self.op.master_candidate:
5121
      new_role = self._ROLE_CANDIDATE
5122
    elif self.op.drained:
5123
      new_role = self._ROLE_DRAINED
5124
    elif self.op.offline:
5125
      new_role = self._ROLE_OFFLINE
5126
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5127
      # False is still in new flags, which means we're un-setting (the
5128
      # only) True flag
5129
      new_role = self._ROLE_REGULAR
5130
    else: # no new flags, nothing, keep old role
5131
      new_role = old_role
5132

    
5133
    self.new_role = new_role
5134

    
5135
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5136
      # Trying to transition out of offline status
5137
      result = self.rpc.call_version([node.name])[node.name]
5138
      if result.fail_msg:
5139
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5140
                                   " to report its version: %s" %
5141
                                   (node.name, result.fail_msg),
5142
                                   errors.ECODE_STATE)
5143
      else:
5144
        self.LogWarning("Transitioning node from offline to online state"
5145
                        " without using re-add. Please make sure the node"
5146
                        " is healthy!")
5147

    
5148
    if self.op.secondary_ip:
5149
      # Ok even without locking, because this can't be changed by any LU
5150
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5151
      master_singlehomed = master.secondary_ip == master.primary_ip
5152
      if master_singlehomed and self.op.secondary_ip:
5153
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5154
                                   " homed cluster", errors.ECODE_INVAL)
5155

    
5156
      if node.offline:
5157
        if self.affected_instances:
5158
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5159
                                     " node has instances (%s) configured"
5160
                                     " to use it" % self.affected_instances)
5161
      else:
5162
        # On online nodes, check that no instances are running, and that
5163
        # the node has the new ip and we can reach it.
5164
        for instance in self.affected_instances:
5165
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5166

    
5167
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5168
        if master.name != node.name:
5169
          # check reachability from master secondary ip to new secondary ip
5170
          if not netutils.TcpPing(self.op.secondary_ip,
5171
                                  constants.DEFAULT_NODED_PORT,
5172
                                  source=master.secondary_ip):
5173
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5174
                                       " based ping to node daemon port",
5175
                                       errors.ECODE_ENVIRON)
5176

    
5177
    if self.op.ndparams:
5178
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5179
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5180
      self.new_ndparams = new_ndparams
5181

    
5182
  def Exec(self, feedback_fn):
5183
    """Modifies a node.
5184

5185
    """
5186
    node = self.node
5187
    old_role = self.old_role
5188
    new_role = self.new_role
5189

    
5190
    result = []
5191

    
5192
    if self.op.ndparams:
5193
      node.ndparams = self.new_ndparams
5194

    
5195
    if self.op.powered is not None:
5196
      node.powered = self.op.powered
5197

    
5198
    for attr in ["master_capable", "vm_capable"]:
5199
      val = getattr(self.op, attr)
5200
      if val is not None:
5201
        setattr(node, attr, val)
5202
        result.append((attr, str(val)))
5203

    
5204
    if new_role != old_role:
5205
      # Tell the node to demote itself, if no longer MC and not offline
5206
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5207
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5208
        if msg:
5209
          self.LogWarning("Node failed to demote itself: %s", msg)
5210

    
5211
      new_flags = self._R2F[new_role]
5212
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5213
        if of != nf:
5214
          result.append((desc, str(nf)))
5215
      (node.master_candidate, node.drained, node.offline) = new_flags
5216

    
5217
      # we locked all nodes, we adjust the CP before updating this node
5218
      if self.lock_all:
5219
        _AdjustCandidatePool(self, [node.name])
5220

    
5221
    if self.op.secondary_ip:
5222
      node.secondary_ip = self.op.secondary_ip
5223
      result.append(("secondary_ip", self.op.secondary_ip))
5224

    
5225
    # this will trigger configuration file update, if needed
5226
    self.cfg.Update(node, feedback_fn)
5227

    
5228
    # this will trigger job queue propagation or cleanup if the mc
5229
    # flag changed
5230
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5231
      self.context.ReaddNode(node)
5232

    
5233
    return result
5234

    
5235

    
5236
class LUNodePowercycle(NoHooksLU):
5237
  """Powercycles a node.
5238

5239
  """
5240
  REQ_BGL = False
5241

    
5242
  def CheckArguments(self):
5243
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5244
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5245
      raise errors.OpPrereqError("The node is the master and the force"
5246
                                 " parameter was not set",
5247
                                 errors.ECODE_INVAL)
5248

    
5249
  def ExpandNames(self):
5250
    """Locking for PowercycleNode.
5251

5252
    This is a last-resort option and shouldn't block on other
5253
    jobs. Therefore, we grab no locks.
5254

5255
    """
5256
    self.needed_locks = {}
5257

    
5258
  def Exec(self, feedback_fn):
5259
    """Reboots a node.
5260

5261
    """
5262
    result = self.rpc.call_node_powercycle(self.op.node_name,
5263
                                           self.cfg.GetHypervisorType())
5264
    result.Raise("Failed to schedule the reboot")
5265
    return result.payload
5266

    
5267

    
5268
class LUClusterQuery(NoHooksLU):
5269
  """Query cluster configuration.
5270

5271
  """
5272
  REQ_BGL = False
5273

    
5274
  def ExpandNames(self):
5275
    self.needed_locks = {}
5276

    
5277
  def Exec(self, feedback_fn):
5278
    """Return cluster config.
5279

5280
    """
5281
    cluster = self.cfg.GetClusterInfo()
5282
    os_hvp = {}
5283

    
5284
    # Filter just for enabled hypervisors
5285
    for os_name, hv_dict in cluster.os_hvp.items():
5286
      os_hvp[os_name] = {}
5287
      for hv_name, hv_params in hv_dict.items():
5288
        if hv_name in cluster.enabled_hypervisors:
5289
          os_hvp[os_name][hv_name] = hv_params
5290

    
5291
    # Convert ip_family to ip_version
5292
    primary_ip_version = constants.IP4_VERSION
5293
    if cluster.primary_ip_family == netutils.IP6Address.family:
5294
      primary_ip_version = constants.IP6_VERSION
5295

    
5296
    result = {
5297
      "software_version": constants.RELEASE_VERSION,
5298
      "protocol_version": constants.PROTOCOL_VERSION,
5299
      "config_version": constants.CONFIG_VERSION,
5300
      "os_api_version": max(constants.OS_API_VERSIONS),
5301
      "export_version": constants.EXPORT_VERSION,
5302
      "architecture": (platform.architecture()[0], platform.machine()),
5303
      "name": cluster.cluster_name,
5304
      "master": cluster.master_node,
5305
      "default_hypervisor": cluster.enabled_hypervisors[0],
5306
      "enabled_hypervisors": cluster.enabled_hypervisors,
5307
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5308
                        for hypervisor_name in cluster.enabled_hypervisors]),
5309
      "os_hvp": os_hvp,
5310
      "beparams": cluster.beparams,
5311
      "osparams": cluster.osparams,
5312
      "nicparams": cluster.nicparams,
5313
      "ndparams": cluster.ndparams,
5314
      "candidate_pool_size": cluster.candidate_pool_size,
5315
      "master_netdev": cluster.master_netdev,
5316
      "volume_group_name": cluster.volume_group_name,
5317
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5318
      "file_storage_dir": cluster.file_storage_dir,
5319
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5320
      "maintain_node_health": cluster.maintain_node_health,
5321
      "ctime": cluster.ctime,
5322
      "mtime": cluster.mtime,
5323
      "uuid": cluster.uuid,
5324
      "tags": list(cluster.GetTags()),
5325
      "uid_pool": cluster.uid_pool,
5326
      "default_iallocator": cluster.default_iallocator,
5327
      "reserved_lvs": cluster.reserved_lvs,
5328
      "primary_ip_version": primary_ip_version,
5329
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5330
      "hidden_os": cluster.hidden_os,
5331
      "blacklisted_os": cluster.blacklisted_os,
5332
      }
5333

    
5334
    return result
5335

    
5336

    
5337
class LUClusterConfigQuery(NoHooksLU):
5338
  """Return configuration values.
5339

5340
  """
5341
  REQ_BGL = False
5342
  _FIELDS_DYNAMIC = utils.FieldSet()
5343
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5344
                                  "watcher_pause", "volume_group_name")
5345

    
5346
  def CheckArguments(self):
5347
    _CheckOutputFields(static=self._FIELDS_STATIC,
5348
                       dynamic=self._FIELDS_DYNAMIC,
5349
                       selected=self.op.output_fields)
5350

    
5351
  def ExpandNames(self):
5352
    self.needed_locks = {}
5353

    
5354
  def Exec(self, feedback_fn):
5355
    """Dump a representation of the cluster config to the standard output.
5356

5357
    """
5358
    values = []
5359
    for field in self.op.output_fields:
5360
      if field == "cluster_name":
5361
        entry = self.cfg.GetClusterName()
5362
      elif field == "master_node":
5363
        entry = self.cfg.GetMasterNode()
5364
      elif field == "drain_flag":
5365
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5366
      elif field == "watcher_pause":
5367
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5368
      elif field == "volume_group_name":
5369
        entry = self.cfg.GetVGName()
5370
      else:
5371
        raise errors.ParameterError(field)
5372
      values.append(entry)
5373
    return values
5374

    
5375

    
5376
class LUInstanceActivateDisks(NoHooksLU):
5377
  """Bring up an instance's disks.
5378

5379
  """
5380
  REQ_BGL = False
5381

    
5382
  def ExpandNames(self):
5383
    self._ExpandAndLockInstance()
5384
    self.needed_locks[locking.LEVEL_NODE] = []
5385
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5386

    
5387
  def DeclareLocks(self, level):
5388
    if level == locking.LEVEL_NODE:
5389
      self._LockInstancesNodes()
5390

    
5391
  def CheckPrereq(self):
5392
    """Check prerequisites.
5393

5394
    This checks that the instance is in the cluster.
5395

5396
    """
5397
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5398
    assert self.instance is not None, \
5399
      "Cannot retrieve locked instance %s" % self.op.instance_name
5400
    _CheckNodeOnline(self, self.instance.primary_node)
5401

    
5402
  def Exec(self, feedback_fn):
5403
    """Activate the disks.
5404

5405
    """
5406
    disks_ok, disks_info = \
5407
              _AssembleInstanceDisks(self, self.instance,
5408
                                     ignore_size=self.op.ignore_size)
5409
    if not disks_ok:
5410
      raise errors.OpExecError("Cannot activate block devices")
5411

    
5412
    return disks_info
5413

    
5414

    
5415
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5416
                           ignore_size=False):
5417
  """Prepare the block devices for an instance.
5418

5419
  This sets up the block devices on all nodes.
5420

5421
  @type lu: L{LogicalUnit}
5422
  @param lu: the logical unit on whose behalf we execute
5423
  @type instance: L{objects.Instance}
5424
  @param instance: the instance for whose disks we assemble
5425
  @type disks: list of L{objects.Disk} or None
5426
  @param disks: which disks to assemble (or all, if None)
5427
  @type ignore_secondaries: boolean
5428
  @param ignore_secondaries: if true, errors on secondary nodes
5429
      won't result in an error return from the function
5430
  @type ignore_size: boolean
5431
  @param ignore_size: if true, the current known size of the disk
5432
      will not be used during the disk activation, useful for cases
5433
      when the size is wrong
5434
  @return: False if the operation failed, otherwise a list of
5435
      (host, instance_visible_name, node_visible_name)
5436
      with the mapping from node devices to instance devices
5437

5438
  """
5439
  device_info = []
5440
  disks_ok = True
5441
  iname = instance.name
5442
  disks = _ExpandCheckDisks(instance, disks)
5443

    
5444
  # With the two passes mechanism we try to reduce the window of
5445
  # opportunity for the race condition of switching DRBD to primary
5446
  # before handshaking occured, but we do not eliminate it
5447

    
5448
  # The proper fix would be to wait (with some limits) until the
5449
  # connection has been made and drbd transitions from WFConnection
5450
  # into any other network-connected state (Connected, SyncTarget,
5451
  # SyncSource, etc.)
5452

    
5453
  # 1st pass, assemble on all nodes in secondary mode
5454
  for idx, inst_disk in enumerate(disks):
5455
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5456
      if ignore_size:
5457
        node_disk = node_disk.Copy()
5458
        node_disk.UnsetSize()
5459
      lu.cfg.SetDiskID(node_disk, node)
5460
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5461
      msg = result.fail_msg
5462
      if msg:
5463
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5464
                           " (is_primary=False, pass=1): %s",
5465
                           inst_disk.iv_name, node, msg)
5466
        if not ignore_secondaries:
5467
          disks_ok = False
5468

    
5469
  # FIXME: race condition on drbd migration to primary
5470

    
5471
  # 2nd pass, do only the primary node
5472
  for idx, inst_disk in enumerate(disks):
5473
    dev_path = None
5474

    
5475
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5476
      if node != instance.primary_node:
5477
        continue
5478
      if ignore_size:
5479
        node_disk = node_disk.Copy()
5480
        node_disk.UnsetSize()
5481
      lu.cfg.SetDiskID(node_disk, node)
5482
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5483
      msg = result.fail_msg
5484
      if msg:
5485
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5486
                           " (is_primary=True, pass=2): %s",
5487
                           inst_disk.iv_name, node, msg)
5488
        disks_ok = False
5489
      else:
5490
        dev_path = result.payload
5491

    
5492
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5493

    
5494
  # leave the disks configured for the primary node
5495
  # this is a workaround that would be fixed better by
5496
  # improving the logical/physical id handling
5497
  for disk in disks:
5498
    lu.cfg.SetDiskID(disk, instance.primary_node)
5499

    
5500
  return disks_ok, device_info
5501

    
5502

    
5503
def _StartInstanceDisks(lu, instance, force):
5504
  """Start the disks of an instance.
5505

5506
  """
5507
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5508
                                           ignore_secondaries=force)
5509
  if not disks_ok:
5510
    _ShutdownInstanceDisks(lu, instance)
5511
    if force is not None and not force:
5512
      lu.proc.LogWarning("", hint="If the message above refers to a"
5513
                         " secondary node,"
5514
                         " you can retry the operation using '--force'.")
5515
    raise errors.OpExecError("Disk consistency error")
5516

    
5517

    
5518
class LUInstanceDeactivateDisks(NoHooksLU):
5519
  """Shutdown an instance's disks.
5520

5521
  """
5522
  REQ_BGL = False
5523

    
5524
  def ExpandNames(self):
5525
    self._ExpandAndLockInstance()
5526
    self.needed_locks[locking.LEVEL_NODE] = []
5527
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5528

    
5529
  def DeclareLocks(self, level):
5530
    if level == locking.LEVEL_NODE:
5531
      self._LockInstancesNodes()
5532

    
5533
  def CheckPrereq(self):
5534
    """Check prerequisites.
5535

5536
    This checks that the instance is in the cluster.
5537

5538
    """
5539
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5540
    assert self.instance is not None, \
5541
      "Cannot retrieve locked instance %s" % self.op.instance_name
5542

    
5543
  def Exec(self, feedback_fn):
5544
    """Deactivate the disks
5545

5546
    """
5547
    instance = self.instance
5548
    if self.op.force:
5549
      _ShutdownInstanceDisks(self, instance)
5550
    else:
5551
      _SafeShutdownInstanceDisks(self, instance)
5552

    
5553

    
5554
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5555
  """Shutdown block devices of an instance.
5556

5557
  This function checks if an instance is running, before calling
5558
  _ShutdownInstanceDisks.
5559

5560
  """
5561
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5562
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5563

    
5564

    
5565
def _ExpandCheckDisks(instance, disks):
5566
  """Return the instance disks selected by the disks list
5567

5568
  @type disks: list of L{objects.Disk} or None
5569
  @param disks: selected disks
5570
  @rtype: list of L{objects.Disk}
5571
  @return: selected instance disks to act on
5572

5573
  """
5574
  if disks is None:
5575
    return instance.disks
5576
  else:
5577
    if not set(disks).issubset(instance.disks):
5578
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5579
                                   " target instance")
5580
    return disks
5581

    
5582

    
5583
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5584
  """Shutdown block devices of an instance.
5585

5586
  This does the shutdown on all nodes of the instance.
5587

5588
  If the ignore_primary is false, errors on the primary node are
5589
  ignored.
5590

5591
  """
5592
  all_result = True
5593
  disks = _ExpandCheckDisks(instance, disks)
5594

    
5595
  for disk in disks:
5596
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5597
      lu.cfg.SetDiskID(top_disk, node)
5598
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5599
      msg = result.fail_msg
5600
      if msg:
5601
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5602
                      disk.iv_name, node, msg)
5603
        if ((node == instance.primary_node and not ignore_primary) or
5604
            (node != instance.primary_node and not result.offline)):
5605
          all_result = False
5606
  return all_result
5607

    
5608

    
5609
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5610
  """Checks if a node has enough free memory.
5611

5612
  This function check if a given node has the needed amount of free
5613
  memory. In case the node has less memory or we cannot get the
5614
  information from the node, this function raise an OpPrereqError
5615
  exception.
5616

5617
  @type lu: C{LogicalUnit}
5618
  @param lu: a logical unit from which we get configuration data
5619
  @type node: C{str}
5620
  @param node: the node to check
5621
  @type reason: C{str}
5622
  @param reason: string to use in the error message
5623
  @type requested: C{int}
5624
  @param requested: the amount of memory in MiB to check for
5625
  @type hypervisor_name: C{str}
5626
  @param hypervisor_name: the hypervisor to ask for memory stats
5627
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5628
      we cannot check the node
5629

5630
  """
5631
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5632
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5633
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5634
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5635
  if not isinstance(free_mem, int):
5636
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5637
                               " was '%s'" % (node, free_mem),
5638
                               errors.ECODE_ENVIRON)
5639
  if requested > free_mem:
5640
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5641
                               " needed %s MiB, available %s MiB" %
5642
                               (node, reason, requested, free_mem),
5643
                               errors.ECODE_NORES)
5644

    
5645

    
5646
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5647
  """Checks if nodes have enough free disk space in the all VGs.
5648

5649
  This function check if all given nodes have the needed amount of
5650
  free disk. In case any node has less disk or we cannot get the
5651
  information from the node, this function raise an OpPrereqError
5652
  exception.
5653

5654
  @type lu: C{LogicalUnit}
5655
  @param lu: a logical unit from which we get configuration data
5656
  @type nodenames: C{list}
5657
  @param nodenames: the list of node names to check
5658
  @type req_sizes: C{dict}
5659
  @param req_sizes: the hash of vg and corresponding amount of disk in
5660
      MiB to check for
5661
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5662
      or we cannot check the node
5663

5664
  """
5665
  for vg, req_size in req_sizes.items():
5666
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5667

    
5668

    
5669
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5670
  """Checks if nodes have enough free disk space in the specified VG.
5671

5672
  This function check if all given nodes have the needed amount of
5673
  free disk. In case any node has less disk or we cannot get the
5674
  information from the node, this function raise an OpPrereqError
5675
  exception.
5676

5677
  @type lu: C{LogicalUnit}
5678
  @param lu: a logical unit from which we get configuration data
5679
  @type nodenames: C{list}
5680
  @param nodenames: the list of node names to check
5681
  @type vg: C{str}
5682
  @param vg: the volume group to check
5683
  @type requested: C{int}
5684
  @param requested: the amount of disk in MiB to check for
5685
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5686
      or we cannot check the node
5687

5688
  """
5689
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5690
  for node in nodenames:
5691
    info = nodeinfo[node]
5692
    info.Raise("Cannot get current information from node %s" % node,
5693
               prereq=True, ecode=errors.ECODE_ENVIRON)
5694
    vg_free = info.payload.get("vg_free", None)
5695
    if not isinstance(vg_free, int):
5696
      raise errors.OpPrereqError("Can't compute free disk space on node"
5697
                                 " %s for vg %s, result was '%s'" %
5698
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5699
    if requested > vg_free:
5700
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5701
                                 " vg %s: required %d MiB, available %d MiB" %
5702
                                 (node, vg, requested, vg_free),
5703
                                 errors.ECODE_NORES)
5704

    
5705

    
5706
class LUInstanceStartup(LogicalUnit):
5707
  """Starts an instance.
5708

5709
  """
5710
  HPATH = "instance-start"
5711
  HTYPE = constants.HTYPE_INSTANCE
5712
  REQ_BGL = False
5713

    
5714
  def CheckArguments(self):
5715
    # extra beparams
5716
    if self.op.beparams:
5717
      # fill the beparams dict
5718
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5719

    
5720
  def ExpandNames(self):
5721
    self._ExpandAndLockInstance()
5722

    
5723
  def BuildHooksEnv(self):
5724
    """Build hooks env.
5725

5726
    This runs on master, primary and secondary nodes of the instance.
5727

5728
    """
5729
    env = {
5730
      "FORCE": self.op.force,
5731
      }
5732

    
5733
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5734

    
5735
    return env
5736

    
5737
  def BuildHooksNodes(self):
5738
    """Build hooks nodes.
5739

5740
    """
5741
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5742
    return (nl, nl)
5743

    
5744
  def CheckPrereq(self):
5745
    """Check prerequisites.
5746

5747
    This checks that the instance is in the cluster.
5748

5749
    """
5750
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5751
    assert self.instance is not None, \
5752
      "Cannot retrieve locked instance %s" % self.op.instance_name
5753

    
5754
    # extra hvparams
5755
    if self.op.hvparams:
5756
      # check hypervisor parameter syntax (locally)
5757
      cluster = self.cfg.GetClusterInfo()
5758
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5759
      filled_hvp = cluster.FillHV(instance)
5760
      filled_hvp.update(self.op.hvparams)
5761
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5762
      hv_type.CheckParameterSyntax(filled_hvp)
5763
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5764

    
5765
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5766

    
5767
    if self.primary_offline and self.op.ignore_offline_nodes:
5768
      self.proc.LogWarning("Ignoring offline primary node")
5769

    
5770
      if self.op.hvparams or self.op.beparams:
5771
        self.proc.LogWarning("Overridden parameters are ignored")
5772
    else:
5773
      _CheckNodeOnline(self, instance.primary_node)
5774

    
5775
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5776

    
5777
      # check bridges existence
5778
      _CheckInstanceBridgesExist(self, instance)
5779

    
5780
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5781
                                                instance.name,
5782
                                                instance.hypervisor)
5783
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5784
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5785
      if not remote_info.payload: # not running already
5786
        _CheckNodeFreeMemory(self, instance.primary_node,
5787
                             "starting instance %s" % instance.name,
5788
                             bep[constants.BE_MEMORY], instance.hypervisor)
5789

    
5790
  def Exec(self, feedback_fn):
5791
    """Start the instance.
5792

5793
    """
5794
    instance = self.instance
5795
    force = self.op.force
5796

    
5797
    if not self.op.no_remember:
5798
      self.cfg.MarkInstanceUp(instance.name)
5799

    
5800
    if self.primary_offline:
5801
      assert self.op.ignore_offline_nodes
5802
      self.proc.LogInfo("Primary node offline, marked instance as started")
5803
    else:
5804
      node_current = instance.primary_node
5805

    
5806
      _StartInstanceDisks(self, instance, force)
5807

    
5808
      result = self.rpc.call_instance_start(node_current, instance,
5809
                                            self.op.hvparams, self.op.beparams,
5810
                                            self.op.startup_paused)
5811
      msg = result.fail_msg
5812
      if msg:
5813
        _ShutdownInstanceDisks(self, instance)
5814
        raise errors.OpExecError("Could not start instance: %s" % msg)
5815

    
5816

    
5817
class LUInstanceReboot(LogicalUnit):
5818
  """Reboot an instance.
5819

5820
  """
5821
  HPATH = "instance-reboot"
5822
  HTYPE = constants.HTYPE_INSTANCE
5823
  REQ_BGL = False
5824

    
5825
  def ExpandNames(self):
5826
    self._ExpandAndLockInstance()
5827

    
5828
  def BuildHooksEnv(self):
5829
    """Build hooks env.
5830

5831
    This runs on master, primary and secondary nodes of the instance.
5832

5833
    """
5834
    env = {
5835
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5836
      "REBOOT_TYPE": self.op.reboot_type,
5837
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5838
      }
5839

    
5840
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5841

    
5842
    return env
5843

    
5844
  def BuildHooksNodes(self):
5845
    """Build hooks nodes.
5846

5847
    """
5848
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5849
    return (nl, nl)
5850

    
5851
  def CheckPrereq(self):
5852
    """Check prerequisites.
5853

5854
    This checks that the instance is in the cluster.
5855

5856
    """
5857
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5858
    assert self.instance is not None, \
5859
      "Cannot retrieve locked instance %s" % self.op.instance_name
5860

    
5861
    _CheckNodeOnline(self, instance.primary_node)
5862

    
5863
    # check bridges existence
5864
    _CheckInstanceBridgesExist(self, instance)
5865

    
5866
  def Exec(self, feedback_fn):
5867
    """Reboot the instance.
5868

5869
    """
5870
    instance = self.instance
5871
    ignore_secondaries = self.op.ignore_secondaries
5872
    reboot_type = self.op.reboot_type
5873

    
5874
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5875
                                              instance.name,
5876
                                              instance.hypervisor)
5877
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5878
    instance_running = bool(remote_info.payload)
5879

    
5880
    node_current = instance.primary_node
5881

    
5882
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5883
                                            constants.INSTANCE_REBOOT_HARD]:
5884
      for disk in instance.disks:
5885
        self.cfg.SetDiskID(disk, node_current)
5886
      result = self.rpc.call_instance_reboot(node_current, instance,
5887
                                             reboot_type,
5888
                                             self.op.shutdown_timeout)
5889
      result.Raise("Could not reboot instance")
5890
    else:
5891
      if instance_running:
5892
        result = self.rpc.call_instance_shutdown(node_current, instance,
5893
                                                 self.op.shutdown_timeout)
5894
        result.Raise("Could not shutdown instance for full reboot")
5895
        _ShutdownInstanceDisks(self, instance)
5896
      else:
5897
        self.LogInfo("Instance %s was already stopped, starting now",
5898
                     instance.name)
5899
      _StartInstanceDisks(self, instance, ignore_secondaries)
5900
      result = self.rpc.call_instance_start(node_current, instance,
5901
                                            None, None, False)
5902
      msg = result.fail_msg
5903
      if msg:
5904
        _ShutdownInstanceDisks(self, instance)
5905
        raise errors.OpExecError("Could not start instance for"
5906
                                 " full reboot: %s" % msg)
5907

    
5908
    self.cfg.MarkInstanceUp(instance.name)
5909

    
5910

    
5911
class LUInstanceShutdown(LogicalUnit):
5912
  """Shutdown an instance.
5913

5914
  """
5915
  HPATH = "instance-stop"
5916
  HTYPE = constants.HTYPE_INSTANCE
5917
  REQ_BGL = False
5918

    
5919
  def ExpandNames(self):
5920
    self._ExpandAndLockInstance()
5921

    
5922
  def BuildHooksEnv(self):
5923
    """Build hooks env.
5924

5925
    This runs on master, primary and secondary nodes of the instance.
5926

5927
    """
5928
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5929
    env["TIMEOUT"] = self.op.timeout
5930
    return env
5931

    
5932
  def BuildHooksNodes(self):
5933
    """Build hooks nodes.
5934

5935
    """
5936
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5937
    return (nl, nl)
5938

    
5939
  def CheckPrereq(self):
5940
    """Check prerequisites.
5941

5942
    This checks that the instance is in the cluster.
5943

5944
    """
5945
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5946
    assert self.instance is not None, \
5947
      "Cannot retrieve locked instance %s" % self.op.instance_name
5948

    
5949
    self.primary_offline = \
5950
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5951

    
5952
    if self.primary_offline and self.op.ignore_offline_nodes:
5953
      self.proc.LogWarning("Ignoring offline primary node")
5954
    else:
5955
      _CheckNodeOnline(self, self.instance.primary_node)
5956

    
5957
  def Exec(self, feedback_fn):
5958
    """Shutdown the instance.
5959

5960
    """
5961
    instance = self.instance
5962
    node_current = instance.primary_node
5963
    timeout = self.op.timeout
5964

    
5965
    if not self.op.no_remember:
5966
      self.cfg.MarkInstanceDown(instance.name)
5967

    
5968
    if self.primary_offline:
5969
      assert self.op.ignore_offline_nodes
5970
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5971
    else:
5972
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5973
      msg = result.fail_msg
5974
      if msg:
5975
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5976

    
5977
      _ShutdownInstanceDisks(self, instance)
5978

    
5979

    
5980
class LUInstanceReinstall(LogicalUnit):
5981
  """Reinstall an instance.
5982

5983
  """
5984
  HPATH = "instance-reinstall"
5985
  HTYPE = constants.HTYPE_INSTANCE
5986
  REQ_BGL = False
5987

    
5988
  def ExpandNames(self):
5989
    self._ExpandAndLockInstance()
5990

    
5991
  def BuildHooksEnv(self):
5992
    """Build hooks env.
5993

5994
    This runs on master, primary and secondary nodes of the instance.
5995

5996
    """
5997
    return _BuildInstanceHookEnvByObject(self, self.instance)
5998

    
5999
  def BuildHooksNodes(self):
6000
    """Build hooks nodes.
6001

6002
    """
6003
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6004
    return (nl, nl)
6005

    
6006
  def CheckPrereq(self):
6007
    """Check prerequisites.
6008

6009
    This checks that the instance is in the cluster and is not running.
6010

6011
    """
6012
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6013
    assert instance is not None, \
6014
      "Cannot retrieve locked instance %s" % self.op.instance_name
6015
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6016
                     " offline, cannot reinstall")
6017
    for node in instance.secondary_nodes:
6018
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6019
                       " cannot reinstall")
6020

    
6021
    if instance.disk_template == constants.DT_DISKLESS:
6022
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6023
                                 self.op.instance_name,
6024
                                 errors.ECODE_INVAL)
6025
    _CheckInstanceDown(self, instance, "cannot reinstall")
6026

    
6027
    if self.op.os_type is not None:
6028
      # OS verification
6029
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6030
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6031
      instance_os = self.op.os_type
6032
    else:
6033
      instance_os = instance.os
6034

    
6035
    nodelist = list(instance.all_nodes)
6036

    
6037
    if self.op.osparams:
6038
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6039
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6040
      self.os_inst = i_osdict # the new dict (without defaults)
6041
    else:
6042
      self.os_inst = None
6043

    
6044
    self.instance = instance
6045

    
6046
  def Exec(self, feedback_fn):
6047
    """Reinstall the instance.
6048

6049
    """
6050
    inst = self.instance
6051

    
6052
    if self.op.os_type is not None:
6053
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6054
      inst.os = self.op.os_type
6055
      # Write to configuration
6056
      self.cfg.Update(inst, feedback_fn)
6057

    
6058
    _StartInstanceDisks(self, inst, None)
6059
    try:
6060
      feedback_fn("Running the instance OS create scripts...")
6061
      # FIXME: pass debug option from opcode to backend
6062
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6063
                                             self.op.debug_level,
6064
                                             osparams=self.os_inst)
6065
      result.Raise("Could not install OS for instance %s on node %s" %
6066
                   (inst.name, inst.primary_node))
6067
    finally:
6068
      _ShutdownInstanceDisks(self, inst)
6069

    
6070

    
6071
class LUInstanceRecreateDisks(LogicalUnit):
6072
  """Recreate an instance's missing disks.
6073

6074
  """
6075
  HPATH = "instance-recreate-disks"
6076
  HTYPE = constants.HTYPE_INSTANCE
6077
  REQ_BGL = False
6078

    
6079
  def CheckArguments(self):
6080
    # normalise the disk list
6081
    self.op.disks = sorted(frozenset(self.op.disks))
6082

    
6083
  def ExpandNames(self):
6084
    self._ExpandAndLockInstance()
6085
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6086
    if self.op.nodes:
6087
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6088
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6089
    else:
6090
      self.needed_locks[locking.LEVEL_NODE] = []
6091

    
6092
  def DeclareLocks(self, level):
6093
    if level == locking.LEVEL_NODE:
6094
      # if we replace the nodes, we only need to lock the old primary,
6095
      # otherwise we need to lock all nodes for disk re-creation
6096
      primary_only = bool(self.op.nodes)
6097
      self._LockInstancesNodes(primary_only=primary_only)
6098

    
6099
  def BuildHooksEnv(self):
6100
    """Build hooks env.
6101

6102
    This runs on master, primary and secondary nodes of the instance.
6103

6104
    """
6105
    return _BuildInstanceHookEnvByObject(self, self.instance)
6106

    
6107
  def BuildHooksNodes(self):
6108
    """Build hooks nodes.
6109

6110
    """
6111
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6112
    return (nl, nl)
6113

    
6114
  def CheckPrereq(self):
6115
    """Check prerequisites.
6116

6117
    This checks that the instance is in the cluster and is not running.
6118

6119
    """
6120
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6121
    assert instance is not None, \
6122
      "Cannot retrieve locked instance %s" % self.op.instance_name
6123
    if self.op.nodes:
6124
      if len(self.op.nodes) != len(instance.all_nodes):
6125
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6126
                                   " %d replacement nodes were specified" %
6127
                                   (instance.name, len(instance.all_nodes),
6128
                                    len(self.op.nodes)),
6129
                                   errors.ECODE_INVAL)
6130
      assert instance.disk_template != constants.DT_DRBD8 or \
6131
          len(self.op.nodes) == 2
6132
      assert instance.disk_template != constants.DT_PLAIN or \
6133
          len(self.op.nodes) == 1
6134
      primary_node = self.op.nodes[0]
6135
    else:
6136
      primary_node = instance.primary_node
6137
    _CheckNodeOnline(self, primary_node)
6138

    
6139
    if instance.disk_template == constants.DT_DISKLESS:
6140
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6141
                                 self.op.instance_name, errors.ECODE_INVAL)
6142
    # if we replace nodes *and* the old primary is offline, we don't
6143
    # check
6144
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6145
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6146
    if not (self.op.nodes and old_pnode.offline):
6147
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6148

    
6149
    if not self.op.disks:
6150
      self.op.disks = range(len(instance.disks))
6151
    else:
6152
      for idx in self.op.disks:
6153
        if idx >= len(instance.disks):
6154
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6155
                                     errors.ECODE_INVAL)
6156
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6157
      raise errors.OpPrereqError("Can't recreate disks partially and"
6158
                                 " change the nodes at the same time",
6159
                                 errors.ECODE_INVAL)
6160
    self.instance = instance
6161

    
6162
  def Exec(self, feedback_fn):
6163
    """Recreate the disks.
6164

6165
    """
6166
    instance = self.instance
6167

    
6168
    to_skip = []
6169
    mods = [] # keeps track of needed logical_id changes
6170

    
6171
    for idx, disk in enumerate(instance.disks):
6172
      if idx not in self.op.disks: # disk idx has not been passed in
6173
        to_skip.append(idx)
6174
        continue
6175
      # update secondaries for disks, if needed
6176
      if self.op.nodes:
6177
        if disk.dev_type == constants.LD_DRBD8:
6178
          # need to update the nodes and minors
6179
          assert len(self.op.nodes) == 2
6180
          assert len(disk.logical_id) == 6 # otherwise disk internals
6181
                                           # have changed
6182
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6183
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6184
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6185
                    new_minors[0], new_minors[1], old_secret)
6186
          assert len(disk.logical_id) == len(new_id)
6187
          mods.append((idx, new_id))
6188

    
6189
    # now that we have passed all asserts above, we can apply the mods
6190
    # in a single run (to avoid partial changes)
6191
    for idx, new_id in mods:
6192
      instance.disks[idx].logical_id = new_id
6193

    
6194
    # change primary node, if needed
6195
    if self.op.nodes:
6196
      instance.primary_node = self.op.nodes[0]
6197
      self.LogWarning("Changing the instance's nodes, you will have to"
6198
                      " remove any disks left on the older nodes manually")
6199

    
6200
    if self.op.nodes:
6201
      self.cfg.Update(instance, feedback_fn)
6202

    
6203
    _CreateDisks(self, instance, to_skip=to_skip)
6204

    
6205

    
6206
class LUInstanceRename(LogicalUnit):
6207
  """Rename an instance.
6208

6209
  """
6210
  HPATH = "instance-rename"
6211
  HTYPE = constants.HTYPE_INSTANCE
6212

    
6213
  def CheckArguments(self):
6214
    """Check arguments.
6215

6216
    """
6217
    if self.op.ip_check and not self.op.name_check:
6218
      # TODO: make the ip check more flexible and not depend on the name check
6219
      raise errors.OpPrereqError("IP address check requires a name check",
6220
                                 errors.ECODE_INVAL)
6221

    
6222
  def BuildHooksEnv(self):
6223
    """Build hooks env.
6224

6225
    This runs on master, primary and secondary nodes of the instance.
6226

6227
    """
6228
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6229
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6230
    return env
6231

    
6232
  def BuildHooksNodes(self):
6233
    """Build hooks nodes.
6234

6235
    """
6236
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6237
    return (nl, nl)
6238

    
6239
  def CheckPrereq(self):
6240
    """Check prerequisites.
6241

6242
    This checks that the instance is in the cluster and is not running.
6243

6244
    """
6245
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6246
                                                self.op.instance_name)
6247
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6248
    assert instance is not None
6249
    _CheckNodeOnline(self, instance.primary_node)
6250
    _CheckInstanceDown(self, instance, "cannot rename")
6251
    self.instance = instance
6252

    
6253
    new_name = self.op.new_name
6254
    if self.op.name_check:
6255
      hostname = netutils.GetHostname(name=new_name)
6256
      if hostname != new_name:
6257
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6258
                     hostname.name)
6259
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6260
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6261
                                    " same as given hostname '%s'") %
6262
                                    (hostname.name, self.op.new_name),
6263
                                    errors.ECODE_INVAL)
6264
      new_name = self.op.new_name = hostname.name
6265
      if (self.op.ip_check and
6266
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6267
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6268
                                   (hostname.ip, new_name),
6269
                                   errors.ECODE_NOTUNIQUE)
6270

    
6271
    instance_list = self.cfg.GetInstanceList()
6272
    if new_name in instance_list and new_name != instance.name:
6273
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6274
                                 new_name, errors.ECODE_EXISTS)
6275

    
6276
  def Exec(self, feedback_fn):
6277
    """Rename the instance.
6278

6279
    """
6280
    inst = self.instance
6281
    old_name = inst.name
6282

    
6283
    rename_file_storage = False
6284
    if (inst.disk_template in constants.DTS_FILEBASED and
6285
        self.op.new_name != inst.name):
6286
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6287
      rename_file_storage = True
6288

    
6289
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6290
    # Change the instance lock. This is definitely safe while we hold the BGL.
6291
    # Otherwise the new lock would have to be added in acquired mode.
6292
    assert self.REQ_BGL
6293
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6294
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6295

    
6296
    # re-read the instance from the configuration after rename
6297
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6298

    
6299
    if rename_file_storage:
6300
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6301
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6302
                                                     old_file_storage_dir,
6303
                                                     new_file_storage_dir)
6304
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6305
                   " (but the instance has been renamed in Ganeti)" %
6306
                   (inst.primary_node, old_file_storage_dir,
6307
                    new_file_storage_dir))
6308

    
6309
    _StartInstanceDisks(self, inst, None)
6310
    try:
6311
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6312
                                                 old_name, self.op.debug_level)
6313
      msg = result.fail_msg
6314
      if msg:
6315
        msg = ("Could not run OS rename script for instance %s on node %s"
6316
               " (but the instance has been renamed in Ganeti): %s" %
6317
               (inst.name, inst.primary_node, msg))
6318
        self.proc.LogWarning(msg)
6319
    finally:
6320
      _ShutdownInstanceDisks(self, inst)
6321

    
6322
    return inst.name
6323

    
6324

    
6325
class LUInstanceRemove(LogicalUnit):
6326
  """Remove an instance.
6327

6328
  """
6329
  HPATH = "instance-remove"
6330
  HTYPE = constants.HTYPE_INSTANCE
6331
  REQ_BGL = False
6332

    
6333
  def ExpandNames(self):
6334
    self._ExpandAndLockInstance()
6335
    self.needed_locks[locking.LEVEL_NODE] = []
6336
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6337

    
6338
  def DeclareLocks(self, level):
6339
    if level == locking.LEVEL_NODE:
6340
      self._LockInstancesNodes()
6341

    
6342
  def BuildHooksEnv(self):
6343
    """Build hooks env.
6344

6345
    This runs on master, primary and secondary nodes of the instance.
6346

6347
    """
6348
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6349
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6350
    return env
6351

    
6352
  def BuildHooksNodes(self):
6353
    """Build hooks nodes.
6354

6355
    """
6356
    nl = [self.cfg.GetMasterNode()]
6357
    nl_post = list(self.instance.all_nodes) + nl
6358
    return (nl, nl_post)
6359

    
6360
  def CheckPrereq(self):
6361
    """Check prerequisites.
6362

6363
    This checks that the instance is in the cluster.
6364

6365
    """
6366
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6367
    assert self.instance is not None, \
6368
      "Cannot retrieve locked instance %s" % self.op.instance_name
6369

    
6370
  def Exec(self, feedback_fn):
6371
    """Remove the instance.
6372

6373
    """
6374
    instance = self.instance
6375
    logging.info("Shutting down instance %s on node %s",
6376
                 instance.name, instance.primary_node)
6377

    
6378
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6379
                                             self.op.shutdown_timeout)
6380
    msg = result.fail_msg
6381
    if msg:
6382
      if self.op.ignore_failures:
6383
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6384
      else:
6385
        raise errors.OpExecError("Could not shutdown instance %s on"
6386
                                 " node %s: %s" %
6387
                                 (instance.name, instance.primary_node, msg))
6388

    
6389
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6390

    
6391

    
6392
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6393
  """Utility function to remove an instance.
6394

6395
  """
6396
  logging.info("Removing block devices for instance %s", instance.name)
6397

    
6398
  if not _RemoveDisks(lu, instance):
6399
    if not ignore_failures:
6400
      raise errors.OpExecError("Can't remove instance's disks")
6401
    feedback_fn("Warning: can't remove instance's disks")
6402

    
6403
  logging.info("Removing instance %s out of cluster config", instance.name)
6404

    
6405
  lu.cfg.RemoveInstance(instance.name)
6406

    
6407
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6408
    "Instance lock removal conflict"
6409

    
6410
  # Remove lock for the instance
6411
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6412

    
6413

    
6414
class LUInstanceQuery(NoHooksLU):
6415
  """Logical unit for querying instances.
6416

6417
  """
6418
  # pylint: disable-msg=W0142
6419
  REQ_BGL = False
6420

    
6421
  def CheckArguments(self):
6422
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6423
                             self.op.output_fields, self.op.use_locking)
6424

    
6425
  def ExpandNames(self):
6426
    self.iq.ExpandNames(self)
6427

    
6428
  def DeclareLocks(self, level):
6429
    self.iq.DeclareLocks(self, level)
6430

    
6431
  def Exec(self, feedback_fn):
6432
    return self.iq.OldStyleQuery(self)
6433

    
6434

    
6435
class LUInstanceFailover(LogicalUnit):
6436
  """Failover an instance.
6437

6438
  """
6439
  HPATH = "instance-failover"
6440
  HTYPE = constants.HTYPE_INSTANCE
6441
  REQ_BGL = False
6442

    
6443
  def CheckArguments(self):
6444
    """Check the arguments.
6445

6446
    """
6447
    self.iallocator = getattr(self.op, "iallocator", None)
6448
    self.target_node = getattr(self.op, "target_node", None)
6449

    
6450
  def ExpandNames(self):
6451
    self._ExpandAndLockInstance()
6452

    
6453
    if self.op.target_node is not None:
6454
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6455

    
6456
    self.needed_locks[locking.LEVEL_NODE] = []
6457
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6458

    
6459
    ignore_consistency = self.op.ignore_consistency
6460
    shutdown_timeout = self.op.shutdown_timeout
6461
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6462
                                       cleanup=False,
6463
                                       failover=True,
6464
                                       ignore_consistency=ignore_consistency,
6465
                                       shutdown_timeout=shutdown_timeout)
6466
    self.tasklets = [self._migrater]
6467

    
6468
  def DeclareLocks(self, level):
6469
    if level == locking.LEVEL_NODE:
6470
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6471
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6472
        if self.op.target_node is None:
6473
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6474
        else:
6475
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6476
                                                   self.op.target_node]
6477
        del self.recalculate_locks[locking.LEVEL_NODE]
6478
      else:
6479
        self._LockInstancesNodes()
6480

    
6481
  def BuildHooksEnv(self):
6482
    """Build hooks env.
6483

6484
    This runs on master, primary and secondary nodes of the instance.
6485

6486
    """
6487
    instance = self._migrater.instance
6488
    source_node = instance.primary_node
6489
    target_node = self.op.target_node
6490
    env = {
6491
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6492
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6493
      "OLD_PRIMARY": source_node,
6494
      "NEW_PRIMARY": target_node,
6495
      }
6496

    
6497
    if instance.disk_template in constants.DTS_INT_MIRROR:
6498
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6499
      env["NEW_SECONDARY"] = source_node
6500
    else:
6501
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6502

    
6503
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6504

    
6505
    return env
6506

    
6507
  def BuildHooksNodes(self):
6508
    """Build hooks nodes.
6509

6510
    """
6511
    instance = self._migrater.instance
6512
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6513
    return (nl, nl + [instance.primary_node])
6514

    
6515

    
6516
class LUInstanceMigrate(LogicalUnit):
6517
  """Migrate an instance.
6518

6519
  This is migration without shutting down, compared to the failover,
6520
  which is done with shutdown.
6521

6522
  """
6523
  HPATH = "instance-migrate"
6524
  HTYPE = constants.HTYPE_INSTANCE
6525
  REQ_BGL = False
6526

    
6527
  def ExpandNames(self):
6528
    self._ExpandAndLockInstance()
6529

    
6530
    if self.op.target_node is not None:
6531
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6532

    
6533
    self.needed_locks[locking.LEVEL_NODE] = []
6534
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6535

    
6536
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6537
                                       cleanup=self.op.cleanup,
6538
                                       failover=False,
6539
                                       fallback=self.op.allow_failover)
6540
    self.tasklets = [self._migrater]
6541

    
6542
  def DeclareLocks(self, level):
6543
    if level == locking.LEVEL_NODE:
6544
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6545
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6546
        if self.op.target_node is None:
6547
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6548
        else:
6549
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6550
                                                   self.op.target_node]
6551
        del self.recalculate_locks[locking.LEVEL_NODE]
6552
      else:
6553
        self._LockInstancesNodes()
6554

    
6555
  def BuildHooksEnv(self):
6556
    """Build hooks env.
6557

6558
    This runs on master, primary and secondary nodes of the instance.
6559

6560
    """
6561
    instance = self._migrater.instance
6562
    source_node = instance.primary_node
6563
    target_node = self.op.target_node
6564
    env = _BuildInstanceHookEnvByObject(self, instance)
6565
    env.update({
6566
      "MIGRATE_LIVE": self._migrater.live,
6567
      "MIGRATE_CLEANUP": self.op.cleanup,
6568
      "OLD_PRIMARY": source_node,
6569
      "NEW_PRIMARY": target_node,
6570
      })
6571

    
6572
    if instance.disk_template in constants.DTS_INT_MIRROR:
6573
      env["OLD_SECONDARY"] = target_node
6574
      env["NEW_SECONDARY"] = source_node
6575
    else:
6576
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6577

    
6578
    return env
6579

    
6580
  def BuildHooksNodes(self):
6581
    """Build hooks nodes.
6582

6583
    """
6584
    instance = self._migrater.instance
6585
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6586
    return (nl, nl + [instance.primary_node])
6587

    
6588

    
6589
class LUInstanceMove(LogicalUnit):
6590
  """Move an instance by data-copying.
6591

6592
  """
6593
  HPATH = "instance-move"
6594
  HTYPE = constants.HTYPE_INSTANCE
6595
  REQ_BGL = False
6596

    
6597
  def ExpandNames(self):
6598
    self._ExpandAndLockInstance()
6599
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6600
    self.op.target_node = target_node
6601
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6602
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6603

    
6604
  def DeclareLocks(self, level):
6605
    if level == locking.LEVEL_NODE:
6606
      self._LockInstancesNodes(primary_only=True)
6607

    
6608
  def BuildHooksEnv(self):
6609
    """Build hooks env.
6610

6611
    This runs on master, primary and secondary nodes of the instance.
6612

6613
    """
6614
    env = {
6615
      "TARGET_NODE": self.op.target_node,
6616
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6617
      }
6618
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6619
    return env
6620

    
6621
  def BuildHooksNodes(self):
6622
    """Build hooks nodes.
6623

6624
    """
6625
    nl = [
6626
      self.cfg.GetMasterNode(),
6627
      self.instance.primary_node,
6628
      self.op.target_node,
6629
      ]
6630
    return (nl, nl)
6631

    
6632
  def CheckPrereq(self):
6633
    """Check prerequisites.
6634

6635
    This checks that the instance is in the cluster.
6636

6637
    """
6638
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6639
    assert self.instance is not None, \
6640
      "Cannot retrieve locked instance %s" % self.op.instance_name
6641

    
6642
    node = self.cfg.GetNodeInfo(self.op.target_node)
6643
    assert node is not None, \
6644
      "Cannot retrieve locked node %s" % self.op.target_node
6645

    
6646
    self.target_node = target_node = node.name
6647

    
6648
    if target_node == instance.primary_node:
6649
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6650
                                 (instance.name, target_node),
6651
                                 errors.ECODE_STATE)
6652

    
6653
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6654

    
6655
    for idx, dsk in enumerate(instance.disks):
6656
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6657
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6658
                                   " cannot copy" % idx, errors.ECODE_STATE)
6659

    
6660
    _CheckNodeOnline(self, target_node)
6661
    _CheckNodeNotDrained(self, target_node)
6662
    _CheckNodeVmCapable(self, target_node)
6663

    
6664
    if instance.admin_up:
6665
      # check memory requirements on the secondary node
6666
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6667
                           instance.name, bep[constants.BE_MEMORY],
6668
                           instance.hypervisor)
6669
    else:
6670
      self.LogInfo("Not checking memory on the secondary node as"
6671
                   " instance will not be started")
6672

    
6673
    # check bridge existance
6674
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6675

    
6676
  def Exec(self, feedback_fn):
6677
    """Move an instance.
6678

6679
    The move is done by shutting it down on its present node, copying
6680
    the data over (slow) and starting it on the new node.
6681

6682
    """
6683
    instance = self.instance
6684

    
6685
    source_node = instance.primary_node
6686
    target_node = self.target_node
6687

    
6688
    self.LogInfo("Shutting down instance %s on source node %s",
6689
                 instance.name, source_node)
6690

    
6691
    result = self.rpc.call_instance_shutdown(source_node, instance,
6692
                                             self.op.shutdown_timeout)
6693
    msg = result.fail_msg
6694
    if msg:
6695
      if self.op.ignore_consistency:
6696
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6697
                             " Proceeding anyway. Please make sure node"
6698
                             " %s is down. Error details: %s",
6699
                             instance.name, source_node, source_node, msg)
6700
      else:
6701
        raise errors.OpExecError("Could not shutdown instance %s on"
6702
                                 " node %s: %s" %
6703
                                 (instance.name, source_node, msg))
6704

    
6705
    # create the target disks
6706
    try:
6707
      _CreateDisks(self, instance, target_node=target_node)
6708
    except errors.OpExecError:
6709
      self.LogWarning("Device creation failed, reverting...")
6710
      try:
6711
        _RemoveDisks(self, instance, target_node=target_node)
6712
      finally:
6713
        self.cfg.ReleaseDRBDMinors(instance.name)
6714
        raise
6715

    
6716
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6717

    
6718
    errs = []
6719
    # activate, get path, copy the data over
6720
    for idx, disk in enumerate(instance.disks):
6721
      self.LogInfo("Copying data for disk %d", idx)
6722
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6723
                                               instance.name, True, idx)
6724
      if result.fail_msg:
6725
        self.LogWarning("Can't assemble newly created disk %d: %s",
6726
                        idx, result.fail_msg)
6727
        errs.append(result.fail_msg)
6728
        break
6729
      dev_path = result.payload
6730
      result = self.rpc.call_blockdev_export(source_node, disk,
6731
                                             target_node, dev_path,
6732
                                             cluster_name)
6733
      if result.fail_msg:
6734
        self.LogWarning("Can't copy data over for disk %d: %s",
6735
                        idx, result.fail_msg)
6736
        errs.append(result.fail_msg)
6737
        break
6738

    
6739
    if errs:
6740
      self.LogWarning("Some disks failed to copy, aborting")
6741
      try:
6742
        _RemoveDisks(self, instance, target_node=target_node)
6743
      finally:
6744
        self.cfg.ReleaseDRBDMinors(instance.name)
6745
        raise errors.OpExecError("Errors during disk copy: %s" %
6746
                                 (",".join(errs),))
6747

    
6748
    instance.primary_node = target_node
6749
    self.cfg.Update(instance, feedback_fn)
6750

    
6751
    self.LogInfo("Removing the disks on the original node")
6752
    _RemoveDisks(self, instance, target_node=source_node)
6753

    
6754
    # Only start the instance if it's marked as up
6755
    if instance.admin_up:
6756
      self.LogInfo("Starting instance %s on node %s",
6757
                   instance.name, target_node)
6758

    
6759
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6760
                                           ignore_secondaries=True)
6761
      if not disks_ok:
6762
        _ShutdownInstanceDisks(self, instance)
6763
        raise errors.OpExecError("Can't activate the instance's disks")
6764

    
6765
      result = self.rpc.call_instance_start(target_node, instance,
6766
                                            None, None, False)
6767
      msg = result.fail_msg
6768
      if msg:
6769
        _ShutdownInstanceDisks(self, instance)
6770
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6771
                                 (instance.name, target_node, msg))
6772

    
6773

    
6774
class LUNodeMigrate(LogicalUnit):
6775
  """Migrate all instances from a node.
6776

6777
  """
6778
  HPATH = "node-migrate"
6779
  HTYPE = constants.HTYPE_NODE
6780
  REQ_BGL = False
6781

    
6782
  def CheckArguments(self):
6783
    pass
6784

    
6785
  def ExpandNames(self):
6786
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6787

    
6788
    self.share_locks = _ShareAll()
6789
    self.needed_locks = {
6790
      locking.LEVEL_NODE: [self.op.node_name],
6791
      }
6792

    
6793
  def BuildHooksEnv(self):
6794
    """Build hooks env.
6795

6796
    This runs on the master, the primary and all the secondaries.
6797

6798
    """
6799
    return {
6800
      "NODE_NAME": self.op.node_name,
6801
      }
6802

    
6803
  def BuildHooksNodes(self):
6804
    """Build hooks nodes.
6805

6806
    """
6807
    nl = [self.cfg.GetMasterNode()]
6808
    return (nl, nl)
6809

    
6810
  def CheckPrereq(self):
6811
    pass
6812

    
6813
  def Exec(self, feedback_fn):
6814
    # Prepare jobs for migration instances
6815
    jobs = [
6816
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6817
                                 mode=self.op.mode,
6818
                                 live=self.op.live,
6819
                                 iallocator=self.op.iallocator,
6820
                                 target_node=self.op.target_node)]
6821
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6822
      ]
6823

    
6824
    # TODO: Run iallocator in this opcode and pass correct placement options to
6825
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6826
    # running the iallocator and the actual migration, a good consistency model
6827
    # will have to be found.
6828

    
6829
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6830
            frozenset([self.op.node_name]))
6831

    
6832
    return ResultWithJobs(jobs)
6833

    
6834

    
6835
class TLMigrateInstance(Tasklet):
6836
  """Tasklet class for instance migration.
6837

6838
  @type live: boolean
6839
  @ivar live: whether the migration will be done live or non-live;
6840
      this variable is initalized only after CheckPrereq has run
6841
  @type cleanup: boolean
6842
  @ivar cleanup: Wheater we cleanup from a failed migration
6843
  @type iallocator: string
6844
  @ivar iallocator: The iallocator used to determine target_node
6845
  @type target_node: string
6846
  @ivar target_node: If given, the target_node to reallocate the instance to
6847
  @type failover: boolean
6848
  @ivar failover: Whether operation results in failover or migration
6849
  @type fallback: boolean
6850
  @ivar fallback: Whether fallback to failover is allowed if migration not
6851
                  possible
6852
  @type ignore_consistency: boolean
6853
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6854
                            and target node
6855
  @type shutdown_timeout: int
6856
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6857

6858
  """
6859
  def __init__(self, lu, instance_name, cleanup=False,
6860
               failover=False, fallback=False,
6861
               ignore_consistency=False,
6862
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6863
    """Initializes this class.
6864

6865
    """
6866
    Tasklet.__init__(self, lu)
6867

    
6868
    # Parameters
6869
    self.instance_name = instance_name
6870
    self.cleanup = cleanup
6871
    self.live = False # will be overridden later
6872
    self.failover = failover
6873
    self.fallback = fallback
6874
    self.ignore_consistency = ignore_consistency
6875
    self.shutdown_timeout = shutdown_timeout
6876

    
6877
  def CheckPrereq(self):
6878
    """Check prerequisites.
6879

6880
    This checks that the instance is in the cluster.
6881

6882
    """
6883
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6884
    instance = self.cfg.GetInstanceInfo(instance_name)
6885
    assert instance is not None
6886
    self.instance = instance
6887

    
6888
    if (not self.cleanup and not instance.admin_up and not self.failover and
6889
        self.fallback):
6890
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6891
                      " to failover")
6892
      self.failover = True
6893

    
6894
    if instance.disk_template not in constants.DTS_MIRRORED:
6895
      if self.failover:
6896
        text = "failovers"
6897
      else:
6898
        text = "migrations"
6899
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6900
                                 " %s" % (instance.disk_template, text),
6901
                                 errors.ECODE_STATE)
6902

    
6903
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6904
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6905

    
6906
      if self.lu.op.iallocator:
6907
        self._RunAllocator()
6908
      else:
6909
        # We set set self.target_node as it is required by
6910
        # BuildHooksEnv
6911
        self.target_node = self.lu.op.target_node
6912

    
6913
      # self.target_node is already populated, either directly or by the
6914
      # iallocator run
6915
      target_node = self.target_node
6916
      if self.target_node == instance.primary_node:
6917
        raise errors.OpPrereqError("Cannot migrate instance %s"
6918
                                   " to its primary (%s)" %
6919
                                   (instance.name, instance.primary_node))
6920

    
6921
      if len(self.lu.tasklets) == 1:
6922
        # It is safe to release locks only when we're the only tasklet
6923
        # in the LU
6924
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6925
                      keep=[instance.primary_node, self.target_node])
6926

    
6927
    else:
6928
      secondary_nodes = instance.secondary_nodes
6929
      if not secondary_nodes:
6930
        raise errors.ConfigurationError("No secondary node but using"
6931
                                        " %s disk template" %
6932
                                        instance.disk_template)
6933
      target_node = secondary_nodes[0]
6934
      if self.lu.op.iallocator or (self.lu.op.target_node and
6935
                                   self.lu.op.target_node != target_node):
6936
        if self.failover:
6937
          text = "failed over"
6938
        else:
6939
          text = "migrated"
6940
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6941
                                   " be %s to arbitrary nodes"
6942
                                   " (neither an iallocator nor a target"
6943
                                   " node can be passed)" %
6944
                                   (instance.disk_template, text),
6945
                                   errors.ECODE_INVAL)
6946

    
6947
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6948

    
6949
    # check memory requirements on the secondary node
6950
    if not self.failover or instance.admin_up:
6951
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6952
                           instance.name, i_be[constants.BE_MEMORY],
6953
                           instance.hypervisor)
6954
    else:
6955
      self.lu.LogInfo("Not checking memory on the secondary node as"
6956
                      " instance will not be started")
6957

    
6958
    # check bridge existance
6959
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6960

    
6961
    if not self.cleanup:
6962
      _CheckNodeNotDrained(self.lu, target_node)
6963
      if not self.failover:
6964
        result = self.rpc.call_instance_migratable(instance.primary_node,
6965
                                                   instance)
6966
        if result.fail_msg and self.fallback:
6967
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6968
                          " failover")
6969
          self.failover = True
6970
        else:
6971
          result.Raise("Can't migrate, please use failover",
6972
                       prereq=True, ecode=errors.ECODE_STATE)
6973

    
6974
    assert not (self.failover and self.cleanup)
6975

    
6976
    if not self.failover:
6977
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6978
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6979
                                   " parameters are accepted",
6980
                                   errors.ECODE_INVAL)
6981
      if self.lu.op.live is not None:
6982
        if self.lu.op.live:
6983
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6984
        else:
6985
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6986
        # reset the 'live' parameter to None so that repeated
6987
        # invocations of CheckPrereq do not raise an exception
6988
        self.lu.op.live = None
6989
      elif self.lu.op.mode is None:
6990
        # read the default value from the hypervisor
6991
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6992
                                                skip_globals=False)
6993
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6994

    
6995
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6996
    else:
6997
      # Failover is never live
6998
      self.live = False
6999

    
7000
  def _RunAllocator(self):
7001
    """Run the allocator based on input opcode.
7002

7003
    """
7004
    ial = IAllocator(self.cfg, self.rpc,
7005
                     mode=constants.IALLOCATOR_MODE_RELOC,
7006
                     name=self.instance_name,
7007
                     # TODO See why hail breaks with a single node below
7008
                     relocate_from=[self.instance.primary_node,
7009
                                    self.instance.primary_node],
7010
                     )
7011

    
7012
    ial.Run(self.lu.op.iallocator)
7013

    
7014
    if not ial.success:
7015
      raise errors.OpPrereqError("Can't compute nodes using"
7016
                                 " iallocator '%s': %s" %
7017
                                 (self.lu.op.iallocator, ial.info),
7018
                                 errors.ECODE_NORES)
7019
    if len(ial.result) != ial.required_nodes:
7020
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7021
                                 " of nodes (%s), required %s" %
7022
                                 (self.lu.op.iallocator, len(ial.result),
7023
                                  ial.required_nodes), errors.ECODE_FAULT)
7024
    self.target_node = ial.result[0]
7025
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7026
                 self.instance_name, self.lu.op.iallocator,
7027
                 utils.CommaJoin(ial.result))
7028

    
7029
  def _WaitUntilSync(self):
7030
    """Poll with custom rpc for disk sync.
7031

7032
    This uses our own step-based rpc call.
7033

7034
    """
7035
    self.feedback_fn("* wait until resync is done")
7036
    all_done = False
7037
    while not all_done:
7038
      all_done = True
7039
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7040
                                            self.nodes_ip,
7041
                                            self.instance.disks)
7042
      min_percent = 100
7043
      for node, nres in result.items():
7044
        nres.Raise("Cannot resync disks on node %s" % node)
7045
        node_done, node_percent = nres.payload
7046
        all_done = all_done and node_done
7047
        if node_percent is not None:
7048
          min_percent = min(min_percent, node_percent)
7049
      if not all_done:
7050
        if min_percent < 100:
7051
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7052
        time.sleep(2)
7053

    
7054
  def _EnsureSecondary(self, node):
7055
    """Demote a node to secondary.
7056

7057
    """
7058
    self.feedback_fn("* switching node %s to secondary mode" % node)
7059

    
7060
    for dev in self.instance.disks:
7061
      self.cfg.SetDiskID(dev, node)
7062

    
7063
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7064
                                          self.instance.disks)
7065
    result.Raise("Cannot change disk to secondary on node %s" % node)
7066

    
7067
  def _GoStandalone(self):
7068
    """Disconnect from the network.
7069

7070
    """
7071
    self.feedback_fn("* changing into standalone mode")
7072
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7073
                                               self.instance.disks)
7074
    for node, nres in result.items():
7075
      nres.Raise("Cannot disconnect disks node %s" % node)
7076

    
7077
  def _GoReconnect(self, multimaster):
7078
    """Reconnect to the network.
7079

7080
    """
7081
    if multimaster:
7082
      msg = "dual-master"
7083
    else:
7084
      msg = "single-master"
7085
    self.feedback_fn("* changing disks into %s mode" % msg)
7086
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7087
                                           self.instance.disks,
7088
                                           self.instance.name, multimaster)
7089
    for node, nres in result.items():
7090
      nres.Raise("Cannot change disks config on node %s" % node)
7091

    
7092
  def _ExecCleanup(self):
7093
    """Try to cleanup after a failed migration.
7094

7095
    The cleanup is done by:
7096
      - check that the instance is running only on one node
7097
        (and update the config if needed)
7098
      - change disks on its secondary node to secondary
7099
      - wait until disks are fully synchronized
7100
      - disconnect from the network
7101
      - change disks into single-master mode
7102
      - wait again until disks are fully synchronized
7103

7104
    """
7105
    instance = self.instance
7106
    target_node = self.target_node
7107
    source_node = self.source_node
7108

    
7109
    # check running on only one node
7110
    self.feedback_fn("* checking where the instance actually runs"
7111
                     " (if this hangs, the hypervisor might be in"
7112
                     " a bad state)")
7113
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7114
    for node, result in ins_l.items():
7115
      result.Raise("Can't contact node %s" % node)
7116

    
7117
    runningon_source = instance.name in ins_l[source_node].payload
7118
    runningon_target = instance.name in ins_l[target_node].payload
7119

    
7120
    if runningon_source and runningon_target:
7121
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7122
                               " or the hypervisor is confused; you will have"
7123
                               " to ensure manually that it runs only on one"
7124
                               " and restart this operation")
7125

    
7126
    if not (runningon_source or runningon_target):
7127
      raise errors.OpExecError("Instance does not seem to be running at all;"
7128
                               " in this case it's safer to repair by"
7129
                               " running 'gnt-instance stop' to ensure disk"
7130
                               " shutdown, and then restarting it")
7131

    
7132
    if runningon_target:
7133
      # the migration has actually succeeded, we need to update the config
7134
      self.feedback_fn("* instance running on secondary node (%s),"
7135
                       " updating config" % target_node)
7136
      instance.primary_node = target_node
7137
      self.cfg.Update(instance, self.feedback_fn)
7138
      demoted_node = source_node
7139
    else:
7140
      self.feedback_fn("* instance confirmed to be running on its"
7141
                       " primary node (%s)" % source_node)
7142
      demoted_node = target_node
7143

    
7144
    if instance.disk_template in constants.DTS_INT_MIRROR:
7145
      self._EnsureSecondary(demoted_node)
7146
      try:
7147
        self._WaitUntilSync()
7148
      except errors.OpExecError:
7149
        # we ignore here errors, since if the device is standalone, it
7150
        # won't be able to sync
7151
        pass
7152
      self._GoStandalone()
7153
      self._GoReconnect(False)
7154
      self._WaitUntilSync()
7155

    
7156
    self.feedback_fn("* done")
7157

    
7158
  def _RevertDiskStatus(self):
7159
    """Try to revert the disk status after a failed migration.
7160

7161
    """
7162
    target_node = self.target_node
7163
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7164
      return
7165

    
7166
    try:
7167
      self._EnsureSecondary(target_node)
7168
      self._GoStandalone()
7169
      self._GoReconnect(False)
7170
      self._WaitUntilSync()
7171
    except errors.OpExecError, err:
7172
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7173
                         " please try to recover the instance manually;"
7174
                         " error '%s'" % str(err))
7175

    
7176
  def _AbortMigration(self):
7177
    """Call the hypervisor code to abort a started migration.
7178

7179
    """
7180
    instance = self.instance
7181
    target_node = self.target_node
7182
    migration_info = self.migration_info
7183

    
7184
    abort_result = self.rpc.call_finalize_migration(target_node,
7185
                                                    instance,
7186
                                                    migration_info,
7187
                                                    False)
7188
    abort_msg = abort_result.fail_msg
7189
    if abort_msg:
7190
      logging.error("Aborting migration failed on target node %s: %s",
7191
                    target_node, abort_msg)
7192
      # Don't raise an exception here, as we stil have to try to revert the
7193
      # disk status, even if this step failed.
7194

    
7195
  def _ExecMigration(self):
7196
    """Migrate an instance.
7197

7198
    The migrate is done by:
7199
      - change the disks into dual-master mode
7200
      - wait until disks are fully synchronized again
7201
      - migrate the instance
7202
      - change disks on the new secondary node (the old primary) to secondary
7203
      - wait until disks are fully synchronized
7204
      - change disks into single-master mode
7205

7206
    """
7207
    instance = self.instance
7208
    target_node = self.target_node
7209
    source_node = self.source_node
7210

    
7211
    self.feedback_fn("* checking disk consistency between source and target")
7212
    for dev in instance.disks:
7213
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7214
        raise errors.OpExecError("Disk %s is degraded or not fully"
7215
                                 " synchronized on target node,"
7216
                                 " aborting migration" % dev.iv_name)
7217

    
7218
    # First get the migration information from the remote node
7219
    result = self.rpc.call_migration_info(source_node, instance)
7220
    msg = result.fail_msg
7221
    if msg:
7222
      log_err = ("Failed fetching source migration information from %s: %s" %
7223
                 (source_node, msg))
7224
      logging.error(log_err)
7225
      raise errors.OpExecError(log_err)
7226

    
7227
    self.migration_info = migration_info = result.payload
7228

    
7229
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7230
      # Then switch the disks to master/master mode
7231
      self._EnsureSecondary(target_node)
7232
      self._GoStandalone()
7233
      self._GoReconnect(True)
7234
      self._WaitUntilSync()
7235

    
7236
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7237
    result = self.rpc.call_accept_instance(target_node,
7238
                                           instance,
7239
                                           migration_info,
7240
                                           self.nodes_ip[target_node])
7241

    
7242
    msg = result.fail_msg
7243
    if msg:
7244
      logging.error("Instance pre-migration failed, trying to revert"
7245
                    " disk status: %s", msg)
7246
      self.feedback_fn("Pre-migration failed, aborting")
7247
      self._AbortMigration()
7248
      self._RevertDiskStatus()
7249
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7250
                               (instance.name, msg))
7251

    
7252
    self.feedback_fn("* migrating instance to %s" % target_node)
7253
    result = self.rpc.call_instance_migrate(source_node, instance,
7254
                                            self.nodes_ip[target_node],
7255
                                            self.live)
7256
    msg = result.fail_msg
7257
    if msg:
7258
      logging.error("Instance migration failed, trying to revert"
7259
                    " disk status: %s", msg)
7260
      self.feedback_fn("Migration failed, aborting")
7261
      self._AbortMigration()
7262
      self._RevertDiskStatus()
7263
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7264
                               (instance.name, msg))
7265

    
7266
    instance.primary_node = target_node
7267
    # distribute new instance config to the other nodes
7268
    self.cfg.Update(instance, self.feedback_fn)
7269

    
7270
    result = self.rpc.call_finalize_migration(target_node,
7271
                                              instance,
7272
                                              migration_info,
7273
                                              True)
7274
    msg = result.fail_msg
7275
    if msg:
7276
      logging.error("Instance migration succeeded, but finalization failed:"
7277
                    " %s", msg)
7278
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7279
                               msg)
7280

    
7281
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7282
      self._EnsureSecondary(source_node)
7283
      self._WaitUntilSync()
7284
      self._GoStandalone()
7285
      self._GoReconnect(False)
7286
      self._WaitUntilSync()
7287

    
7288
    self.feedback_fn("* done")
7289

    
7290
  def _ExecFailover(self):
7291
    """Failover an instance.
7292

7293
    The failover is done by shutting it down on its present node and
7294
    starting it on the secondary.
7295

7296
    """
7297
    instance = self.instance
7298
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7299

    
7300
    source_node = instance.primary_node
7301
    target_node = self.target_node
7302

    
7303
    if instance.admin_up:
7304
      self.feedback_fn("* checking disk consistency between source and target")
7305
      for dev in instance.disks:
7306
        # for drbd, these are drbd over lvm
7307
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7308
          if primary_node.offline:
7309
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7310
                             " target node %s" %
7311
                             (primary_node.name, dev.iv_name, target_node))
7312
          elif not self.ignore_consistency:
7313
            raise errors.OpExecError("Disk %s is degraded on target node,"
7314
                                     " aborting failover" % dev.iv_name)
7315
    else:
7316
      self.feedback_fn("* not checking disk consistency as instance is not"
7317
                       " running")
7318

    
7319
    self.feedback_fn("* shutting down instance on source node")
7320
    logging.info("Shutting down instance %s on node %s",
7321
                 instance.name, source_node)
7322

    
7323
    result = self.rpc.call_instance_shutdown(source_node, instance,
7324
                                             self.shutdown_timeout)
7325
    msg = result.fail_msg
7326
    if msg:
7327
      if self.ignore_consistency or primary_node.offline:
7328
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7329
                           " proceeding anyway; please make sure node"
7330
                           " %s is down; error details: %s",
7331
                           instance.name, source_node, source_node, msg)
7332
      else:
7333
        raise errors.OpExecError("Could not shutdown instance %s on"
7334
                                 " node %s: %s" %
7335
                                 (instance.name, source_node, msg))
7336

    
7337
    self.feedback_fn("* deactivating the instance's disks on source node")
7338
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7339
      raise errors.OpExecError("Can't shut down the instance's disks")
7340

    
7341
    instance.primary_node = target_node
7342
    # distribute new instance config to the other nodes
7343
    self.cfg.Update(instance, self.feedback_fn)
7344

    
7345
    # Only start the instance if it's marked as up
7346
    if instance.admin_up:
7347
      self.feedback_fn("* activating the instance's disks on target node %s" %
7348
                       target_node)
7349
      logging.info("Starting instance %s on node %s",
7350
                   instance.name, target_node)
7351

    
7352
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7353
                                           ignore_secondaries=True)
7354
      if not disks_ok:
7355
        _ShutdownInstanceDisks(self.lu, instance)
7356
        raise errors.OpExecError("Can't activate the instance's disks")
7357

    
7358
      self.feedback_fn("* starting the instance on the target node %s" %
7359
                       target_node)
7360
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7361
                                            False)
7362
      msg = result.fail_msg
7363
      if msg:
7364
        _ShutdownInstanceDisks(self.lu, instance)
7365
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7366
                                 (instance.name, target_node, msg))
7367

    
7368
  def Exec(self, feedback_fn):
7369
    """Perform the migration.
7370

7371
    """
7372
    self.feedback_fn = feedback_fn
7373
    self.source_node = self.instance.primary_node
7374

    
7375
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7376
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7377
      self.target_node = self.instance.secondary_nodes[0]
7378
      # Otherwise self.target_node has been populated either
7379
      # directly, or through an iallocator.
7380

    
7381
    self.all_nodes = [self.source_node, self.target_node]
7382
    self.nodes_ip = {
7383
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7384
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7385
      }
7386

    
7387
    if self.failover:
7388
      feedback_fn("Failover instance %s" % self.instance.name)
7389
      self._ExecFailover()
7390
    else:
7391
      feedback_fn("Migrating instance %s" % self.instance.name)
7392

    
7393
      if self.cleanup:
7394
        return self._ExecCleanup()
7395
      else:
7396
        return self._ExecMigration()
7397

    
7398

    
7399
def _CreateBlockDev(lu, node, instance, device, force_create,
7400
                    info, force_open):
7401
  """Create a tree of block devices on a given node.
7402

7403
  If this device type has to be created on secondaries, create it and
7404
  all its children.
7405

7406
  If not, just recurse to children keeping the same 'force' value.
7407

7408
  @param lu: the lu on whose behalf we execute
7409
  @param node: the node on which to create the device
7410
  @type instance: L{objects.Instance}
7411
  @param instance: the instance which owns the device
7412
  @type device: L{objects.Disk}
7413
  @param device: the device to create
7414
  @type force_create: boolean
7415
  @param force_create: whether to force creation of this device; this
7416
      will be change to True whenever we find a device which has
7417
      CreateOnSecondary() attribute
7418
  @param info: the extra 'metadata' we should attach to the device
7419
      (this will be represented as a LVM tag)
7420
  @type force_open: boolean
7421
  @param force_open: this parameter will be passes to the
7422
      L{backend.BlockdevCreate} function where it specifies
7423
      whether we run on primary or not, and it affects both
7424
      the child assembly and the device own Open() execution
7425

7426
  """
7427
  if device.CreateOnSecondary():
7428
    force_create = True
7429

    
7430
  if device.children:
7431
    for child in device.children:
7432
      _CreateBlockDev(lu, node, instance, child, force_create,
7433
                      info, force_open)
7434

    
7435
  if not force_create:
7436
    return
7437

    
7438
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7439

    
7440

    
7441
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7442
  """Create a single block device on a given node.
7443

7444
  This will not recurse over children of the device, so they must be
7445
  created in advance.
7446

7447
  @param lu: the lu on whose behalf we execute
7448
  @param node: the node on which to create the device
7449
  @type instance: L{objects.Instance}
7450
  @param instance: the instance which owns the device
7451
  @type device: L{objects.Disk}
7452
  @param device: the device to create
7453
  @param info: the extra 'metadata' we should attach to the device
7454
      (this will be represented as a LVM tag)
7455
  @type force_open: boolean
7456
  @param force_open: this parameter will be passes to the
7457
      L{backend.BlockdevCreate} function where it specifies
7458
      whether we run on primary or not, and it affects both
7459
      the child assembly and the device own Open() execution
7460

7461
  """
7462
  lu.cfg.SetDiskID(device, node)
7463
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7464
                                       instance.name, force_open, info)
7465
  result.Raise("Can't create block device %s on"
7466
               " node %s for instance %s" % (device, node, instance.name))
7467
  if device.physical_id is None:
7468
    device.physical_id = result.payload
7469

    
7470

    
7471
def _GenerateUniqueNames(lu, exts):
7472
  """Generate a suitable LV name.
7473

7474
  This will generate a logical volume name for the given instance.
7475

7476
  """
7477
  results = []
7478
  for val in exts:
7479
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7480
    results.append("%s%s" % (new_id, val))
7481
  return results
7482

    
7483

    
7484
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7485
                         iv_name, p_minor, s_minor):
7486
  """Generate a drbd8 device complete with its children.
7487

7488
  """
7489
  assert len(vgnames) == len(names) == 2
7490
  port = lu.cfg.AllocatePort()
7491
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7492
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7493
                          logical_id=(vgnames[0], names[0]))
7494
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7495
                          logical_id=(vgnames[1], names[1]))
7496
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7497
                          logical_id=(primary, secondary, port,
7498
                                      p_minor, s_minor,
7499
                                      shared_secret),
7500
                          children=[dev_data, dev_meta],
7501
                          iv_name=iv_name)
7502
  return drbd_dev
7503

    
7504

    
7505
def _GenerateDiskTemplate(lu, template_name,
7506
                          instance_name, primary_node,
7507
                          secondary_nodes, disk_info,
7508
                          file_storage_dir, file_driver,
7509
                          base_index, feedback_fn):
7510
  """Generate the entire disk layout for a given template type.
7511

7512
  """
7513
  #TODO: compute space requirements
7514

    
7515
  vgname = lu.cfg.GetVGName()
7516
  disk_count = len(disk_info)
7517
  disks = []
7518
  if template_name == constants.DT_DISKLESS:
7519
    pass
7520
  elif template_name == constants.DT_PLAIN:
7521
    if len(secondary_nodes) != 0:
7522
      raise errors.ProgrammerError("Wrong template configuration")
7523

    
7524
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7525
                                      for i in range(disk_count)])
7526
    for idx, disk in enumerate(disk_info):
7527
      disk_index = idx + base_index
7528
      vg = disk.get(constants.IDISK_VG, vgname)
7529
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7530
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7531
                              size=disk[constants.IDISK_SIZE],
7532
                              logical_id=(vg, names[idx]),
7533
                              iv_name="disk/%d" % disk_index,
7534
                              mode=disk[constants.IDISK_MODE])
7535
      disks.append(disk_dev)
7536
  elif template_name == constants.DT_DRBD8:
7537
    if len(secondary_nodes) != 1:
7538
      raise errors.ProgrammerError("Wrong template configuration")
7539
    remote_node = secondary_nodes[0]
7540
    minors = lu.cfg.AllocateDRBDMinor(
7541
      [primary_node, remote_node] * len(disk_info), instance_name)
7542

    
7543
    names = []
7544
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7545
                                               for i in range(disk_count)]):
7546
      names.append(lv_prefix + "_data")
7547
      names.append(lv_prefix + "_meta")
7548
    for idx, disk in enumerate(disk_info):
7549
      disk_index = idx + base_index
7550
      data_vg = disk.get(constants.IDISK_VG, vgname)
7551
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7552
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7553
                                      disk[constants.IDISK_SIZE],
7554
                                      [data_vg, meta_vg],
7555
                                      names[idx * 2:idx * 2 + 2],
7556
                                      "disk/%d" % disk_index,
7557
                                      minors[idx * 2], minors[idx * 2 + 1])
7558
      disk_dev.mode = disk[constants.IDISK_MODE]
7559
      disks.append(disk_dev)
7560
  elif template_name == constants.DT_FILE:
7561
    if len(secondary_nodes) != 0:
7562
      raise errors.ProgrammerError("Wrong template configuration")
7563

    
7564
    opcodes.RequireFileStorage()
7565

    
7566
    for idx, disk in enumerate(disk_info):
7567
      disk_index = idx + base_index
7568
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7569
                              size=disk[constants.IDISK_SIZE],
7570
                              iv_name="disk/%d" % disk_index,
7571
                              logical_id=(file_driver,
7572
                                          "%s/disk%d" % (file_storage_dir,
7573
                                                         disk_index)),
7574
                              mode=disk[constants.IDISK_MODE])
7575
      disks.append(disk_dev)
7576
  elif template_name == constants.DT_SHARED_FILE:
7577
    if len(secondary_nodes) != 0:
7578
      raise errors.ProgrammerError("Wrong template configuration")
7579

    
7580
    opcodes.RequireSharedFileStorage()
7581

    
7582
    for idx, disk in enumerate(disk_info):
7583
      disk_index = idx + base_index
7584
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7585
                              size=disk[constants.IDISK_SIZE],
7586
                              iv_name="disk/%d" % disk_index,
7587
                              logical_id=(file_driver,
7588
                                          "%s/disk%d" % (file_storage_dir,
7589
                                                         disk_index)),
7590
                              mode=disk[constants.IDISK_MODE])
7591
      disks.append(disk_dev)
7592
  elif template_name == constants.DT_BLOCK:
7593
    if len(secondary_nodes) != 0:
7594
      raise errors.ProgrammerError("Wrong template configuration")
7595

    
7596
    for idx, disk in enumerate(disk_info):
7597
      disk_index = idx + base_index
7598
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7599
                              size=disk[constants.IDISK_SIZE],
7600
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7601
                                          disk[constants.IDISK_ADOPT]),
7602
                              iv_name="disk/%d" % disk_index,
7603
                              mode=disk[constants.IDISK_MODE])
7604
      disks.append(disk_dev)
7605

    
7606
  else:
7607
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7608
  return disks
7609

    
7610

    
7611
def _GetInstanceInfoText(instance):
7612
  """Compute that text that should be added to the disk's metadata.
7613

7614
  """
7615
  return "originstname+%s" % instance.name
7616

    
7617

    
7618
def _CalcEta(time_taken, written, total_size):
7619
  """Calculates the ETA based on size written and total size.
7620

7621
  @param time_taken: The time taken so far
7622
  @param written: amount written so far
7623
  @param total_size: The total size of data to be written
7624
  @return: The remaining time in seconds
7625

7626
  """
7627
  avg_time = time_taken / float(written)
7628
  return (total_size - written) * avg_time
7629

    
7630

    
7631
def _WipeDisks(lu, instance):
7632
  """Wipes instance disks.
7633

7634
  @type lu: L{LogicalUnit}
7635
  @param lu: the logical unit on whose behalf we execute
7636
  @type instance: L{objects.Instance}
7637
  @param instance: the instance whose disks we should create
7638
  @return: the success of the wipe
7639

7640
  """
7641
  node = instance.primary_node
7642

    
7643
  for device in instance.disks:
7644
    lu.cfg.SetDiskID(device, node)
7645

    
7646
  logging.info("Pause sync of instance %s disks", instance.name)
7647
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7648

    
7649
  for idx, success in enumerate(result.payload):
7650
    if not success:
7651
      logging.warn("pause-sync of instance %s for disks %d failed",
7652
                   instance.name, idx)
7653

    
7654
  try:
7655
    for idx, device in enumerate(instance.disks):
7656
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7657
      # MAX_WIPE_CHUNK at max
7658
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7659
                            constants.MIN_WIPE_CHUNK_PERCENT)
7660
      # we _must_ make this an int, otherwise rounding errors will
7661
      # occur
7662
      wipe_chunk_size = int(wipe_chunk_size)
7663

    
7664
      lu.LogInfo("* Wiping disk %d", idx)
7665
      logging.info("Wiping disk %d for instance %s, node %s using"
7666
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7667

    
7668
      offset = 0
7669
      size = device.size
7670
      last_output = 0
7671
      start_time = time.time()
7672

    
7673
      while offset < size:
7674
        wipe_size = min(wipe_chunk_size, size - offset)
7675
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7676
                      idx, offset, wipe_size)
7677
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7678
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7679
                     (idx, offset, wipe_size))
7680
        now = time.time()
7681
        offset += wipe_size
7682
        if now - last_output >= 60:
7683
          eta = _CalcEta(now - start_time, offset, size)
7684
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7685
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7686
          last_output = now
7687
  finally:
7688
    logging.info("Resume sync of instance %s disks", instance.name)
7689

    
7690
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7691

    
7692
    for idx, success in enumerate(result.payload):
7693
      if not success:
7694
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7695
                      " look at the status and troubleshoot the issue", idx)
7696
        logging.warn("resume-sync of instance %s for disks %d failed",
7697
                     instance.name, idx)
7698

    
7699

    
7700
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7701
  """Create all disks for an instance.
7702

7703
  This abstracts away some work from AddInstance.
7704

7705
  @type lu: L{LogicalUnit}
7706
  @param lu: the logical unit on whose behalf we execute
7707
  @type instance: L{objects.Instance}
7708
  @param instance: the instance whose disks we should create
7709
  @type to_skip: list
7710
  @param to_skip: list of indices to skip
7711
  @type target_node: string
7712
  @param target_node: if passed, overrides the target node for creation
7713
  @rtype: boolean
7714
  @return: the success of the creation
7715

7716
  """
7717
  info = _GetInstanceInfoText(instance)
7718
  if target_node is None:
7719
    pnode = instance.primary_node
7720
    all_nodes = instance.all_nodes
7721
  else:
7722
    pnode = target_node
7723
    all_nodes = [pnode]
7724

    
7725
  if instance.disk_template in constants.DTS_FILEBASED:
7726
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7727
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7728

    
7729
    result.Raise("Failed to create directory '%s' on"
7730
                 " node %s" % (file_storage_dir, pnode))
7731

    
7732
  # Note: this needs to be kept in sync with adding of disks in
7733
  # LUInstanceSetParams
7734
  for idx, device in enumerate(instance.disks):
7735
    if to_skip and idx in to_skip:
7736
      continue
7737
    logging.info("Creating volume %s for instance %s",
7738
                 device.iv_name, instance.name)
7739
    #HARDCODE
7740
    for node in all_nodes:
7741
      f_create = node == pnode
7742
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7743

    
7744

    
7745
def _RemoveDisks(lu, instance, target_node=None):
7746
  """Remove all disks for an instance.
7747

7748
  This abstracts away some work from `AddInstance()` and
7749
  `RemoveInstance()`. Note that in case some of the devices couldn't
7750
  be removed, the removal will continue with the other ones (compare
7751
  with `_CreateDisks()`).
7752

7753
  @type lu: L{LogicalUnit}
7754
  @param lu: the logical unit on whose behalf we execute
7755
  @type instance: L{objects.Instance}
7756
  @param instance: the instance whose disks we should remove
7757
  @type target_node: string
7758
  @param target_node: used to override the node on which to remove the disks
7759
  @rtype: boolean
7760
  @return: the success of the removal
7761

7762
  """
7763
  logging.info("Removing block devices for instance %s", instance.name)
7764

    
7765
  all_result = True
7766
  for device in instance.disks:
7767
    if target_node:
7768
      edata = [(target_node, device)]
7769
    else:
7770
      edata = device.ComputeNodeTree(instance.primary_node)
7771
    for node, disk in edata:
7772
      lu.cfg.SetDiskID(disk, node)
7773
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7774
      if msg:
7775
        lu.LogWarning("Could not remove block device %s on node %s,"
7776
                      " continuing anyway: %s", device.iv_name, node, msg)
7777
        all_result = False
7778

    
7779
  if instance.disk_template == constants.DT_FILE:
7780
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7781
    if target_node:
7782
      tgt = target_node
7783
    else:
7784
      tgt = instance.primary_node
7785
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7786
    if result.fail_msg:
7787
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7788
                    file_storage_dir, instance.primary_node, result.fail_msg)
7789
      all_result = False
7790

    
7791
  return all_result
7792

    
7793

    
7794
def _ComputeDiskSizePerVG(disk_template, disks):
7795
  """Compute disk size requirements in the volume group
7796

7797
  """
7798
  def _compute(disks, payload):
7799
    """Universal algorithm.
7800

7801
    """
7802
    vgs = {}
7803
    for disk in disks:
7804
      vgs[disk[constants.IDISK_VG]] = \
7805
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7806

    
7807
    return vgs
7808

    
7809
  # Required free disk space as a function of disk and swap space
7810
  req_size_dict = {
7811
    constants.DT_DISKLESS: {},
7812
    constants.DT_PLAIN: _compute(disks, 0),
7813
    # 128 MB are added for drbd metadata for each disk
7814
    constants.DT_DRBD8: _compute(disks, 128),
7815
    constants.DT_FILE: {},
7816
    constants.DT_SHARED_FILE: {},
7817
  }
7818

    
7819
  if disk_template not in req_size_dict:
7820
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7821
                                 " is unknown" %  disk_template)
7822

    
7823
  return req_size_dict[disk_template]
7824

    
7825

    
7826
def _ComputeDiskSize(disk_template, disks):
7827
  """Compute disk size requirements in the volume group
7828

7829
  """
7830
  # Required free disk space as a function of disk and swap space
7831
  req_size_dict = {
7832
    constants.DT_DISKLESS: None,
7833
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7834
    # 128 MB are added for drbd metadata for each disk
7835
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7836
    constants.DT_FILE: None,
7837
    constants.DT_SHARED_FILE: 0,
7838
    constants.DT_BLOCK: 0,
7839
  }
7840

    
7841
  if disk_template not in req_size_dict:
7842
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7843
                                 " is unknown" %  disk_template)
7844

    
7845
  return req_size_dict[disk_template]
7846

    
7847

    
7848
def _FilterVmNodes(lu, nodenames):
7849
  """Filters out non-vm_capable nodes from a list.
7850

7851
  @type lu: L{LogicalUnit}
7852
  @param lu: the logical unit for which we check
7853
  @type nodenames: list
7854
  @param nodenames: the list of nodes on which we should check
7855
  @rtype: list
7856
  @return: the list of vm-capable nodes
7857

7858
  """
7859
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7860
  return [name for name in nodenames if name not in vm_nodes]
7861

    
7862

    
7863
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7864
  """Hypervisor parameter validation.
7865

7866
  This function abstract the hypervisor parameter validation to be
7867
  used in both instance create and instance modify.
7868

7869
  @type lu: L{LogicalUnit}
7870
  @param lu: the logical unit for which we check
7871
  @type nodenames: list
7872
  @param nodenames: the list of nodes on which we should check
7873
  @type hvname: string
7874
  @param hvname: the name of the hypervisor we should use
7875
  @type hvparams: dict
7876
  @param hvparams: the parameters which we need to check
7877
  @raise errors.OpPrereqError: if the parameters are not valid
7878

7879
  """
7880
  nodenames = _FilterVmNodes(lu, nodenames)
7881
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7882
                                                  hvname,
7883
                                                  hvparams)
7884
  for node in nodenames:
7885
    info = hvinfo[node]
7886
    if info.offline:
7887
      continue
7888
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7889

    
7890

    
7891
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7892
  """OS parameters validation.
7893

7894
  @type lu: L{LogicalUnit}
7895
  @param lu: the logical unit for which we check
7896
  @type required: boolean
7897
  @param required: whether the validation should fail if the OS is not
7898
      found
7899
  @type nodenames: list
7900
  @param nodenames: the list of nodes on which we should check
7901
  @type osname: string
7902
  @param osname: the name of the hypervisor we should use
7903
  @type osparams: dict
7904
  @param osparams: the parameters which we need to check
7905
  @raise errors.OpPrereqError: if the parameters are not valid
7906

7907
  """
7908
  nodenames = _FilterVmNodes(lu, nodenames)
7909
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7910
                                   [constants.OS_VALIDATE_PARAMETERS],
7911
                                   osparams)
7912
  for node, nres in result.items():
7913
    # we don't check for offline cases since this should be run only
7914
    # against the master node and/or an instance's nodes
7915
    nres.Raise("OS Parameters validation failed on node %s" % node)
7916
    if not nres.payload:
7917
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7918
                 osname, node)
7919

    
7920

    
7921
class LUInstanceCreate(LogicalUnit):
7922
  """Create an instance.
7923

7924
  """
7925
  HPATH = "instance-add"
7926
  HTYPE = constants.HTYPE_INSTANCE
7927
  REQ_BGL = False
7928

    
7929
  def CheckArguments(self):
7930
    """Check arguments.
7931

7932
    """
7933
    # do not require name_check to ease forward/backward compatibility
7934
    # for tools
7935
    if self.op.no_install and self.op.start:
7936
      self.LogInfo("No-installation mode selected, disabling startup")
7937
      self.op.start = False
7938
    # validate/normalize the instance name
7939
    self.op.instance_name = \
7940
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7941

    
7942
    if self.op.ip_check and not self.op.name_check:
7943
      # TODO: make the ip check more flexible and not depend on the name check
7944
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7945
                                 " check", errors.ECODE_INVAL)
7946

    
7947
    # check nics' parameter names
7948
    for nic in self.op.nics:
7949
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7950

    
7951
    # check disks. parameter names and consistent adopt/no-adopt strategy
7952
    has_adopt = has_no_adopt = False
7953
    for disk in self.op.disks:
7954
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7955
      if constants.IDISK_ADOPT in disk:
7956
        has_adopt = True
7957
      else:
7958
        has_no_adopt = True
7959
    if has_adopt and has_no_adopt:
7960
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7961
                                 errors.ECODE_INVAL)
7962
    if has_adopt:
7963
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7964
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7965
                                   " '%s' disk template" %
7966
                                   self.op.disk_template,
7967
                                   errors.ECODE_INVAL)
7968
      if self.op.iallocator is not None:
7969
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7970
                                   " iallocator script", errors.ECODE_INVAL)
7971
      if self.op.mode == constants.INSTANCE_IMPORT:
7972
        raise errors.OpPrereqError("Disk adoption not allowed for"
7973
                                   " instance import", errors.ECODE_INVAL)
7974
    else:
7975
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7976
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7977
                                   " but no 'adopt' parameter given" %
7978
                                   self.op.disk_template,
7979
                                   errors.ECODE_INVAL)
7980

    
7981
    self.adopt_disks = has_adopt
7982

    
7983
    # instance name verification
7984
    if self.op.name_check:
7985
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7986
      self.op.instance_name = self.hostname1.name
7987
      # used in CheckPrereq for ip ping check
7988
      self.check_ip = self.hostname1.ip
7989
    else:
7990
      self.check_ip = None
7991

    
7992
    # file storage checks
7993
    if (self.op.file_driver and
7994
        not self.op.file_driver in constants.FILE_DRIVER):
7995
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7996
                                 self.op.file_driver, errors.ECODE_INVAL)
7997

    
7998
    if self.op.disk_template == constants.DT_FILE:
7999
      opcodes.RequireFileStorage()
8000
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8001
      opcodes.RequireSharedFileStorage()
8002

    
8003
    ### Node/iallocator related checks
8004
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8005

    
8006
    if self.op.pnode is not None:
8007
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8008
        if self.op.snode is None:
8009
          raise errors.OpPrereqError("The networked disk templates need"
8010
                                     " a mirror node", errors.ECODE_INVAL)
8011
      elif self.op.snode:
8012
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8013
                        " template")
8014
        self.op.snode = None
8015

    
8016
    self._cds = _GetClusterDomainSecret()
8017

    
8018
    if self.op.mode == constants.INSTANCE_IMPORT:
8019
      # On import force_variant must be True, because if we forced it at
8020
      # initial install, our only chance when importing it back is that it
8021
      # works again!
8022
      self.op.force_variant = True
8023

    
8024
      if self.op.no_install:
8025
        self.LogInfo("No-installation mode has no effect during import")
8026

    
8027
    elif self.op.mode == constants.INSTANCE_CREATE:
8028
      if self.op.os_type is None:
8029
        raise errors.OpPrereqError("No guest OS specified",
8030
                                   errors.ECODE_INVAL)
8031
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8032
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8033
                                   " installation" % self.op.os_type,
8034
                                   errors.ECODE_STATE)
8035
      if self.op.disk_template is None:
8036
        raise errors.OpPrereqError("No disk template specified",
8037
                                   errors.ECODE_INVAL)
8038

    
8039
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8040
      # Check handshake to ensure both clusters have the same domain secret
8041
      src_handshake = self.op.source_handshake
8042
      if not src_handshake:
8043
        raise errors.OpPrereqError("Missing source handshake",
8044
                                   errors.ECODE_INVAL)
8045

    
8046
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8047
                                                           src_handshake)
8048
      if errmsg:
8049
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8050
                                   errors.ECODE_INVAL)
8051

    
8052
      # Load and check source CA
8053
      self.source_x509_ca_pem = self.op.source_x509_ca
8054
      if not self.source_x509_ca_pem:
8055
        raise errors.OpPrereqError("Missing source X509 CA",
8056
                                   errors.ECODE_INVAL)
8057

    
8058
      try:
8059
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8060
                                                    self._cds)
8061
      except OpenSSL.crypto.Error, err:
8062
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8063
                                   (err, ), errors.ECODE_INVAL)
8064

    
8065
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8066
      if errcode is not None:
8067
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8068
                                   errors.ECODE_INVAL)
8069

    
8070
      self.source_x509_ca = cert
8071

    
8072
      src_instance_name = self.op.source_instance_name
8073
      if not src_instance_name:
8074
        raise errors.OpPrereqError("Missing source instance name",
8075
                                   errors.ECODE_INVAL)
8076

    
8077
      self.source_instance_name = \
8078
          netutils.GetHostname(name=src_instance_name).name
8079

    
8080
    else:
8081
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8082
                                 self.op.mode, errors.ECODE_INVAL)
8083

    
8084
  def ExpandNames(self):
8085
    """ExpandNames for CreateInstance.
8086

8087
    Figure out the right locks for instance creation.
8088

8089
    """
8090
    self.needed_locks = {}
8091

    
8092
    instance_name = self.op.instance_name
8093
    # this is just a preventive check, but someone might still add this
8094
    # instance in the meantime, and creation will fail at lock-add time
8095
    if instance_name in self.cfg.GetInstanceList():
8096
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8097
                                 instance_name, errors.ECODE_EXISTS)
8098

    
8099
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8100

    
8101
    if self.op.iallocator:
8102
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8103
    else:
8104
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8105
      nodelist = [self.op.pnode]
8106
      if self.op.snode is not None:
8107
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8108
        nodelist.append(self.op.snode)
8109
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8110

    
8111
    # in case of import lock the source node too
8112
    if self.op.mode == constants.INSTANCE_IMPORT:
8113
      src_node = self.op.src_node
8114
      src_path = self.op.src_path
8115

    
8116
      if src_path is None:
8117
        self.op.src_path = src_path = self.op.instance_name
8118

    
8119
      if src_node is None:
8120
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8121
        self.op.src_node = None
8122
        if os.path.isabs(src_path):
8123
          raise errors.OpPrereqError("Importing an instance from an absolute"
8124
                                     " path requires a source node option",
8125
                                     errors.ECODE_INVAL)
8126
      else:
8127
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8128
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8129
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8130
        if not os.path.isabs(src_path):
8131
          self.op.src_path = src_path = \
8132
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8133

    
8134
  def _RunAllocator(self):
8135
    """Run the allocator based on input opcode.
8136

8137
    """
8138
    nics = [n.ToDict() for n in self.nics]
8139
    ial = IAllocator(self.cfg, self.rpc,
8140
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8141
                     name=self.op.instance_name,
8142
                     disk_template=self.op.disk_template,
8143
                     tags=self.op.tags,
8144
                     os=self.op.os_type,
8145
                     vcpus=self.be_full[constants.BE_VCPUS],
8146
                     memory=self.be_full[constants.BE_MEMORY],
8147
                     disks=self.disks,
8148
                     nics=nics,
8149
                     hypervisor=self.op.hypervisor,
8150
                     )
8151

    
8152
    ial.Run(self.op.iallocator)
8153

    
8154
    if not ial.success:
8155
      raise errors.OpPrereqError("Can't compute nodes using"
8156
                                 " iallocator '%s': %s" %
8157
                                 (self.op.iallocator, ial.info),
8158
                                 errors.ECODE_NORES)
8159
    if len(ial.result) != ial.required_nodes:
8160
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8161
                                 " of nodes (%s), required %s" %
8162
                                 (self.op.iallocator, len(ial.result),
8163
                                  ial.required_nodes), errors.ECODE_FAULT)
8164
    self.op.pnode = ial.result[0]
8165
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8166
                 self.op.instance_name, self.op.iallocator,
8167
                 utils.CommaJoin(ial.result))
8168
    if ial.required_nodes == 2:
8169
      self.op.snode = ial.result[1]
8170

    
8171
  def BuildHooksEnv(self):
8172
    """Build hooks env.
8173

8174
    This runs on master, primary and secondary nodes of the instance.
8175

8176
    """
8177
    env = {
8178
      "ADD_MODE": self.op.mode,
8179
      }
8180
    if self.op.mode == constants.INSTANCE_IMPORT:
8181
      env["SRC_NODE"] = self.op.src_node
8182
      env["SRC_PATH"] = self.op.src_path
8183
      env["SRC_IMAGES"] = self.src_images
8184

    
8185
    env.update(_BuildInstanceHookEnv(
8186
      name=self.op.instance_name,
8187
      primary_node=self.op.pnode,
8188
      secondary_nodes=self.secondaries,
8189
      status=self.op.start,
8190
      os_type=self.op.os_type,
8191
      memory=self.be_full[constants.BE_MEMORY],
8192
      vcpus=self.be_full[constants.BE_VCPUS],
8193
      nics=_NICListToTuple(self, self.nics),
8194
      disk_template=self.op.disk_template,
8195
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8196
             for d in self.disks],
8197
      bep=self.be_full,
8198
      hvp=self.hv_full,
8199
      hypervisor_name=self.op.hypervisor,
8200
      tags=self.op.tags,
8201
    ))
8202

    
8203
    return env
8204

    
8205
  def BuildHooksNodes(self):
8206
    """Build hooks nodes.
8207

8208
    """
8209
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8210
    return nl, nl
8211

    
8212
  def _ReadExportInfo(self):
8213
    """Reads the export information from disk.
8214

8215
    It will override the opcode source node and path with the actual
8216
    information, if these two were not specified before.
8217

8218
    @return: the export information
8219

8220
    """
8221
    assert self.op.mode == constants.INSTANCE_IMPORT
8222

    
8223
    src_node = self.op.src_node
8224
    src_path = self.op.src_path
8225

    
8226
    if src_node is None:
8227
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8228
      exp_list = self.rpc.call_export_list(locked_nodes)
8229
      found = False
8230
      for node in exp_list:
8231
        if exp_list[node].fail_msg:
8232
          continue
8233
        if src_path in exp_list[node].payload:
8234
          found = True
8235
          self.op.src_node = src_node = node
8236
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8237
                                                       src_path)
8238
          break
8239
      if not found:
8240
        raise errors.OpPrereqError("No export found for relative path %s" %
8241
                                    src_path, errors.ECODE_INVAL)
8242

    
8243
    _CheckNodeOnline(self, src_node)
8244
    result = self.rpc.call_export_info(src_node, src_path)
8245
    result.Raise("No export or invalid export found in dir %s" % src_path)
8246

    
8247
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8248
    if not export_info.has_section(constants.INISECT_EXP):
8249
      raise errors.ProgrammerError("Corrupted export config",
8250
                                   errors.ECODE_ENVIRON)
8251

    
8252
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8253
    if (int(ei_version) != constants.EXPORT_VERSION):
8254
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8255
                                 (ei_version, constants.EXPORT_VERSION),
8256
                                 errors.ECODE_ENVIRON)
8257
    return export_info
8258

    
8259
  def _ReadExportParams(self, einfo):
8260
    """Use export parameters as defaults.
8261

8262
    In case the opcode doesn't specify (as in override) some instance
8263
    parameters, then try to use them from the export information, if
8264
    that declares them.
8265

8266
    """
8267
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8268

    
8269
    if self.op.disk_template is None:
8270
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8271
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8272
                                          "disk_template")
8273
      else:
8274
        raise errors.OpPrereqError("No disk template specified and the export"
8275
                                   " is missing the disk_template information",
8276
                                   errors.ECODE_INVAL)
8277

    
8278
    if not self.op.disks:
8279
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8280
        disks = []
8281
        # TODO: import the disk iv_name too
8282
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8283
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8284
          disks.append({constants.IDISK_SIZE: disk_sz})
8285
        self.op.disks = disks
8286
      else:
8287
        raise errors.OpPrereqError("No disk info specified and the export"
8288
                                   " is missing the disk information",
8289
                                   errors.ECODE_INVAL)
8290

    
8291
    if (not self.op.nics and
8292
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8293
      nics = []
8294
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8295
        ndict = {}
8296
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8297
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8298
          ndict[name] = v
8299
        nics.append(ndict)
8300
      self.op.nics = nics
8301

    
8302
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8303
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8304

    
8305
    if (self.op.hypervisor is None and
8306
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8307
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8308

    
8309
    if einfo.has_section(constants.INISECT_HYP):
8310
      # use the export parameters but do not override the ones
8311
      # specified by the user
8312
      for name, value in einfo.items(constants.INISECT_HYP):
8313
        if name not in self.op.hvparams:
8314
          self.op.hvparams[name] = value
8315

    
8316
    if einfo.has_section(constants.INISECT_BEP):
8317
      # use the parameters, without overriding
8318
      for name, value in einfo.items(constants.INISECT_BEP):
8319
        if name not in self.op.beparams:
8320
          self.op.beparams[name] = value
8321
    else:
8322
      # try to read the parameters old style, from the main section
8323
      for name in constants.BES_PARAMETERS:
8324
        if (name not in self.op.beparams and
8325
            einfo.has_option(constants.INISECT_INS, name)):
8326
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8327

    
8328
    if einfo.has_section(constants.INISECT_OSP):
8329
      # use the parameters, without overriding
8330
      for name, value in einfo.items(constants.INISECT_OSP):
8331
        if name not in self.op.osparams:
8332
          self.op.osparams[name] = value
8333

    
8334
  def _RevertToDefaults(self, cluster):
8335
    """Revert the instance parameters to the default values.
8336

8337
    """
8338
    # hvparams
8339
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8340
    for name in self.op.hvparams.keys():
8341
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8342
        del self.op.hvparams[name]
8343
    # beparams
8344
    be_defs = cluster.SimpleFillBE({})
8345
    for name in self.op.beparams.keys():
8346
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8347
        del self.op.beparams[name]
8348
    # nic params
8349
    nic_defs = cluster.SimpleFillNIC({})
8350
    for nic in self.op.nics:
8351
      for name in constants.NICS_PARAMETERS:
8352
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8353
          del nic[name]
8354
    # osparams
8355
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8356
    for name in self.op.osparams.keys():
8357
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8358
        del self.op.osparams[name]
8359

    
8360
  def _CalculateFileStorageDir(self):
8361
    """Calculate final instance file storage dir.
8362

8363
    """
8364
    # file storage dir calculation/check
8365
    self.instance_file_storage_dir = None
8366
    if self.op.disk_template in constants.DTS_FILEBASED:
8367
      # build the full file storage dir path
8368
      joinargs = []
8369

    
8370
      if self.op.disk_template == constants.DT_SHARED_FILE:
8371
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8372
      else:
8373
        get_fsd_fn = self.cfg.GetFileStorageDir
8374

    
8375
      cfg_storagedir = get_fsd_fn()
8376
      if not cfg_storagedir:
8377
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8378
      joinargs.append(cfg_storagedir)
8379

    
8380
      if self.op.file_storage_dir is not None:
8381
        joinargs.append(self.op.file_storage_dir)
8382

    
8383
      joinargs.append(self.op.instance_name)
8384

    
8385
      # pylint: disable-msg=W0142
8386
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8387

    
8388
  def CheckPrereq(self):
8389
    """Check prerequisites.
8390

8391
    """
8392
    self._CalculateFileStorageDir()
8393

    
8394
    if self.op.mode == constants.INSTANCE_IMPORT:
8395
      export_info = self._ReadExportInfo()
8396
      self._ReadExportParams(export_info)
8397

    
8398
    if (not self.cfg.GetVGName() and
8399
        self.op.disk_template not in constants.DTS_NOT_LVM):
8400
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8401
                                 " instances", errors.ECODE_STATE)
8402

    
8403
    if self.op.hypervisor is None:
8404
      self.op.hypervisor = self.cfg.GetHypervisorType()
8405

    
8406
    cluster = self.cfg.GetClusterInfo()
8407
    enabled_hvs = cluster.enabled_hypervisors
8408
    if self.op.hypervisor not in enabled_hvs:
8409
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8410
                                 " cluster (%s)" % (self.op.hypervisor,
8411
                                  ",".join(enabled_hvs)),
8412
                                 errors.ECODE_STATE)
8413

    
8414
    # Check tag validity
8415
    for tag in self.op.tags:
8416
      objects.TaggableObject.ValidateTag(tag)
8417

    
8418
    # check hypervisor parameter syntax (locally)
8419
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8420
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8421
                                      self.op.hvparams)
8422
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8423
    hv_type.CheckParameterSyntax(filled_hvp)
8424
    self.hv_full = filled_hvp
8425
    # check that we don't specify global parameters on an instance
8426
    _CheckGlobalHvParams(self.op.hvparams)
8427

    
8428
    # fill and remember the beparams dict
8429
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8430
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8431

    
8432
    # build os parameters
8433
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8434

    
8435
    # now that hvp/bep are in final format, let's reset to defaults,
8436
    # if told to do so
8437
    if self.op.identify_defaults:
8438
      self._RevertToDefaults(cluster)
8439

    
8440
    # NIC buildup
8441
    self.nics = []
8442
    for idx, nic in enumerate(self.op.nics):
8443
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8444
      nic_mode = nic_mode_req
8445
      if nic_mode is None:
8446
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8447

    
8448
      # in routed mode, for the first nic, the default ip is 'auto'
8449
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8450
        default_ip_mode = constants.VALUE_AUTO
8451
      else:
8452
        default_ip_mode = constants.VALUE_NONE
8453

    
8454
      # ip validity checks
8455
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8456
      if ip is None or ip.lower() == constants.VALUE_NONE:
8457
        nic_ip = None
8458
      elif ip.lower() == constants.VALUE_AUTO:
8459
        if not self.op.name_check:
8460
          raise errors.OpPrereqError("IP address set to auto but name checks"
8461
                                     " have been skipped",
8462
                                     errors.ECODE_INVAL)
8463
        nic_ip = self.hostname1.ip
8464
      else:
8465
        if not netutils.IPAddress.IsValid(ip):
8466
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8467
                                     errors.ECODE_INVAL)
8468
        nic_ip = ip
8469

    
8470
      # TODO: check the ip address for uniqueness
8471
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8472
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8473
                                   errors.ECODE_INVAL)
8474

    
8475
      # MAC address verification
8476
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8477
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8478
        mac = utils.NormalizeAndValidateMac(mac)
8479

    
8480
        try:
8481
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8482
        except errors.ReservationError:
8483
          raise errors.OpPrereqError("MAC address %s already in use"
8484
                                     " in cluster" % mac,
8485
                                     errors.ECODE_NOTUNIQUE)
8486

    
8487
      #  Build nic parameters
8488
      link = nic.get(constants.INIC_LINK, None)
8489
      nicparams = {}
8490
      if nic_mode_req:
8491
        nicparams[constants.NIC_MODE] = nic_mode_req
8492
      if link:
8493
        nicparams[constants.NIC_LINK] = link
8494

    
8495
      check_params = cluster.SimpleFillNIC(nicparams)
8496
      objects.NIC.CheckParameterSyntax(check_params)
8497
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8498

    
8499
    # disk checks/pre-build
8500
    default_vg = self.cfg.GetVGName()
8501
    self.disks = []
8502
    for disk in self.op.disks:
8503
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8504
      if mode not in constants.DISK_ACCESS_SET:
8505
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8506
                                   mode, errors.ECODE_INVAL)
8507
      size = disk.get(constants.IDISK_SIZE, None)
8508
      if size is None:
8509
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8510
      try:
8511
        size = int(size)
8512
      except (TypeError, ValueError):
8513
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8514
                                   errors.ECODE_INVAL)
8515

    
8516
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8517
      new_disk = {
8518
        constants.IDISK_SIZE: size,
8519
        constants.IDISK_MODE: mode,
8520
        constants.IDISK_VG: data_vg,
8521
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8522
        }
8523
      if constants.IDISK_ADOPT in disk:
8524
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8525
      self.disks.append(new_disk)
8526

    
8527
    if self.op.mode == constants.INSTANCE_IMPORT:
8528

    
8529
      # Check that the new instance doesn't have less disks than the export
8530
      instance_disks = len(self.disks)
8531
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8532
      if instance_disks < export_disks:
8533
        raise errors.OpPrereqError("Not enough disks to import."
8534
                                   " (instance: %d, export: %d)" %
8535
                                   (instance_disks, export_disks),
8536
                                   errors.ECODE_INVAL)
8537

    
8538
      disk_images = []
8539
      for idx in range(export_disks):
8540
        option = "disk%d_dump" % idx
8541
        if export_info.has_option(constants.INISECT_INS, option):
8542
          # FIXME: are the old os-es, disk sizes, etc. useful?
8543
          export_name = export_info.get(constants.INISECT_INS, option)
8544
          image = utils.PathJoin(self.op.src_path, export_name)
8545
          disk_images.append(image)
8546
        else:
8547
          disk_images.append(False)
8548

    
8549
      self.src_images = disk_images
8550

    
8551
      old_name = export_info.get(constants.INISECT_INS, "name")
8552
      try:
8553
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8554
      except (TypeError, ValueError), err:
8555
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8556
                                   " an integer: %s" % str(err),
8557
                                   errors.ECODE_STATE)
8558
      if self.op.instance_name == old_name:
8559
        for idx, nic in enumerate(self.nics):
8560
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8561
            nic_mac_ini = "nic%d_mac" % idx
8562
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8563

    
8564
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8565

    
8566
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8567
    if self.op.ip_check:
8568
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8569
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8570
                                   (self.check_ip, self.op.instance_name),
8571
                                   errors.ECODE_NOTUNIQUE)
8572

    
8573
    #### mac address generation
8574
    # By generating here the mac address both the allocator and the hooks get
8575
    # the real final mac address rather than the 'auto' or 'generate' value.
8576
    # There is a race condition between the generation and the instance object
8577
    # creation, which means that we know the mac is valid now, but we're not
8578
    # sure it will be when we actually add the instance. If things go bad
8579
    # adding the instance will abort because of a duplicate mac, and the
8580
    # creation job will fail.
8581
    for nic in self.nics:
8582
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8583
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8584

    
8585
    #### allocator run
8586

    
8587
    if self.op.iallocator is not None:
8588
      self._RunAllocator()
8589

    
8590
    #### node related checks
8591

    
8592
    # check primary node
8593
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8594
    assert self.pnode is not None, \
8595
      "Cannot retrieve locked node %s" % self.op.pnode
8596
    if pnode.offline:
8597
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8598
                                 pnode.name, errors.ECODE_STATE)
8599
    if pnode.drained:
8600
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8601
                                 pnode.name, errors.ECODE_STATE)
8602
    if not pnode.vm_capable:
8603
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8604
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8605

    
8606
    self.secondaries = []
8607

    
8608
    # mirror node verification
8609
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8610
      if self.op.snode == pnode.name:
8611
        raise errors.OpPrereqError("The secondary node cannot be the"
8612
                                   " primary node", errors.ECODE_INVAL)
8613
      _CheckNodeOnline(self, self.op.snode)
8614
      _CheckNodeNotDrained(self, self.op.snode)
8615
      _CheckNodeVmCapable(self, self.op.snode)
8616
      self.secondaries.append(self.op.snode)
8617

    
8618
    nodenames = [pnode.name] + self.secondaries
8619

    
8620
    if not self.adopt_disks:
8621
      # Check lv size requirements, if not adopting
8622
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8623
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8624

    
8625
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8626
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8627
                                disk[constants.IDISK_ADOPT])
8628
                     for disk in self.disks])
8629
      if len(all_lvs) != len(self.disks):
8630
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8631
                                   errors.ECODE_INVAL)
8632
      for lv_name in all_lvs:
8633
        try:
8634
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8635
          # to ReserveLV uses the same syntax
8636
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8637
        except errors.ReservationError:
8638
          raise errors.OpPrereqError("LV named %s used by another instance" %
8639
                                     lv_name, errors.ECODE_NOTUNIQUE)
8640

    
8641
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8642
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8643

    
8644
      node_lvs = self.rpc.call_lv_list([pnode.name],
8645
                                       vg_names.payload.keys())[pnode.name]
8646
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8647
      node_lvs = node_lvs.payload
8648

    
8649
      delta = all_lvs.difference(node_lvs.keys())
8650
      if delta:
8651
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8652
                                   utils.CommaJoin(delta),
8653
                                   errors.ECODE_INVAL)
8654
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8655
      if online_lvs:
8656
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8657
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8658
                                   errors.ECODE_STATE)
8659
      # update the size of disk based on what is found
8660
      for dsk in self.disks:
8661
        dsk[constants.IDISK_SIZE] = \
8662
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8663
                                        dsk[constants.IDISK_ADOPT])][0]))
8664

    
8665
    elif self.op.disk_template == constants.DT_BLOCK:
8666
      # Normalize and de-duplicate device paths
8667
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8668
                       for disk in self.disks])
8669
      if len(all_disks) != len(self.disks):
8670
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8671
                                   errors.ECODE_INVAL)
8672
      baddisks = [d for d in all_disks
8673
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8674
      if baddisks:
8675
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8676
                                   " cannot be adopted" %
8677
                                   (", ".join(baddisks),
8678
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8679
                                   errors.ECODE_INVAL)
8680

    
8681
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8682
                                            list(all_disks))[pnode.name]
8683
      node_disks.Raise("Cannot get block device information from node %s" %
8684
                       pnode.name)
8685
      node_disks = node_disks.payload
8686
      delta = all_disks.difference(node_disks.keys())
8687
      if delta:
8688
        raise errors.OpPrereqError("Missing block device(s): %s" %
8689
                                   utils.CommaJoin(delta),
8690
                                   errors.ECODE_INVAL)
8691
      for dsk in self.disks:
8692
        dsk[constants.IDISK_SIZE] = \
8693
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8694

    
8695
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8696

    
8697
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8698
    # check OS parameters (remotely)
8699
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8700

    
8701
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8702

    
8703
    # memory check on primary node
8704
    if self.op.start:
8705
      _CheckNodeFreeMemory(self, self.pnode.name,
8706
                           "creating instance %s" % self.op.instance_name,
8707
                           self.be_full[constants.BE_MEMORY],
8708
                           self.op.hypervisor)
8709

    
8710
    self.dry_run_result = list(nodenames)
8711

    
8712
  def Exec(self, feedback_fn):
8713
    """Create and add the instance to the cluster.
8714

8715
    """
8716
    instance = self.op.instance_name
8717
    pnode_name = self.pnode.name
8718

    
8719
    ht_kind = self.op.hypervisor
8720
    if ht_kind in constants.HTS_REQ_PORT:
8721
      network_port = self.cfg.AllocatePort()
8722
    else:
8723
      network_port = None
8724

    
8725
    disks = _GenerateDiskTemplate(self,
8726
                                  self.op.disk_template,
8727
                                  instance, pnode_name,
8728
                                  self.secondaries,
8729
                                  self.disks,
8730
                                  self.instance_file_storage_dir,
8731
                                  self.op.file_driver,
8732
                                  0,
8733
                                  feedback_fn)
8734

    
8735
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8736
                            primary_node=pnode_name,
8737
                            nics=self.nics, disks=disks,
8738
                            disk_template=self.op.disk_template,
8739
                            admin_up=False,
8740
                            network_port=network_port,
8741
                            beparams=self.op.beparams,
8742
                            hvparams=self.op.hvparams,
8743
                            hypervisor=self.op.hypervisor,
8744
                            osparams=self.op.osparams,
8745
                            )
8746

    
8747
    if self.op.tags:
8748
      for tag in self.op.tags:
8749
        iobj.AddTag(tag)
8750

    
8751
    if self.adopt_disks:
8752
      if self.op.disk_template == constants.DT_PLAIN:
8753
        # rename LVs to the newly-generated names; we need to construct
8754
        # 'fake' LV disks with the old data, plus the new unique_id
8755
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8756
        rename_to = []
8757
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8758
          rename_to.append(t_dsk.logical_id)
8759
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8760
          self.cfg.SetDiskID(t_dsk, pnode_name)
8761
        result = self.rpc.call_blockdev_rename(pnode_name,
8762
                                               zip(tmp_disks, rename_to))
8763
        result.Raise("Failed to rename adoped LVs")
8764
    else:
8765
      feedback_fn("* creating instance disks...")
8766
      try:
8767
        _CreateDisks(self, iobj)
8768
      except errors.OpExecError:
8769
        self.LogWarning("Device creation failed, reverting...")
8770
        try:
8771
          _RemoveDisks(self, iobj)
8772
        finally:
8773
          self.cfg.ReleaseDRBDMinors(instance)
8774
          raise
8775

    
8776
    feedback_fn("adding instance %s to cluster config" % instance)
8777

    
8778
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8779

    
8780
    # Declare that we don't want to remove the instance lock anymore, as we've
8781
    # added the instance to the config
8782
    del self.remove_locks[locking.LEVEL_INSTANCE]
8783

    
8784
    if self.op.mode == constants.INSTANCE_IMPORT:
8785
      # Release unused nodes
8786
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8787
    else:
8788
      # Release all nodes
8789
      _ReleaseLocks(self, locking.LEVEL_NODE)
8790

    
8791
    disk_abort = False
8792
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8793
      feedback_fn("* wiping instance disks...")
8794
      try:
8795
        _WipeDisks(self, iobj)
8796
      except errors.OpExecError, err:
8797
        logging.exception("Wiping disks failed")
8798
        self.LogWarning("Wiping instance disks failed (%s)", err)
8799
        disk_abort = True
8800

    
8801
    if disk_abort:
8802
      # Something is already wrong with the disks, don't do anything else
8803
      pass
8804
    elif self.op.wait_for_sync:
8805
      disk_abort = not _WaitForSync(self, iobj)
8806
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8807
      # make sure the disks are not degraded (still sync-ing is ok)
8808
      time.sleep(15)
8809
      feedback_fn("* checking mirrors status")
8810
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8811
    else:
8812
      disk_abort = False
8813

    
8814
    if disk_abort:
8815
      _RemoveDisks(self, iobj)
8816
      self.cfg.RemoveInstance(iobj.name)
8817
      # Make sure the instance lock gets removed
8818
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8819
      raise errors.OpExecError("There are some degraded disks for"
8820
                               " this instance")
8821

    
8822
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8823
      if self.op.mode == constants.INSTANCE_CREATE:
8824
        if not self.op.no_install:
8825
          feedback_fn("* running the instance OS create scripts...")
8826
          # FIXME: pass debug option from opcode to backend
8827
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8828
                                                 self.op.debug_level)
8829
          result.Raise("Could not add os for instance %s"
8830
                       " on node %s" % (instance, pnode_name))
8831

    
8832
      elif self.op.mode == constants.INSTANCE_IMPORT:
8833
        feedback_fn("* running the instance OS import scripts...")
8834

    
8835
        transfers = []
8836

    
8837
        for idx, image in enumerate(self.src_images):
8838
          if not image:
8839
            continue
8840

    
8841
          # FIXME: pass debug option from opcode to backend
8842
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8843
                                             constants.IEIO_FILE, (image, ),
8844
                                             constants.IEIO_SCRIPT,
8845
                                             (iobj.disks[idx], idx),
8846
                                             None)
8847
          transfers.append(dt)
8848

    
8849
        import_result = \
8850
          masterd.instance.TransferInstanceData(self, feedback_fn,
8851
                                                self.op.src_node, pnode_name,
8852
                                                self.pnode.secondary_ip,
8853
                                                iobj, transfers)
8854
        if not compat.all(import_result):
8855
          self.LogWarning("Some disks for instance %s on node %s were not"
8856
                          " imported successfully" % (instance, pnode_name))
8857

    
8858
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8859
        feedback_fn("* preparing remote import...")
8860
        # The source cluster will stop the instance before attempting to make a
8861
        # connection. In some cases stopping an instance can take a long time,
8862
        # hence the shutdown timeout is added to the connection timeout.
8863
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8864
                           self.op.source_shutdown_timeout)
8865
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8866

    
8867
        assert iobj.primary_node == self.pnode.name
8868
        disk_results = \
8869
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8870
                                        self.source_x509_ca,
8871
                                        self._cds, timeouts)
8872
        if not compat.all(disk_results):
8873
          # TODO: Should the instance still be started, even if some disks
8874
          # failed to import (valid for local imports, too)?
8875
          self.LogWarning("Some disks for instance %s on node %s were not"
8876
                          " imported successfully" % (instance, pnode_name))
8877

    
8878
        # Run rename script on newly imported instance
8879
        assert iobj.name == instance
8880
        feedback_fn("Running rename script for %s" % instance)
8881
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8882
                                                   self.source_instance_name,
8883
                                                   self.op.debug_level)
8884
        if result.fail_msg:
8885
          self.LogWarning("Failed to run rename script for %s on node"
8886
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8887

    
8888
      else:
8889
        # also checked in the prereq part
8890
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8891
                                     % self.op.mode)
8892

    
8893
    if self.op.start:
8894
      iobj.admin_up = True
8895
      self.cfg.Update(iobj, feedback_fn)
8896
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8897
      feedback_fn("* starting instance...")
8898
      result = self.rpc.call_instance_start(pnode_name, iobj,
8899
                                            None, None, False)
8900
      result.Raise("Could not start instance")
8901

    
8902
    return list(iobj.all_nodes)
8903

    
8904

    
8905
class LUInstanceConsole(NoHooksLU):
8906
  """Connect to an instance's console.
8907

8908
  This is somewhat special in that it returns the command line that
8909
  you need to run on the master node in order to connect to the
8910
  console.
8911

8912
  """
8913
  REQ_BGL = False
8914

    
8915
  def ExpandNames(self):
8916
    self._ExpandAndLockInstance()
8917

    
8918
  def CheckPrereq(self):
8919
    """Check prerequisites.
8920

8921
    This checks that the instance is in the cluster.
8922

8923
    """
8924
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8925
    assert self.instance is not None, \
8926
      "Cannot retrieve locked instance %s" % self.op.instance_name
8927
    _CheckNodeOnline(self, self.instance.primary_node)
8928

    
8929
  def Exec(self, feedback_fn):
8930
    """Connect to the console of an instance
8931

8932
    """
8933
    instance = self.instance
8934
    node = instance.primary_node
8935

    
8936
    node_insts = self.rpc.call_instance_list([node],
8937
                                             [instance.hypervisor])[node]
8938
    node_insts.Raise("Can't get node information from %s" % node)
8939

    
8940
    if instance.name not in node_insts.payload:
8941
      if instance.admin_up:
8942
        state = constants.INSTST_ERRORDOWN
8943
      else:
8944
        state = constants.INSTST_ADMINDOWN
8945
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8946
                               (instance.name, state))
8947

    
8948
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8949

    
8950
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8951

    
8952

    
8953
def _GetInstanceConsole(cluster, instance):
8954
  """Returns console information for an instance.
8955

8956
  @type cluster: L{objects.Cluster}
8957
  @type instance: L{objects.Instance}
8958
  @rtype: dict
8959

8960
  """
8961
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8962
  # beparams and hvparams are passed separately, to avoid editing the
8963
  # instance and then saving the defaults in the instance itself.
8964
  hvparams = cluster.FillHV(instance)
8965
  beparams = cluster.FillBE(instance)
8966
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8967

    
8968
  assert console.instance == instance.name
8969
  assert console.Validate()
8970

    
8971
  return console.ToDict()
8972

    
8973

    
8974
class LUInstanceReplaceDisks(LogicalUnit):
8975
  """Replace the disks of an instance.
8976

8977
  """
8978
  HPATH = "mirrors-replace"
8979
  HTYPE = constants.HTYPE_INSTANCE
8980
  REQ_BGL = False
8981

    
8982
  def CheckArguments(self):
8983
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8984
                                  self.op.iallocator)
8985

    
8986
  def ExpandNames(self):
8987
    self._ExpandAndLockInstance()
8988

    
8989
    assert locking.LEVEL_NODE not in self.needed_locks
8990
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8991

    
8992
    assert self.op.iallocator is None or self.op.remote_node is None, \
8993
      "Conflicting options"
8994

    
8995
    if self.op.remote_node is not None:
8996
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8997

    
8998
      # Warning: do not remove the locking of the new secondary here
8999
      # unless DRBD8.AddChildren is changed to work in parallel;
9000
      # currently it doesn't since parallel invocations of
9001
      # FindUnusedMinor will conflict
9002
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9003
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9004
    else:
9005
      self.needed_locks[locking.LEVEL_NODE] = []
9006
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9007

    
9008
      if self.op.iallocator is not None:
9009
        # iallocator will select a new node in the same group
9010
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9011

    
9012
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9013
                                   self.op.iallocator, self.op.remote_node,
9014
                                   self.op.disks, False, self.op.early_release)
9015

    
9016
    self.tasklets = [self.replacer]
9017

    
9018
  def DeclareLocks(self, level):
9019
    if level == locking.LEVEL_NODEGROUP:
9020
      assert self.op.remote_node is None
9021
      assert self.op.iallocator is not None
9022
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9023

    
9024
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9025
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9026
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9027

    
9028
    elif level == locking.LEVEL_NODE:
9029
      if self.op.iallocator is not None:
9030
        assert self.op.remote_node is None
9031
        assert not self.needed_locks[locking.LEVEL_NODE]
9032

    
9033
        # Lock member nodes of all locked groups
9034
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9035
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9036
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9037
      else:
9038
        self._LockInstancesNodes()
9039

    
9040
  def BuildHooksEnv(self):
9041
    """Build hooks env.
9042

9043
    This runs on the master, the primary and all the secondaries.
9044

9045
    """
9046
    instance = self.replacer.instance
9047
    env = {
9048
      "MODE": self.op.mode,
9049
      "NEW_SECONDARY": self.op.remote_node,
9050
      "OLD_SECONDARY": instance.secondary_nodes[0],
9051
      }
9052
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9053
    return env
9054

    
9055
  def BuildHooksNodes(self):
9056
    """Build hooks nodes.
9057

9058
    """
9059
    instance = self.replacer.instance
9060
    nl = [
9061
      self.cfg.GetMasterNode(),
9062
      instance.primary_node,
9063
      ]
9064
    if self.op.remote_node is not None:
9065
      nl.append(self.op.remote_node)
9066
    return nl, nl
9067

    
9068
  def CheckPrereq(self):
9069
    """Check prerequisites.
9070

9071
    """
9072
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9073
            self.op.iallocator is None)
9074

    
9075
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9076
    if owned_groups:
9077
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9078
      if owned_groups != groups:
9079
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9080
                                 " since lock was acquired, current list is %r,"
9081
                                 " used to be '%s'" %
9082
                                 (self.op.instance_name,
9083
                                  utils.CommaJoin(groups),
9084
                                  utils.CommaJoin(owned_groups)))
9085

    
9086
    return LogicalUnit.CheckPrereq(self)
9087

    
9088

    
9089
class TLReplaceDisks(Tasklet):
9090
  """Replaces disks for an instance.
9091

9092
  Note: Locking is not within the scope of this class.
9093

9094
  """
9095
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9096
               disks, delay_iallocator, early_release):
9097
    """Initializes this class.
9098

9099
    """
9100
    Tasklet.__init__(self, lu)
9101

    
9102
    # Parameters
9103
    self.instance_name = instance_name
9104
    self.mode = mode
9105
    self.iallocator_name = iallocator_name
9106
    self.remote_node = remote_node
9107
    self.disks = disks
9108
    self.delay_iallocator = delay_iallocator
9109
    self.early_release = early_release
9110

    
9111
    # Runtime data
9112
    self.instance = None
9113
    self.new_node = None
9114
    self.target_node = None
9115
    self.other_node = None
9116
    self.remote_node_info = None
9117
    self.node_secondary_ip = None
9118

    
9119
  @staticmethod
9120
  def CheckArguments(mode, remote_node, iallocator):
9121
    """Helper function for users of this class.
9122

9123
    """
9124
    # check for valid parameter combination
9125
    if mode == constants.REPLACE_DISK_CHG:
9126
      if remote_node is None and iallocator is None:
9127
        raise errors.OpPrereqError("When changing the secondary either an"
9128
                                   " iallocator script must be used or the"
9129
                                   " new node given", errors.ECODE_INVAL)
9130

    
9131
      if remote_node is not None and iallocator is not None:
9132
        raise errors.OpPrereqError("Give either the iallocator or the new"
9133
                                   " secondary, not both", errors.ECODE_INVAL)
9134

    
9135
    elif remote_node is not None or iallocator is not None:
9136
      # Not replacing the secondary
9137
      raise errors.OpPrereqError("The iallocator and new node options can"
9138
                                 " only be used when changing the"
9139
                                 " secondary node", errors.ECODE_INVAL)
9140

    
9141
  @staticmethod
9142
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9143
    """Compute a new secondary node using an IAllocator.
9144

9145
    """
9146
    ial = IAllocator(lu.cfg, lu.rpc,
9147
                     mode=constants.IALLOCATOR_MODE_RELOC,
9148
                     name=instance_name,
9149
                     relocate_from=relocate_from)
9150

    
9151
    ial.Run(iallocator_name)
9152

    
9153
    if not ial.success:
9154
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9155
                                 " %s" % (iallocator_name, ial.info),
9156
                                 errors.ECODE_NORES)
9157

    
9158
    if len(ial.result) != ial.required_nodes:
9159
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9160
                                 " of nodes (%s), required %s" %
9161
                                 (iallocator_name,
9162
                                  len(ial.result), ial.required_nodes),
9163
                                 errors.ECODE_FAULT)
9164

    
9165
    remote_node_name = ial.result[0]
9166

    
9167
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9168
               instance_name, remote_node_name)
9169

    
9170
    return remote_node_name
9171

    
9172
  def _FindFaultyDisks(self, node_name):
9173
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9174
                                    node_name, True)
9175

    
9176
  def _CheckDisksActivated(self, instance):
9177
    """Checks if the instance disks are activated.
9178

9179
    @param instance: The instance to check disks
9180
    @return: True if they are activated, False otherwise
9181

9182
    """
9183
    nodes = instance.all_nodes
9184

    
9185
    for idx, dev in enumerate(instance.disks):
9186
      for node in nodes:
9187
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9188
        self.cfg.SetDiskID(dev, node)
9189

    
9190
        result = self.rpc.call_blockdev_find(node, dev)
9191

    
9192
        if result.offline:
9193
          continue
9194
        elif result.fail_msg or not result.payload:
9195
          return False
9196

    
9197
    return True
9198

    
9199
  def CheckPrereq(self):
9200
    """Check prerequisites.
9201

9202
    This checks that the instance is in the cluster.
9203

9204
    """
9205
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9206
    assert instance is not None, \
9207
      "Cannot retrieve locked instance %s" % self.instance_name
9208

    
9209
    if instance.disk_template != constants.DT_DRBD8:
9210
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9211
                                 " instances", errors.ECODE_INVAL)
9212

    
9213
    if len(instance.secondary_nodes) != 1:
9214
      raise errors.OpPrereqError("The instance has a strange layout,"
9215
                                 " expected one secondary but found %d" %
9216
                                 len(instance.secondary_nodes),
9217
                                 errors.ECODE_FAULT)
9218

    
9219
    if not self.delay_iallocator:
9220
      self._CheckPrereq2()
9221

    
9222
  def _CheckPrereq2(self):
9223
    """Check prerequisites, second part.
9224

9225
    This function should always be part of CheckPrereq. It was separated and is
9226
    now called from Exec because during node evacuation iallocator was only
9227
    called with an unmodified cluster model, not taking planned changes into
9228
    account.
9229

9230
    """
9231
    instance = self.instance
9232
    secondary_node = instance.secondary_nodes[0]
9233

    
9234
    if self.iallocator_name is None:
9235
      remote_node = self.remote_node
9236
    else:
9237
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9238
                                       instance.name, instance.secondary_nodes)
9239

    
9240
    if remote_node is None:
9241
      self.remote_node_info = None
9242
    else:
9243
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9244
             "Remote node '%s' is not locked" % remote_node
9245

    
9246
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9247
      assert self.remote_node_info is not None, \
9248
        "Cannot retrieve locked node %s" % remote_node
9249

    
9250
    if remote_node == self.instance.primary_node:
9251
      raise errors.OpPrereqError("The specified node is the primary node of"
9252
                                 " the instance", errors.ECODE_INVAL)
9253

    
9254
    if remote_node == secondary_node:
9255
      raise errors.OpPrereqError("The specified node is already the"
9256
                                 " secondary node of the instance",
9257
                                 errors.ECODE_INVAL)
9258

    
9259
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9260
                                    constants.REPLACE_DISK_CHG):
9261
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9262
                                 errors.ECODE_INVAL)
9263

    
9264
    if self.mode == constants.REPLACE_DISK_AUTO:
9265
      if not self._CheckDisksActivated(instance):
9266
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9267
                                   " first" % self.instance_name,
9268
                                   errors.ECODE_STATE)
9269
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9270
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9271

    
9272
      if faulty_primary and faulty_secondary:
9273
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9274
                                   " one node and can not be repaired"
9275
                                   " automatically" % self.instance_name,
9276
                                   errors.ECODE_STATE)
9277

    
9278
      if faulty_primary:
9279
        self.disks = faulty_primary
9280
        self.target_node = instance.primary_node
9281
        self.other_node = secondary_node
9282
        check_nodes = [self.target_node, self.other_node]
9283
      elif faulty_secondary:
9284
        self.disks = faulty_secondary
9285
        self.target_node = secondary_node
9286
        self.other_node = instance.primary_node
9287
        check_nodes = [self.target_node, self.other_node]
9288
      else:
9289
        self.disks = []
9290
        check_nodes = []
9291

    
9292
    else:
9293
      # Non-automatic modes
9294
      if self.mode == constants.REPLACE_DISK_PRI:
9295
        self.target_node = instance.primary_node
9296
        self.other_node = secondary_node
9297
        check_nodes = [self.target_node, self.other_node]
9298

    
9299
      elif self.mode == constants.REPLACE_DISK_SEC:
9300
        self.target_node = secondary_node
9301
        self.other_node = instance.primary_node
9302
        check_nodes = [self.target_node, self.other_node]
9303

    
9304
      elif self.mode == constants.REPLACE_DISK_CHG:
9305
        self.new_node = remote_node
9306
        self.other_node = instance.primary_node
9307
        self.target_node = secondary_node
9308
        check_nodes = [self.new_node, self.other_node]
9309

    
9310
        _CheckNodeNotDrained(self.lu, remote_node)
9311
        _CheckNodeVmCapable(self.lu, remote_node)
9312

    
9313
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9314
        assert old_node_info is not None
9315
        if old_node_info.offline and not self.early_release:
9316
          # doesn't make sense to delay the release
9317
          self.early_release = True
9318
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9319
                          " early-release mode", secondary_node)
9320

    
9321
      else:
9322
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9323
                                     self.mode)
9324

    
9325
      # If not specified all disks should be replaced
9326
      if not self.disks:
9327
        self.disks = range(len(self.instance.disks))
9328

    
9329
    for node in check_nodes:
9330
      _CheckNodeOnline(self.lu, node)
9331

    
9332
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9333
                                                          self.other_node,
9334
                                                          self.target_node]
9335
                              if node_name is not None)
9336

    
9337
    # Release unneeded node locks
9338
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9339

    
9340
    # Release any owned node group
9341
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9342
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9343

    
9344
    # Check whether disks are valid
9345
    for disk_idx in self.disks:
9346
      instance.FindDisk(disk_idx)
9347

    
9348
    # Get secondary node IP addresses
9349
    self.node_secondary_ip = \
9350
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9351
           for node_name in touched_nodes)
9352

    
9353
  def Exec(self, feedback_fn):
9354
    """Execute disk replacement.
9355

9356
    This dispatches the disk replacement to the appropriate handler.
9357

9358
    """
9359
    if self.delay_iallocator:
9360
      self._CheckPrereq2()
9361

    
9362
    if __debug__:
9363
      # Verify owned locks before starting operation
9364
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9365
      assert set(owned_locks) == set(self.node_secondary_ip), \
9366
          ("Incorrect node locks, owning %s, expected %s" %
9367
           (owned_locks, self.node_secondary_ip.keys()))
9368

    
9369
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9370
      assert list(owned_locks) == [self.instance_name], \
9371
          "Instance '%s' not locked" % self.instance_name
9372

    
9373
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9374
          "Should not own any node group lock at this point"
9375

    
9376
    if not self.disks:
9377
      feedback_fn("No disks need replacement")
9378
      return
9379

    
9380
    feedback_fn("Replacing disk(s) %s for %s" %
9381
                (utils.CommaJoin(self.disks), self.instance.name))
9382

    
9383
    activate_disks = (not self.instance.admin_up)
9384

    
9385
    # Activate the instance disks if we're replacing them on a down instance
9386
    if activate_disks:
9387
      _StartInstanceDisks(self.lu, self.instance, True)
9388

    
9389
    try:
9390
      # Should we replace the secondary node?
9391
      if self.new_node is not None:
9392
        fn = self._ExecDrbd8Secondary
9393
      else:
9394
        fn = self._ExecDrbd8DiskOnly
9395

    
9396
      result = fn(feedback_fn)
9397
    finally:
9398
      # Deactivate the instance disks if we're replacing them on a
9399
      # down instance
9400
      if activate_disks:
9401
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9402

    
9403
    if __debug__:
9404
      # Verify owned locks
9405
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9406
      nodes = frozenset(self.node_secondary_ip)
9407
      assert ((self.early_release and not owned_locks) or
9408
              (not self.early_release and not (set(owned_locks) - nodes))), \
9409
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9410
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9411

    
9412
    return result
9413

    
9414
  def _CheckVolumeGroup(self, nodes):
9415
    self.lu.LogInfo("Checking volume groups")
9416

    
9417
    vgname = self.cfg.GetVGName()
9418

    
9419
    # Make sure volume group exists on all involved nodes
9420
    results = self.rpc.call_vg_list(nodes)
9421
    if not results:
9422
      raise errors.OpExecError("Can't list volume groups on the nodes")
9423

    
9424
    for node in nodes:
9425
      res = results[node]
9426
      res.Raise("Error checking node %s" % node)
9427
      if vgname not in res.payload:
9428
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9429
                                 (vgname, node))
9430

    
9431
  def _CheckDisksExistence(self, nodes):
9432
    # Check disk existence
9433
    for idx, dev in enumerate(self.instance.disks):
9434
      if idx not in self.disks:
9435
        continue
9436

    
9437
      for node in nodes:
9438
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9439
        self.cfg.SetDiskID(dev, node)
9440

    
9441
        result = self.rpc.call_blockdev_find(node, dev)
9442

    
9443
        msg = result.fail_msg
9444
        if msg or not result.payload:
9445
          if not msg:
9446
            msg = "disk not found"
9447
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9448
                                   (idx, node, msg))
9449

    
9450
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9451
    for idx, dev in enumerate(self.instance.disks):
9452
      if idx not in self.disks:
9453
        continue
9454

    
9455
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9456
                      (idx, node_name))
9457

    
9458
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9459
                                   ldisk=ldisk):
9460
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9461
                                 " replace disks for instance %s" %
9462
                                 (node_name, self.instance.name))
9463

    
9464
  def _CreateNewStorage(self, node_name):
9465
    """Create new storage on the primary or secondary node.
9466

9467
    This is only used for same-node replaces, not for changing the
9468
    secondary node, hence we don't want to modify the existing disk.
9469

9470
    """
9471
    iv_names = {}
9472

    
9473
    for idx, dev in enumerate(self.instance.disks):
9474
      if idx not in self.disks:
9475
        continue
9476

    
9477
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9478

    
9479
      self.cfg.SetDiskID(dev, node_name)
9480

    
9481
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9482
      names = _GenerateUniqueNames(self.lu, lv_names)
9483

    
9484
      vg_data = dev.children[0].logical_id[0]
9485
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9486
                             logical_id=(vg_data, names[0]))
9487
      vg_meta = dev.children[1].logical_id[0]
9488
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9489
                             logical_id=(vg_meta, names[1]))
9490

    
9491
      new_lvs = [lv_data, lv_meta]
9492
      old_lvs = [child.Copy() for child in dev.children]
9493
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9494

    
9495
      # we pass force_create=True to force the LVM creation
9496
      for new_lv in new_lvs:
9497
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9498
                        _GetInstanceInfoText(self.instance), False)
9499

    
9500
    return iv_names
9501

    
9502
  def _CheckDevices(self, node_name, iv_names):
9503
    for name, (dev, _, _) in iv_names.iteritems():
9504
      self.cfg.SetDiskID(dev, node_name)
9505

    
9506
      result = self.rpc.call_blockdev_find(node_name, dev)
9507

    
9508
      msg = result.fail_msg
9509
      if msg or not result.payload:
9510
        if not msg:
9511
          msg = "disk not found"
9512
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9513
                                 (name, msg))
9514

    
9515
      if result.payload.is_degraded:
9516
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9517

    
9518
  def _RemoveOldStorage(self, node_name, iv_names):
9519
    for name, (_, old_lvs, _) in iv_names.iteritems():
9520
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9521

    
9522
      for lv in old_lvs:
9523
        self.cfg.SetDiskID(lv, node_name)
9524

    
9525
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9526
        if msg:
9527
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9528
                             hint="remove unused LVs manually")
9529

    
9530
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9531
    """Replace a disk on the primary or secondary for DRBD 8.
9532

9533
    The algorithm for replace is quite complicated:
9534

9535
      1. for each disk to be replaced:
9536

9537
        1. create new LVs on the target node with unique names
9538
        1. detach old LVs from the drbd device
9539
        1. rename old LVs to name_replaced.<time_t>
9540
        1. rename new LVs to old LVs
9541
        1. attach the new LVs (with the old names now) to the drbd device
9542

9543
      1. wait for sync across all devices
9544

9545
      1. for each modified disk:
9546

9547
        1. remove old LVs (which have the name name_replaces.<time_t>)
9548

9549
    Failures are not very well handled.
9550

9551
    """
9552
    steps_total = 6
9553

    
9554
    # Step: check device activation
9555
    self.lu.LogStep(1, steps_total, "Check device existence")
9556
    self._CheckDisksExistence([self.other_node, self.target_node])
9557
    self._CheckVolumeGroup([self.target_node, self.other_node])
9558

    
9559
    # Step: check other node consistency
9560
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9561
    self._CheckDisksConsistency(self.other_node,
9562
                                self.other_node == self.instance.primary_node,
9563
                                False)
9564

    
9565
    # Step: create new storage
9566
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9567
    iv_names = self._CreateNewStorage(self.target_node)
9568

    
9569
    # Step: for each lv, detach+rename*2+attach
9570
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9571
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9572
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9573

    
9574
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9575
                                                     old_lvs)
9576
      result.Raise("Can't detach drbd from local storage on node"
9577
                   " %s for device %s" % (self.target_node, dev.iv_name))
9578
      #dev.children = []
9579
      #cfg.Update(instance)
9580

    
9581
      # ok, we created the new LVs, so now we know we have the needed
9582
      # storage; as such, we proceed on the target node to rename
9583
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9584
      # using the assumption that logical_id == physical_id (which in
9585
      # turn is the unique_id on that node)
9586

    
9587
      # FIXME(iustin): use a better name for the replaced LVs
9588
      temp_suffix = int(time.time())
9589
      ren_fn = lambda d, suff: (d.physical_id[0],
9590
                                d.physical_id[1] + "_replaced-%s" % suff)
9591

    
9592
      # Build the rename list based on what LVs exist on the node
9593
      rename_old_to_new = []
9594
      for to_ren in old_lvs:
9595
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9596
        if not result.fail_msg and result.payload:
9597
          # device exists
9598
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9599

    
9600
      self.lu.LogInfo("Renaming the old LVs on the target node")
9601
      result = self.rpc.call_blockdev_rename(self.target_node,
9602
                                             rename_old_to_new)
9603
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9604

    
9605
      # Now we rename the new LVs to the old LVs
9606
      self.lu.LogInfo("Renaming the new LVs on the target node")
9607
      rename_new_to_old = [(new, old.physical_id)
9608
                           for old, new in zip(old_lvs, new_lvs)]
9609
      result = self.rpc.call_blockdev_rename(self.target_node,
9610
                                             rename_new_to_old)
9611
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9612

    
9613
      # Intermediate steps of in memory modifications
9614
      for old, new in zip(old_lvs, new_lvs):
9615
        new.logical_id = old.logical_id
9616
        self.cfg.SetDiskID(new, self.target_node)
9617

    
9618
      # We need to modify old_lvs so that removal later removes the
9619
      # right LVs, not the newly added ones; note that old_lvs is a
9620
      # copy here
9621
      for disk in old_lvs:
9622
        disk.logical_id = ren_fn(disk, temp_suffix)
9623
        self.cfg.SetDiskID(disk, self.target_node)
9624

    
9625
      # Now that the new lvs have the old name, we can add them to the device
9626
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9627
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9628
                                                  new_lvs)
9629
      msg = result.fail_msg
9630
      if msg:
9631
        for new_lv in new_lvs:
9632
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9633
                                               new_lv).fail_msg
9634
          if msg2:
9635
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9636
                               hint=("cleanup manually the unused logical"
9637
                                     "volumes"))
9638
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9639

    
9640
    cstep = 5
9641
    if self.early_release:
9642
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9643
      cstep += 1
9644
      self._RemoveOldStorage(self.target_node, iv_names)
9645
      # WARNING: we release both node locks here, do not do other RPCs
9646
      # than WaitForSync to the primary node
9647
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9648
                    names=[self.target_node, self.other_node])
9649

    
9650
    # Wait for sync
9651
    # This can fail as the old devices are degraded and _WaitForSync
9652
    # does a combined result over all disks, so we don't check its return value
9653
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9654
    cstep += 1
9655
    _WaitForSync(self.lu, self.instance)
9656

    
9657
    # Check all devices manually
9658
    self._CheckDevices(self.instance.primary_node, iv_names)
9659

    
9660
    # Step: remove old storage
9661
    if not self.early_release:
9662
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9663
      cstep += 1
9664
      self._RemoveOldStorage(self.target_node, iv_names)
9665

    
9666
  def _ExecDrbd8Secondary(self, feedback_fn):
9667
    """Replace the secondary node for DRBD 8.
9668

9669
    The algorithm for replace is quite complicated:
9670
      - for all disks of the instance:
9671
        - create new LVs on the new node with same names
9672
        - shutdown the drbd device on the old secondary
9673
        - disconnect the drbd network on the primary
9674
        - create the drbd device on the new secondary
9675
        - network attach the drbd on the primary, using an artifice:
9676
          the drbd code for Attach() will connect to the network if it
9677
          finds a device which is connected to the good local disks but
9678
          not network enabled
9679
      - wait for sync across all devices
9680
      - remove all disks from the old secondary
9681

9682
    Failures are not very well handled.
9683

9684
    """
9685
    steps_total = 6
9686

    
9687
    # Step: check device activation
9688
    self.lu.LogStep(1, steps_total, "Check device existence")
9689
    self._CheckDisksExistence([self.instance.primary_node])
9690
    self._CheckVolumeGroup([self.instance.primary_node])
9691

    
9692
    # Step: check other node consistency
9693
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9694
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9695

    
9696
    # Step: create new storage
9697
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9698
    for idx, dev in enumerate(self.instance.disks):
9699
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9700
                      (self.new_node, idx))
9701
      # we pass force_create=True to force LVM creation
9702
      for new_lv in dev.children:
9703
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9704
                        _GetInstanceInfoText(self.instance), False)
9705

    
9706
    # Step 4: dbrd minors and drbd setups changes
9707
    # after this, we must manually remove the drbd minors on both the
9708
    # error and the success paths
9709
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9710
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9711
                                         for dev in self.instance.disks],
9712
                                        self.instance.name)
9713
    logging.debug("Allocated minors %r", minors)
9714

    
9715
    iv_names = {}
9716
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9717
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9718
                      (self.new_node, idx))
9719
      # create new devices on new_node; note that we create two IDs:
9720
      # one without port, so the drbd will be activated without
9721
      # networking information on the new node at this stage, and one
9722
      # with network, for the latter activation in step 4
9723
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9724
      if self.instance.primary_node == o_node1:
9725
        p_minor = o_minor1
9726
      else:
9727
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9728
        p_minor = o_minor2
9729

    
9730
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9731
                      p_minor, new_minor, o_secret)
9732
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9733
                    p_minor, new_minor, o_secret)
9734

    
9735
      iv_names[idx] = (dev, dev.children, new_net_id)
9736
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9737
                    new_net_id)
9738
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9739
                              logical_id=new_alone_id,
9740
                              children=dev.children,
9741
                              size=dev.size)
9742
      try:
9743
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9744
                              _GetInstanceInfoText(self.instance), False)
9745
      except errors.GenericError:
9746
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9747
        raise
9748

    
9749
    # We have new devices, shutdown the drbd on the old secondary
9750
    for idx, dev in enumerate(self.instance.disks):
9751
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9752
      self.cfg.SetDiskID(dev, self.target_node)
9753
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9754
      if msg:
9755
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9756
                           "node: %s" % (idx, msg),
9757
                           hint=("Please cleanup this device manually as"
9758
                                 " soon as possible"))
9759

    
9760
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9761
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9762
                                               self.node_secondary_ip,
9763
                                               self.instance.disks)\
9764
                                              [self.instance.primary_node]
9765

    
9766
    msg = result.fail_msg
9767
    if msg:
9768
      # detaches didn't succeed (unlikely)
9769
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9770
      raise errors.OpExecError("Can't detach the disks from the network on"
9771
                               " old node: %s" % (msg,))
9772

    
9773
    # if we managed to detach at least one, we update all the disks of
9774
    # the instance to point to the new secondary
9775
    self.lu.LogInfo("Updating instance configuration")
9776
    for dev, _, new_logical_id in iv_names.itervalues():
9777
      dev.logical_id = new_logical_id
9778
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9779

    
9780
    self.cfg.Update(self.instance, feedback_fn)
9781

    
9782
    # and now perform the drbd attach
9783
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9784
                    " (standalone => connected)")
9785
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9786
                                            self.new_node],
9787
                                           self.node_secondary_ip,
9788
                                           self.instance.disks,
9789
                                           self.instance.name,
9790
                                           False)
9791
    for to_node, to_result in result.items():
9792
      msg = to_result.fail_msg
9793
      if msg:
9794
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9795
                           to_node, msg,
9796
                           hint=("please do a gnt-instance info to see the"
9797
                                 " status of disks"))
9798
    cstep = 5
9799
    if self.early_release:
9800
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9801
      cstep += 1
9802
      self._RemoveOldStorage(self.target_node, iv_names)
9803
      # WARNING: we release all node locks here, do not do other RPCs
9804
      # than WaitForSync to the primary node
9805
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9806
                    names=[self.instance.primary_node,
9807
                           self.target_node,
9808
                           self.new_node])
9809

    
9810
    # Wait for sync
9811
    # This can fail as the old devices are degraded and _WaitForSync
9812
    # does a combined result over all disks, so we don't check its return value
9813
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9814
    cstep += 1
9815
    _WaitForSync(self.lu, self.instance)
9816

    
9817
    # Check all devices manually
9818
    self._CheckDevices(self.instance.primary_node, iv_names)
9819

    
9820
    # Step: remove old storage
9821
    if not self.early_release:
9822
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9823
      self._RemoveOldStorage(self.target_node, iv_names)
9824

    
9825

    
9826
class LURepairNodeStorage(NoHooksLU):
9827
  """Repairs the volume group on a node.
9828

9829
  """
9830
  REQ_BGL = False
9831

    
9832
  def CheckArguments(self):
9833
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9834

    
9835
    storage_type = self.op.storage_type
9836

    
9837
    if (constants.SO_FIX_CONSISTENCY not in
9838
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9839
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9840
                                 " repaired" % storage_type,
9841
                                 errors.ECODE_INVAL)
9842

    
9843
  def ExpandNames(self):
9844
    self.needed_locks = {
9845
      locking.LEVEL_NODE: [self.op.node_name],
9846
      }
9847

    
9848
  def _CheckFaultyDisks(self, instance, node_name):
9849
    """Ensure faulty disks abort the opcode or at least warn."""
9850
    try:
9851
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9852
                                  node_name, True):
9853
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9854
                                   " node '%s'" % (instance.name, node_name),
9855
                                   errors.ECODE_STATE)
9856
    except errors.OpPrereqError, err:
9857
      if self.op.ignore_consistency:
9858
        self.proc.LogWarning(str(err.args[0]))
9859
      else:
9860
        raise
9861

    
9862
  def CheckPrereq(self):
9863
    """Check prerequisites.
9864

9865
    """
9866
    # Check whether any instance on this node has faulty disks
9867
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9868
      if not inst.admin_up:
9869
        continue
9870
      check_nodes = set(inst.all_nodes)
9871
      check_nodes.discard(self.op.node_name)
9872
      for inst_node_name in check_nodes:
9873
        self._CheckFaultyDisks(inst, inst_node_name)
9874

    
9875
  def Exec(self, feedback_fn):
9876
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9877
                (self.op.name, self.op.node_name))
9878

    
9879
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9880
    result = self.rpc.call_storage_execute(self.op.node_name,
9881
                                           self.op.storage_type, st_args,
9882
                                           self.op.name,
9883
                                           constants.SO_FIX_CONSISTENCY)
9884
    result.Raise("Failed to repair storage unit '%s' on %s" %
9885
                 (self.op.name, self.op.node_name))
9886

    
9887

    
9888
class LUNodeEvacuate(NoHooksLU):
9889
  """Evacuates instances off a list of nodes.
9890

9891
  """
9892
  REQ_BGL = False
9893

    
9894
  def CheckArguments(self):
9895
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9896

    
9897
  def ExpandNames(self):
9898
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9899

    
9900
    if self.op.remote_node is not None:
9901
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9902
      assert self.op.remote_node
9903

    
9904
      if self.op.remote_node == self.op.node_name:
9905
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9906
                                   " secondary node", errors.ECODE_INVAL)
9907

    
9908
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9909
        raise errors.OpPrereqError("Without the use of an iallocator only"
9910
                                   " secondary instances can be evacuated",
9911
                                   errors.ECODE_INVAL)
9912

    
9913
    # Declare locks
9914
    self.share_locks = _ShareAll()
9915
    self.needed_locks = {
9916
      locking.LEVEL_INSTANCE: [],
9917
      locking.LEVEL_NODEGROUP: [],
9918
      locking.LEVEL_NODE: [],
9919
      }
9920

    
9921
    if self.op.remote_node is None:
9922
      # Iallocator will choose any node(s) in the same group
9923
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9924
    else:
9925
      group_nodes = frozenset([self.op.remote_node])
9926

    
9927
    # Determine nodes to be locked
9928
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9929

    
9930
  def _DetermineInstances(self):
9931
    """Builds list of instances to operate on.
9932

9933
    """
9934
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9935

    
9936
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9937
      # Primary instances only
9938
      inst_fn = _GetNodePrimaryInstances
9939
      assert self.op.remote_node is None, \
9940
        "Evacuating primary instances requires iallocator"
9941
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9942
      # Secondary instances only
9943
      inst_fn = _GetNodeSecondaryInstances
9944
    else:
9945
      # All instances
9946
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9947
      inst_fn = _GetNodeInstances
9948

    
9949
    return inst_fn(self.cfg, self.op.node_name)
9950

    
9951
  def DeclareLocks(self, level):
9952
    if level == locking.LEVEL_INSTANCE:
9953
      # Lock instances optimistically, needs verification once node and group
9954
      # locks have been acquired
9955
      self.needed_locks[locking.LEVEL_INSTANCE] = \
9956
        set(i.name for i in self._DetermineInstances())
9957

    
9958
    elif level == locking.LEVEL_NODEGROUP:
9959
      # Lock node groups optimistically, needs verification once nodes have
9960
      # been acquired
9961
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9962
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9963

    
9964
    elif level == locking.LEVEL_NODE:
9965
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9966

    
9967
  def CheckPrereq(self):
9968
    # Verify locks
9969
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9970
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9971
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9972

    
9973
    assert owned_nodes == self.lock_nodes
9974

    
9975
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9976
    if owned_groups != wanted_groups:
9977
      raise errors.OpExecError("Node groups changed since locks were acquired,"
9978
                               " current groups are '%s', used to be '%s'" %
9979
                               (utils.CommaJoin(wanted_groups),
9980
                                utils.CommaJoin(owned_groups)))
9981

    
9982
    # Determine affected instances
9983
    self.instances = self._DetermineInstances()
9984
    self.instance_names = [i.name for i in self.instances]
9985

    
9986
    if set(self.instance_names) != owned_instances:
9987
      raise errors.OpExecError("Instances on node '%s' changed since locks"
9988
                               " were acquired, current instances are '%s',"
9989
                               " used to be '%s'" %
9990
                               (self.op.node_name,
9991
                                utils.CommaJoin(self.instance_names),
9992
                                utils.CommaJoin(owned_instances)))
9993

    
9994
    if self.instance_names:
9995
      self.LogInfo("Evacuating instances from node '%s': %s",
9996
                   self.op.node_name,
9997
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
9998
    else:
9999
      self.LogInfo("No instances to evacuate from node '%s'",
10000
                   self.op.node_name)
10001

    
10002
    if self.op.remote_node is not None:
10003
      for i in self.instances:
10004
        if i.primary_node == self.op.remote_node:
10005
          raise errors.OpPrereqError("Node %s is the primary node of"
10006
                                     " instance %s, cannot use it as"
10007
                                     " secondary" %
10008
                                     (self.op.remote_node, i.name),
10009
                                     errors.ECODE_INVAL)
10010

    
10011
  def Exec(self, feedback_fn):
10012
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10013

    
10014
    if not self.instance_names:
10015
      # No instances to evacuate
10016
      jobs = []
10017

    
10018
    elif self.op.iallocator is not None:
10019
      # TODO: Implement relocation to other group
10020
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10021
                       evac_mode=self.op.mode,
10022
                       instances=list(self.instance_names))
10023

    
10024
      ial.Run(self.op.iallocator)
10025

    
10026
      if not ial.success:
10027
        raise errors.OpPrereqError("Can't compute node evacuation using"
10028
                                   " iallocator '%s': %s" %
10029
                                   (self.op.iallocator, ial.info),
10030
                                   errors.ECODE_NORES)
10031

    
10032
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10033

    
10034
    elif self.op.remote_node is not None:
10035
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10036
      jobs = [
10037
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10038
                                        remote_node=self.op.remote_node,
10039
                                        disks=[],
10040
                                        mode=constants.REPLACE_DISK_CHG,
10041
                                        early_release=self.op.early_release)]
10042
        for instance_name in self.instance_names
10043
        ]
10044

    
10045
    else:
10046
      raise errors.ProgrammerError("No iallocator or remote node")
10047

    
10048
    return ResultWithJobs(jobs)
10049

    
10050

    
10051
def _SetOpEarlyRelease(early_release, op):
10052
  """Sets C{early_release} flag on opcodes if available.
10053

10054
  """
10055
  try:
10056
    op.early_release = early_release
10057
  except AttributeError:
10058
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10059

    
10060
  return op
10061

    
10062

    
10063
def _NodeEvacDest(use_nodes, group, nodes):
10064
  """Returns group or nodes depending on caller's choice.
10065

10066
  """
10067
  if use_nodes:
10068
    return utils.CommaJoin(nodes)
10069
  else:
10070
    return group
10071

    
10072

    
10073
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10074
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10075

10076
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10077
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10078

10079
  @type lu: L{LogicalUnit}
10080
  @param lu: Logical unit instance
10081
  @type alloc_result: tuple/list
10082
  @param alloc_result: Result from iallocator
10083
  @type early_release: bool
10084
  @param early_release: Whether to release locks early if possible
10085
  @type use_nodes: bool
10086
  @param use_nodes: Whether to display node names instead of groups
10087

10088
  """
10089
  (moved, failed, jobs) = alloc_result
10090

    
10091
  if failed:
10092
    lu.LogWarning("Unable to evacuate instances %s",
10093
                  utils.CommaJoin("%s (%s)" % (name, reason)
10094
                                  for (name, reason) in failed))
10095

    
10096
  if moved:
10097
    lu.LogInfo("Instances to be moved: %s",
10098
               utils.CommaJoin("%s (to %s)" %
10099
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10100
                               for (name, group, nodes) in moved))
10101

    
10102
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10103
              map(opcodes.OpCode.LoadOpCode, ops))
10104
          for ops in jobs]
10105

    
10106

    
10107
class LUInstanceGrowDisk(LogicalUnit):
10108
  """Grow a disk of an instance.
10109

10110
  """
10111
  HPATH = "disk-grow"
10112
  HTYPE = constants.HTYPE_INSTANCE
10113
  REQ_BGL = False
10114

    
10115
  def ExpandNames(self):
10116
    self._ExpandAndLockInstance()
10117
    self.needed_locks[locking.LEVEL_NODE] = []
10118
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10119

    
10120
  def DeclareLocks(self, level):
10121
    if level == locking.LEVEL_NODE:
10122
      self._LockInstancesNodes()
10123

    
10124
  def BuildHooksEnv(self):
10125
    """Build hooks env.
10126

10127
    This runs on the master, the primary and all the secondaries.
10128

10129
    """
10130
    env = {
10131
      "DISK": self.op.disk,
10132
      "AMOUNT": self.op.amount,
10133
      }
10134
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10135
    return env
10136

    
10137
  def BuildHooksNodes(self):
10138
    """Build hooks nodes.
10139

10140
    """
10141
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10142
    return (nl, nl)
10143

    
10144
  def CheckPrereq(self):
10145
    """Check prerequisites.
10146

10147
    This checks that the instance is in the cluster.
10148

10149
    """
10150
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10151
    assert instance is not None, \
10152
      "Cannot retrieve locked instance %s" % self.op.instance_name
10153
    nodenames = list(instance.all_nodes)
10154
    for node in nodenames:
10155
      _CheckNodeOnline(self, node)
10156

    
10157
    self.instance = instance
10158

    
10159
    if instance.disk_template not in constants.DTS_GROWABLE:
10160
      raise errors.OpPrereqError("Instance's disk layout does not support"
10161
                                 " growing", errors.ECODE_INVAL)
10162

    
10163
    self.disk = instance.FindDisk(self.op.disk)
10164

    
10165
    if instance.disk_template not in (constants.DT_FILE,
10166
                                      constants.DT_SHARED_FILE):
10167
      # TODO: check the free disk space for file, when that feature will be
10168
      # supported
10169
      _CheckNodesFreeDiskPerVG(self, nodenames,
10170
                               self.disk.ComputeGrowth(self.op.amount))
10171

    
10172
  def Exec(self, feedback_fn):
10173
    """Execute disk grow.
10174

10175
    """
10176
    instance = self.instance
10177
    disk = self.disk
10178

    
10179
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10180
    if not disks_ok:
10181
      raise errors.OpExecError("Cannot activate block device to grow")
10182

    
10183
    # First run all grow ops in dry-run mode
10184
    for node in instance.all_nodes:
10185
      self.cfg.SetDiskID(disk, node)
10186
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10187
      result.Raise("Grow request failed to node %s" % node)
10188

    
10189
    # We know that (as far as we can test) operations across different
10190
    # nodes will succeed, time to run it for real
10191
    for node in instance.all_nodes:
10192
      self.cfg.SetDiskID(disk, node)
10193
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10194
      result.Raise("Grow request failed to node %s" % node)
10195

    
10196
      # TODO: Rewrite code to work properly
10197
      # DRBD goes into sync mode for a short amount of time after executing the
10198
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10199
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10200
      # time is a work-around.
10201
      time.sleep(5)
10202

    
10203
    disk.RecordGrow(self.op.amount)
10204
    self.cfg.Update(instance, feedback_fn)
10205
    if self.op.wait_for_sync:
10206
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10207
      if disk_abort:
10208
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10209
                             " status; please check the instance")
10210
      if not instance.admin_up:
10211
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10212
    elif not instance.admin_up:
10213
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10214
                           " not supposed to be running because no wait for"
10215
                           " sync mode was requested")
10216

    
10217

    
10218
class LUInstanceQueryData(NoHooksLU):
10219
  """Query runtime instance data.
10220

10221
  """
10222
  REQ_BGL = False
10223

    
10224
  def ExpandNames(self):
10225
    self.needed_locks = {}
10226

    
10227
    # Use locking if requested or when non-static information is wanted
10228
    if not (self.op.static or self.op.use_locking):
10229
      self.LogWarning("Non-static data requested, locks need to be acquired")
10230
      self.op.use_locking = True
10231

    
10232
    if self.op.instances or not self.op.use_locking:
10233
      # Expand instance names right here
10234
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10235
    else:
10236
      # Will use acquired locks
10237
      self.wanted_names = None
10238

    
10239
    if self.op.use_locking:
10240
      self.share_locks = _ShareAll()
10241

    
10242
      if self.wanted_names is None:
10243
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10244
      else:
10245
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10246

    
10247
      self.needed_locks[locking.LEVEL_NODE] = []
10248
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10249

    
10250
  def DeclareLocks(self, level):
10251
    if self.op.use_locking and level == locking.LEVEL_NODE:
10252
      self._LockInstancesNodes()
10253

    
10254
  def CheckPrereq(self):
10255
    """Check prerequisites.
10256

10257
    This only checks the optional instance list against the existing names.
10258

10259
    """
10260
    if self.wanted_names is None:
10261
      assert self.op.use_locking, "Locking was not used"
10262
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10263

    
10264
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10265
                             for name in self.wanted_names]
10266

    
10267
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10268
    """Returns the status of a block device
10269

10270
    """
10271
    if self.op.static or not node:
10272
      return None
10273

    
10274
    self.cfg.SetDiskID(dev, node)
10275

    
10276
    result = self.rpc.call_blockdev_find(node, dev)
10277
    if result.offline:
10278
      return None
10279

    
10280
    result.Raise("Can't compute disk status for %s" % instance_name)
10281

    
10282
    status = result.payload
10283
    if status is None:
10284
      return None
10285

    
10286
    return (status.dev_path, status.major, status.minor,
10287
            status.sync_percent, status.estimated_time,
10288
            status.is_degraded, status.ldisk_status)
10289

    
10290
  def _ComputeDiskStatus(self, instance, snode, dev):
10291
    """Compute block device status.
10292

10293
    """
10294
    if dev.dev_type in constants.LDS_DRBD:
10295
      # we change the snode then (otherwise we use the one passed in)
10296
      if dev.logical_id[0] == instance.primary_node:
10297
        snode = dev.logical_id[1]
10298
      else:
10299
        snode = dev.logical_id[0]
10300

    
10301
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10302
                                              instance.name, dev)
10303
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10304

    
10305
    if dev.children:
10306
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10307
                                        instance, snode),
10308
                         dev.children)
10309
    else:
10310
      dev_children = []
10311

    
10312
    return {
10313
      "iv_name": dev.iv_name,
10314
      "dev_type": dev.dev_type,
10315
      "logical_id": dev.logical_id,
10316
      "physical_id": dev.physical_id,
10317
      "pstatus": dev_pstatus,
10318
      "sstatus": dev_sstatus,
10319
      "children": dev_children,
10320
      "mode": dev.mode,
10321
      "size": dev.size,
10322
      }
10323

    
10324
  def Exec(self, feedback_fn):
10325
    """Gather and return data"""
10326
    result = {}
10327

    
10328
    cluster = self.cfg.GetClusterInfo()
10329

    
10330
    for instance in self.wanted_instances:
10331
      pnode = self.cfg.GetNodeInfo(instance.primary_node)
10332

    
10333
      if self.op.static or pnode.offline:
10334
        remote_state = None
10335
        if pnode.offline:
10336
          self.LogWarning("Primary node %s is marked offline, returning static"
10337
                          " information only for instance %s" %
10338
                          (pnode.name, instance.name))
10339
      else:
10340
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10341
                                                  instance.name,
10342
                                                  instance.hypervisor)
10343
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10344
        remote_info = remote_info.payload
10345
        if remote_info and "state" in remote_info:
10346
          remote_state = "up"
10347
        else:
10348
          remote_state = "down"
10349

    
10350
      if instance.admin_up:
10351
        config_state = "up"
10352
      else:
10353
        config_state = "down"
10354

    
10355
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10356
                  instance.disks)
10357

    
10358
      result[instance.name] = {
10359
        "name": instance.name,
10360
        "config_state": config_state,
10361
        "run_state": remote_state,
10362
        "pnode": instance.primary_node,
10363
        "snodes": instance.secondary_nodes,
10364
        "os": instance.os,
10365
        # this happens to be the same format used for hooks
10366
        "nics": _NICListToTuple(self, instance.nics),
10367
        "disk_template": instance.disk_template,
10368
        "disks": disks,
10369
        "hypervisor": instance.hypervisor,
10370
        "network_port": instance.network_port,
10371
        "hv_instance": instance.hvparams,
10372
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10373
        "be_instance": instance.beparams,
10374
        "be_actual": cluster.FillBE(instance),
10375
        "os_instance": instance.osparams,
10376
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10377
        "serial_no": instance.serial_no,
10378
        "mtime": instance.mtime,
10379
        "ctime": instance.ctime,
10380
        "uuid": instance.uuid,
10381
        }
10382

    
10383
    return result
10384

    
10385

    
10386
class LUInstanceSetParams(LogicalUnit):
10387
  """Modifies an instances's parameters.
10388

10389
  """
10390
  HPATH = "instance-modify"
10391
  HTYPE = constants.HTYPE_INSTANCE
10392
  REQ_BGL = False
10393

    
10394
  def CheckArguments(self):
10395
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10396
            self.op.hvparams or self.op.beparams or self.op.os_name):
10397
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10398

    
10399
    if self.op.hvparams:
10400
      _CheckGlobalHvParams(self.op.hvparams)
10401

    
10402
    # Disk validation
10403
    disk_addremove = 0
10404
    for disk_op, disk_dict in self.op.disks:
10405
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10406
      if disk_op == constants.DDM_REMOVE:
10407
        disk_addremove += 1
10408
        continue
10409
      elif disk_op == constants.DDM_ADD:
10410
        disk_addremove += 1
10411
      else:
10412
        if not isinstance(disk_op, int):
10413
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10414
        if not isinstance(disk_dict, dict):
10415
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10416
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10417

    
10418
      if disk_op == constants.DDM_ADD:
10419
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10420
        if mode not in constants.DISK_ACCESS_SET:
10421
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10422
                                     errors.ECODE_INVAL)
10423
        size = disk_dict.get(constants.IDISK_SIZE, None)
10424
        if size is None:
10425
          raise errors.OpPrereqError("Required disk parameter size missing",
10426
                                     errors.ECODE_INVAL)
10427
        try:
10428
          size = int(size)
10429
        except (TypeError, ValueError), err:
10430
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10431
                                     str(err), errors.ECODE_INVAL)
10432
        disk_dict[constants.IDISK_SIZE] = size
10433
      else:
10434
        # modification of disk
10435
        if constants.IDISK_SIZE in disk_dict:
10436
          raise errors.OpPrereqError("Disk size change not possible, use"
10437
                                     " grow-disk", errors.ECODE_INVAL)
10438

    
10439
    if disk_addremove > 1:
10440
      raise errors.OpPrereqError("Only one disk add or remove operation"
10441
                                 " supported at a time", errors.ECODE_INVAL)
10442

    
10443
    if self.op.disks and self.op.disk_template is not None:
10444
      raise errors.OpPrereqError("Disk template conversion and other disk"
10445
                                 " changes not supported at the same time",
10446
                                 errors.ECODE_INVAL)
10447

    
10448
    if (self.op.disk_template and
10449
        self.op.disk_template in constants.DTS_INT_MIRROR and
10450
        self.op.remote_node is None):
10451
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10452
                                 " one requires specifying a secondary node",
10453
                                 errors.ECODE_INVAL)
10454

    
10455
    # NIC validation
10456
    nic_addremove = 0
10457
    for nic_op, nic_dict in self.op.nics:
10458
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10459
      if nic_op == constants.DDM_REMOVE:
10460
        nic_addremove += 1
10461
        continue
10462
      elif nic_op == constants.DDM_ADD:
10463
        nic_addremove += 1
10464
      else:
10465
        if not isinstance(nic_op, int):
10466
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10467
        if not isinstance(nic_dict, dict):
10468
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10469
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10470

    
10471
      # nic_dict should be a dict
10472
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10473
      if nic_ip is not None:
10474
        if nic_ip.lower() == constants.VALUE_NONE:
10475
          nic_dict[constants.INIC_IP] = None
10476
        else:
10477
          if not netutils.IPAddress.IsValid(nic_ip):
10478
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10479
                                       errors.ECODE_INVAL)
10480

    
10481
      nic_bridge = nic_dict.get("bridge", None)
10482
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10483
      if nic_bridge and nic_link:
10484
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10485
                                   " at the same time", errors.ECODE_INVAL)
10486
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10487
        nic_dict["bridge"] = None
10488
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10489
        nic_dict[constants.INIC_LINK] = None
10490

    
10491
      if nic_op == constants.DDM_ADD:
10492
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10493
        if nic_mac is None:
10494
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10495

    
10496
      if constants.INIC_MAC in nic_dict:
10497
        nic_mac = nic_dict[constants.INIC_MAC]
10498
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10499
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10500

    
10501
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10502
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10503
                                     " modifying an existing nic",
10504
                                     errors.ECODE_INVAL)
10505

    
10506
    if nic_addremove > 1:
10507
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10508
                                 " supported at a time", errors.ECODE_INVAL)
10509

    
10510
  def ExpandNames(self):
10511
    self._ExpandAndLockInstance()
10512
    self.needed_locks[locking.LEVEL_NODE] = []
10513
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10514

    
10515
  def DeclareLocks(self, level):
10516
    if level == locking.LEVEL_NODE:
10517
      self._LockInstancesNodes()
10518
      if self.op.disk_template and self.op.remote_node:
10519
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10520
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10521

    
10522
  def BuildHooksEnv(self):
10523
    """Build hooks env.
10524

10525
    This runs on the master, primary and secondaries.
10526

10527
    """
10528
    args = dict()
10529
    if constants.BE_MEMORY in self.be_new:
10530
      args["memory"] = self.be_new[constants.BE_MEMORY]
10531
    if constants.BE_VCPUS in self.be_new:
10532
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10533
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10534
    # information at all.
10535
    if self.op.nics:
10536
      args["nics"] = []
10537
      nic_override = dict(self.op.nics)
10538
      for idx, nic in enumerate(self.instance.nics):
10539
        if idx in nic_override:
10540
          this_nic_override = nic_override[idx]
10541
        else:
10542
          this_nic_override = {}
10543
        if constants.INIC_IP in this_nic_override:
10544
          ip = this_nic_override[constants.INIC_IP]
10545
        else:
10546
          ip = nic.ip
10547
        if constants.INIC_MAC in this_nic_override:
10548
          mac = this_nic_override[constants.INIC_MAC]
10549
        else:
10550
          mac = nic.mac
10551
        if idx in self.nic_pnew:
10552
          nicparams = self.nic_pnew[idx]
10553
        else:
10554
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10555
        mode = nicparams[constants.NIC_MODE]
10556
        link = nicparams[constants.NIC_LINK]
10557
        args["nics"].append((ip, mac, mode, link))
10558
      if constants.DDM_ADD in nic_override:
10559
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10560
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10561
        nicparams = self.nic_pnew[constants.DDM_ADD]
10562
        mode = nicparams[constants.NIC_MODE]
10563
        link = nicparams[constants.NIC_LINK]
10564
        args["nics"].append((ip, mac, mode, link))
10565
      elif constants.DDM_REMOVE in nic_override:
10566
        del args["nics"][-1]
10567

    
10568
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10569
    if self.op.disk_template:
10570
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10571

    
10572
    return env
10573

    
10574
  def BuildHooksNodes(self):
10575
    """Build hooks nodes.
10576

10577
    """
10578
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10579
    return (nl, nl)
10580

    
10581
  def CheckPrereq(self):
10582
    """Check prerequisites.
10583

10584
    This only checks the instance list against the existing names.
10585

10586
    """
10587
    # checking the new params on the primary/secondary nodes
10588

    
10589
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10590
    cluster = self.cluster = self.cfg.GetClusterInfo()
10591
    assert self.instance is not None, \
10592
      "Cannot retrieve locked instance %s" % self.op.instance_name
10593
    pnode = instance.primary_node
10594
    nodelist = list(instance.all_nodes)
10595

    
10596
    # OS change
10597
    if self.op.os_name and not self.op.force:
10598
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10599
                      self.op.force_variant)
10600
      instance_os = self.op.os_name
10601
    else:
10602
      instance_os = instance.os
10603

    
10604
    if self.op.disk_template:
10605
      if instance.disk_template == self.op.disk_template:
10606
        raise errors.OpPrereqError("Instance already has disk template %s" %
10607
                                   instance.disk_template, errors.ECODE_INVAL)
10608

    
10609
      if (instance.disk_template,
10610
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10611
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10612
                                   " %s to %s" % (instance.disk_template,
10613
                                                  self.op.disk_template),
10614
                                   errors.ECODE_INVAL)
10615
      _CheckInstanceDown(self, instance, "cannot change disk template")
10616
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10617
        if self.op.remote_node == pnode:
10618
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10619
                                     " as the primary node of the instance" %
10620
                                     self.op.remote_node, errors.ECODE_STATE)
10621
        _CheckNodeOnline(self, self.op.remote_node)
10622
        _CheckNodeNotDrained(self, self.op.remote_node)
10623
        # FIXME: here we assume that the old instance type is DT_PLAIN
10624
        assert instance.disk_template == constants.DT_PLAIN
10625
        disks = [{constants.IDISK_SIZE: d.size,
10626
                  constants.IDISK_VG: d.logical_id[0]}
10627
                 for d in instance.disks]
10628
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10629
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10630

    
10631
    # hvparams processing
10632
    if self.op.hvparams:
10633
      hv_type = instance.hypervisor
10634
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10635
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10636
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10637

    
10638
      # local check
10639
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10640
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10641
      self.hv_new = hv_new # the new actual values
10642
      self.hv_inst = i_hvdict # the new dict (without defaults)
10643
    else:
10644
      self.hv_new = self.hv_inst = {}
10645

    
10646
    # beparams processing
10647
    if self.op.beparams:
10648
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10649
                                   use_none=True)
10650
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10651
      be_new = cluster.SimpleFillBE(i_bedict)
10652
      self.be_new = be_new # the new actual values
10653
      self.be_inst = i_bedict # the new dict (without defaults)
10654
    else:
10655
      self.be_new = self.be_inst = {}
10656
    be_old = cluster.FillBE(instance)
10657

    
10658
    # osparams processing
10659
    if self.op.osparams:
10660
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10661
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10662
      self.os_inst = i_osdict # the new dict (without defaults)
10663
    else:
10664
      self.os_inst = {}
10665

    
10666
    self.warn = []
10667

    
10668
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10669
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10670
      mem_check_list = [pnode]
10671
      if be_new[constants.BE_AUTO_BALANCE]:
10672
        # either we changed auto_balance to yes or it was from before
10673
        mem_check_list.extend(instance.secondary_nodes)
10674
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10675
                                                  instance.hypervisor)
10676
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10677
                                         instance.hypervisor)
10678
      pninfo = nodeinfo[pnode]
10679
      msg = pninfo.fail_msg
10680
      if msg:
10681
        # Assume the primary node is unreachable and go ahead
10682
        self.warn.append("Can't get info from primary node %s: %s" %
10683
                         (pnode,  msg))
10684
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10685
        self.warn.append("Node data from primary node %s doesn't contain"
10686
                         " free memory information" % pnode)
10687
      elif instance_info.fail_msg:
10688
        self.warn.append("Can't get instance runtime information: %s" %
10689
                        instance_info.fail_msg)
10690
      else:
10691
        if instance_info.payload:
10692
          current_mem = int(instance_info.payload["memory"])
10693
        else:
10694
          # Assume instance not running
10695
          # (there is a slight race condition here, but it's not very probable,
10696
          # and we have no other way to check)
10697
          current_mem = 0
10698
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10699
                    pninfo.payload["memory_free"])
10700
        if miss_mem > 0:
10701
          raise errors.OpPrereqError("This change will prevent the instance"
10702
                                     " from starting, due to %d MB of memory"
10703
                                     " missing on its primary node" % miss_mem,
10704
                                     errors.ECODE_NORES)
10705

    
10706
      if be_new[constants.BE_AUTO_BALANCE]:
10707
        for node, nres in nodeinfo.items():
10708
          if node not in instance.secondary_nodes:
10709
            continue
10710
          nres.Raise("Can't get info from secondary node %s" % node,
10711
                     prereq=True, ecode=errors.ECODE_STATE)
10712
          if not isinstance(nres.payload.get("memory_free", None), int):
10713
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10714
                                       " memory information" % node,
10715
                                       errors.ECODE_STATE)
10716
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10717
            raise errors.OpPrereqError("This change will prevent the instance"
10718
                                       " from failover to its secondary node"
10719
                                       " %s, due to not enough memory" % node,
10720
                                       errors.ECODE_STATE)
10721

    
10722
    # NIC processing
10723
    self.nic_pnew = {}
10724
    self.nic_pinst = {}
10725
    for nic_op, nic_dict in self.op.nics:
10726
      if nic_op == constants.DDM_REMOVE:
10727
        if not instance.nics:
10728
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10729
                                     errors.ECODE_INVAL)
10730
        continue
10731
      if nic_op != constants.DDM_ADD:
10732
        # an existing nic
10733
        if not instance.nics:
10734
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10735
                                     " no NICs" % nic_op,
10736
                                     errors.ECODE_INVAL)
10737
        if nic_op < 0 or nic_op >= len(instance.nics):
10738
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10739
                                     " are 0 to %d" %
10740
                                     (nic_op, len(instance.nics) - 1),
10741
                                     errors.ECODE_INVAL)
10742
        old_nic_params = instance.nics[nic_op].nicparams
10743
        old_nic_ip = instance.nics[nic_op].ip
10744
      else:
10745
        old_nic_params = {}
10746
        old_nic_ip = None
10747

    
10748
      update_params_dict = dict([(key, nic_dict[key])
10749
                                 for key in constants.NICS_PARAMETERS
10750
                                 if key in nic_dict])
10751

    
10752
      if "bridge" in nic_dict:
10753
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10754

    
10755
      new_nic_params = _GetUpdatedParams(old_nic_params,
10756
                                         update_params_dict)
10757
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10758
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10759
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10760
      self.nic_pinst[nic_op] = new_nic_params
10761
      self.nic_pnew[nic_op] = new_filled_nic_params
10762
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10763

    
10764
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10765
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10766
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10767
        if msg:
10768
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10769
          if self.op.force:
10770
            self.warn.append(msg)
10771
          else:
10772
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10773
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10774
        if constants.INIC_IP in nic_dict:
10775
          nic_ip = nic_dict[constants.INIC_IP]
10776
        else:
10777
          nic_ip = old_nic_ip
10778
        if nic_ip is None:
10779
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10780
                                     " on a routed nic", errors.ECODE_INVAL)
10781
      if constants.INIC_MAC in nic_dict:
10782
        nic_mac = nic_dict[constants.INIC_MAC]
10783
        if nic_mac is None:
10784
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10785
                                     errors.ECODE_INVAL)
10786
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10787
          # otherwise generate the mac
10788
          nic_dict[constants.INIC_MAC] = \
10789
            self.cfg.GenerateMAC(self.proc.GetECId())
10790
        else:
10791
          # or validate/reserve the current one
10792
          try:
10793
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10794
          except errors.ReservationError:
10795
            raise errors.OpPrereqError("MAC address %s already in use"
10796
                                       " in cluster" % nic_mac,
10797
                                       errors.ECODE_NOTUNIQUE)
10798

    
10799
    # DISK processing
10800
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10801
      raise errors.OpPrereqError("Disk operations not supported for"
10802
                                 " diskless instances",
10803
                                 errors.ECODE_INVAL)
10804
    for disk_op, _ in self.op.disks:
10805
      if disk_op == constants.DDM_REMOVE:
10806
        if len(instance.disks) == 1:
10807
          raise errors.OpPrereqError("Cannot remove the last disk of"
10808
                                     " an instance", errors.ECODE_INVAL)
10809
        _CheckInstanceDown(self, instance, "cannot remove disks")
10810

    
10811
      if (disk_op == constants.DDM_ADD and
10812
          len(instance.disks) >= constants.MAX_DISKS):
10813
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10814
                                   " add more" % constants.MAX_DISKS,
10815
                                   errors.ECODE_STATE)
10816
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10817
        # an existing disk
10818
        if disk_op < 0 or disk_op >= len(instance.disks):
10819
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10820
                                     " are 0 to %d" %
10821
                                     (disk_op, len(instance.disks)),
10822
                                     errors.ECODE_INVAL)
10823

    
10824
    return
10825

    
10826
  def _ConvertPlainToDrbd(self, feedback_fn):
10827
    """Converts an instance from plain to drbd.
10828

10829
    """
10830
    feedback_fn("Converting template to drbd")
10831
    instance = self.instance
10832
    pnode = instance.primary_node
10833
    snode = self.op.remote_node
10834

    
10835
    # create a fake disk info for _GenerateDiskTemplate
10836
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10837
                  constants.IDISK_VG: d.logical_id[0]}
10838
                 for d in instance.disks]
10839
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10840
                                      instance.name, pnode, [snode],
10841
                                      disk_info, None, None, 0, feedback_fn)
10842
    info = _GetInstanceInfoText(instance)
10843
    feedback_fn("Creating aditional volumes...")
10844
    # first, create the missing data and meta devices
10845
    for disk in new_disks:
10846
      # unfortunately this is... not too nice
10847
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10848
                            info, True)
10849
      for child in disk.children:
10850
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10851
    # at this stage, all new LVs have been created, we can rename the
10852
    # old ones
10853
    feedback_fn("Renaming original volumes...")
10854
    rename_list = [(o, n.children[0].logical_id)
10855
                   for (o, n) in zip(instance.disks, new_disks)]
10856
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10857
    result.Raise("Failed to rename original LVs")
10858

    
10859
    feedback_fn("Initializing DRBD devices...")
10860
    # all child devices are in place, we can now create the DRBD devices
10861
    for disk in new_disks:
10862
      for node in [pnode, snode]:
10863
        f_create = node == pnode
10864
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10865

    
10866
    # at this point, the instance has been modified
10867
    instance.disk_template = constants.DT_DRBD8
10868
    instance.disks = new_disks
10869
    self.cfg.Update(instance, feedback_fn)
10870

    
10871
    # disks are created, waiting for sync
10872
    disk_abort = not _WaitForSync(self, instance,
10873
                                  oneshot=not self.op.wait_for_sync)
10874
    if disk_abort:
10875
      raise errors.OpExecError("There are some degraded disks for"
10876
                               " this instance, please cleanup manually")
10877

    
10878
  def _ConvertDrbdToPlain(self, feedback_fn):
10879
    """Converts an instance from drbd to plain.
10880

10881
    """
10882
    instance = self.instance
10883
    assert len(instance.secondary_nodes) == 1
10884
    pnode = instance.primary_node
10885
    snode = instance.secondary_nodes[0]
10886
    feedback_fn("Converting template to plain")
10887

    
10888
    old_disks = instance.disks
10889
    new_disks = [d.children[0] for d in old_disks]
10890

    
10891
    # copy over size and mode
10892
    for parent, child in zip(old_disks, new_disks):
10893
      child.size = parent.size
10894
      child.mode = parent.mode
10895

    
10896
    # update instance structure
10897
    instance.disks = new_disks
10898
    instance.disk_template = constants.DT_PLAIN
10899
    self.cfg.Update(instance, feedback_fn)
10900

    
10901
    feedback_fn("Removing volumes on the secondary node...")
10902
    for disk in old_disks:
10903
      self.cfg.SetDiskID(disk, snode)
10904
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10905
      if msg:
10906
        self.LogWarning("Could not remove block device %s on node %s,"
10907
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10908

    
10909
    feedback_fn("Removing unneeded volumes on the primary node...")
10910
    for idx, disk in enumerate(old_disks):
10911
      meta = disk.children[1]
10912
      self.cfg.SetDiskID(meta, pnode)
10913
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10914
      if msg:
10915
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10916
                        " continuing anyway: %s", idx, pnode, msg)
10917

    
10918
  def Exec(self, feedback_fn):
10919
    """Modifies an instance.
10920

10921
    All parameters take effect only at the next restart of the instance.
10922

10923
    """
10924
    # Process here the warnings from CheckPrereq, as we don't have a
10925
    # feedback_fn there.
10926
    for warn in self.warn:
10927
      feedback_fn("WARNING: %s" % warn)
10928

    
10929
    result = []
10930
    instance = self.instance
10931
    # disk changes
10932
    for disk_op, disk_dict in self.op.disks:
10933
      if disk_op == constants.DDM_REMOVE:
10934
        # remove the last disk
10935
        device = instance.disks.pop()
10936
        device_idx = len(instance.disks)
10937
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10938
          self.cfg.SetDiskID(disk, node)
10939
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10940
          if msg:
10941
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10942
                            " continuing anyway", device_idx, node, msg)
10943
        result.append(("disk/%d" % device_idx, "remove"))
10944
      elif disk_op == constants.DDM_ADD:
10945
        # add a new disk
10946
        if instance.disk_template in (constants.DT_FILE,
10947
                                        constants.DT_SHARED_FILE):
10948
          file_driver, file_path = instance.disks[0].logical_id
10949
          file_path = os.path.dirname(file_path)
10950
        else:
10951
          file_driver = file_path = None
10952
        disk_idx_base = len(instance.disks)
10953
        new_disk = _GenerateDiskTemplate(self,
10954
                                         instance.disk_template,
10955
                                         instance.name, instance.primary_node,
10956
                                         instance.secondary_nodes,
10957
                                         [disk_dict],
10958
                                         file_path,
10959
                                         file_driver,
10960
                                         disk_idx_base, feedback_fn)[0]
10961
        instance.disks.append(new_disk)
10962
        info = _GetInstanceInfoText(instance)
10963

    
10964
        logging.info("Creating volume %s for instance %s",
10965
                     new_disk.iv_name, instance.name)
10966
        # Note: this needs to be kept in sync with _CreateDisks
10967
        #HARDCODE
10968
        for node in instance.all_nodes:
10969
          f_create = node == instance.primary_node
10970
          try:
10971
            _CreateBlockDev(self, node, instance, new_disk,
10972
                            f_create, info, f_create)
10973
          except errors.OpExecError, err:
10974
            self.LogWarning("Failed to create volume %s (%s) on"
10975
                            " node %s: %s",
10976
                            new_disk.iv_name, new_disk, node, err)
10977
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10978
                       (new_disk.size, new_disk.mode)))
10979
      else:
10980
        # change a given disk
10981
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10982
        result.append(("disk.mode/%d" % disk_op,
10983
                       disk_dict[constants.IDISK_MODE]))
10984

    
10985
    if self.op.disk_template:
10986
      r_shut = _ShutdownInstanceDisks(self, instance)
10987
      if not r_shut:
10988
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10989
                                 " proceed with disk template conversion")
10990
      mode = (instance.disk_template, self.op.disk_template)
10991
      try:
10992
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10993
      except:
10994
        self.cfg.ReleaseDRBDMinors(instance.name)
10995
        raise
10996
      result.append(("disk_template", self.op.disk_template))
10997

    
10998
    # NIC changes
10999
    for nic_op, nic_dict in self.op.nics:
11000
      if nic_op == constants.DDM_REMOVE:
11001
        # remove the last nic
11002
        del instance.nics[-1]
11003
        result.append(("nic.%d" % len(instance.nics), "remove"))
11004
      elif nic_op == constants.DDM_ADD:
11005
        # mac and bridge should be set, by now
11006
        mac = nic_dict[constants.INIC_MAC]
11007
        ip = nic_dict.get(constants.INIC_IP, None)
11008
        nicparams = self.nic_pinst[constants.DDM_ADD]
11009
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11010
        instance.nics.append(new_nic)
11011
        result.append(("nic.%d" % (len(instance.nics) - 1),
11012
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11013
                       (new_nic.mac, new_nic.ip,
11014
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11015
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11016
                       )))
11017
      else:
11018
        for key in (constants.INIC_MAC, constants.INIC_IP):
11019
          if key in nic_dict:
11020
            setattr(instance.nics[nic_op], key, nic_dict[key])
11021
        if nic_op in self.nic_pinst:
11022
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11023
        for key, val in nic_dict.iteritems():
11024
          result.append(("nic.%s/%d" % (key, nic_op), val))
11025

    
11026
    # hvparams changes
11027
    if self.op.hvparams:
11028
      instance.hvparams = self.hv_inst
11029
      for key, val in self.op.hvparams.iteritems():
11030
        result.append(("hv/%s" % key, val))
11031

    
11032
    # beparams changes
11033
    if self.op.beparams:
11034
      instance.beparams = self.be_inst
11035
      for key, val in self.op.beparams.iteritems():
11036
        result.append(("be/%s" % key, val))
11037

    
11038
    # OS change
11039
    if self.op.os_name:
11040
      instance.os = self.op.os_name
11041

    
11042
    # osparams changes
11043
    if self.op.osparams:
11044
      instance.osparams = self.os_inst
11045
      for key, val in self.op.osparams.iteritems():
11046
        result.append(("os/%s" % key, val))
11047

    
11048
    self.cfg.Update(instance, feedback_fn)
11049

    
11050
    return result
11051

    
11052
  _DISK_CONVERSIONS = {
11053
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11054
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11055
    }
11056

    
11057

    
11058
class LUBackupQuery(NoHooksLU):
11059
  """Query the exports list
11060

11061
  """
11062
  REQ_BGL = False
11063

    
11064
  def ExpandNames(self):
11065
    self.needed_locks = {}
11066
    self.share_locks[locking.LEVEL_NODE] = 1
11067
    if not self.op.nodes:
11068
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11069
    else:
11070
      self.needed_locks[locking.LEVEL_NODE] = \
11071
        _GetWantedNodes(self, self.op.nodes)
11072

    
11073
  def Exec(self, feedback_fn):
11074
    """Compute the list of all the exported system images.
11075

11076
    @rtype: dict
11077
    @return: a dictionary with the structure node->(export-list)
11078
        where export-list is a list of the instances exported on
11079
        that node.
11080

11081
    """
11082
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11083
    rpcresult = self.rpc.call_export_list(self.nodes)
11084
    result = {}
11085
    for node in rpcresult:
11086
      if rpcresult[node].fail_msg:
11087
        result[node] = False
11088
      else:
11089
        result[node] = rpcresult[node].payload
11090

    
11091
    return result
11092

    
11093

    
11094
class LUBackupPrepare(NoHooksLU):
11095
  """Prepares an instance for an export and returns useful information.
11096

11097
  """
11098
  REQ_BGL = False
11099

    
11100
  def ExpandNames(self):
11101
    self._ExpandAndLockInstance()
11102

    
11103
  def CheckPrereq(self):
11104
    """Check prerequisites.
11105

11106
    """
11107
    instance_name = self.op.instance_name
11108

    
11109
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11110
    assert self.instance is not None, \
11111
          "Cannot retrieve locked instance %s" % self.op.instance_name
11112
    _CheckNodeOnline(self, self.instance.primary_node)
11113

    
11114
    self._cds = _GetClusterDomainSecret()
11115

    
11116
  def Exec(self, feedback_fn):
11117
    """Prepares an instance for an export.
11118

11119
    """
11120
    instance = self.instance
11121

    
11122
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11123
      salt = utils.GenerateSecret(8)
11124

    
11125
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11126
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11127
                                              constants.RIE_CERT_VALIDITY)
11128
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11129

    
11130
      (name, cert_pem) = result.payload
11131

    
11132
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11133
                                             cert_pem)
11134

    
11135
      return {
11136
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11137
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11138
                          salt),
11139
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11140
        }
11141

    
11142
    return None
11143

    
11144

    
11145
class LUBackupExport(LogicalUnit):
11146
  """Export an instance to an image in the cluster.
11147

11148
  """
11149
  HPATH = "instance-export"
11150
  HTYPE = constants.HTYPE_INSTANCE
11151
  REQ_BGL = False
11152

    
11153
  def CheckArguments(self):
11154
    """Check the arguments.
11155

11156
    """
11157
    self.x509_key_name = self.op.x509_key_name
11158
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11159

    
11160
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11161
      if not self.x509_key_name:
11162
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11163
                                   errors.ECODE_INVAL)
11164

    
11165
      if not self.dest_x509_ca_pem:
11166
        raise errors.OpPrereqError("Missing destination X509 CA",
11167
                                   errors.ECODE_INVAL)
11168

    
11169
  def ExpandNames(self):
11170
    self._ExpandAndLockInstance()
11171

    
11172
    # Lock all nodes for local exports
11173
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11174
      # FIXME: lock only instance primary and destination node
11175
      #
11176
      # Sad but true, for now we have do lock all nodes, as we don't know where
11177
      # the previous export might be, and in this LU we search for it and
11178
      # remove it from its current node. In the future we could fix this by:
11179
      #  - making a tasklet to search (share-lock all), then create the
11180
      #    new one, then one to remove, after
11181
      #  - removing the removal operation altogether
11182
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11183

    
11184
  def DeclareLocks(self, level):
11185
    """Last minute lock declaration."""
11186
    # All nodes are locked anyway, so nothing to do here.
11187

    
11188
  def BuildHooksEnv(self):
11189
    """Build hooks env.
11190

11191
    This will run on the master, primary node and target node.
11192

11193
    """
11194
    env = {
11195
      "EXPORT_MODE": self.op.mode,
11196
      "EXPORT_NODE": self.op.target_node,
11197
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11198
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11199
      # TODO: Generic function for boolean env variables
11200
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11201
      }
11202

    
11203
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11204

    
11205
    return env
11206

    
11207
  def BuildHooksNodes(self):
11208
    """Build hooks nodes.
11209

11210
    """
11211
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11212

    
11213
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11214
      nl.append(self.op.target_node)
11215

    
11216
    return (nl, nl)
11217

    
11218
  def CheckPrereq(self):
11219
    """Check prerequisites.
11220

11221
    This checks that the instance and node names are valid.
11222

11223
    """
11224
    instance_name = self.op.instance_name
11225

    
11226
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11227
    assert self.instance is not None, \
11228
          "Cannot retrieve locked instance %s" % self.op.instance_name
11229
    _CheckNodeOnline(self, self.instance.primary_node)
11230

    
11231
    if (self.op.remove_instance and self.instance.admin_up and
11232
        not self.op.shutdown):
11233
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11234
                                 " down before")
11235

    
11236
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11237
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11238
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11239
      assert self.dst_node is not None
11240

    
11241
      _CheckNodeOnline(self, self.dst_node.name)
11242
      _CheckNodeNotDrained(self, self.dst_node.name)
11243

    
11244
      self._cds = None
11245
      self.dest_disk_info = None
11246
      self.dest_x509_ca = None
11247

    
11248
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11249
      self.dst_node = None
11250

    
11251
      if len(self.op.target_node) != len(self.instance.disks):
11252
        raise errors.OpPrereqError(("Received destination information for %s"
11253
                                    " disks, but instance %s has %s disks") %
11254
                                   (len(self.op.target_node), instance_name,
11255
                                    len(self.instance.disks)),
11256
                                   errors.ECODE_INVAL)
11257

    
11258
      cds = _GetClusterDomainSecret()
11259

    
11260
      # Check X509 key name
11261
      try:
11262
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11263
      except (TypeError, ValueError), err:
11264
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11265

    
11266
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11267
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11268
                                   errors.ECODE_INVAL)
11269

    
11270
      # Load and verify CA
11271
      try:
11272
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11273
      except OpenSSL.crypto.Error, err:
11274
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11275
                                   (err, ), errors.ECODE_INVAL)
11276

    
11277
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11278
      if errcode is not None:
11279
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11280
                                   (msg, ), errors.ECODE_INVAL)
11281

    
11282
      self.dest_x509_ca = cert
11283

    
11284
      # Verify target information
11285
      disk_info = []
11286
      for idx, disk_data in enumerate(self.op.target_node):
11287
        try:
11288
          (host, port, magic) = \
11289
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11290
        except errors.GenericError, err:
11291
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11292
                                     (idx, err), errors.ECODE_INVAL)
11293

    
11294
        disk_info.append((host, port, magic))
11295

    
11296
      assert len(disk_info) == len(self.op.target_node)
11297
      self.dest_disk_info = disk_info
11298

    
11299
    else:
11300
      raise errors.ProgrammerError("Unhandled export mode %r" %
11301
                                   self.op.mode)
11302

    
11303
    # instance disk type verification
11304
    # TODO: Implement export support for file-based disks
11305
    for disk in self.instance.disks:
11306
      if disk.dev_type == constants.LD_FILE:
11307
        raise errors.OpPrereqError("Export not supported for instances with"
11308
                                   " file-based disks", errors.ECODE_INVAL)
11309

    
11310
  def _CleanupExports(self, feedback_fn):
11311
    """Removes exports of current instance from all other nodes.
11312

11313
    If an instance in a cluster with nodes A..D was exported to node C, its
11314
    exports will be removed from the nodes A, B and D.
11315

11316
    """
11317
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11318

    
11319
    nodelist = self.cfg.GetNodeList()
11320
    nodelist.remove(self.dst_node.name)
11321

    
11322
    # on one-node clusters nodelist will be empty after the removal
11323
    # if we proceed the backup would be removed because OpBackupQuery
11324
    # substitutes an empty list with the full cluster node list.
11325
    iname = self.instance.name
11326
    if nodelist:
11327
      feedback_fn("Removing old exports for instance %s" % iname)
11328
      exportlist = self.rpc.call_export_list(nodelist)
11329
      for node in exportlist:
11330
        if exportlist[node].fail_msg:
11331
          continue
11332
        if iname in exportlist[node].payload:
11333
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11334
          if msg:
11335
            self.LogWarning("Could not remove older export for instance %s"
11336
                            " on node %s: %s", iname, node, msg)
11337

    
11338
  def Exec(self, feedback_fn):
11339
    """Export an instance to an image in the cluster.
11340

11341
    """
11342
    assert self.op.mode in constants.EXPORT_MODES
11343

    
11344
    instance = self.instance
11345
    src_node = instance.primary_node
11346

    
11347
    if self.op.shutdown:
11348
      # shutdown the instance, but not the disks
11349
      feedback_fn("Shutting down instance %s" % instance.name)
11350
      result = self.rpc.call_instance_shutdown(src_node, instance,
11351
                                               self.op.shutdown_timeout)
11352
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11353
      result.Raise("Could not shutdown instance %s on"
11354
                   " node %s" % (instance.name, src_node))
11355

    
11356
    # set the disks ID correctly since call_instance_start needs the
11357
    # correct drbd minor to create the symlinks
11358
    for disk in instance.disks:
11359
      self.cfg.SetDiskID(disk, src_node)
11360

    
11361
    activate_disks = (not instance.admin_up)
11362

    
11363
    if activate_disks:
11364
      # Activate the instance disks if we'exporting a stopped instance
11365
      feedback_fn("Activating disks for %s" % instance.name)
11366
      _StartInstanceDisks(self, instance, None)
11367

    
11368
    try:
11369
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11370
                                                     instance)
11371

    
11372
      helper.CreateSnapshots()
11373
      try:
11374
        if (self.op.shutdown and instance.admin_up and
11375
            not self.op.remove_instance):
11376
          assert not activate_disks
11377
          feedback_fn("Starting instance %s" % instance.name)
11378
          result = self.rpc.call_instance_start(src_node, instance,
11379
                                                None, None, False)
11380
          msg = result.fail_msg
11381
          if msg:
11382
            feedback_fn("Failed to start instance: %s" % msg)
11383
            _ShutdownInstanceDisks(self, instance)
11384
            raise errors.OpExecError("Could not start instance: %s" % msg)
11385

    
11386
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11387
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11388
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11389
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11390
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11391

    
11392
          (key_name, _, _) = self.x509_key_name
11393

    
11394
          dest_ca_pem = \
11395
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11396
                                            self.dest_x509_ca)
11397

    
11398
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11399
                                                     key_name, dest_ca_pem,
11400
                                                     timeouts)
11401
      finally:
11402
        helper.Cleanup()
11403

    
11404
      # Check for backwards compatibility
11405
      assert len(dresults) == len(instance.disks)
11406
      assert compat.all(isinstance(i, bool) for i in dresults), \
11407
             "Not all results are boolean: %r" % dresults
11408

    
11409
    finally:
11410
      if activate_disks:
11411
        feedback_fn("Deactivating disks for %s" % instance.name)
11412
        _ShutdownInstanceDisks(self, instance)
11413

    
11414
    if not (compat.all(dresults) and fin_resu):
11415
      failures = []
11416
      if not fin_resu:
11417
        failures.append("export finalization")
11418
      if not compat.all(dresults):
11419
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11420
                               if not dsk)
11421
        failures.append("disk export: disk(s) %s" % fdsk)
11422

    
11423
      raise errors.OpExecError("Export failed, errors in %s" %
11424
                               utils.CommaJoin(failures))
11425

    
11426
    # At this point, the export was successful, we can cleanup/finish
11427

    
11428
    # Remove instance if requested
11429
    if self.op.remove_instance:
11430
      feedback_fn("Removing instance %s" % instance.name)
11431
      _RemoveInstance(self, feedback_fn, instance,
11432
                      self.op.ignore_remove_failures)
11433

    
11434
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11435
      self._CleanupExports(feedback_fn)
11436

    
11437
    return fin_resu, dresults
11438

    
11439

    
11440
class LUBackupRemove(NoHooksLU):
11441
  """Remove exports related to the named instance.
11442

11443
  """
11444
  REQ_BGL = False
11445

    
11446
  def ExpandNames(self):
11447
    self.needed_locks = {}
11448
    # We need all nodes to be locked in order for RemoveExport to work, but we
11449
    # don't need to lock the instance itself, as nothing will happen to it (and
11450
    # we can remove exports also for a removed instance)
11451
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11452

    
11453
  def Exec(self, feedback_fn):
11454
    """Remove any export.
11455

11456
    """
11457
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11458
    # If the instance was not found we'll try with the name that was passed in.
11459
    # This will only work if it was an FQDN, though.
11460
    fqdn_warn = False
11461
    if not instance_name:
11462
      fqdn_warn = True
11463
      instance_name = self.op.instance_name
11464

    
11465
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11466
    exportlist = self.rpc.call_export_list(locked_nodes)
11467
    found = False
11468
    for node in exportlist:
11469
      msg = exportlist[node].fail_msg
11470
      if msg:
11471
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11472
        continue
11473
      if instance_name in exportlist[node].payload:
11474
        found = True
11475
        result = self.rpc.call_export_remove(node, instance_name)
11476
        msg = result.fail_msg
11477
        if msg:
11478
          logging.error("Could not remove export for instance %s"
11479
                        " on node %s: %s", instance_name, node, msg)
11480

    
11481
    if fqdn_warn and not found:
11482
      feedback_fn("Export not found. If trying to remove an export belonging"
11483
                  " to a deleted instance please use its Fully Qualified"
11484
                  " Domain Name.")
11485

    
11486

    
11487
class LUGroupAdd(LogicalUnit):
11488
  """Logical unit for creating node groups.
11489

11490
  """
11491
  HPATH = "group-add"
11492
  HTYPE = constants.HTYPE_GROUP
11493
  REQ_BGL = False
11494

    
11495
  def ExpandNames(self):
11496
    # We need the new group's UUID here so that we can create and acquire the
11497
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11498
    # that it should not check whether the UUID exists in the configuration.
11499
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11500
    self.needed_locks = {}
11501
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11502

    
11503
  def CheckPrereq(self):
11504
    """Check prerequisites.
11505

11506
    This checks that the given group name is not an existing node group
11507
    already.
11508

11509
    """
11510
    try:
11511
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11512
    except errors.OpPrereqError:
11513
      pass
11514
    else:
11515
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11516
                                 " node group (UUID: %s)" %
11517
                                 (self.op.group_name, existing_uuid),
11518
                                 errors.ECODE_EXISTS)
11519

    
11520
    if self.op.ndparams:
11521
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11522

    
11523
  def BuildHooksEnv(self):
11524
    """Build hooks env.
11525

11526
    """
11527
    return {
11528
      "GROUP_NAME": self.op.group_name,
11529
      }
11530

    
11531
  def BuildHooksNodes(self):
11532
    """Build hooks nodes.
11533

11534
    """
11535
    mn = self.cfg.GetMasterNode()
11536
    return ([mn], [mn])
11537

    
11538
  def Exec(self, feedback_fn):
11539
    """Add the node group to the cluster.
11540

11541
    """
11542
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11543
                                  uuid=self.group_uuid,
11544
                                  alloc_policy=self.op.alloc_policy,
11545
                                  ndparams=self.op.ndparams)
11546

    
11547
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11548
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11549

    
11550

    
11551
class LUGroupAssignNodes(NoHooksLU):
11552
  """Logical unit for assigning nodes to groups.
11553

11554
  """
11555
  REQ_BGL = False
11556

    
11557
  def ExpandNames(self):
11558
    # These raise errors.OpPrereqError on their own:
11559
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11560
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11561

    
11562
    # We want to lock all the affected nodes and groups. We have readily
11563
    # available the list of nodes, and the *destination* group. To gather the
11564
    # list of "source" groups, we need to fetch node information later on.
11565
    self.needed_locks = {
11566
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11567
      locking.LEVEL_NODE: self.op.nodes,
11568
      }
11569

    
11570
  def DeclareLocks(self, level):
11571
    if level == locking.LEVEL_NODEGROUP:
11572
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11573

    
11574
      # Try to get all affected nodes' groups without having the group or node
11575
      # lock yet. Needs verification later in the code flow.
11576
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11577

    
11578
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11579

    
11580
  def CheckPrereq(self):
11581
    """Check prerequisites.
11582

11583
    """
11584
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11585
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11586
            frozenset(self.op.nodes))
11587

    
11588
    expected_locks = (set([self.group_uuid]) |
11589
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11590
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11591
    if actual_locks != expected_locks:
11592
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11593
                               " current groups are '%s', used to be '%s'" %
11594
                               (utils.CommaJoin(expected_locks),
11595
                                utils.CommaJoin(actual_locks)))
11596

    
11597
    self.node_data = self.cfg.GetAllNodesInfo()
11598
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11599
    instance_data = self.cfg.GetAllInstancesInfo()
11600

    
11601
    if self.group is None:
11602
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11603
                               (self.op.group_name, self.group_uuid))
11604

    
11605
    (new_splits, previous_splits) = \
11606
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11607
                                             for node in self.op.nodes],
11608
                                            self.node_data, instance_data)
11609

    
11610
    if new_splits:
11611
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11612

    
11613
      if not self.op.force:
11614
        raise errors.OpExecError("The following instances get split by this"
11615
                                 " change and --force was not given: %s" %
11616
                                 fmt_new_splits)
11617
      else:
11618
        self.LogWarning("This operation will split the following instances: %s",
11619
                        fmt_new_splits)
11620

    
11621
        if previous_splits:
11622
          self.LogWarning("In addition, these already-split instances continue"
11623
                          " to be split across groups: %s",
11624
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11625

    
11626
  def Exec(self, feedback_fn):
11627
    """Assign nodes to a new group.
11628

11629
    """
11630
    for node in self.op.nodes:
11631
      self.node_data[node].group = self.group_uuid
11632

    
11633
    # FIXME: Depends on side-effects of modifying the result of
11634
    # C{cfg.GetAllNodesInfo}
11635

    
11636
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11637

    
11638
  @staticmethod
11639
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11640
    """Check for split instances after a node assignment.
11641

11642
    This method considers a series of node assignments as an atomic operation,
11643
    and returns information about split instances after applying the set of
11644
    changes.
11645

11646
    In particular, it returns information about newly split instances, and
11647
    instances that were already split, and remain so after the change.
11648

11649
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11650
    considered.
11651

11652
    @type changes: list of (node_name, new_group_uuid) pairs.
11653
    @param changes: list of node assignments to consider.
11654
    @param node_data: a dict with data for all nodes
11655
    @param instance_data: a dict with all instances to consider
11656
    @rtype: a two-tuple
11657
    @return: a list of instances that were previously okay and result split as a
11658
      consequence of this change, and a list of instances that were previously
11659
      split and this change does not fix.
11660

11661
    """
11662
    changed_nodes = dict((node, group) for node, group in changes
11663
                         if node_data[node].group != group)
11664

    
11665
    all_split_instances = set()
11666
    previously_split_instances = set()
11667

    
11668
    def InstanceNodes(instance):
11669
      return [instance.primary_node] + list(instance.secondary_nodes)
11670

    
11671
    for inst in instance_data.values():
11672
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11673
        continue
11674

    
11675
      instance_nodes = InstanceNodes(inst)
11676

    
11677
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11678
        previously_split_instances.add(inst.name)
11679

    
11680
      if len(set(changed_nodes.get(node, node_data[node].group)
11681
                 for node in instance_nodes)) > 1:
11682
        all_split_instances.add(inst.name)
11683

    
11684
    return (list(all_split_instances - previously_split_instances),
11685
            list(previously_split_instances & all_split_instances))
11686

    
11687

    
11688
class _GroupQuery(_QueryBase):
11689
  FIELDS = query.GROUP_FIELDS
11690

    
11691
  def ExpandNames(self, lu):
11692
    lu.needed_locks = {}
11693

    
11694
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11695
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11696

    
11697
    if not self.names:
11698
      self.wanted = [name_to_uuid[name]
11699
                     for name in utils.NiceSort(name_to_uuid.keys())]
11700
    else:
11701
      # Accept names to be either names or UUIDs.
11702
      missing = []
11703
      self.wanted = []
11704
      all_uuid = frozenset(self._all_groups.keys())
11705

    
11706
      for name in self.names:
11707
        if name in all_uuid:
11708
          self.wanted.append(name)
11709
        elif name in name_to_uuid:
11710
          self.wanted.append(name_to_uuid[name])
11711
        else:
11712
          missing.append(name)
11713

    
11714
      if missing:
11715
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11716
                                   utils.CommaJoin(missing),
11717
                                   errors.ECODE_NOENT)
11718

    
11719
  def DeclareLocks(self, lu, level):
11720
    pass
11721

    
11722
  def _GetQueryData(self, lu):
11723
    """Computes the list of node groups and their attributes.
11724

11725
    """
11726
    do_nodes = query.GQ_NODE in self.requested_data
11727
    do_instances = query.GQ_INST in self.requested_data
11728

    
11729
    group_to_nodes = None
11730
    group_to_instances = None
11731

    
11732
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11733
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11734
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11735
    # instance->node. Hence, we will need to process nodes even if we only need
11736
    # instance information.
11737
    if do_nodes or do_instances:
11738
      all_nodes = lu.cfg.GetAllNodesInfo()
11739
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11740
      node_to_group = {}
11741

    
11742
      for node in all_nodes.values():
11743
        if node.group in group_to_nodes:
11744
          group_to_nodes[node.group].append(node.name)
11745
          node_to_group[node.name] = node.group
11746

    
11747
      if do_instances:
11748
        all_instances = lu.cfg.GetAllInstancesInfo()
11749
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11750

    
11751
        for instance in all_instances.values():
11752
          node = instance.primary_node
11753
          if node in node_to_group:
11754
            group_to_instances[node_to_group[node]].append(instance.name)
11755

    
11756
        if not do_nodes:
11757
          # Do not pass on node information if it was not requested.
11758
          group_to_nodes = None
11759

    
11760
    return query.GroupQueryData([self._all_groups[uuid]
11761
                                 for uuid in self.wanted],
11762
                                group_to_nodes, group_to_instances)
11763

    
11764

    
11765
class LUGroupQuery(NoHooksLU):
11766
  """Logical unit for querying node groups.
11767

11768
  """
11769
  REQ_BGL = False
11770

    
11771
  def CheckArguments(self):
11772
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11773
                          self.op.output_fields, False)
11774

    
11775
  def ExpandNames(self):
11776
    self.gq.ExpandNames(self)
11777

    
11778
  def Exec(self, feedback_fn):
11779
    return self.gq.OldStyleQuery(self)
11780

    
11781

    
11782
class LUGroupSetParams(LogicalUnit):
11783
  """Modifies the parameters of a node group.
11784

11785
  """
11786
  HPATH = "group-modify"
11787
  HTYPE = constants.HTYPE_GROUP
11788
  REQ_BGL = False
11789

    
11790
  def CheckArguments(self):
11791
    all_changes = [
11792
      self.op.ndparams,
11793
      self.op.alloc_policy,
11794
      ]
11795

    
11796
    if all_changes.count(None) == len(all_changes):
11797
      raise errors.OpPrereqError("Please pass at least one modification",
11798
                                 errors.ECODE_INVAL)
11799

    
11800
  def ExpandNames(self):
11801
    # This raises errors.OpPrereqError on its own:
11802
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11803

    
11804
    self.needed_locks = {
11805
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11806
      }
11807

    
11808
  def CheckPrereq(self):
11809
    """Check prerequisites.
11810

11811
    """
11812
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11813

    
11814
    if self.group is None:
11815
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11816
                               (self.op.group_name, self.group_uuid))
11817

    
11818
    if self.op.ndparams:
11819
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11820
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11821
      self.new_ndparams = new_ndparams
11822

    
11823
  def BuildHooksEnv(self):
11824
    """Build hooks env.
11825

11826
    """
11827
    return {
11828
      "GROUP_NAME": self.op.group_name,
11829
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11830
      }
11831

    
11832
  def BuildHooksNodes(self):
11833
    """Build hooks nodes.
11834

11835
    """
11836
    mn = self.cfg.GetMasterNode()
11837
    return ([mn], [mn])
11838

    
11839
  def Exec(self, feedback_fn):
11840
    """Modifies the node group.
11841

11842
    """
11843
    result = []
11844

    
11845
    if self.op.ndparams:
11846
      self.group.ndparams = self.new_ndparams
11847
      result.append(("ndparams", str(self.group.ndparams)))
11848

    
11849
    if self.op.alloc_policy:
11850
      self.group.alloc_policy = self.op.alloc_policy
11851

    
11852
    self.cfg.Update(self.group, feedback_fn)
11853
    return result
11854

    
11855

    
11856

    
11857
class LUGroupRemove(LogicalUnit):
11858
  HPATH = "group-remove"
11859
  HTYPE = constants.HTYPE_GROUP
11860
  REQ_BGL = False
11861

    
11862
  def ExpandNames(self):
11863
    # This will raises errors.OpPrereqError on its own:
11864
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11865
    self.needed_locks = {
11866
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11867
      }
11868

    
11869
  def CheckPrereq(self):
11870
    """Check prerequisites.
11871

11872
    This checks that the given group name exists as a node group, that is
11873
    empty (i.e., contains no nodes), and that is not the last group of the
11874
    cluster.
11875

11876
    """
11877
    # Verify that the group is empty.
11878
    group_nodes = [node.name
11879
                   for node in self.cfg.GetAllNodesInfo().values()
11880
                   if node.group == self.group_uuid]
11881

    
11882
    if group_nodes:
11883
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11884
                                 " nodes: %s" %
11885
                                 (self.op.group_name,
11886
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11887
                                 errors.ECODE_STATE)
11888

    
11889
    # Verify the cluster would not be left group-less.
11890
    if len(self.cfg.GetNodeGroupList()) == 1:
11891
      raise errors.OpPrereqError("Group '%s' is the only group,"
11892
                                 " cannot be removed" %
11893
                                 self.op.group_name,
11894
                                 errors.ECODE_STATE)
11895

    
11896
  def BuildHooksEnv(self):
11897
    """Build hooks env.
11898

11899
    """
11900
    return {
11901
      "GROUP_NAME": self.op.group_name,
11902
      }
11903

    
11904
  def BuildHooksNodes(self):
11905
    """Build hooks nodes.
11906

11907
    """
11908
    mn = self.cfg.GetMasterNode()
11909
    return ([mn], [mn])
11910

    
11911
  def Exec(self, feedback_fn):
11912
    """Remove the node group.
11913

11914
    """
11915
    try:
11916
      self.cfg.RemoveNodeGroup(self.group_uuid)
11917
    except errors.ConfigurationError:
11918
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11919
                               (self.op.group_name, self.group_uuid))
11920

    
11921
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11922

    
11923

    
11924
class LUGroupRename(LogicalUnit):
11925
  HPATH = "group-rename"
11926
  HTYPE = constants.HTYPE_GROUP
11927
  REQ_BGL = False
11928

    
11929
  def ExpandNames(self):
11930
    # This raises errors.OpPrereqError on its own:
11931
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11932

    
11933
    self.needed_locks = {
11934
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11935
      }
11936

    
11937
  def CheckPrereq(self):
11938
    """Check prerequisites.
11939

11940
    Ensures requested new name is not yet used.
11941

11942
    """
11943
    try:
11944
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11945
    except errors.OpPrereqError:
11946
      pass
11947
    else:
11948
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11949
                                 " node group (UUID: %s)" %
11950
                                 (self.op.new_name, new_name_uuid),
11951
                                 errors.ECODE_EXISTS)
11952

    
11953
  def BuildHooksEnv(self):
11954
    """Build hooks env.
11955

11956
    """
11957
    return {
11958
      "OLD_NAME": self.op.group_name,
11959
      "NEW_NAME": self.op.new_name,
11960
      }
11961

    
11962
  def BuildHooksNodes(self):
11963
    """Build hooks nodes.
11964

11965
    """
11966
    mn = self.cfg.GetMasterNode()
11967

    
11968
    all_nodes = self.cfg.GetAllNodesInfo()
11969
    all_nodes.pop(mn, None)
11970

    
11971
    run_nodes = [mn]
11972
    run_nodes.extend(node.name for node in all_nodes.values()
11973
                     if node.group == self.group_uuid)
11974

    
11975
    return (run_nodes, run_nodes)
11976

    
11977
  def Exec(self, feedback_fn):
11978
    """Rename the node group.
11979

11980
    """
11981
    group = self.cfg.GetNodeGroup(self.group_uuid)
11982

    
11983
    if group is None:
11984
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11985
                               (self.op.group_name, self.group_uuid))
11986

    
11987
    group.name = self.op.new_name
11988
    self.cfg.Update(group, feedback_fn)
11989

    
11990
    return self.op.new_name
11991

    
11992

    
11993
class LUGroupEvacuate(LogicalUnit):
11994
  HPATH = "group-evacuate"
11995
  HTYPE = constants.HTYPE_GROUP
11996
  REQ_BGL = False
11997

    
11998
  def ExpandNames(self):
11999
    # This raises errors.OpPrereqError on its own:
12000
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12001

    
12002
    if self.op.target_groups:
12003
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12004
                                  self.op.target_groups)
12005
    else:
12006
      self.req_target_uuids = []
12007

    
12008
    if self.group_uuid in self.req_target_uuids:
12009
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12010
                                 " as a target group (targets are %s)" %
12011
                                 (self.group_uuid,
12012
                                  utils.CommaJoin(self.req_target_uuids)),
12013
                                 errors.ECODE_INVAL)
12014

    
12015
    if not self.op.iallocator:
12016
      # Use default iallocator
12017
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
12018

    
12019
    if not self.op.iallocator:
12020
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
12021
                                 " opcode nor as a cluster-wide default",
12022
                                 errors.ECODE_INVAL)
12023

    
12024
    self.share_locks = _ShareAll()
12025
    self.needed_locks = {
12026
      locking.LEVEL_INSTANCE: [],
12027
      locking.LEVEL_NODEGROUP: [],
12028
      locking.LEVEL_NODE: [],
12029
      }
12030

    
12031
  def DeclareLocks(self, level):
12032
    if level == locking.LEVEL_INSTANCE:
12033
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12034

    
12035
      # Lock instances optimistically, needs verification once node and group
12036
      # locks have been acquired
12037
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12038
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12039

    
12040
    elif level == locking.LEVEL_NODEGROUP:
12041
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12042

    
12043
      if self.req_target_uuids:
12044
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12045

    
12046
        # Lock all groups used by instances optimistically; this requires going
12047
        # via the node before it's locked, requiring verification later on
12048
        lock_groups.update(group_uuid
12049
                           for instance_name in
12050
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12051
                           for group_uuid in
12052
                             self.cfg.GetInstanceNodeGroups(instance_name))
12053
      else:
12054
        # No target groups, need to lock all of them
12055
        lock_groups = locking.ALL_SET
12056

    
12057
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12058

    
12059
    elif level == locking.LEVEL_NODE:
12060
      # This will only lock the nodes in the group to be evacuated which
12061
      # contain actual instances
12062
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12063
      self._LockInstancesNodes()
12064

    
12065
      # Lock all nodes in group to be evacuated
12066
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12067
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12068
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12069

    
12070
  def CheckPrereq(self):
12071
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12072
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12073
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12074

    
12075
    assert owned_groups.issuperset(self.req_target_uuids)
12076
    assert self.group_uuid in owned_groups
12077

    
12078
    # Check if locked instances are still correct
12079
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12080
    if owned_instances != wanted_instances:
12081
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12082
                                 " changed since locks were acquired, wanted"
12083
                                 " %s, have %s; retry the operation" %
12084
                                 (self.group_uuid,
12085
                                  utils.CommaJoin(wanted_instances),
12086
                                  utils.CommaJoin(owned_instances)),
12087
                                 errors.ECODE_STATE)
12088

    
12089
    # Get instance information
12090
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
12091
                          for name in owned_instances)
12092

    
12093
    # Check if node groups for locked instances are still correct
12094
    for instance_name in owned_instances:
12095
      inst = self.instances[instance_name]
12096
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12097
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12098
      assert owned_nodes.issuperset(inst.all_nodes), \
12099
        "Instance %s's nodes changed while we kept the lock" % instance_name
12100

    
12101
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12102
      if not owned_groups.issuperset(inst_groups):
12103
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12104
                                   " locks were acquired, current groups"
12105
                                   " are '%s', owning groups '%s'; retry the"
12106
                                   " operation" %
12107
                                   (instance_name,
12108
                                    utils.CommaJoin(inst_groups),
12109
                                    utils.CommaJoin(owned_groups)),
12110
                                   errors.ECODE_STATE)
12111

    
12112
    if self.req_target_uuids:
12113
      # User requested specific target groups
12114
      self.target_uuids = self.req_target_uuids
12115
    else:
12116
      # All groups except the one to be evacuated are potential targets
12117
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12118
                           if group_uuid != self.group_uuid]
12119

    
12120
      if not self.target_uuids:
12121
        raise errors.OpExecError("There are no possible target groups")
12122

    
12123
  def BuildHooksEnv(self):
12124
    """Build hooks env.
12125

12126
    """
12127
    return {
12128
      "GROUP_NAME": self.op.group_name,
12129
      "TARGET_GROUPS": " ".join(self.target_uuids),
12130
      }
12131

    
12132
  def BuildHooksNodes(self):
12133
    """Build hooks nodes.
12134

12135
    """
12136
    mn = self.cfg.GetMasterNode()
12137

    
12138
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12139

    
12140
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12141

    
12142
    return (run_nodes, run_nodes)
12143

    
12144
  def Exec(self, feedback_fn):
12145
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12146

    
12147
    assert self.group_uuid not in self.target_uuids
12148

    
12149
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12150
                     instances=instances, target_groups=self.target_uuids)
12151

    
12152
    ial.Run(self.op.iallocator)
12153

    
12154
    if not ial.success:
12155
      raise errors.OpPrereqError("Can't compute group evacuation using"
12156
                                 " iallocator '%s': %s" %
12157
                                 (self.op.iallocator, ial.info),
12158
                                 errors.ECODE_NORES)
12159

    
12160
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12161

    
12162
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12163
                 len(jobs), self.op.group_name)
12164

    
12165
    return ResultWithJobs(jobs)
12166

    
12167

    
12168
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12169
  """Generic tags LU.
12170

12171
  This is an abstract class which is the parent of all the other tags LUs.
12172

12173
  """
12174
  def ExpandNames(self):
12175
    self.group_uuid = None
12176
    self.needed_locks = {}
12177
    if self.op.kind == constants.TAG_NODE:
12178
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12179
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12180
    elif self.op.kind == constants.TAG_INSTANCE:
12181
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12182
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12183
    elif self.op.kind == constants.TAG_NODEGROUP:
12184
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12185

    
12186
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12187
    # not possible to acquire the BGL based on opcode parameters)
12188

    
12189
  def CheckPrereq(self):
12190
    """Check prerequisites.
12191

12192
    """
12193
    if self.op.kind == constants.TAG_CLUSTER:
12194
      self.target = self.cfg.GetClusterInfo()
12195
    elif self.op.kind == constants.TAG_NODE:
12196
      self.target = self.cfg.GetNodeInfo(self.op.name)
12197
    elif self.op.kind == constants.TAG_INSTANCE:
12198
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12199
    elif self.op.kind == constants.TAG_NODEGROUP:
12200
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12201
    else:
12202
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12203
                                 str(self.op.kind), errors.ECODE_INVAL)
12204

    
12205

    
12206
class LUTagsGet(TagsLU):
12207
  """Returns the tags of a given object.
12208

12209
  """
12210
  REQ_BGL = False
12211

    
12212
  def ExpandNames(self):
12213
    TagsLU.ExpandNames(self)
12214

    
12215
    # Share locks as this is only a read operation
12216
    self.share_locks = _ShareAll()
12217

    
12218
  def Exec(self, feedback_fn):
12219
    """Returns the tag list.
12220

12221
    """
12222
    return list(self.target.GetTags())
12223

    
12224

    
12225
class LUTagsSearch(NoHooksLU):
12226
  """Searches the tags for a given pattern.
12227

12228
  """
12229
  REQ_BGL = False
12230

    
12231
  def ExpandNames(self):
12232
    self.needed_locks = {}
12233

    
12234
  def CheckPrereq(self):
12235
    """Check prerequisites.
12236

12237
    This checks the pattern passed for validity by compiling it.
12238

12239
    """
12240
    try:
12241
      self.re = re.compile(self.op.pattern)
12242
    except re.error, err:
12243
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12244
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12245

    
12246
  def Exec(self, feedback_fn):
12247
    """Returns the tag list.
12248

12249
    """
12250
    cfg = self.cfg
12251
    tgts = [("/cluster", cfg.GetClusterInfo())]
12252
    ilist = cfg.GetAllInstancesInfo().values()
12253
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12254
    nlist = cfg.GetAllNodesInfo().values()
12255
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12256
    tgts.extend(("/nodegroup/%s" % n.name, n)
12257
                for n in cfg.GetAllNodeGroupsInfo().values())
12258
    results = []
12259
    for path, target in tgts:
12260
      for tag in target.GetTags():
12261
        if self.re.search(tag):
12262
          results.append((path, tag))
12263
    return results
12264

    
12265

    
12266
class LUTagsSet(TagsLU):
12267
  """Sets a tag on a given object.
12268

12269
  """
12270
  REQ_BGL = False
12271

    
12272
  def CheckPrereq(self):
12273
    """Check prerequisites.
12274

12275
    This checks the type and length of the tag name and value.
12276

12277
    """
12278
    TagsLU.CheckPrereq(self)
12279
    for tag in self.op.tags:
12280
      objects.TaggableObject.ValidateTag(tag)
12281

    
12282
  def Exec(self, feedback_fn):
12283
    """Sets the tag.
12284

12285
    """
12286
    try:
12287
      for tag in self.op.tags:
12288
        self.target.AddTag(tag)
12289
    except errors.TagError, err:
12290
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12291
    self.cfg.Update(self.target, feedback_fn)
12292

    
12293

    
12294
class LUTagsDel(TagsLU):
12295
  """Delete a list of tags from a given object.
12296

12297
  """
12298
  REQ_BGL = False
12299

    
12300
  def CheckPrereq(self):
12301
    """Check prerequisites.
12302

12303
    This checks that we have the given tag.
12304

12305
    """
12306
    TagsLU.CheckPrereq(self)
12307
    for tag in self.op.tags:
12308
      objects.TaggableObject.ValidateTag(tag)
12309
    del_tags = frozenset(self.op.tags)
12310
    cur_tags = self.target.GetTags()
12311

    
12312
    diff_tags = del_tags - cur_tags
12313
    if diff_tags:
12314
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12315
      raise errors.OpPrereqError("Tag(s) %s not found" %
12316
                                 (utils.CommaJoin(diff_names), ),
12317
                                 errors.ECODE_NOENT)
12318

    
12319
  def Exec(self, feedback_fn):
12320
    """Remove the tag from the object.
12321

12322
    """
12323
    for tag in self.op.tags:
12324
      self.target.RemoveTag(tag)
12325
    self.cfg.Update(self.target, feedback_fn)
12326

    
12327

    
12328
class LUTestDelay(NoHooksLU):
12329
  """Sleep for a specified amount of time.
12330

12331
  This LU sleeps on the master and/or nodes for a specified amount of
12332
  time.
12333

12334
  """
12335
  REQ_BGL = False
12336

    
12337
  def ExpandNames(self):
12338
    """Expand names and set required locks.
12339

12340
    This expands the node list, if any.
12341

12342
    """
12343
    self.needed_locks = {}
12344
    if self.op.on_nodes:
12345
      # _GetWantedNodes can be used here, but is not always appropriate to use
12346
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12347
      # more information.
12348
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12349
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12350

    
12351
  def _TestDelay(self):
12352
    """Do the actual sleep.
12353

12354
    """
12355
    if self.op.on_master:
12356
      if not utils.TestDelay(self.op.duration):
12357
        raise errors.OpExecError("Error during master delay test")
12358
    if self.op.on_nodes:
12359
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12360
      for node, node_result in result.items():
12361
        node_result.Raise("Failure during rpc call to node %s" % node)
12362

    
12363
  def Exec(self, feedback_fn):
12364
    """Execute the test delay opcode, with the wanted repetitions.
12365

12366
    """
12367
    if self.op.repeat == 0:
12368
      self._TestDelay()
12369
    else:
12370
      top_value = self.op.repeat - 1
12371
      for i in range(self.op.repeat):
12372
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12373
        self._TestDelay()
12374

    
12375

    
12376
class LUTestJqueue(NoHooksLU):
12377
  """Utility LU to test some aspects of the job queue.
12378

12379
  """
12380
  REQ_BGL = False
12381

    
12382
  # Must be lower than default timeout for WaitForJobChange to see whether it
12383
  # notices changed jobs
12384
  _CLIENT_CONNECT_TIMEOUT = 20.0
12385
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12386

    
12387
  @classmethod
12388
  def _NotifyUsingSocket(cls, cb, errcls):
12389
    """Opens a Unix socket and waits for another program to connect.
12390

12391
    @type cb: callable
12392
    @param cb: Callback to send socket name to client
12393
    @type errcls: class
12394
    @param errcls: Exception class to use for errors
12395

12396
    """
12397
    # Using a temporary directory as there's no easy way to create temporary
12398
    # sockets without writing a custom loop around tempfile.mktemp and
12399
    # socket.bind
12400
    tmpdir = tempfile.mkdtemp()
12401
    try:
12402
      tmpsock = utils.PathJoin(tmpdir, "sock")
12403

    
12404
      logging.debug("Creating temporary socket at %s", tmpsock)
12405
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12406
      try:
12407
        sock.bind(tmpsock)
12408
        sock.listen(1)
12409

    
12410
        # Send details to client
12411
        cb(tmpsock)
12412

    
12413
        # Wait for client to connect before continuing
12414
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12415
        try:
12416
          (conn, _) = sock.accept()
12417
        except socket.error, err:
12418
          raise errcls("Client didn't connect in time (%s)" % err)
12419
      finally:
12420
        sock.close()
12421
    finally:
12422
      # Remove as soon as client is connected
12423
      shutil.rmtree(tmpdir)
12424

    
12425
    # Wait for client to close
12426
    try:
12427
      try:
12428
        # pylint: disable-msg=E1101
12429
        # Instance of '_socketobject' has no ... member
12430
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12431
        conn.recv(1)
12432
      except socket.error, err:
12433
        raise errcls("Client failed to confirm notification (%s)" % err)
12434
    finally:
12435
      conn.close()
12436

    
12437
  def _SendNotification(self, test, arg, sockname):
12438
    """Sends a notification to the client.
12439

12440
    @type test: string
12441
    @param test: Test name
12442
    @param arg: Test argument (depends on test)
12443
    @type sockname: string
12444
    @param sockname: Socket path
12445

12446
    """
12447
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12448

    
12449
  def _Notify(self, prereq, test, arg):
12450
    """Notifies the client of a test.
12451

12452
    @type prereq: bool
12453
    @param prereq: Whether this is a prereq-phase test
12454
    @type test: string
12455
    @param test: Test name
12456
    @param arg: Test argument (depends on test)
12457

12458
    """
12459
    if prereq:
12460
      errcls = errors.OpPrereqError
12461
    else:
12462
      errcls = errors.OpExecError
12463

    
12464
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12465
                                                  test, arg),
12466
                                   errcls)
12467

    
12468
  def CheckArguments(self):
12469
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12470
    self.expandnames_calls = 0
12471

    
12472
  def ExpandNames(self):
12473
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12474
    if checkargs_calls < 1:
12475
      raise errors.ProgrammerError("CheckArguments was not called")
12476

    
12477
    self.expandnames_calls += 1
12478

    
12479
    if self.op.notify_waitlock:
12480
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12481

    
12482
    self.LogInfo("Expanding names")
12483

    
12484
    # Get lock on master node (just to get a lock, not for a particular reason)
12485
    self.needed_locks = {
12486
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12487
      }
12488

    
12489
  def Exec(self, feedback_fn):
12490
    if self.expandnames_calls < 1:
12491
      raise errors.ProgrammerError("ExpandNames was not called")
12492

    
12493
    if self.op.notify_exec:
12494
      self._Notify(False, constants.JQT_EXEC, None)
12495

    
12496
    self.LogInfo("Executing")
12497

    
12498
    if self.op.log_messages:
12499
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12500
      for idx, msg in enumerate(self.op.log_messages):
12501
        self.LogInfo("Sending log message %s", idx + 1)
12502
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12503
        # Report how many test messages have been sent
12504
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12505

    
12506
    if self.op.fail:
12507
      raise errors.OpExecError("Opcode failure was requested")
12508

    
12509
    return True
12510

    
12511

    
12512
class IAllocator(object):
12513
  """IAllocator framework.
12514

12515
  An IAllocator instance has three sets of attributes:
12516
    - cfg that is needed to query the cluster
12517
    - input data (all members of the _KEYS class attribute are required)
12518
    - four buffer attributes (in|out_data|text), that represent the
12519
      input (to the external script) in text and data structure format,
12520
      and the output from it, again in two formats
12521
    - the result variables from the script (success, info, nodes) for
12522
      easy usage
12523

12524
  """
12525
  # pylint: disable-msg=R0902
12526
  # lots of instance attributes
12527

    
12528
  def __init__(self, cfg, rpc, mode, **kwargs):
12529
    self.cfg = cfg
12530
    self.rpc = rpc
12531
    # init buffer variables
12532
    self.in_text = self.out_text = self.in_data = self.out_data = None
12533
    # init all input fields so that pylint is happy
12534
    self.mode = mode
12535
    self.memory = self.disks = self.disk_template = None
12536
    self.os = self.tags = self.nics = self.vcpus = None
12537
    self.hypervisor = None
12538
    self.relocate_from = None
12539
    self.name = None
12540
    self.evac_nodes = None
12541
    self.instances = None
12542
    self.evac_mode = None
12543
    self.target_groups = []
12544
    # computed fields
12545
    self.required_nodes = None
12546
    # init result fields
12547
    self.success = self.info = self.result = None
12548

    
12549
    try:
12550
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12551
    except KeyError:
12552
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12553
                                   " IAllocator" % self.mode)
12554

    
12555
    keyset = [n for (n, _) in keydata]
12556

    
12557
    for key in kwargs:
12558
      if key not in keyset:
12559
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12560
                                     " IAllocator" % key)
12561
      setattr(self, key, kwargs[key])
12562

    
12563
    for key in keyset:
12564
      if key not in kwargs:
12565
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12566
                                     " IAllocator" % key)
12567
    self._BuildInputData(compat.partial(fn, self), keydata)
12568

    
12569
  def _ComputeClusterData(self):
12570
    """Compute the generic allocator input data.
12571

12572
    This is the data that is independent of the actual operation.
12573

12574
    """
12575
    cfg = self.cfg
12576
    cluster_info = cfg.GetClusterInfo()
12577
    # cluster data
12578
    data = {
12579
      "version": constants.IALLOCATOR_VERSION,
12580
      "cluster_name": cfg.GetClusterName(),
12581
      "cluster_tags": list(cluster_info.GetTags()),
12582
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12583
      # we don't have job IDs
12584
      }
12585
    ninfo = cfg.GetAllNodesInfo()
12586
    iinfo = cfg.GetAllInstancesInfo().values()
12587
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12588

    
12589
    # node data
12590
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12591

    
12592
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12593
      hypervisor_name = self.hypervisor
12594
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12595
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12596
    else:
12597
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12598

    
12599
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12600
                                        hypervisor_name)
12601
    node_iinfo = \
12602
      self.rpc.call_all_instances_info(node_list,
12603
                                       cluster_info.enabled_hypervisors)
12604

    
12605
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12606

    
12607
    config_ndata = self._ComputeBasicNodeData(ninfo)
12608
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12609
                                                 i_list, config_ndata)
12610
    assert len(data["nodes"]) == len(ninfo), \
12611
        "Incomplete node data computed"
12612

    
12613
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12614

    
12615
    self.in_data = data
12616

    
12617
  @staticmethod
12618
  def _ComputeNodeGroupData(cfg):
12619
    """Compute node groups data.
12620

12621
    """
12622
    ng = dict((guuid, {
12623
      "name": gdata.name,
12624
      "alloc_policy": gdata.alloc_policy,
12625
      })
12626
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12627

    
12628
    return ng
12629

    
12630
  @staticmethod
12631
  def _ComputeBasicNodeData(node_cfg):
12632
    """Compute global node data.
12633

12634
    @rtype: dict
12635
    @returns: a dict of name: (node dict, node config)
12636

12637
    """
12638
    # fill in static (config-based) values
12639
    node_results = dict((ninfo.name, {
12640
      "tags": list(ninfo.GetTags()),
12641
      "primary_ip": ninfo.primary_ip,
12642
      "secondary_ip": ninfo.secondary_ip,
12643
      "offline": ninfo.offline,
12644
      "drained": ninfo.drained,
12645
      "master_candidate": ninfo.master_candidate,
12646
      "group": ninfo.group,
12647
      "master_capable": ninfo.master_capable,
12648
      "vm_capable": ninfo.vm_capable,
12649
      })
12650
      for ninfo in node_cfg.values())
12651

    
12652
    return node_results
12653

    
12654
  @staticmethod
12655
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12656
                              node_results):
12657
    """Compute global node data.
12658

12659
    @param node_results: the basic node structures as filled from the config
12660

12661
    """
12662
    # make a copy of the current dict
12663
    node_results = dict(node_results)
12664
    for nname, nresult in node_data.items():
12665
      assert nname in node_results, "Missing basic data for node %s" % nname
12666
      ninfo = node_cfg[nname]
12667

    
12668
      if not (ninfo.offline or ninfo.drained):
12669
        nresult.Raise("Can't get data for node %s" % nname)
12670
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12671
                                nname)
12672
        remote_info = nresult.payload
12673

    
12674
        for attr in ["memory_total", "memory_free", "memory_dom0",
12675
                     "vg_size", "vg_free", "cpu_total"]:
12676
          if attr not in remote_info:
12677
            raise errors.OpExecError("Node '%s' didn't return attribute"
12678
                                     " '%s'" % (nname, attr))
12679
          if not isinstance(remote_info[attr], int):
12680
            raise errors.OpExecError("Node '%s' returned invalid value"
12681
                                     " for '%s': %s" %
12682
                                     (nname, attr, remote_info[attr]))
12683
        # compute memory used by primary instances
12684
        i_p_mem = i_p_up_mem = 0
12685
        for iinfo, beinfo in i_list:
12686
          if iinfo.primary_node == nname:
12687
            i_p_mem += beinfo[constants.BE_MEMORY]
12688
            if iinfo.name not in node_iinfo[nname].payload:
12689
              i_used_mem = 0
12690
            else:
12691
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12692
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12693
            remote_info["memory_free"] -= max(0, i_mem_diff)
12694

    
12695
            if iinfo.admin_up:
12696
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12697

    
12698
        # compute memory used by instances
12699
        pnr_dyn = {
12700
          "total_memory": remote_info["memory_total"],
12701
          "reserved_memory": remote_info["memory_dom0"],
12702
          "free_memory": remote_info["memory_free"],
12703
          "total_disk": remote_info["vg_size"],
12704
          "free_disk": remote_info["vg_free"],
12705
          "total_cpus": remote_info["cpu_total"],
12706
          "i_pri_memory": i_p_mem,
12707
          "i_pri_up_memory": i_p_up_mem,
12708
          }
12709
        pnr_dyn.update(node_results[nname])
12710
        node_results[nname] = pnr_dyn
12711

    
12712
    return node_results
12713

    
12714
  @staticmethod
12715
  def _ComputeInstanceData(cluster_info, i_list):
12716
    """Compute global instance data.
12717

12718
    """
12719
    instance_data = {}
12720
    for iinfo, beinfo in i_list:
12721
      nic_data = []
12722
      for nic in iinfo.nics:
12723
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12724
        nic_dict = {
12725
          "mac": nic.mac,
12726
          "ip": nic.ip,
12727
          "mode": filled_params[constants.NIC_MODE],
12728
          "link": filled_params[constants.NIC_LINK],
12729
          }
12730
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12731
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12732
        nic_data.append(nic_dict)
12733
      pir = {
12734
        "tags": list(iinfo.GetTags()),
12735
        "admin_up": iinfo.admin_up,
12736
        "vcpus": beinfo[constants.BE_VCPUS],
12737
        "memory": beinfo[constants.BE_MEMORY],
12738
        "os": iinfo.os,
12739
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12740
        "nics": nic_data,
12741
        "disks": [{constants.IDISK_SIZE: dsk.size,
12742
                   constants.IDISK_MODE: dsk.mode}
12743
                  for dsk in iinfo.disks],
12744
        "disk_template": iinfo.disk_template,
12745
        "hypervisor": iinfo.hypervisor,
12746
        }
12747
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12748
                                                 pir["disks"])
12749
      instance_data[iinfo.name] = pir
12750

    
12751
    return instance_data
12752

    
12753
  def _AddNewInstance(self):
12754
    """Add new instance data to allocator structure.
12755

12756
    This in combination with _AllocatorGetClusterData will create the
12757
    correct structure needed as input for the allocator.
12758

12759
    The checks for the completeness of the opcode must have already been
12760
    done.
12761

12762
    """
12763
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12764

    
12765
    if self.disk_template in constants.DTS_INT_MIRROR:
12766
      self.required_nodes = 2
12767
    else:
12768
      self.required_nodes = 1
12769

    
12770
    request = {
12771
      "name": self.name,
12772
      "disk_template": self.disk_template,
12773
      "tags": self.tags,
12774
      "os": self.os,
12775
      "vcpus": self.vcpus,
12776
      "memory": self.memory,
12777
      "disks": self.disks,
12778
      "disk_space_total": disk_space,
12779
      "nics": self.nics,
12780
      "required_nodes": self.required_nodes,
12781
      "hypervisor": self.hypervisor,
12782
      }
12783

    
12784
    return request
12785

    
12786
  def _AddRelocateInstance(self):
12787
    """Add relocate instance data to allocator structure.
12788

12789
    This in combination with _IAllocatorGetClusterData will create the
12790
    correct structure needed as input for the allocator.
12791

12792
    The checks for the completeness of the opcode must have already been
12793
    done.
12794

12795
    """
12796
    instance = self.cfg.GetInstanceInfo(self.name)
12797
    if instance is None:
12798
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12799
                                   " IAllocator" % self.name)
12800

    
12801
    if instance.disk_template not in constants.DTS_MIRRORED:
12802
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12803
                                 errors.ECODE_INVAL)
12804

    
12805
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12806
        len(instance.secondary_nodes) != 1:
12807
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12808
                                 errors.ECODE_STATE)
12809

    
12810
    self.required_nodes = 1
12811
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12812
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12813

    
12814
    request = {
12815
      "name": self.name,
12816
      "disk_space_total": disk_space,
12817
      "required_nodes": self.required_nodes,
12818
      "relocate_from": self.relocate_from,
12819
      }
12820
    return request
12821

    
12822
  def _AddEvacuateNodes(self):
12823
    """Add evacuate nodes data to allocator structure.
12824

12825
    """
12826
    request = {
12827
      "evac_nodes": self.evac_nodes
12828
      }
12829
    return request
12830

    
12831
  def _AddNodeEvacuate(self):
12832
    """Get data for node-evacuate requests.
12833

12834
    """
12835
    return {
12836
      "instances": self.instances,
12837
      "evac_mode": self.evac_mode,
12838
      }
12839

    
12840
  def _AddChangeGroup(self):
12841
    """Get data for node-evacuate requests.
12842

12843
    """
12844
    return {
12845
      "instances": self.instances,
12846
      "target_groups": self.target_groups,
12847
      }
12848

    
12849
  def _BuildInputData(self, fn, keydata):
12850
    """Build input data structures.
12851

12852
    """
12853
    self._ComputeClusterData()
12854

    
12855
    request = fn()
12856
    request["type"] = self.mode
12857
    for keyname, keytype in keydata:
12858
      if keyname not in request:
12859
        raise errors.ProgrammerError("Request parameter %s is missing" %
12860
                                     keyname)
12861
      val = request[keyname]
12862
      if not keytype(val):
12863
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12864
                                     " validation, value %s, expected"
12865
                                     " type %s" % (keyname, val, keytype))
12866
    self.in_data["request"] = request
12867

    
12868
    self.in_text = serializer.Dump(self.in_data)
12869

    
12870
  _STRING_LIST = ht.TListOf(ht.TString)
12871
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12872
     # pylint: disable-msg=E1101
12873
     # Class '...' has no 'OP_ID' member
12874
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12875
                          opcodes.OpInstanceMigrate.OP_ID,
12876
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12877
     })))
12878

    
12879
  _NEVAC_MOVED = \
12880
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12881
                       ht.TItems([ht.TNonEmptyString,
12882
                                  ht.TNonEmptyString,
12883
                                  ht.TListOf(ht.TNonEmptyString),
12884
                                 ])))
12885
  _NEVAC_FAILED = \
12886
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12887
                       ht.TItems([ht.TNonEmptyString,
12888
                                  ht.TMaybeString,
12889
                                 ])))
12890
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12891
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12892

    
12893
  _MODE_DATA = {
12894
    constants.IALLOCATOR_MODE_ALLOC:
12895
      (_AddNewInstance,
12896
       [
12897
        ("name", ht.TString),
12898
        ("memory", ht.TInt),
12899
        ("disks", ht.TListOf(ht.TDict)),
12900
        ("disk_template", ht.TString),
12901
        ("os", ht.TString),
12902
        ("tags", _STRING_LIST),
12903
        ("nics", ht.TListOf(ht.TDict)),
12904
        ("vcpus", ht.TInt),
12905
        ("hypervisor", ht.TString),
12906
        ], ht.TList),
12907
    constants.IALLOCATOR_MODE_RELOC:
12908
      (_AddRelocateInstance,
12909
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12910
       ht.TList),
12911
    constants.IALLOCATOR_MODE_MEVAC:
12912
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12913
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12914
     constants.IALLOCATOR_MODE_NODE_EVAC:
12915
      (_AddNodeEvacuate, [
12916
        ("instances", _STRING_LIST),
12917
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12918
        ], _NEVAC_RESULT),
12919
     constants.IALLOCATOR_MODE_CHG_GROUP:
12920
      (_AddChangeGroup, [
12921
        ("instances", _STRING_LIST),
12922
        ("target_groups", _STRING_LIST),
12923
        ], _NEVAC_RESULT),
12924
    }
12925

    
12926
  def Run(self, name, validate=True, call_fn=None):
12927
    """Run an instance allocator and return the results.
12928

12929
    """
12930
    if call_fn is None:
12931
      call_fn = self.rpc.call_iallocator_runner
12932

    
12933
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12934
    result.Raise("Failure while running the iallocator script")
12935

    
12936
    self.out_text = result.payload
12937
    if validate:
12938
      self._ValidateResult()
12939

    
12940
  def _ValidateResult(self):
12941
    """Process the allocator results.
12942

12943
    This will process and if successful save the result in
12944
    self.out_data and the other parameters.
12945

12946
    """
12947
    try:
12948
      rdict = serializer.Load(self.out_text)
12949
    except Exception, err:
12950
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12951

    
12952
    if not isinstance(rdict, dict):
12953
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12954

    
12955
    # TODO: remove backwards compatiblity in later versions
12956
    if "nodes" in rdict and "result" not in rdict:
12957
      rdict["result"] = rdict["nodes"]
12958
      del rdict["nodes"]
12959

    
12960
    for key in "success", "info", "result":
12961
      if key not in rdict:
12962
        raise errors.OpExecError("Can't parse iallocator results:"
12963
                                 " missing key '%s'" % key)
12964
      setattr(self, key, rdict[key])
12965

    
12966
    if not self._result_check(self.result):
12967
      raise errors.OpExecError("Iallocator returned invalid result,"
12968
                               " expected %s, got %s" %
12969
                               (self._result_check, self.result),
12970
                               errors.ECODE_INVAL)
12971

    
12972
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12973
                     constants.IALLOCATOR_MODE_MEVAC):
12974
      node2group = dict((name, ndata["group"])
12975
                        for (name, ndata) in self.in_data["nodes"].items())
12976

    
12977
      fn = compat.partial(self._NodesToGroups, node2group,
12978
                          self.in_data["nodegroups"])
12979

    
12980
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12981
        assert self.relocate_from is not None
12982
        assert self.required_nodes == 1
12983

    
12984
        request_groups = fn(self.relocate_from)
12985
        result_groups = fn(rdict["result"])
12986

    
12987
        if result_groups != request_groups:
12988
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12989
                                   " differ from original groups (%s)" %
12990
                                   (utils.CommaJoin(result_groups),
12991
                                    utils.CommaJoin(request_groups)))
12992
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12993
        request_groups = fn(self.evac_nodes)
12994
        for (instance_name, secnode) in self.result:
12995
          result_groups = fn([secnode])
12996
          if result_groups != request_groups:
12997
            raise errors.OpExecError("Iallocator returned new secondary node"
12998
                                     " '%s' (group '%s') for instance '%s'"
12999
                                     " which is not in original group '%s'" %
13000
                                     (secnode, utils.CommaJoin(result_groups),
13001
                                      instance_name,
13002
                                      utils.CommaJoin(request_groups)))
13003
      else:
13004
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13005

    
13006
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13007
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13008

    
13009
    self.out_data = rdict
13010

    
13011
  @staticmethod
13012
  def _NodesToGroups(node2group, groups, nodes):
13013
    """Returns a list of unique group names for a list of nodes.
13014

13015
    @type node2group: dict
13016
    @param node2group: Map from node name to group UUID
13017
    @type groups: dict
13018
    @param groups: Group information
13019
    @type nodes: list
13020
    @param nodes: Node names
13021

13022
    """
13023
    result = set()
13024

    
13025
    for node in nodes:
13026
      try:
13027
        group_uuid = node2group[node]
13028
      except KeyError:
13029
        # Ignore unknown node
13030
        pass
13031
      else:
13032
        try:
13033
          group = groups[group_uuid]
13034
        except KeyError:
13035
          # Can't find group, let's use UUID
13036
          group_name = group_uuid
13037
        else:
13038
          group_name = group["name"]
13039

    
13040
        result.add(group_name)
13041

    
13042
    return sorted(result)
13043

    
13044

    
13045
class LUTestAllocator(NoHooksLU):
13046
  """Run allocator tests.
13047

13048
  This LU runs the allocator tests
13049

13050
  """
13051
  def CheckPrereq(self):
13052
    """Check prerequisites.
13053

13054
    This checks the opcode parameters depending on the director and mode test.
13055

13056
    """
13057
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13058
      for attr in ["memory", "disks", "disk_template",
13059
                   "os", "tags", "nics", "vcpus"]:
13060
        if not hasattr(self.op, attr):
13061
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13062
                                     attr, errors.ECODE_INVAL)
13063
      iname = self.cfg.ExpandInstanceName(self.op.name)
13064
      if iname is not None:
13065
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13066
                                   iname, errors.ECODE_EXISTS)
13067
      if not isinstance(self.op.nics, list):
13068
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13069
                                   errors.ECODE_INVAL)
13070
      if not isinstance(self.op.disks, list):
13071
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13072
                                   errors.ECODE_INVAL)
13073
      for row in self.op.disks:
13074
        if (not isinstance(row, dict) or
13075
            constants.IDISK_SIZE not in row or
13076
            not isinstance(row[constants.IDISK_SIZE], int) or
13077
            constants.IDISK_MODE not in row or
13078
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13079
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13080
                                     " parameter", errors.ECODE_INVAL)
13081
      if self.op.hypervisor is None:
13082
        self.op.hypervisor = self.cfg.GetHypervisorType()
13083
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13084
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13085
      self.op.name = fname
13086
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
13087
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13088
      if not hasattr(self.op, "evac_nodes"):
13089
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13090
                                   " opcode input", errors.ECODE_INVAL)
13091
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13092
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13093
      if not self.op.instances:
13094
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13095
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13096
    else:
13097
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13098
                                 self.op.mode, errors.ECODE_INVAL)
13099

    
13100
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13101
      if self.op.allocator is None:
13102
        raise errors.OpPrereqError("Missing allocator name",
13103
                                   errors.ECODE_INVAL)
13104
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13105
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13106
                                 self.op.direction, errors.ECODE_INVAL)
13107

    
13108
  def Exec(self, feedback_fn):
13109
    """Run the allocator test.
13110

13111
    """
13112
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13113
      ial = IAllocator(self.cfg, self.rpc,
13114
                       mode=self.op.mode,
13115
                       name=self.op.name,
13116
                       memory=self.op.memory,
13117
                       disks=self.op.disks,
13118
                       disk_template=self.op.disk_template,
13119
                       os=self.op.os,
13120
                       tags=self.op.tags,
13121
                       nics=self.op.nics,
13122
                       vcpus=self.op.vcpus,
13123
                       hypervisor=self.op.hypervisor,
13124
                       )
13125
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13126
      ial = IAllocator(self.cfg, self.rpc,
13127
                       mode=self.op.mode,
13128
                       name=self.op.name,
13129
                       relocate_from=list(self.relocate_from),
13130
                       )
13131
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13132
      ial = IAllocator(self.cfg, self.rpc,
13133
                       mode=self.op.mode,
13134
                       evac_nodes=self.op.evac_nodes)
13135
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13136
      ial = IAllocator(self.cfg, self.rpc,
13137
                       mode=self.op.mode,
13138
                       instances=self.op.instances,
13139
                       target_groups=self.op.target_groups)
13140
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13141
      ial = IAllocator(self.cfg, self.rpc,
13142
                       mode=self.op.mode,
13143
                       instances=self.op.instances,
13144
                       evac_mode=self.op.evac_mode)
13145
    else:
13146
      raise errors.ProgrammerError("Uncatched mode %s in"
13147
                                   " LUTestAllocator.Exec", self.op.mode)
13148

    
13149
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13150
      result = ial.in_text
13151
    else:
13152
      ial.Run(self.op.allocator, validate=False)
13153
      result = ial.out_text
13154
    return result
13155

    
13156

    
13157
#: Query type implementations
13158
_QUERY_IMPL = {
13159
  constants.QR_INSTANCE: _InstanceQuery,
13160
  constants.QR_NODE: _NodeQuery,
13161
  constants.QR_GROUP: _GroupQuery,
13162
  constants.QR_OS: _OsQuery,
13163
  }
13164

    
13165
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13166

    
13167

    
13168
def _GetQueryImplementation(name):
13169
  """Returns the implemtnation for a query type.
13170

13171
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13172

13173
  """
13174
  try:
13175
    return _QUERY_IMPL[name]
13176
  except KeyError:
13177
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13178
                               errors.ECODE_INVAL)