Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 35007011

History | View | Annotate | Download (464.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    self.context = context
123
    self.rpc = rpc
124
    # Dicts used to declare locking needs to mcpu
125
    self.needed_locks = None
126
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
127
    self.add_locks = {}
128
    self.remove_locks = {}
129
    # Used to force good behavior when calling helper functions
130
    self.recalculate_locks = {}
131
    # logging
132
    self.Log = processor.Log # pylint: disable-msg=C0103
133
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136
    # support for dry-run
137
    self.dry_run_result = None
138
    # support for generic debug attribute
139
    if (not hasattr(self.op, "debug_level") or
140
        not isinstance(self.op.debug_level, int)):
141
      self.op.debug_level = 0
142

    
143
    # Tasklets
144
    self.tasklets = None
145

    
146
    # Validate opcode parameters and set defaults
147
    self.op.Validate(True)
148

    
149
    self.CheckArguments()
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    @rtype: dict
277
    @return: Dictionary containing the environment that will be used for
278
      running the hooks for this LU. The keys of the dict must not be prefixed
279
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280
      will extend the environment with additional variables. If no environment
281
      should be defined, an empty dictionary should be returned (not C{None}).
282
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
283
      will not be called.
284

285
    """
286
    raise NotImplementedError
287

    
288
  def BuildHooksNodes(self):
289
    """Build list of nodes to run LU's hooks.
290

291
    @rtype: tuple; (list, list)
292
    @return: Tuple containing a list of node names on which the hook
293
      should run before the execution and a list of node names on which the
294
      hook should run after the execution. No nodes should be returned as an
295
      empty list (and not None).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303
    """Notify the LU about the results of its hooks.
304

305
    This method is called every time a hooks phase is executed, and notifies
306
    the Logical Unit about the hooks' result. The LU can then use it to alter
307
    its result based on the hooks.  By default the method does nothing and the
308
    previous result is passed back unchanged but any LU can define it if it
309
    wants to use the local cluster hook-scripts somehow.
310

311
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
312
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313
    @param hook_results: the results of the multi-node hooks rpc call
314
    @param feedback_fn: function used send feedback back to the caller
315
    @param lu_result: the previous Exec result this LU had, or None
316
        in the PRE phase
317
    @return: the new Exec result, based on the previous result
318
        and hook results
319

320
    """
321
    # API must be kept, thus we ignore the unused argument and could
322
    # be a function warnings
323
    # pylint: disable-msg=W0613,R0201
324
    return lu_result
325

    
326
  def _ExpandAndLockInstance(self):
327
    """Helper function to expand and lock an instance.
328

329
    Many LUs that work on an instance take its name in self.op.instance_name
330
    and need to expand it and then declare the expanded name for locking. This
331
    function does it, and then updates self.op.instance_name to the expanded
332
    name. It also initializes needed_locks as a dict, if this hasn't been done
333
    before.
334

335
    """
336
    if self.needed_locks is None:
337
      self.needed_locks = {}
338
    else:
339
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340
        "_ExpandAndLockInstance called with instance-level locks set"
341
    self.op.instance_name = _ExpandInstanceName(self.cfg,
342
                                                self.op.instance_name)
343
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
344

    
345
  def _LockInstancesNodes(self, primary_only=False):
346
    """Helper function to declare instances' nodes for locking.
347

348
    This function should be called after locking one or more instances to lock
349
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350
    with all primary or secondary nodes for instances already locked and
351
    present in self.needed_locks[locking.LEVEL_INSTANCE].
352

353
    It should be called from DeclareLocks, and for safety only works if
354
    self.recalculate_locks[locking.LEVEL_NODE] is set.
355

356
    In the future it may grow parameters to just lock some instance's nodes, or
357
    to just lock primaries or secondary nodes, if needed.
358

359
    If should be called in DeclareLocks in a way similar to::
360

361
      if level == locking.LEVEL_NODE:
362
        self._LockInstancesNodes()
363

364
    @type primary_only: boolean
365
    @param primary_only: only lock primary nodes of locked instances
366

367
    """
368
    assert locking.LEVEL_NODE in self.recalculate_locks, \
369
      "_LockInstancesNodes helper function called with no nodes to recalculate"
370

    
371
    # TODO: check if we're really been called with the instance locks held
372

    
373
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374
    # future we might want to have different behaviors depending on the value
375
    # of self.recalculate_locks[locking.LEVEL_NODE]
376
    wanted_nodes = []
377
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
378
      instance = self.context.cfg.GetInstanceInfo(instance_name)
379
      wanted_nodes.append(instance.primary_node)
380
      if not primary_only:
381
        wanted_nodes.extend(instance.secondary_nodes)
382

    
383
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
387

    
388
    del self.recalculate_locks[locking.LEVEL_NODE]
389

    
390

    
391
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392
  """Simple LU which runs no hooks.
393

394
  This LU is intended as a parent for other LogicalUnits which will
395
  run no hooks, in order to reduce duplicate code.
396

397
  """
398
  HPATH = None
399
  HTYPE = None
400

    
401
  def BuildHooksEnv(self):
402
    """Empty BuildHooksEnv for NoHooksLu.
403

404
    This just raises an error.
405

406
    """
407
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
408

    
409
  def BuildHooksNodes(self):
410
    """Empty BuildHooksNodes for NoHooksLU.
411

412
    """
413
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
414

    
415

    
416
class Tasklet:
417
  """Tasklet base class.
418

419
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
421
  tasklets know nothing about locks.
422

423
  Subclasses must follow these rules:
424
    - Implement CheckPrereq
425
    - Implement Exec
426

427
  """
428
  def __init__(self, lu):
429
    self.lu = lu
430

    
431
    # Shortcuts
432
    self.cfg = lu.cfg
433
    self.rpc = lu.rpc
434

    
435
  def CheckPrereq(self):
436
    """Check prerequisites for this tasklets.
437

438
    This method should check whether the prerequisites for the execution of
439
    this tasklet are fulfilled. It can do internode communication, but it
440
    should be idempotent - no cluster or system changes are allowed.
441

442
    The method should raise errors.OpPrereqError in case something is not
443
    fulfilled. Its return value is ignored.
444

445
    This method should also update all parameters to their canonical form if it
446
    hasn't been done before.
447

448
    """
449
    pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the tasklet.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in code, or
456
    expected.
457

458
    """
459
    raise NotImplementedError
460

    
461

    
462
class _QueryBase:
463
  """Base for query utility classes.
464

465
  """
466
  #: Attribute holding field definitions
467
  FIELDS = None
468

    
469
  def __init__(self, filter_, fields, use_locking):
470
    """Initializes this class.
471

472
    """
473
    self.use_locking = use_locking
474

    
475
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
476
                             namefield="name")
477
    self.requested_data = self.query.RequestedData()
478
    self.names = self.query.RequestedNames()
479

    
480
    # Sort only if no names were requested
481
    self.sort_by_name = not self.names
482

    
483
    self.do_locking = None
484
    self.wanted = None
485

    
486
  def _GetNames(self, lu, all_names, lock_level):
487
    """Helper function to determine names asked for in the query.
488

489
    """
490
    if self.do_locking:
491
      names = lu.glm.list_owned(lock_level)
492
    else:
493
      names = all_names
494

    
495
    if self.wanted == locking.ALL_SET:
496
      assert not self.names
497
      # caller didn't specify names, so ordering is not important
498
      return utils.NiceSort(names)
499

    
500
    # caller specified names and we must keep the same order
501
    assert self.names
502
    assert not self.do_locking or lu.glm.is_owned(lock_level)
503

    
504
    missing = set(self.wanted).difference(names)
505
    if missing:
506
      raise errors.OpExecError("Some items were removed before retrieving"
507
                               " their data: %s" % missing)
508

    
509
    # Return expanded names
510
    return self.wanted
511

    
512
  def ExpandNames(self, lu):
513
    """Expand names for this query.
514

515
    See L{LogicalUnit.ExpandNames}.
516

517
    """
518
    raise NotImplementedError()
519

    
520
  def DeclareLocks(self, lu, level):
521
    """Declare locks for this query.
522

523
    See L{LogicalUnit.DeclareLocks}.
524

525
    """
526
    raise NotImplementedError()
527

    
528
  def _GetQueryData(self, lu):
529
    """Collects all data for this query.
530

531
    @return: Query data object
532

533
    """
534
    raise NotImplementedError()
535

    
536
  def NewStyleQuery(self, lu):
537
    """Collect data and execute query.
538

539
    """
540
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541
                                  sort_by_name=self.sort_by_name)
542

    
543
  def OldStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return self.query.OldStyleQuery(self._GetQueryData(lu),
548
                                    sort_by_name=self.sort_by_name)
549

    
550

    
551
def _ShareAll():
552
  """Returns a dict declaring all lock levels shared.
553

554
  """
555
  return dict.fromkeys(locking.LEVELS, 1)
556

    
557

    
558
def _SupportsOob(cfg, node):
559
  """Tells if node supports OOB.
560

561
  @type cfg: L{config.ConfigWriter}
562
  @param cfg: The cluster configuration
563
  @type node: L{objects.Node}
564
  @param node: The node
565
  @return: The OOB script if supported or an empty string otherwise
566

567
  """
568
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
569

    
570

    
571
def _GetWantedNodes(lu, nodes):
572
  """Returns list of checked and expanded node names.
573

574
  @type lu: L{LogicalUnit}
575
  @param lu: the logical unit on whose behalf we execute
576
  @type nodes: list
577
  @param nodes: list of node names or None for all nodes
578
  @rtype: list
579
  @return: the list of nodes, sorted
580
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
581

582
  """
583
  if nodes:
584
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
585

    
586
  return utils.NiceSort(lu.cfg.GetNodeList())
587

    
588

    
589
def _GetWantedInstances(lu, instances):
590
  """Returns list of checked and expanded instance names.
591

592
  @type lu: L{LogicalUnit}
593
  @param lu: the logical unit on whose behalf we execute
594
  @type instances: list
595
  @param instances: list of instance names or None for all instances
596
  @rtype: list
597
  @return: the list of instances, sorted
598
  @raise errors.OpPrereqError: if the instances parameter is wrong type
599
  @raise errors.OpPrereqError: if any of the passed instances is not found
600

601
  """
602
  if instances:
603
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
604
  else:
605
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
606
  return wanted
607

    
608

    
609
def _GetUpdatedParams(old_params, update_dict,
610
                      use_default=True, use_none=False):
611
  """Return the new version of a parameter dictionary.
612

613
  @type old_params: dict
614
  @param old_params: old parameters
615
  @type update_dict: dict
616
  @param update_dict: dict containing new parameter values, or
617
      constants.VALUE_DEFAULT to reset the parameter to its default
618
      value
619
  @param use_default: boolean
620
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621
      values as 'to be deleted' values
622
  @param use_none: boolean
623
  @type use_none: whether to recognise C{None} values as 'to be
624
      deleted' values
625
  @rtype: dict
626
  @return: the new parameter dictionary
627

628
  """
629
  params_copy = copy.deepcopy(old_params)
630
  for key, val in update_dict.iteritems():
631
    if ((use_default and val == constants.VALUE_DEFAULT) or
632
        (use_none and val is None)):
633
      try:
634
        del params_copy[key]
635
      except KeyError:
636
        pass
637
    else:
638
      params_copy[key] = val
639
  return params_copy
640

    
641

    
642
def _ReleaseLocks(lu, level, names=None, keep=None):
643
  """Releases locks owned by an LU.
644

645
  @type lu: L{LogicalUnit}
646
  @param level: Lock level
647
  @type names: list or None
648
  @param names: Names of locks to release
649
  @type keep: list or None
650
  @param keep: Names of locks to retain
651

652
  """
653
  assert not (keep is not None and names is not None), \
654
         "Only one of the 'names' and the 'keep' parameters can be given"
655

    
656
  if names is not None:
657
    should_release = names.__contains__
658
  elif keep:
659
    should_release = lambda name: name not in keep
660
  else:
661
    should_release = None
662

    
663
  if should_release:
664
    retain = []
665
    release = []
666

    
667
    # Determine which locks to release
668
    for name in lu.glm.list_owned(level):
669
      if should_release(name):
670
        release.append(name)
671
      else:
672
        retain.append(name)
673

    
674
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
675

    
676
    # Release just some locks
677
    lu.glm.release(level, names=release)
678

    
679
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
680
  else:
681
    # Release everything
682
    lu.glm.release(level)
683

    
684
    assert not lu.glm.is_owned(level), "No locks should be owned"
685

    
686

    
687
def _MapInstanceDisksToNodes(instances):
688
  """Creates a map from (node, volume) to instance name.
689

690
  @type instances: list of L{objects.Instance}
691
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
692

693
  """
694
  return dict(((node, vol), inst.name)
695
              for inst in instances
696
              for (node, vols) in inst.MapLVsByNode().items()
697
              for vol in vols)
698

    
699

    
700
def _RunPostHook(lu, node_name):
701
  """Runs the post-hook for an opcode on a single node.
702

703
  """
704
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
705
  try:
706
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
707
  except:
708
    # pylint: disable-msg=W0702
709
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
710

    
711

    
712
def _CheckOutputFields(static, dynamic, selected):
713
  """Checks whether all selected fields are valid.
714

715
  @type static: L{utils.FieldSet}
716
  @param static: static fields set
717
  @type dynamic: L{utils.FieldSet}
718
  @param dynamic: dynamic fields set
719

720
  """
721
  f = utils.FieldSet()
722
  f.Extend(static)
723
  f.Extend(dynamic)
724

    
725
  delta = f.NonMatching(selected)
726
  if delta:
727
    raise errors.OpPrereqError("Unknown output fields selected: %s"
728
                               % ",".join(delta), errors.ECODE_INVAL)
729

    
730

    
731
def _CheckGlobalHvParams(params):
732
  """Validates that given hypervisor params are not global ones.
733

734
  This will ensure that instances don't get customised versions of
735
  global params.
736

737
  """
738
  used_globals = constants.HVC_GLOBALS.intersection(params)
739
  if used_globals:
740
    msg = ("The following hypervisor parameters are global and cannot"
741
           " be customized at instance level, please modify them at"
742
           " cluster level: %s" % utils.CommaJoin(used_globals))
743
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
744

    
745

    
746
def _CheckNodeOnline(lu, node, msg=None):
747
  """Ensure that a given node is online.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param msg: if passed, should be a message to replace the default one
752
  @raise errors.OpPrereqError: if the node is offline
753

754
  """
755
  if msg is None:
756
    msg = "Can't use offline node"
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
759

    
760

    
761
def _CheckNodeNotDrained(lu, node):
762
  """Ensure that a given node is not drained.
763

764
  @param lu: the LU on behalf of which we make the check
765
  @param node: the node to check
766
  @raise errors.OpPrereqError: if the node is drained
767

768
  """
769
  if lu.cfg.GetNodeInfo(node).drained:
770
    raise errors.OpPrereqError("Can't use drained node %s" % node,
771
                               errors.ECODE_STATE)
772

    
773

    
774
def _CheckNodeVmCapable(lu, node):
775
  """Ensure that a given node is vm capable.
776

777
  @param lu: the LU on behalf of which we make the check
778
  @param node: the node to check
779
  @raise errors.OpPrereqError: if the node is not vm capable
780

781
  """
782
  if not lu.cfg.GetNodeInfo(node).vm_capable:
783
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
784
                               errors.ECODE_STATE)
785

    
786

    
787
def _CheckNodeHasOS(lu, node, os_name, force_variant):
788
  """Ensure that a node supports a given OS.
789

790
  @param lu: the LU on behalf of which we make the check
791
  @param node: the node to check
792
  @param os_name: the OS to query about
793
  @param force_variant: whether to ignore variant errors
794
  @raise errors.OpPrereqError: if the node is not supporting the OS
795

796
  """
797
  result = lu.rpc.call_os_get(node, os_name)
798
  result.Raise("OS '%s' not in supported OS list for node %s" %
799
               (os_name, node),
800
               prereq=True, ecode=errors.ECODE_INVAL)
801
  if not force_variant:
802
    _CheckOSVariant(result.payload, os_name)
803

    
804

    
805
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806
  """Ensure that a node has the given secondary ip.
807

808
  @type lu: L{LogicalUnit}
809
  @param lu: the LU on behalf of which we make the check
810
  @type node: string
811
  @param node: the node to check
812
  @type secondary_ip: string
813
  @param secondary_ip: the ip to check
814
  @type prereq: boolean
815
  @param prereq: whether to throw a prerequisite or an execute error
816
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
818

819
  """
820
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821
  result.Raise("Failure checking secondary ip on node %s" % node,
822
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
823
  if not result.payload:
824
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825
           " please fix and re-run this command" % secondary_ip)
826
    if prereq:
827
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
828
    else:
829
      raise errors.OpExecError(msg)
830

    
831

    
832
def _GetClusterDomainSecret():
833
  """Reads the cluster domain secret.
834

835
  """
836
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
837
                               strict=True)
838

    
839

    
840
def _CheckInstanceDown(lu, instance, reason):
841
  """Ensure that an instance is not running."""
842
  if instance.admin_up:
843
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844
                               (instance.name, reason), errors.ECODE_STATE)
845

    
846
  pnode = instance.primary_node
847
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
849
              prereq=True, ecode=errors.ECODE_ENVIRON)
850

    
851
  if instance.name in ins_l.payload:
852
    raise errors.OpPrereqError("Instance %s is running, %s" %
853
                               (instance.name, reason), errors.ECODE_STATE)
854

    
855

    
856
def _ExpandItemName(fn, name, kind):
857
  """Expand an item name.
858

859
  @param fn: the function to use for expansion
860
  @param name: requested item name
861
  @param kind: text description ('Node' or 'Instance')
862
  @return: the resolved (full) name
863
  @raise errors.OpPrereqError: if the item is not found
864

865
  """
866
  full_name = fn(name)
867
  if full_name is None:
868
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
869
                               errors.ECODE_NOENT)
870
  return full_name
871

    
872

    
873
def _ExpandNodeName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for nodes."""
875
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
876

    
877

    
878
def _ExpandInstanceName(cfg, name):
879
  """Wrapper over L{_ExpandItemName} for instance."""
880
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
881

    
882

    
883
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884
                          memory, vcpus, nics, disk_template, disks,
885
                          bep, hvp, hypervisor_name, tags):
886
  """Builds instance related env variables for hooks
887

888
  This builds the hook environment from individual variables.
889

890
  @type name: string
891
  @param name: the name of the instance
892
  @type primary_node: string
893
  @param primary_node: the name of the instance's primary node
894
  @type secondary_nodes: list
895
  @param secondary_nodes: list of secondary nodes as strings
896
  @type os_type: string
897
  @param os_type: the name of the instance's OS
898
  @type status: boolean
899
  @param status: the should_run status of the instance
900
  @type memory: string
901
  @param memory: the memory size of the instance
902
  @type vcpus: string
903
  @param vcpus: the count of VCPUs the instance has
904
  @type nics: list
905
  @param nics: list of tuples (ip, mac, mode, link) representing
906
      the NICs the instance has
907
  @type disk_template: string
908
  @param disk_template: the disk template of the instance
909
  @type disks: list
910
  @param disks: the list of (size, mode) pairs
911
  @type bep: dict
912
  @param bep: the backend parameters for the instance
913
  @type hvp: dict
914
  @param hvp: the hypervisor parameters for the instance
915
  @type hypervisor_name: string
916
  @param hypervisor_name: the hypervisor for the instance
917
  @type tags: list
918
  @param tags: list of instance tags as strings
919
  @rtype: dict
920
  @return: the hook environment for this instance
921

922
  """
923
  if status:
924
    str_status = "up"
925
  else:
926
    str_status = "down"
927
  env = {
928
    "OP_TARGET": name,
929
    "INSTANCE_NAME": name,
930
    "INSTANCE_PRIMARY": primary_node,
931
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932
    "INSTANCE_OS_TYPE": os_type,
933
    "INSTANCE_STATUS": str_status,
934
    "INSTANCE_MEMORY": memory,
935
    "INSTANCE_VCPUS": vcpus,
936
    "INSTANCE_DISK_TEMPLATE": disk_template,
937
    "INSTANCE_HYPERVISOR": hypervisor_name,
938
  }
939

    
940
  if nics:
941
    nic_count = len(nics)
942
    for idx, (ip, mac, mode, link) in enumerate(nics):
943
      if ip is None:
944
        ip = ""
945
      env["INSTANCE_NIC%d_IP" % idx] = ip
946
      env["INSTANCE_NIC%d_MAC" % idx] = mac
947
      env["INSTANCE_NIC%d_MODE" % idx] = mode
948
      env["INSTANCE_NIC%d_LINK" % idx] = link
949
      if mode == constants.NIC_MODE_BRIDGED:
950
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
951
  else:
952
    nic_count = 0
953

    
954
  env["INSTANCE_NIC_COUNT"] = nic_count
955

    
956
  if disks:
957
    disk_count = len(disks)
958
    for idx, (size, mode) in enumerate(disks):
959
      env["INSTANCE_DISK%d_SIZE" % idx] = size
960
      env["INSTANCE_DISK%d_MODE" % idx] = mode
961
  else:
962
    disk_count = 0
963

    
964
  env["INSTANCE_DISK_COUNT"] = disk_count
965

    
966
  if not tags:
967
    tags = []
968

    
969
  env["INSTANCE_TAGS"] = " ".join(tags)
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUInstanceQueryData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    "name": instance.name,
1022
    "primary_node": instance.primary_node,
1023
    "secondary_nodes": instance.secondary_nodes,
1024
    "os_type": instance.os,
1025
    "status": instance.admin_up,
1026
    "memory": bep[constants.BE_MEMORY],
1027
    "vcpus": bep[constants.BE_VCPUS],
1028
    "nics": _NICListToTuple(lu, instance.nics),
1029
    "disk_template": instance.disk_template,
1030
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031
    "bep": bep,
1032
    "hvp": hvp,
1033
    "hypervisor_name": instance.hypervisor,
1034
    "tags": instance.tags,
1035
  }
1036
  if override:
1037
    args.update(override)
1038
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039

    
1040

    
1041
def _AdjustCandidatePool(lu, exceptions):
1042
  """Adjust the candidate pool after node operations.
1043

1044
  """
1045
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046
  if mod_list:
1047
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1048
               utils.CommaJoin(node.name for node in mod_list))
1049
    for name in mod_list:
1050
      lu.context.ReaddNode(name)
1051
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  if mc_now > mc_max:
1053
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054
               (mc_now, mc_max))
1055

    
1056

    
1057
def _DecideSelfPromotion(lu, exceptions=None):
1058
  """Decide whether I should promote myself as a master candidate.
1059

1060
  """
1061
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063
  # the new node will increase mc_max with one, so:
1064
  mc_should = min(mc_should + 1, cp_size)
1065
  return mc_now < mc_should
1066

    
1067

    
1068
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069
  """Check that the brigdes needed by a list of nics exist.
1070

1071
  """
1072
  cluster = lu.cfg.GetClusterInfo()
1073
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074
  brlist = [params[constants.NIC_LINK] for params in paramslist
1075
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076
  if brlist:
1077
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1078
    result.Raise("Error checking bridges on destination node '%s'" %
1079
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080

    
1081

    
1082
def _CheckInstanceBridgesExist(lu, instance, node=None):
1083
  """Check that the brigdes needed by an instance exist.
1084

1085
  """
1086
  if node is None:
1087
    node = instance.primary_node
1088
  _CheckNicsBridgesExist(lu, instance.nics, node)
1089

    
1090

    
1091
def _CheckOSVariant(os_obj, name):
1092
  """Check whether an OS name conforms to the os variants specification.
1093

1094
  @type os_obj: L{objects.OS}
1095
  @param os_obj: OS object to check
1096
  @type name: string
1097
  @param name: OS name passed by the user, to check for validity
1098

1099
  """
1100
  variant = objects.OS.GetVariant(name)
1101
  if not os_obj.supported_variants:
1102
    if variant:
1103
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104
                                 " passed)" % (os_obj.name, variant),
1105
                                 errors.ECODE_INVAL)
1106
    return
1107
  if not variant:
1108
    raise errors.OpPrereqError("OS name must include a variant",
1109
                               errors.ECODE_INVAL)
1110

    
1111
  if variant not in os_obj.supported_variants:
1112
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1113

    
1114

    
1115
def _GetNodeInstancesInner(cfg, fn):
1116
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1117

    
1118

    
1119
def _GetNodeInstances(cfg, node_name):
1120
  """Returns a list of all primary and secondary instances on a node.
1121

1122
  """
1123

    
1124
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1125

    
1126

    
1127
def _GetNodePrimaryInstances(cfg, node_name):
1128
  """Returns primary instances on a node.
1129

1130
  """
1131
  return _GetNodeInstancesInner(cfg,
1132
                                lambda inst: node_name == inst.primary_node)
1133

    
1134

    
1135
def _GetNodeSecondaryInstances(cfg, node_name):
1136
  """Returns secondary instances on a node.
1137

1138
  """
1139
  return _GetNodeInstancesInner(cfg,
1140
                                lambda inst: node_name in inst.secondary_nodes)
1141

    
1142

    
1143
def _GetStorageTypeArgs(cfg, storage_type):
1144
  """Returns the arguments for a storage type.
1145

1146
  """
1147
  # Special case for file storage
1148
  if storage_type == constants.ST_FILE:
1149
    # storage.FileStorage wants a list of storage directories
1150
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1151

    
1152
  return []
1153

    
1154

    
1155
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1156
  faulty = []
1157

    
1158
  for dev in instance.disks:
1159
    cfg.SetDiskID(dev, node_name)
1160

    
1161
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162
  result.Raise("Failed to get disk status from node %s" % node_name,
1163
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164

    
1165
  for idx, bdev_status in enumerate(result.payload):
1166
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1167
      faulty.append(idx)
1168

    
1169
  return faulty
1170

    
1171

    
1172
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173
  """Check the sanity of iallocator and node arguments and use the
1174
  cluster-wide iallocator if appropriate.
1175

1176
  Check that at most one of (iallocator, node) is specified. If none is
1177
  specified, then the LU's opcode's iallocator slot is filled with the
1178
  cluster-wide default iallocator.
1179

1180
  @type iallocator_slot: string
1181
  @param iallocator_slot: the name of the opcode iallocator slot
1182
  @type node_slot: string
1183
  @param node_slot: the name of the opcode target node slot
1184

1185
  """
1186
  node = getattr(lu.op, node_slot, None)
1187
  iallocator = getattr(lu.op, iallocator_slot, None)
1188

    
1189
  if node is not None and iallocator is not None:
1190
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1191
                               errors.ECODE_INVAL)
1192
  elif node is None and iallocator is None:
1193
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1194
    if default_iallocator:
1195
      setattr(lu.op, iallocator_slot, default_iallocator)
1196
    else:
1197
      raise errors.OpPrereqError("No iallocator or node given and no"
1198
                                 " cluster-wide default iallocator found;"
1199
                                 " please specify either an iallocator or a"
1200
                                 " node, or set a cluster-wide default"
1201
                                 " iallocator")
1202

    
1203

    
1204
class LUClusterPostInit(LogicalUnit):
1205
  """Logical unit for running hooks after cluster initialization.
1206

1207
  """
1208
  HPATH = "cluster-init"
1209
  HTYPE = constants.HTYPE_CLUSTER
1210

    
1211
  def BuildHooksEnv(self):
1212
    """Build hooks env.
1213

1214
    """
1215
    return {
1216
      "OP_TARGET": self.cfg.GetClusterName(),
1217
      }
1218

    
1219
  def BuildHooksNodes(self):
1220
    """Build hooks nodes.
1221

1222
    """
1223
    return ([], [self.cfg.GetMasterNode()])
1224

    
1225
  def Exec(self, feedback_fn):
1226
    """Nothing to do.
1227

1228
    """
1229
    return True
1230

    
1231

    
1232
class LUClusterDestroy(LogicalUnit):
1233
  """Logical unit for destroying the cluster.
1234

1235
  """
1236
  HPATH = "cluster-destroy"
1237
  HTYPE = constants.HTYPE_CLUSTER
1238

    
1239
  def BuildHooksEnv(self):
1240
    """Build hooks env.
1241

1242
    """
1243
    return {
1244
      "OP_TARGET": self.cfg.GetClusterName(),
1245
      }
1246

    
1247
  def BuildHooksNodes(self):
1248
    """Build hooks nodes.
1249

1250
    """
1251
    return ([], [])
1252

    
1253
  def CheckPrereq(self):
1254
    """Check prerequisites.
1255

1256
    This checks whether the cluster is empty.
1257

1258
    Any errors are signaled by raising errors.OpPrereqError.
1259

1260
    """
1261
    master = self.cfg.GetMasterNode()
1262

    
1263
    nodelist = self.cfg.GetNodeList()
1264
    if len(nodelist) != 1 or nodelist[0] != master:
1265
      raise errors.OpPrereqError("There are still %d node(s) in"
1266
                                 " this cluster." % (len(nodelist) - 1),
1267
                                 errors.ECODE_INVAL)
1268
    instancelist = self.cfg.GetInstanceList()
1269
    if instancelist:
1270
      raise errors.OpPrereqError("There are still %d instance(s) in"
1271
                                 " this cluster." % len(instancelist),
1272
                                 errors.ECODE_INVAL)
1273

    
1274
  def Exec(self, feedback_fn):
1275
    """Destroys the cluster.
1276

1277
    """
1278
    master = self.cfg.GetMasterNode()
1279

    
1280
    # Run post hooks on master node before it's removed
1281
    _RunPostHook(self, master)
1282

    
1283
    result = self.rpc.call_node_stop_master(master, False)
1284
    result.Raise("Could not disable the master role")
1285

    
1286
    return master
1287

    
1288

    
1289
def _VerifyCertificate(filename):
1290
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1291

1292
  @type filename: string
1293
  @param filename: Path to PEM file
1294

1295
  """
1296
  try:
1297
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1298
                                           utils.ReadFile(filename))
1299
  except Exception, err: # pylint: disable-msg=W0703
1300
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1301
            "Failed to load X509 certificate %s: %s" % (filename, err))
1302

    
1303
  (errcode, msg) = \
1304
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1305
                                constants.SSL_CERT_EXPIRATION_ERROR)
1306

    
1307
  if msg:
1308
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1309
  else:
1310
    fnamemsg = None
1311

    
1312
  if errcode is None:
1313
    return (None, fnamemsg)
1314
  elif errcode == utils.CERT_WARNING:
1315
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1316
  elif errcode == utils.CERT_ERROR:
1317
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1318

    
1319
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1320

    
1321

    
1322
def _GetAllHypervisorParameters(cluster, instances):
1323
  """Compute the set of all hypervisor parameters.
1324

1325
  @type cluster: L{objects.Cluster}
1326
  @param cluster: the cluster object
1327
  @param instances: list of L{objects.Instance}
1328
  @param instances: additional instances from which to obtain parameters
1329
  @rtype: list of (origin, hypervisor, parameters)
1330
  @return: a list with all parameters found, indicating the hypervisor they
1331
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1332

1333
  """
1334
  hvp_data = []
1335

    
1336
  for hv_name in cluster.enabled_hypervisors:
1337
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1338

    
1339
  for os_name, os_hvp in cluster.os_hvp.items():
1340
    for hv_name, hv_params in os_hvp.items():
1341
      if hv_params:
1342
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1343
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1344

    
1345
  # TODO: collapse identical parameter values in a single one
1346
  for instance in instances:
1347
    if instance.hvparams:
1348
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1349
                       cluster.FillHV(instance)))
1350

    
1351
  return hvp_data
1352

    
1353

    
1354
class _VerifyErrors(object):
1355
  """Mix-in for cluster/group verify LUs.
1356

1357
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1358
  self.op and self._feedback_fn to be available.)
1359

1360
  """
1361
  TCLUSTER = "cluster"
1362
  TNODE = "node"
1363
  TINSTANCE = "instance"
1364

    
1365
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1366
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1367
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1368
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1369
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1370
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1371
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1372
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1373
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1374
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1375
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1376
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1377
  ENODEDRBD = (TNODE, "ENODEDRBD")
1378
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1379
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1380
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1381
  ENODEHV = (TNODE, "ENODEHV")
1382
  ENODELVM = (TNODE, "ENODELVM")
1383
  ENODEN1 = (TNODE, "ENODEN1")
1384
  ENODENET = (TNODE, "ENODENET")
1385
  ENODEOS = (TNODE, "ENODEOS")
1386
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1387
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1388
  ENODERPC = (TNODE, "ENODERPC")
1389
  ENODESSH = (TNODE, "ENODESSH")
1390
  ENODEVERSION = (TNODE, "ENODEVERSION")
1391
  ENODESETUP = (TNODE, "ENODESETUP")
1392
  ENODETIME = (TNODE, "ENODETIME")
1393
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1394

    
1395
  ETYPE_FIELD = "code"
1396
  ETYPE_ERROR = "ERROR"
1397
  ETYPE_WARNING = "WARNING"
1398

    
1399
  def _Error(self, ecode, item, msg, *args, **kwargs):
1400
    """Format an error message.
1401

1402
    Based on the opcode's error_codes parameter, either format a
1403
    parseable error code, or a simpler error string.
1404

1405
    This must be called only from Exec and functions called from Exec.
1406

1407
    """
1408
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1409
    itype, etxt = ecode
1410
    # first complete the msg
1411
    if args:
1412
      msg = msg % args
1413
    # then format the whole message
1414
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1415
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1416
    else:
1417
      if item:
1418
        item = " " + item
1419
      else:
1420
        item = ""
1421
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1422
    # and finally report it via the feedback_fn
1423
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1424

    
1425
  def _ErrorIf(self, cond, *args, **kwargs):
1426
    """Log an error message if the passed condition is True.
1427

1428
    """
1429
    cond = (bool(cond)
1430
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1431
    if cond:
1432
      self._Error(*args, **kwargs)
1433
    # do not mark the operation as failed for WARN cases only
1434
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1435
      self.bad = self.bad or cond
1436

    
1437

    
1438
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1439
  """Verifies the cluster config.
1440

1441
  """
1442
  REQ_BGL = True
1443

    
1444
  def _VerifyHVP(self, hvp_data):
1445
    """Verifies locally the syntax of the hypervisor parameters.
1446

1447
    """
1448
    for item, hv_name, hv_params in hvp_data:
1449
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1450
             (item, hv_name))
1451
      try:
1452
        hv_class = hypervisor.GetHypervisor(hv_name)
1453
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1454
        hv_class.CheckParameterSyntax(hv_params)
1455
      except errors.GenericError, err:
1456
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1457

    
1458
  def ExpandNames(self):
1459
    # Information can be safely retrieved as the BGL is acquired in exclusive
1460
    # mode
1461
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1462
    self.all_node_info = self.cfg.GetAllNodesInfo()
1463
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1464
    self.needed_locks = {}
1465

    
1466
  def Exec(self, feedback_fn):
1467
    """Verify integrity of cluster, performing various test on nodes.
1468

1469
    """
1470
    self.bad = False
1471
    self._feedback_fn = feedback_fn
1472

    
1473
    feedback_fn("* Verifying cluster config")
1474

    
1475
    for msg in self.cfg.VerifyConfig():
1476
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1477

    
1478
    feedback_fn("* Verifying cluster certificate files")
1479

    
1480
    for cert_filename in constants.ALL_CERT_FILES:
1481
      (errcode, msg) = _VerifyCertificate(cert_filename)
1482
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1483

    
1484
    feedback_fn("* Verifying hypervisor parameters")
1485

    
1486
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1487
                                                self.all_inst_info.values()))
1488

    
1489
    feedback_fn("* Verifying all nodes belong to an existing group")
1490

    
1491
    # We do this verification here because, should this bogus circumstance
1492
    # occur, it would never be caught by VerifyGroup, which only acts on
1493
    # nodes/instances reachable from existing node groups.
1494

    
1495
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1496
                         if node.group not in self.all_group_info)
1497

    
1498
    dangling_instances = {}
1499
    no_node_instances = []
1500

    
1501
    for inst in self.all_inst_info.values():
1502
      if inst.primary_node in dangling_nodes:
1503
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1504
      elif inst.primary_node not in self.all_node_info:
1505
        no_node_instances.append(inst.name)
1506

    
1507
    pretty_dangling = [
1508
        "%s (%s)" %
1509
        (node.name,
1510
         utils.CommaJoin(dangling_instances.get(node.name,
1511
                                                ["no instances"])))
1512
        for node in dangling_nodes]
1513

    
1514
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1515
                  "the following nodes (and their instances) belong to a non"
1516
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1517

    
1518
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1519
                  "the following instances have a non-existing primary-node:"
1520
                  " %s", utils.CommaJoin(no_node_instances))
1521

    
1522
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1523

    
1524

    
1525
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1526
  """Verifies the status of a node group.
1527

1528
  """
1529
  HPATH = "cluster-verify"
1530
  HTYPE = constants.HTYPE_CLUSTER
1531
  REQ_BGL = False
1532

    
1533
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1534

    
1535
  class NodeImage(object):
1536
    """A class representing the logical and physical status of a node.
1537

1538
    @type name: string
1539
    @ivar name: the node name to which this object refers
1540
    @ivar volumes: a structure as returned from
1541
        L{ganeti.backend.GetVolumeList} (runtime)
1542
    @ivar instances: a list of running instances (runtime)
1543
    @ivar pinst: list of configured primary instances (config)
1544
    @ivar sinst: list of configured secondary instances (config)
1545
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1546
        instances for which this node is secondary (config)
1547
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1548
    @ivar dfree: free disk, as reported by the node (runtime)
1549
    @ivar offline: the offline status (config)
1550
    @type rpc_fail: boolean
1551
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1552
        not whether the individual keys were correct) (runtime)
1553
    @type lvm_fail: boolean
1554
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1555
    @type hyp_fail: boolean
1556
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1557
    @type ghost: boolean
1558
    @ivar ghost: whether this is a known node or not (config)
1559
    @type os_fail: boolean
1560
    @ivar os_fail: whether the RPC call didn't return valid OS data
1561
    @type oslist: list
1562
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1563
    @type vm_capable: boolean
1564
    @ivar vm_capable: whether the node can host instances
1565

1566
    """
1567
    def __init__(self, offline=False, name=None, vm_capable=True):
1568
      self.name = name
1569
      self.volumes = {}
1570
      self.instances = []
1571
      self.pinst = []
1572
      self.sinst = []
1573
      self.sbp = {}
1574
      self.mfree = 0
1575
      self.dfree = 0
1576
      self.offline = offline
1577
      self.vm_capable = vm_capable
1578
      self.rpc_fail = False
1579
      self.lvm_fail = False
1580
      self.hyp_fail = False
1581
      self.ghost = False
1582
      self.os_fail = False
1583
      self.oslist = {}
1584

    
1585
  def ExpandNames(self):
1586
    # This raises errors.OpPrereqError on its own:
1587
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1588

    
1589
    # Get instances in node group; this is unsafe and needs verification later
1590
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1591

    
1592
    self.needed_locks = {
1593
      locking.LEVEL_INSTANCE: inst_names,
1594
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1595
      locking.LEVEL_NODE: [],
1596
      }
1597

    
1598
    self.share_locks = _ShareAll()
1599

    
1600
  def DeclareLocks(self, level):
1601
    if level == locking.LEVEL_NODE:
1602
      # Get members of node group; this is unsafe and needs verification later
1603
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1604

    
1605
      all_inst_info = self.cfg.GetAllInstancesInfo()
1606

    
1607
      # In Exec(), we warn about mirrored instances that have primary and
1608
      # secondary living in separate node groups. To fully verify that
1609
      # volumes for these instances are healthy, we will need to do an
1610
      # extra call to their secondaries. We ensure here those nodes will
1611
      # be locked.
1612
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1613
        # Important: access only the instances whose lock is owned
1614
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1615
          nodes.update(all_inst_info[inst].secondary_nodes)
1616

    
1617
      self.needed_locks[locking.LEVEL_NODE] = nodes
1618

    
1619
  def CheckPrereq(self):
1620
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1621
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1622

    
1623
    unlocked_nodes = \
1624
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1625

    
1626
    unlocked_instances = \
1627
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1628

    
1629
    if unlocked_nodes:
1630
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1631
                                 utils.CommaJoin(unlocked_nodes))
1632

    
1633
    if unlocked_instances:
1634
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1635
                                 utils.CommaJoin(unlocked_instances))
1636

    
1637
    self.all_node_info = self.cfg.GetAllNodesInfo()
1638
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1639

    
1640
    self.my_node_names = utils.NiceSort(group_nodes)
1641
    self.my_inst_names = utils.NiceSort(group_instances)
1642

    
1643
    self.my_node_info = dict((name, self.all_node_info[name])
1644
                             for name in self.my_node_names)
1645

    
1646
    self.my_inst_info = dict((name, self.all_inst_info[name])
1647
                             for name in self.my_inst_names)
1648

    
1649
    # We detect here the nodes that will need the extra RPC calls for verifying
1650
    # split LV volumes; they should be locked.
1651
    extra_lv_nodes = set()
1652

    
1653
    for inst in self.my_inst_info.values():
1654
      if inst.disk_template in constants.DTS_INT_MIRROR:
1655
        group = self.my_node_info[inst.primary_node].group
1656
        for nname in inst.secondary_nodes:
1657
          if self.all_node_info[nname].group != group:
1658
            extra_lv_nodes.add(nname)
1659

    
1660
    unlocked_lv_nodes = \
1661
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1662

    
1663
    if unlocked_lv_nodes:
1664
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1665
                                 utils.CommaJoin(unlocked_lv_nodes))
1666
    self.extra_lv_nodes = list(extra_lv_nodes)
1667

    
1668
  def _VerifyNode(self, ninfo, nresult):
1669
    """Perform some basic validation on data returned from a node.
1670

1671
      - check the result data structure is well formed and has all the
1672
        mandatory fields
1673
      - check ganeti version
1674

1675
    @type ninfo: L{objects.Node}
1676
    @param ninfo: the node to check
1677
    @param nresult: the results from the node
1678
    @rtype: boolean
1679
    @return: whether overall this call was successful (and we can expect
1680
         reasonable values in the respose)
1681

1682
    """
1683
    node = ninfo.name
1684
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1685

    
1686
    # main result, nresult should be a non-empty dict
1687
    test = not nresult or not isinstance(nresult, dict)
1688
    _ErrorIf(test, self.ENODERPC, node,
1689
                  "unable to verify node: no data returned")
1690
    if test:
1691
      return False
1692

    
1693
    # compares ganeti version
1694
    local_version = constants.PROTOCOL_VERSION
1695
    remote_version = nresult.get("version", None)
1696
    test = not (remote_version and
1697
                isinstance(remote_version, (list, tuple)) and
1698
                len(remote_version) == 2)
1699
    _ErrorIf(test, self.ENODERPC, node,
1700
             "connection to node returned invalid data")
1701
    if test:
1702
      return False
1703

    
1704
    test = local_version != remote_version[0]
1705
    _ErrorIf(test, self.ENODEVERSION, node,
1706
             "incompatible protocol versions: master %s,"
1707
             " node %s", local_version, remote_version[0])
1708
    if test:
1709
      return False
1710

    
1711
    # node seems compatible, we can actually try to look into its results
1712

    
1713
    # full package version
1714
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1715
                  self.ENODEVERSION, node,
1716
                  "software version mismatch: master %s, node %s",
1717
                  constants.RELEASE_VERSION, remote_version[1],
1718
                  code=self.ETYPE_WARNING)
1719

    
1720
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1721
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1722
      for hv_name, hv_result in hyp_result.iteritems():
1723
        test = hv_result is not None
1724
        _ErrorIf(test, self.ENODEHV, node,
1725
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1726

    
1727
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1728
    if ninfo.vm_capable and isinstance(hvp_result, list):
1729
      for item, hv_name, hv_result in hvp_result:
1730
        _ErrorIf(True, self.ENODEHV, node,
1731
                 "hypervisor %s parameter verify failure (source %s): %s",
1732
                 hv_name, item, hv_result)
1733

    
1734
    test = nresult.get(constants.NV_NODESETUP,
1735
                       ["Missing NODESETUP results"])
1736
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1737
             "; ".join(test))
1738

    
1739
    return True
1740

    
1741
  def _VerifyNodeTime(self, ninfo, nresult,
1742
                      nvinfo_starttime, nvinfo_endtime):
1743
    """Check the node time.
1744

1745
    @type ninfo: L{objects.Node}
1746
    @param ninfo: the node to check
1747
    @param nresult: the remote results for the node
1748
    @param nvinfo_starttime: the start time of the RPC call
1749
    @param nvinfo_endtime: the end time of the RPC call
1750

1751
    """
1752
    node = ninfo.name
1753
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1754

    
1755
    ntime = nresult.get(constants.NV_TIME, None)
1756
    try:
1757
      ntime_merged = utils.MergeTime(ntime)
1758
    except (ValueError, TypeError):
1759
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1760
      return
1761

    
1762
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1763
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1764
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1765
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1766
    else:
1767
      ntime_diff = None
1768

    
1769
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1770
             "Node time diverges by at least %s from master node time",
1771
             ntime_diff)
1772

    
1773
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1774
    """Check the node LVM results.
1775

1776
    @type ninfo: L{objects.Node}
1777
    @param ninfo: the node to check
1778
    @param nresult: the remote results for the node
1779
    @param vg_name: the configured VG name
1780

1781
    """
1782
    if vg_name is None:
1783
      return
1784

    
1785
    node = ninfo.name
1786
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1787

    
1788
    # checks vg existence and size > 20G
1789
    vglist = nresult.get(constants.NV_VGLIST, None)
1790
    test = not vglist
1791
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1792
    if not test:
1793
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1794
                                            constants.MIN_VG_SIZE)
1795
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1796

    
1797
    # check pv names
1798
    pvlist = nresult.get(constants.NV_PVLIST, None)
1799
    test = pvlist is None
1800
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1801
    if not test:
1802
      # check that ':' is not present in PV names, since it's a
1803
      # special character for lvcreate (denotes the range of PEs to
1804
      # use on the PV)
1805
      for _, pvname, owner_vg in pvlist:
1806
        test = ":" in pvname
1807
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1808
                 " '%s' of VG '%s'", pvname, owner_vg)
1809

    
1810
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1811
    """Check the node bridges.
1812

1813
    @type ninfo: L{objects.Node}
1814
    @param ninfo: the node to check
1815
    @param nresult: the remote results for the node
1816
    @param bridges: the expected list of bridges
1817

1818
    """
1819
    if not bridges:
1820
      return
1821

    
1822
    node = ninfo.name
1823
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824

    
1825
    missing = nresult.get(constants.NV_BRIDGES, None)
1826
    test = not isinstance(missing, list)
1827
    _ErrorIf(test, self.ENODENET, node,
1828
             "did not return valid bridge information")
1829
    if not test:
1830
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1831
               utils.CommaJoin(sorted(missing)))
1832

    
1833
  def _VerifyNodeNetwork(self, ninfo, nresult):
1834
    """Check the node network connectivity results.
1835

1836
    @type ninfo: L{objects.Node}
1837
    @param ninfo: the node to check
1838
    @param nresult: the remote results for the node
1839

1840
    """
1841
    node = ninfo.name
1842
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1843

    
1844
    test = constants.NV_NODELIST not in nresult
1845
    _ErrorIf(test, self.ENODESSH, node,
1846
             "node hasn't returned node ssh connectivity data")
1847
    if not test:
1848
      if nresult[constants.NV_NODELIST]:
1849
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1850
          _ErrorIf(True, self.ENODESSH, node,
1851
                   "ssh communication with node '%s': %s", a_node, a_msg)
1852

    
1853
    test = constants.NV_NODENETTEST not in nresult
1854
    _ErrorIf(test, self.ENODENET, node,
1855
             "node hasn't returned node tcp connectivity data")
1856
    if not test:
1857
      if nresult[constants.NV_NODENETTEST]:
1858
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1859
        for anode in nlist:
1860
          _ErrorIf(True, self.ENODENET, node,
1861
                   "tcp communication with node '%s': %s",
1862
                   anode, nresult[constants.NV_NODENETTEST][anode])
1863

    
1864
    test = constants.NV_MASTERIP not in nresult
1865
    _ErrorIf(test, self.ENODENET, node,
1866
             "node hasn't returned node master IP reachability data")
1867
    if not test:
1868
      if not nresult[constants.NV_MASTERIP]:
1869
        if node == self.master_node:
1870
          msg = "the master node cannot reach the master IP (not configured?)"
1871
        else:
1872
          msg = "cannot reach the master IP"
1873
        _ErrorIf(True, self.ENODENET, node, msg)
1874

    
1875
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1876
                      diskstatus):
1877
    """Verify an instance.
1878

1879
    This function checks to see if the required block devices are
1880
    available on the instance's node.
1881

1882
    """
1883
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884
    node_current = instanceconfig.primary_node
1885

    
1886
    node_vol_should = {}
1887
    instanceconfig.MapLVsByNode(node_vol_should)
1888

    
1889
    for node in node_vol_should:
1890
      n_img = node_image[node]
1891
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1892
        # ignore missing volumes on offline or broken nodes
1893
        continue
1894
      for volume in node_vol_should[node]:
1895
        test = volume not in n_img.volumes
1896
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1897
                 "volume %s missing on node %s", volume, node)
1898

    
1899
    if instanceconfig.admin_up:
1900
      pri_img = node_image[node_current]
1901
      test = instance not in pri_img.instances and not pri_img.offline
1902
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1903
               "instance not running on its primary node %s",
1904
               node_current)
1905

    
1906
    diskdata = [(nname, success, status, idx)
1907
                for (nname, disks) in diskstatus.items()
1908
                for idx, (success, status) in enumerate(disks)]
1909

    
1910
    for nname, success, bdev_status, idx in diskdata:
1911
      # the 'ghost node' construction in Exec() ensures that we have a
1912
      # node here
1913
      snode = node_image[nname]
1914
      bad_snode = snode.ghost or snode.offline
1915
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1916
               self.EINSTANCEFAULTYDISK, instance,
1917
               "couldn't retrieve status for disk/%s on %s: %s",
1918
               idx, nname, bdev_status)
1919
      _ErrorIf((instanceconfig.admin_up and success and
1920
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1921
               self.EINSTANCEFAULTYDISK, instance,
1922
               "disk/%s on %s is faulty", idx, nname)
1923

    
1924
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1925
    """Verify if there are any unknown volumes in the cluster.
1926

1927
    The .os, .swap and backup volumes are ignored. All other volumes are
1928
    reported as unknown.
1929

1930
    @type reserved: L{ganeti.utils.FieldSet}
1931
    @param reserved: a FieldSet of reserved volume names
1932

1933
    """
1934
    for node, n_img in node_image.items():
1935
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1936
        # skip non-healthy nodes
1937
        continue
1938
      for volume in n_img.volumes:
1939
        test = ((node not in node_vol_should or
1940
                volume not in node_vol_should[node]) and
1941
                not reserved.Matches(volume))
1942
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1943
                      "volume %s is unknown", volume)
1944

    
1945
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1946
    """Verify N+1 Memory Resilience.
1947

1948
    Check that if one single node dies we can still start all the
1949
    instances it was primary for.
1950

1951
    """
1952
    cluster_info = self.cfg.GetClusterInfo()
1953
    for node, n_img in node_image.items():
1954
      # This code checks that every node which is now listed as
1955
      # secondary has enough memory to host all instances it is
1956
      # supposed to should a single other node in the cluster fail.
1957
      # FIXME: not ready for failover to an arbitrary node
1958
      # FIXME: does not support file-backed instances
1959
      # WARNING: we currently take into account down instances as well
1960
      # as up ones, considering that even if they're down someone
1961
      # might want to start them even in the event of a node failure.
1962
      if n_img.offline:
1963
        # we're skipping offline nodes from the N+1 warning, since
1964
        # most likely we don't have good memory infromation from them;
1965
        # we already list instances living on such nodes, and that's
1966
        # enough warning
1967
        continue
1968
      for prinode, instances in n_img.sbp.items():
1969
        needed_mem = 0
1970
        for instance in instances:
1971
          bep = cluster_info.FillBE(instance_cfg[instance])
1972
          if bep[constants.BE_AUTO_BALANCE]:
1973
            needed_mem += bep[constants.BE_MEMORY]
1974
        test = n_img.mfree < needed_mem
1975
        self._ErrorIf(test, self.ENODEN1, node,
1976
                      "not enough memory to accomodate instance failovers"
1977
                      " should node %s fail (%dMiB needed, %dMiB available)",
1978
                      prinode, needed_mem, n_img.mfree)
1979

    
1980
  @classmethod
1981
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1982
                   (files_all, files_all_opt, files_mc, files_vm)):
1983
    """Verifies file checksums collected from all nodes.
1984

1985
    @param errorif: Callback for reporting errors
1986
    @param nodeinfo: List of L{objects.Node} objects
1987
    @param master_node: Name of master node
1988
    @param all_nvinfo: RPC results
1989

1990
    """
1991
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
1992

    
1993
    assert master_node in node_names
1994
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1995
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1996
           "Found file listed in more than one file list"
1997

    
1998
    # Define functions determining which nodes to consider for a file
1999
    file2nodefn = dict([(filename, fn)
2000
      for (files, fn) in [(files_all, None),
2001
                          (files_all_opt, None),
2002
                          (files_mc, lambda node: (node.master_candidate or
2003
                                                   node.name == master_node)),
2004
                          (files_vm, lambda node: node.vm_capable)]
2005
      for filename in files])
2006

    
2007
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2008

    
2009
    for node in nodeinfo:
2010
      if node.offline:
2011
        continue
2012

    
2013
      nresult = all_nvinfo[node.name]
2014

    
2015
      if nresult.fail_msg or not nresult.payload:
2016
        node_files = None
2017
      else:
2018
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2019

    
2020
      test = not (node_files and isinstance(node_files, dict))
2021
      errorif(test, cls.ENODEFILECHECK, node.name,
2022
              "Node did not return file checksum data")
2023
      if test:
2024
        continue
2025

    
2026
      for (filename, checksum) in node_files.items():
2027
        # Check if the file should be considered for a node
2028
        fn = file2nodefn[filename]
2029
        if fn is None or fn(node):
2030
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2031

    
2032
    for (filename, checksums) in fileinfo.items():
2033
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2034

    
2035
      # Nodes having the file
2036
      with_file = frozenset(node_name
2037
                            for nodes in fileinfo[filename].values()
2038
                            for node_name in nodes)
2039

    
2040
      # Nodes missing file
2041
      missing_file = node_names - with_file
2042

    
2043
      if filename in files_all_opt:
2044
        # All or no nodes
2045
        errorif(missing_file and missing_file != node_names,
2046
                cls.ECLUSTERFILECHECK, None,
2047
                "File %s is optional, but it must exist on all or no"
2048
                " nodes (not found on %s)",
2049
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2050
      else:
2051
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2052
                "File %s is missing from node(s) %s", filename,
2053
                utils.CommaJoin(utils.NiceSort(missing_file)))
2054

    
2055
      # See if there are multiple versions of the file
2056
      test = len(checksums) > 1
2057
      if test:
2058
        variants = ["variant %s on %s" %
2059
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2060
                    for (idx, (checksum, nodes)) in
2061
                      enumerate(sorted(checksums.items()))]
2062
      else:
2063
        variants = []
2064

    
2065
      errorif(test, cls.ECLUSTERFILECHECK, None,
2066
              "File %s found with %s different checksums (%s)",
2067
              filename, len(checksums), "; ".join(variants))
2068

    
2069
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2070
                      drbd_map):
2071
    """Verifies and the node DRBD status.
2072

2073
    @type ninfo: L{objects.Node}
2074
    @param ninfo: the node to check
2075
    @param nresult: the remote results for the node
2076
    @param instanceinfo: the dict of instances
2077
    @param drbd_helper: the configured DRBD usermode helper
2078
    @param drbd_map: the DRBD map as returned by
2079
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2080

2081
    """
2082
    node = ninfo.name
2083
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2084

    
2085
    if drbd_helper:
2086
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2087
      test = (helper_result == None)
2088
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2089
               "no drbd usermode helper returned")
2090
      if helper_result:
2091
        status, payload = helper_result
2092
        test = not status
2093
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2094
                 "drbd usermode helper check unsuccessful: %s", payload)
2095
        test = status and (payload != drbd_helper)
2096
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2097
                 "wrong drbd usermode helper: %s", payload)
2098

    
2099
    # compute the DRBD minors
2100
    node_drbd = {}
2101
    for minor, instance in drbd_map[node].items():
2102
      test = instance not in instanceinfo
2103
      _ErrorIf(test, self.ECLUSTERCFG, None,
2104
               "ghost instance '%s' in temporary DRBD map", instance)
2105
        # ghost instance should not be running, but otherwise we
2106
        # don't give double warnings (both ghost instance and
2107
        # unallocated minor in use)
2108
      if test:
2109
        node_drbd[minor] = (instance, False)
2110
      else:
2111
        instance = instanceinfo[instance]
2112
        node_drbd[minor] = (instance.name, instance.admin_up)
2113

    
2114
    # and now check them
2115
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2116
    test = not isinstance(used_minors, (tuple, list))
2117
    _ErrorIf(test, self.ENODEDRBD, node,
2118
             "cannot parse drbd status file: %s", str(used_minors))
2119
    if test:
2120
      # we cannot check drbd status
2121
      return
2122

    
2123
    for minor, (iname, must_exist) in node_drbd.items():
2124
      test = minor not in used_minors and must_exist
2125
      _ErrorIf(test, self.ENODEDRBD, node,
2126
               "drbd minor %d of instance %s is not active", minor, iname)
2127
    for minor in used_minors:
2128
      test = minor not in node_drbd
2129
      _ErrorIf(test, self.ENODEDRBD, node,
2130
               "unallocated drbd minor %d is in use", minor)
2131

    
2132
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2133
    """Builds the node OS structures.
2134

2135
    @type ninfo: L{objects.Node}
2136
    @param ninfo: the node to check
2137
    @param nresult: the remote results for the node
2138
    @param nimg: the node image object
2139

2140
    """
2141
    node = ninfo.name
2142
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2143

    
2144
    remote_os = nresult.get(constants.NV_OSLIST, None)
2145
    test = (not isinstance(remote_os, list) or
2146
            not compat.all(isinstance(v, list) and len(v) == 7
2147
                           for v in remote_os))
2148

    
2149
    _ErrorIf(test, self.ENODEOS, node,
2150
             "node hasn't returned valid OS data")
2151

    
2152
    nimg.os_fail = test
2153

    
2154
    if test:
2155
      return
2156

    
2157
    os_dict = {}
2158

    
2159
    for (name, os_path, status, diagnose,
2160
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2161

    
2162
      if name not in os_dict:
2163
        os_dict[name] = []
2164

    
2165
      # parameters is a list of lists instead of list of tuples due to
2166
      # JSON lacking a real tuple type, fix it:
2167
      parameters = [tuple(v) for v in parameters]
2168
      os_dict[name].append((os_path, status, diagnose,
2169
                            set(variants), set(parameters), set(api_ver)))
2170

    
2171
    nimg.oslist = os_dict
2172

    
2173
  def _VerifyNodeOS(self, ninfo, nimg, base):
2174
    """Verifies the node OS list.
2175

2176
    @type ninfo: L{objects.Node}
2177
    @param ninfo: the node to check
2178
    @param nimg: the node image object
2179
    @param base: the 'template' node we match against (e.g. from the master)
2180

2181
    """
2182
    node = ninfo.name
2183
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2184

    
2185
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2186

    
2187
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2188
    for os_name, os_data in nimg.oslist.items():
2189
      assert os_data, "Empty OS status for OS %s?!" % os_name
2190
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2191
      _ErrorIf(not f_status, self.ENODEOS, node,
2192
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2193
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2194
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2195
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2196
      # comparisons with the 'base' image
2197
      test = os_name not in base.oslist
2198
      _ErrorIf(test, self.ENODEOS, node,
2199
               "Extra OS %s not present on reference node (%s)",
2200
               os_name, base.name)
2201
      if test:
2202
        continue
2203
      assert base.oslist[os_name], "Base node has empty OS status?"
2204
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2205
      if not b_status:
2206
        # base OS is invalid, skipping
2207
        continue
2208
      for kind, a, b in [("API version", f_api, b_api),
2209
                         ("variants list", f_var, b_var),
2210
                         ("parameters", beautify_params(f_param),
2211
                          beautify_params(b_param))]:
2212
        _ErrorIf(a != b, self.ENODEOS, node,
2213
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2214
                 kind, os_name, base.name,
2215
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2216

    
2217
    # check any missing OSes
2218
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2219
    _ErrorIf(missing, self.ENODEOS, node,
2220
             "OSes present on reference node %s but missing on this node: %s",
2221
             base.name, utils.CommaJoin(missing))
2222

    
2223
  def _VerifyOob(self, ninfo, nresult):
2224
    """Verifies out of band functionality of a node.
2225

2226
    @type ninfo: L{objects.Node}
2227
    @param ninfo: the node to check
2228
    @param nresult: the remote results for the node
2229

2230
    """
2231
    node = ninfo.name
2232
    # We just have to verify the paths on master and/or master candidates
2233
    # as the oob helper is invoked on the master
2234
    if ((ninfo.master_candidate or ninfo.master_capable) and
2235
        constants.NV_OOB_PATHS in nresult):
2236
      for path_result in nresult[constants.NV_OOB_PATHS]:
2237
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2238

    
2239
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2240
    """Verifies and updates the node volume data.
2241

2242
    This function will update a L{NodeImage}'s internal structures
2243
    with data from the remote call.
2244

2245
    @type ninfo: L{objects.Node}
2246
    @param ninfo: the node to check
2247
    @param nresult: the remote results for the node
2248
    @param nimg: the node image object
2249
    @param vg_name: the configured VG name
2250

2251
    """
2252
    node = ninfo.name
2253
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2254

    
2255
    nimg.lvm_fail = True
2256
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2257
    if vg_name is None:
2258
      pass
2259
    elif isinstance(lvdata, basestring):
2260
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2261
               utils.SafeEncode(lvdata))
2262
    elif not isinstance(lvdata, dict):
2263
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2264
    else:
2265
      nimg.volumes = lvdata
2266
      nimg.lvm_fail = False
2267

    
2268
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2269
    """Verifies and updates the node instance list.
2270

2271
    If the listing was successful, then updates this node's instance
2272
    list. Otherwise, it marks the RPC call as failed for the instance
2273
    list key.
2274

2275
    @type ninfo: L{objects.Node}
2276
    @param ninfo: the node to check
2277
    @param nresult: the remote results for the node
2278
    @param nimg: the node image object
2279

2280
    """
2281
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2282
    test = not isinstance(idata, list)
2283
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2284
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2285
    if test:
2286
      nimg.hyp_fail = True
2287
    else:
2288
      nimg.instances = idata
2289

    
2290
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2291
    """Verifies and computes a node information map
2292

2293
    @type ninfo: L{objects.Node}
2294
    @param ninfo: the node to check
2295
    @param nresult: the remote results for the node
2296
    @param nimg: the node image object
2297
    @param vg_name: the configured VG name
2298

2299
    """
2300
    node = ninfo.name
2301
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2302

    
2303
    # try to read free memory (from the hypervisor)
2304
    hv_info = nresult.get(constants.NV_HVINFO, None)
2305
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2306
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2307
    if not test:
2308
      try:
2309
        nimg.mfree = int(hv_info["memory_free"])
2310
      except (ValueError, TypeError):
2311
        _ErrorIf(True, self.ENODERPC, node,
2312
                 "node returned invalid nodeinfo, check hypervisor")
2313

    
2314
    # FIXME: devise a free space model for file based instances as well
2315
    if vg_name is not None:
2316
      test = (constants.NV_VGLIST not in nresult or
2317
              vg_name not in nresult[constants.NV_VGLIST])
2318
      _ErrorIf(test, self.ENODELVM, node,
2319
               "node didn't return data for the volume group '%s'"
2320
               " - it is either missing or broken", vg_name)
2321
      if not test:
2322
        try:
2323
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2324
        except (ValueError, TypeError):
2325
          _ErrorIf(True, self.ENODERPC, node,
2326
                   "node returned invalid LVM info, check LVM status")
2327

    
2328
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2329
    """Gets per-disk status information for all instances.
2330

2331
    @type nodelist: list of strings
2332
    @param nodelist: Node names
2333
    @type node_image: dict of (name, L{objects.Node})
2334
    @param node_image: Node objects
2335
    @type instanceinfo: dict of (name, L{objects.Instance})
2336
    @param instanceinfo: Instance objects
2337
    @rtype: {instance: {node: [(succes, payload)]}}
2338
    @return: a dictionary of per-instance dictionaries with nodes as
2339
        keys and disk information as values; the disk information is a
2340
        list of tuples (success, payload)
2341

2342
    """
2343
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2344

    
2345
    node_disks = {}
2346
    node_disks_devonly = {}
2347
    diskless_instances = set()
2348
    diskless = constants.DT_DISKLESS
2349

    
2350
    for nname in nodelist:
2351
      node_instances = list(itertools.chain(node_image[nname].pinst,
2352
                                            node_image[nname].sinst))
2353
      diskless_instances.update(inst for inst in node_instances
2354
                                if instanceinfo[inst].disk_template == diskless)
2355
      disks = [(inst, disk)
2356
               for inst in node_instances
2357
               for disk in instanceinfo[inst].disks]
2358

    
2359
      if not disks:
2360
        # No need to collect data
2361
        continue
2362

    
2363
      node_disks[nname] = disks
2364

    
2365
      # Creating copies as SetDiskID below will modify the objects and that can
2366
      # lead to incorrect data returned from nodes
2367
      devonly = [dev.Copy() for (_, dev) in disks]
2368

    
2369
      for dev in devonly:
2370
        self.cfg.SetDiskID(dev, nname)
2371

    
2372
      node_disks_devonly[nname] = devonly
2373

    
2374
    assert len(node_disks) == len(node_disks_devonly)
2375

    
2376
    # Collect data from all nodes with disks
2377
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2378
                                                          node_disks_devonly)
2379

    
2380
    assert len(result) == len(node_disks)
2381

    
2382
    instdisk = {}
2383

    
2384
    for (nname, nres) in result.items():
2385
      disks = node_disks[nname]
2386

    
2387
      if nres.offline:
2388
        # No data from this node
2389
        data = len(disks) * [(False, "node offline")]
2390
      else:
2391
        msg = nres.fail_msg
2392
        _ErrorIf(msg, self.ENODERPC, nname,
2393
                 "while getting disk information: %s", msg)
2394
        if msg:
2395
          # No data from this node
2396
          data = len(disks) * [(False, msg)]
2397
        else:
2398
          data = []
2399
          for idx, i in enumerate(nres.payload):
2400
            if isinstance(i, (tuple, list)) and len(i) == 2:
2401
              data.append(i)
2402
            else:
2403
              logging.warning("Invalid result from node %s, entry %d: %s",
2404
                              nname, idx, i)
2405
              data.append((False, "Invalid result from the remote node"))
2406

    
2407
      for ((inst, _), status) in zip(disks, data):
2408
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2409

    
2410
    # Add empty entries for diskless instances.
2411
    for inst in diskless_instances:
2412
      assert inst not in instdisk
2413
      instdisk[inst] = {}
2414

    
2415
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2416
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2417
                      compat.all(isinstance(s, (tuple, list)) and
2418
                                 len(s) == 2 for s in statuses)
2419
                      for inst, nnames in instdisk.items()
2420
                      for nname, statuses in nnames.items())
2421
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2422

    
2423
    return instdisk
2424

    
2425
  def BuildHooksEnv(self):
2426
    """Build hooks env.
2427

2428
    Cluster-Verify hooks just ran in the post phase and their failure makes
2429
    the output be logged in the verify output and the verification to fail.
2430

2431
    """
2432
    env = {
2433
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2434
      }
2435

    
2436
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2437
               for node in self.my_node_info.values())
2438

    
2439
    return env
2440

    
2441
  def BuildHooksNodes(self):
2442
    """Build hooks nodes.
2443

2444
    """
2445
    return ([], self.my_node_names)
2446

    
2447
  def Exec(self, feedback_fn):
2448
    """Verify integrity of the node group, performing various test on nodes.
2449

2450
    """
2451
    # This method has too many local variables. pylint: disable-msg=R0914
2452

    
2453
    if not self.my_node_names:
2454
      # empty node group
2455
      feedback_fn("* Empty node group, skipping verification")
2456
      return True
2457

    
2458
    self.bad = False
2459
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2460
    verbose = self.op.verbose
2461
    self._feedback_fn = feedback_fn
2462

    
2463
    vg_name = self.cfg.GetVGName()
2464
    drbd_helper = self.cfg.GetDRBDHelper()
2465
    cluster = self.cfg.GetClusterInfo()
2466
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2467
    hypervisors = cluster.enabled_hypervisors
2468
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2469

    
2470
    i_non_redundant = [] # Non redundant instances
2471
    i_non_a_balanced = [] # Non auto-balanced instances
2472
    n_offline = 0 # Count of offline nodes
2473
    n_drained = 0 # Count of nodes being drained
2474
    node_vol_should = {}
2475

    
2476
    # FIXME: verify OS list
2477

    
2478
    # File verification
2479
    filemap = _ComputeAncillaryFiles(cluster, False)
2480

    
2481
    # do local checksums
2482
    master_node = self.master_node = self.cfg.GetMasterNode()
2483
    master_ip = self.cfg.GetMasterIP()
2484

    
2485
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2486

    
2487
    # We will make nodes contact all nodes in their group, and one node from
2488
    # every other group.
2489
    # TODO: should it be a *random* node, different every time?
2490
    online_nodes = [node.name for node in node_data_list if not node.offline]
2491
    other_group_nodes = {}
2492

    
2493
    for name in sorted(self.all_node_info):
2494
      node = self.all_node_info[name]
2495
      if (node.group not in other_group_nodes
2496
          and node.group != self.group_uuid
2497
          and not node.offline):
2498
        other_group_nodes[node.group] = node.name
2499

    
2500
    node_verify_param = {
2501
      constants.NV_FILELIST:
2502
        utils.UniqueSequence(filename
2503
                             for files in filemap
2504
                             for filename in files),
2505
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2506
      constants.NV_HYPERVISOR: hypervisors,
2507
      constants.NV_HVPARAMS:
2508
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2509
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2510
                                 for node in node_data_list
2511
                                 if not node.offline],
2512
      constants.NV_INSTANCELIST: hypervisors,
2513
      constants.NV_VERSION: None,
2514
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2515
      constants.NV_NODESETUP: None,
2516
      constants.NV_TIME: None,
2517
      constants.NV_MASTERIP: (master_node, master_ip),
2518
      constants.NV_OSLIST: None,
2519
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2520
      }
2521

    
2522
    if vg_name is not None:
2523
      node_verify_param[constants.NV_VGLIST] = None
2524
      node_verify_param[constants.NV_LVLIST] = vg_name
2525
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2526
      node_verify_param[constants.NV_DRBDLIST] = None
2527

    
2528
    if drbd_helper:
2529
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2530

    
2531
    # bridge checks
2532
    # FIXME: this needs to be changed per node-group, not cluster-wide
2533
    bridges = set()
2534
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2535
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2536
      bridges.add(default_nicpp[constants.NIC_LINK])
2537
    for instance in self.my_inst_info.values():
2538
      for nic in instance.nics:
2539
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2540
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2541
          bridges.add(full_nic[constants.NIC_LINK])
2542

    
2543
    if bridges:
2544
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2545

    
2546
    # Build our expected cluster state
2547
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2548
                                                 name=node.name,
2549
                                                 vm_capable=node.vm_capable))
2550
                      for node in node_data_list)
2551

    
2552
    # Gather OOB paths
2553
    oob_paths = []
2554
    for node in self.all_node_info.values():
2555
      path = _SupportsOob(self.cfg, node)
2556
      if path and path not in oob_paths:
2557
        oob_paths.append(path)
2558

    
2559
    if oob_paths:
2560
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2561

    
2562
    for instance in self.my_inst_names:
2563
      inst_config = self.my_inst_info[instance]
2564

    
2565
      for nname in inst_config.all_nodes:
2566
        if nname not in node_image:
2567
          gnode = self.NodeImage(name=nname)
2568
          gnode.ghost = (nname not in self.all_node_info)
2569
          node_image[nname] = gnode
2570

    
2571
      inst_config.MapLVsByNode(node_vol_should)
2572

    
2573
      pnode = inst_config.primary_node
2574
      node_image[pnode].pinst.append(instance)
2575

    
2576
      for snode in inst_config.secondary_nodes:
2577
        nimg = node_image[snode]
2578
        nimg.sinst.append(instance)
2579
        if pnode not in nimg.sbp:
2580
          nimg.sbp[pnode] = []
2581
        nimg.sbp[pnode].append(instance)
2582

    
2583
    # At this point, we have the in-memory data structures complete,
2584
    # except for the runtime information, which we'll gather next
2585

    
2586
    # Due to the way our RPC system works, exact response times cannot be
2587
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2588
    # time before and after executing the request, we can at least have a time
2589
    # window.
2590
    nvinfo_starttime = time.time()
2591
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2592
                                           node_verify_param,
2593
                                           self.cfg.GetClusterName())
2594
    nvinfo_endtime = time.time()
2595

    
2596
    if self.extra_lv_nodes and vg_name is not None:
2597
      extra_lv_nvinfo = \
2598
          self.rpc.call_node_verify(self.extra_lv_nodes,
2599
                                    {constants.NV_LVLIST: vg_name},
2600
                                    self.cfg.GetClusterName())
2601
    else:
2602
      extra_lv_nvinfo = {}
2603

    
2604
    all_drbd_map = self.cfg.ComputeDRBDMap()
2605

    
2606
    feedback_fn("* Gathering disk information (%s nodes)" %
2607
                len(self.my_node_names))
2608
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2609
                                     self.my_inst_info)
2610

    
2611
    feedback_fn("* Verifying configuration file consistency")
2612

    
2613
    # If not all nodes are being checked, we need to make sure the master node
2614
    # and a non-checked vm_capable node are in the list.
2615
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2616
    if absent_nodes:
2617
      vf_nvinfo = all_nvinfo.copy()
2618
      vf_node_info = list(self.my_node_info.values())
2619
      additional_nodes = []
2620
      if master_node not in self.my_node_info:
2621
        additional_nodes.append(master_node)
2622
        vf_node_info.append(self.all_node_info[master_node])
2623
      # Add the first vm_capable node we find which is not included
2624
      for node in absent_nodes:
2625
        nodeinfo = self.all_node_info[node]
2626
        if nodeinfo.vm_capable and not nodeinfo.offline:
2627
          additional_nodes.append(node)
2628
          vf_node_info.append(self.all_node_info[node])
2629
          break
2630
      key = constants.NV_FILELIST
2631
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2632
                                                 {key: node_verify_param[key]},
2633
                                                 self.cfg.GetClusterName()))
2634
    else:
2635
      vf_nvinfo = all_nvinfo
2636
      vf_node_info = self.my_node_info.values()
2637

    
2638
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2639

    
2640
    feedback_fn("* Verifying node status")
2641

    
2642
    refos_img = None
2643

    
2644
    for node_i in node_data_list:
2645
      node = node_i.name
2646
      nimg = node_image[node]
2647

    
2648
      if node_i.offline:
2649
        if verbose:
2650
          feedback_fn("* Skipping offline node %s" % (node,))
2651
        n_offline += 1
2652
        continue
2653

    
2654
      if node == master_node:
2655
        ntype = "master"
2656
      elif node_i.master_candidate:
2657
        ntype = "master candidate"
2658
      elif node_i.drained:
2659
        ntype = "drained"
2660
        n_drained += 1
2661
      else:
2662
        ntype = "regular"
2663
      if verbose:
2664
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2665

    
2666
      msg = all_nvinfo[node].fail_msg
2667
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2668
      if msg:
2669
        nimg.rpc_fail = True
2670
        continue
2671

    
2672
      nresult = all_nvinfo[node].payload
2673

    
2674
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2675
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2676
      self._VerifyNodeNetwork(node_i, nresult)
2677
      self._VerifyOob(node_i, nresult)
2678

    
2679
      if nimg.vm_capable:
2680
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2681
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2682
                             all_drbd_map)
2683

    
2684
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2685
        self._UpdateNodeInstances(node_i, nresult, nimg)
2686
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2687
        self._UpdateNodeOS(node_i, nresult, nimg)
2688

    
2689
        if not nimg.os_fail:
2690
          if refos_img is None:
2691
            refos_img = nimg
2692
          self._VerifyNodeOS(node_i, nimg, refos_img)
2693
        self._VerifyNodeBridges(node_i, nresult, bridges)
2694

    
2695
        # Check whether all running instancies are primary for the node. (This
2696
        # can no longer be done from _VerifyInstance below, since some of the
2697
        # wrong instances could be from other node groups.)
2698
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2699

    
2700
        for inst in non_primary_inst:
2701
          test = inst in self.all_inst_info
2702
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2703
                   "instance should not run on node %s", node_i.name)
2704
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2705
                   "node is running unknown instance %s", inst)
2706

    
2707
    for node, result in extra_lv_nvinfo.items():
2708
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2709
                              node_image[node], vg_name)
2710

    
2711
    feedback_fn("* Verifying instance status")
2712
    for instance in self.my_inst_names:
2713
      if verbose:
2714
        feedback_fn("* Verifying instance %s" % instance)
2715
      inst_config = self.my_inst_info[instance]
2716
      self._VerifyInstance(instance, inst_config, node_image,
2717
                           instdisk[instance])
2718
      inst_nodes_offline = []
2719

    
2720
      pnode = inst_config.primary_node
2721
      pnode_img = node_image[pnode]
2722
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2723
               self.ENODERPC, pnode, "instance %s, connection to"
2724
               " primary node failed", instance)
2725

    
2726
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2727
               self.EINSTANCEBADNODE, instance,
2728
               "instance is marked as running and lives on offline node %s",
2729
               inst_config.primary_node)
2730

    
2731
      # If the instance is non-redundant we cannot survive losing its primary
2732
      # node, so we are not N+1 compliant. On the other hand we have no disk
2733
      # templates with more than one secondary so that situation is not well
2734
      # supported either.
2735
      # FIXME: does not support file-backed instances
2736
      if not inst_config.secondary_nodes:
2737
        i_non_redundant.append(instance)
2738

    
2739
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2740
               instance, "instance has multiple secondary nodes: %s",
2741
               utils.CommaJoin(inst_config.secondary_nodes),
2742
               code=self.ETYPE_WARNING)
2743

    
2744
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2745
        pnode = inst_config.primary_node
2746
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2747
        instance_groups = {}
2748

    
2749
        for node in instance_nodes:
2750
          instance_groups.setdefault(self.all_node_info[node].group,
2751
                                     []).append(node)
2752

    
2753
        pretty_list = [
2754
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2755
          # Sort so that we always list the primary node first.
2756
          for group, nodes in sorted(instance_groups.items(),
2757
                                     key=lambda (_, nodes): pnode in nodes,
2758
                                     reverse=True)]
2759

    
2760
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2761
                      instance, "instance has primary and secondary nodes in"
2762
                      " different groups: %s", utils.CommaJoin(pretty_list),
2763
                      code=self.ETYPE_WARNING)
2764

    
2765
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2766
        i_non_a_balanced.append(instance)
2767

    
2768
      for snode in inst_config.secondary_nodes:
2769
        s_img = node_image[snode]
2770
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2771
                 "instance %s, connection to secondary node failed", instance)
2772

    
2773
        if s_img.offline:
2774
          inst_nodes_offline.append(snode)
2775

    
2776
      # warn that the instance lives on offline nodes
2777
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2778
               "instance has offline secondary node(s) %s",
2779
               utils.CommaJoin(inst_nodes_offline))
2780
      # ... or ghost/non-vm_capable nodes
2781
      for node in inst_config.all_nodes:
2782
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2783
                 "instance lives on ghost node %s", node)
2784
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2785
                 instance, "instance lives on non-vm_capable node %s", node)
2786

    
2787
    feedback_fn("* Verifying orphan volumes")
2788
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2789

    
2790
    # We will get spurious "unknown volume" warnings if any node of this group
2791
    # is secondary for an instance whose primary is in another group. To avoid
2792
    # them, we find these instances and add their volumes to node_vol_should.
2793
    for inst in self.all_inst_info.values():
2794
      for secondary in inst.secondary_nodes:
2795
        if (secondary in self.my_node_info
2796
            and inst.name not in self.my_inst_info):
2797
          inst.MapLVsByNode(node_vol_should)
2798
          break
2799

    
2800
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2801

    
2802
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2803
      feedback_fn("* Verifying N+1 Memory redundancy")
2804
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2805

    
2806
    feedback_fn("* Other Notes")
2807
    if i_non_redundant:
2808
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2809
                  % len(i_non_redundant))
2810

    
2811
    if i_non_a_balanced:
2812
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2813
                  % len(i_non_a_balanced))
2814

    
2815
    if n_offline:
2816
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2817

    
2818
    if n_drained:
2819
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2820

    
2821
    return not self.bad
2822

    
2823
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2824
    """Analyze the post-hooks' result
2825

2826
    This method analyses the hook result, handles it, and sends some
2827
    nicely-formatted feedback back to the user.
2828

2829
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2830
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2831
    @param hooks_results: the results of the multi-node hooks rpc call
2832
    @param feedback_fn: function used send feedback back to the caller
2833
    @param lu_result: previous Exec result
2834
    @return: the new Exec result, based on the previous result
2835
        and hook results
2836

2837
    """
2838
    # We only really run POST phase hooks, only for non-empty groups,
2839
    # and are only interested in their results
2840
    if not self.my_node_names:
2841
      # empty node group
2842
      pass
2843
    elif phase == constants.HOOKS_PHASE_POST:
2844
      # Used to change hooks' output to proper indentation
2845
      feedback_fn("* Hooks Results")
2846
      assert hooks_results, "invalid result from hooks"
2847

    
2848
      for node_name in hooks_results:
2849
        res = hooks_results[node_name]
2850
        msg = res.fail_msg
2851
        test = msg and not res.offline
2852
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2853
                      "Communication failure in hooks execution: %s", msg)
2854
        if res.offline or msg:
2855
          # No need to investigate payload if node is offline or gave an error.
2856
          # override manually lu_result here as _ErrorIf only
2857
          # overrides self.bad
2858
          lu_result = 1
2859
          continue
2860
        for script, hkr, output in res.payload:
2861
          test = hkr == constants.HKR_FAIL
2862
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2863
                        "Script %s failed, output:", script)
2864
          if test:
2865
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2866
            feedback_fn("%s" % output)
2867
            lu_result = 0
2868

    
2869
    return lu_result
2870

    
2871

    
2872
class LUClusterVerifyDisks(NoHooksLU):
2873
  """Verifies the cluster disks status.
2874

2875
  """
2876
  REQ_BGL = False
2877

    
2878
  def ExpandNames(self):
2879
    self.share_locks = _ShareAll()
2880
    self.needed_locks = {
2881
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2882
      }
2883

    
2884
  def Exec(self, feedback_fn):
2885
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2886

    
2887
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2888
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2889
                           for group in group_names])
2890

    
2891

    
2892
class LUGroupVerifyDisks(NoHooksLU):
2893
  """Verifies the status of all disks in a node group.
2894

2895
  """
2896
  REQ_BGL = False
2897

    
2898
  def ExpandNames(self):
2899
    # Raises errors.OpPrereqError on its own if group can't be found
2900
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2901

    
2902
    self.share_locks = _ShareAll()
2903
    self.needed_locks = {
2904
      locking.LEVEL_INSTANCE: [],
2905
      locking.LEVEL_NODEGROUP: [],
2906
      locking.LEVEL_NODE: [],
2907
      }
2908

    
2909
  def DeclareLocks(self, level):
2910
    if level == locking.LEVEL_INSTANCE:
2911
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2912

    
2913
      # Lock instances optimistically, needs verification once node and group
2914
      # locks have been acquired
2915
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2916
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2917

    
2918
    elif level == locking.LEVEL_NODEGROUP:
2919
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2920

    
2921
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2922
        set([self.group_uuid] +
2923
            # Lock all groups used by instances optimistically; this requires
2924
            # going via the node before it's locked, requiring verification
2925
            # later on
2926
            [group_uuid
2927
             for instance_name in
2928
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2929
             for group_uuid in
2930
               self.cfg.GetInstanceNodeGroups(instance_name)])
2931

    
2932
    elif level == locking.LEVEL_NODE:
2933
      # This will only lock the nodes in the group to be verified which contain
2934
      # actual instances
2935
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2936
      self._LockInstancesNodes()
2937

    
2938
      # Lock all nodes in group to be verified
2939
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2940
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2941
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2942

    
2943
  def CheckPrereq(self):
2944
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2945
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2946
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2947

    
2948
    assert self.group_uuid in owned_groups
2949

    
2950
    # Check if locked instances are still correct
2951
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2952
    if owned_instances != wanted_instances:
2953
      raise errors.OpPrereqError("Instances in node group %s changed since"
2954
                                 " locks were acquired, wanted %s, have %s;"
2955
                                 " retry the operation" %
2956
                                 (self.op.group_name,
2957
                                  utils.CommaJoin(wanted_instances),
2958
                                  utils.CommaJoin(owned_instances)),
2959
                                 errors.ECODE_STATE)
2960

    
2961
    # Get instance information
2962
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
2963
                          for name in owned_instances)
2964

    
2965
    # Check if node groups for locked instances are still correct
2966
    for (instance_name, inst) in self.instances.items():
2967
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2968
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2969
      assert owned_nodes.issuperset(inst.all_nodes), \
2970
        "Instance %s's nodes changed while we kept the lock" % instance_name
2971

    
2972
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2973
      if not owned_groups.issuperset(inst_groups):
2974
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2975
                                   " locks were acquired, current groups are"
2976
                                   " are '%s', owning groups '%s'; retry the"
2977
                                   " operation" %
2978
                                   (instance_name,
2979
                                    utils.CommaJoin(inst_groups),
2980
                                    utils.CommaJoin(owned_groups)),
2981
                                   errors.ECODE_STATE)
2982

    
2983
  def Exec(self, feedback_fn):
2984
    """Verify integrity of cluster disks.
2985

2986
    @rtype: tuple of three items
2987
    @return: a tuple of (dict of node-to-node_error, list of instances
2988
        which need activate-disks, dict of instance: (node, volume) for
2989
        missing volumes
2990

2991
    """
2992
    res_nodes = {}
2993
    res_instances = set()
2994
    res_missing = {}
2995

    
2996
    nv_dict = _MapInstanceDisksToNodes([inst
2997
                                        for inst in self.instances.values()
2998
                                        if inst.admin_up])
2999

    
3000
    if nv_dict:
3001
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3002
                             set(self.cfg.GetVmCapableNodeList()))
3003

    
3004
      node_lvs = self.rpc.call_lv_list(nodes, [])
3005

    
3006
      for (node, node_res) in node_lvs.items():
3007
        if node_res.offline:
3008
          continue
3009

    
3010
        msg = node_res.fail_msg
3011
        if msg:
3012
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3013
          res_nodes[node] = msg
3014
          continue
3015

    
3016
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3017
          inst = nv_dict.pop((node, lv_name), None)
3018
          if not (lv_online or inst is None):
3019
            res_instances.add(inst)
3020

    
3021
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3022
      # better
3023
      for key, inst in nv_dict.iteritems():
3024
        res_missing.setdefault(inst, []).append(key)
3025

    
3026
    return (res_nodes, list(res_instances), res_missing)
3027

    
3028

    
3029
class LUClusterRepairDiskSizes(NoHooksLU):
3030
  """Verifies the cluster disks sizes.
3031

3032
  """
3033
  REQ_BGL = False
3034

    
3035
  def ExpandNames(self):
3036
    if self.op.instances:
3037
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3038
      self.needed_locks = {
3039
        locking.LEVEL_NODE: [],
3040
        locking.LEVEL_INSTANCE: self.wanted_names,
3041
        }
3042
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3043
    else:
3044
      self.wanted_names = None
3045
      self.needed_locks = {
3046
        locking.LEVEL_NODE: locking.ALL_SET,
3047
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3048
        }
3049
    self.share_locks = _ShareAll()
3050

    
3051
  def DeclareLocks(self, level):
3052
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3053
      self._LockInstancesNodes(primary_only=True)
3054

    
3055
  def CheckPrereq(self):
3056
    """Check prerequisites.
3057

3058
    This only checks the optional instance list against the existing names.
3059

3060
    """
3061
    if self.wanted_names is None:
3062
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3063

    
3064
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3065
                             in self.wanted_names]
3066

    
3067
  def _EnsureChildSizes(self, disk):
3068
    """Ensure children of the disk have the needed disk size.
3069

3070
    This is valid mainly for DRBD8 and fixes an issue where the
3071
    children have smaller disk size.
3072

3073
    @param disk: an L{ganeti.objects.Disk} object
3074

3075
    """
3076
    if disk.dev_type == constants.LD_DRBD8:
3077
      assert disk.children, "Empty children for DRBD8?"
3078
      fchild = disk.children[0]
3079
      mismatch = fchild.size < disk.size
3080
      if mismatch:
3081
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3082
                     fchild.size, disk.size)
3083
        fchild.size = disk.size
3084

    
3085
      # and we recurse on this child only, not on the metadev
3086
      return self._EnsureChildSizes(fchild) or mismatch
3087
    else:
3088
      return False
3089

    
3090
  def Exec(self, feedback_fn):
3091
    """Verify the size of cluster disks.
3092

3093
    """
3094
    # TODO: check child disks too
3095
    # TODO: check differences in size between primary/secondary nodes
3096
    per_node_disks = {}
3097
    for instance in self.wanted_instances:
3098
      pnode = instance.primary_node
3099
      if pnode not in per_node_disks:
3100
        per_node_disks[pnode] = []
3101
      for idx, disk in enumerate(instance.disks):
3102
        per_node_disks[pnode].append((instance, idx, disk))
3103

    
3104
    changed = []
3105
    for node, dskl in per_node_disks.items():
3106
      newl = [v[2].Copy() for v in dskl]
3107
      for dsk in newl:
3108
        self.cfg.SetDiskID(dsk, node)
3109
      result = self.rpc.call_blockdev_getsize(node, newl)
3110
      if result.fail_msg:
3111
        self.LogWarning("Failure in blockdev_getsize call to node"
3112
                        " %s, ignoring", node)
3113
        continue
3114
      if len(result.payload) != len(dskl):
3115
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3116
                        " result.payload=%s", node, len(dskl), result.payload)
3117
        self.LogWarning("Invalid result from node %s, ignoring node results",
3118
                        node)
3119
        continue
3120
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3121
        if size is None:
3122
          self.LogWarning("Disk %d of instance %s did not return size"
3123
                          " information, ignoring", idx, instance.name)
3124
          continue
3125
        if not isinstance(size, (int, long)):
3126
          self.LogWarning("Disk %d of instance %s did not return valid"
3127
                          " size information, ignoring", idx, instance.name)
3128
          continue
3129
        size = size >> 20
3130
        if size != disk.size:
3131
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3132
                       " correcting: recorded %d, actual %d", idx,
3133
                       instance.name, disk.size, size)
3134
          disk.size = size
3135
          self.cfg.Update(instance, feedback_fn)
3136
          changed.append((instance.name, idx, size))
3137
        if self._EnsureChildSizes(disk):
3138
          self.cfg.Update(instance, feedback_fn)
3139
          changed.append((instance.name, idx, disk.size))
3140
    return changed
3141

    
3142

    
3143
class LUClusterRename(LogicalUnit):
3144
  """Rename the cluster.
3145

3146
  """
3147
  HPATH = "cluster-rename"
3148
  HTYPE = constants.HTYPE_CLUSTER
3149

    
3150
  def BuildHooksEnv(self):
3151
    """Build hooks env.
3152

3153
    """
3154
    return {
3155
      "OP_TARGET": self.cfg.GetClusterName(),
3156
      "NEW_NAME": self.op.name,
3157
      }
3158

    
3159
  def BuildHooksNodes(self):
3160
    """Build hooks nodes.
3161

3162
    """
3163
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3164

    
3165
  def CheckPrereq(self):
3166
    """Verify that the passed name is a valid one.
3167

3168
    """
3169
    hostname = netutils.GetHostname(name=self.op.name,
3170
                                    family=self.cfg.GetPrimaryIPFamily())
3171

    
3172
    new_name = hostname.name
3173
    self.ip = new_ip = hostname.ip
3174
    old_name = self.cfg.GetClusterName()
3175
    old_ip = self.cfg.GetMasterIP()
3176
    if new_name == old_name and new_ip == old_ip:
3177
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3178
                                 " cluster has changed",
3179
                                 errors.ECODE_INVAL)
3180
    if new_ip != old_ip:
3181
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3182
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3183
                                   " reachable on the network" %
3184
                                   new_ip, errors.ECODE_NOTUNIQUE)
3185

    
3186
    self.op.name = new_name
3187

    
3188
  def Exec(self, feedback_fn):
3189
    """Rename the cluster.
3190

3191
    """
3192
    clustername = self.op.name
3193
    ip = self.ip
3194

    
3195
    # shutdown the master IP
3196
    master = self.cfg.GetMasterNode()
3197
    result = self.rpc.call_node_stop_master(master, False)
3198
    result.Raise("Could not disable the master role")
3199

    
3200
    try:
3201
      cluster = self.cfg.GetClusterInfo()
3202
      cluster.cluster_name = clustername
3203
      cluster.master_ip = ip
3204
      self.cfg.Update(cluster, feedback_fn)
3205

    
3206
      # update the known hosts file
3207
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3208
      node_list = self.cfg.GetOnlineNodeList()
3209
      try:
3210
        node_list.remove(master)
3211
      except ValueError:
3212
        pass
3213
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3214
    finally:
3215
      result = self.rpc.call_node_start_master(master, False, False)
3216
      msg = result.fail_msg
3217
      if msg:
3218
        self.LogWarning("Could not re-enable the master role on"
3219
                        " the master, please restart manually: %s", msg)
3220

    
3221
    return clustername
3222

    
3223

    
3224
class LUClusterSetParams(LogicalUnit):
3225
  """Change the parameters of the cluster.
3226

3227
  """
3228
  HPATH = "cluster-modify"
3229
  HTYPE = constants.HTYPE_CLUSTER
3230
  REQ_BGL = False
3231

    
3232
  def CheckArguments(self):
3233
    """Check parameters
3234

3235
    """
3236
    if self.op.uid_pool:
3237
      uidpool.CheckUidPool(self.op.uid_pool)
3238

    
3239
    if self.op.add_uids:
3240
      uidpool.CheckUidPool(self.op.add_uids)
3241

    
3242
    if self.op.remove_uids:
3243
      uidpool.CheckUidPool(self.op.remove_uids)
3244

    
3245
  def ExpandNames(self):
3246
    # FIXME: in the future maybe other cluster params won't require checking on
3247
    # all nodes to be modified.
3248
    self.needed_locks = {
3249
      locking.LEVEL_NODE: locking.ALL_SET,
3250
    }
3251
    self.share_locks[locking.LEVEL_NODE] = 1
3252

    
3253
  def BuildHooksEnv(self):
3254
    """Build hooks env.
3255

3256
    """
3257
    return {
3258
      "OP_TARGET": self.cfg.GetClusterName(),
3259
      "NEW_VG_NAME": self.op.vg_name,
3260
      }
3261

    
3262
  def BuildHooksNodes(self):
3263
    """Build hooks nodes.
3264

3265
    """
3266
    mn = self.cfg.GetMasterNode()
3267
    return ([mn], [mn])
3268

    
3269
  def CheckPrereq(self):
3270
    """Check prerequisites.
3271

3272
    This checks whether the given params don't conflict and
3273
    if the given volume group is valid.
3274

3275
    """
3276
    if self.op.vg_name is not None and not self.op.vg_name:
3277
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3278
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3279
                                   " instances exist", errors.ECODE_INVAL)
3280

    
3281
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3282
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3283
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3284
                                   " drbd-based instances exist",
3285
                                   errors.ECODE_INVAL)
3286

    
3287
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3288

    
3289
    # if vg_name not None, checks given volume group on all nodes
3290
    if self.op.vg_name:
3291
      vglist = self.rpc.call_vg_list(node_list)
3292
      for node in node_list:
3293
        msg = vglist[node].fail_msg
3294
        if msg:
3295
          # ignoring down node
3296
          self.LogWarning("Error while gathering data on node %s"
3297
                          " (ignoring node): %s", node, msg)
3298
          continue
3299
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3300
                                              self.op.vg_name,
3301
                                              constants.MIN_VG_SIZE)
3302
        if vgstatus:
3303
          raise errors.OpPrereqError("Error on node '%s': %s" %
3304
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3305

    
3306
    if self.op.drbd_helper:
3307
      # checks given drbd helper on all nodes
3308
      helpers = self.rpc.call_drbd_helper(node_list)
3309
      for node in node_list:
3310
        ninfo = self.cfg.GetNodeInfo(node)
3311
        if ninfo.offline:
3312
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3313
          continue
3314
        msg = helpers[node].fail_msg
3315
        if msg:
3316
          raise errors.OpPrereqError("Error checking drbd helper on node"
3317
                                     " '%s': %s" % (node, msg),
3318
                                     errors.ECODE_ENVIRON)
3319
        node_helper = helpers[node].payload
3320
        if node_helper != self.op.drbd_helper:
3321
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3322
                                     (node, node_helper), errors.ECODE_ENVIRON)
3323

    
3324
    self.cluster = cluster = self.cfg.GetClusterInfo()
3325
    # validate params changes
3326
    if self.op.beparams:
3327
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3328
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3329

    
3330
    if self.op.ndparams:
3331
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3332
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3333

    
3334
      # TODO: we need a more general way to handle resetting
3335
      # cluster-level parameters to default values
3336
      if self.new_ndparams["oob_program"] == "":
3337
        self.new_ndparams["oob_program"] = \
3338
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3339

    
3340
    if self.op.nicparams:
3341
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3342
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3343
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3344
      nic_errors = []
3345

    
3346
      # check all instances for consistency
3347
      for instance in self.cfg.GetAllInstancesInfo().values():
3348
        for nic_idx, nic in enumerate(instance.nics):
3349
          params_copy = copy.deepcopy(nic.nicparams)
3350
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3351

    
3352
          # check parameter syntax
3353
          try:
3354
            objects.NIC.CheckParameterSyntax(params_filled)
3355
          except errors.ConfigurationError, err:
3356
            nic_errors.append("Instance %s, nic/%d: %s" %
3357
                              (instance.name, nic_idx, err))
3358

    
3359
          # if we're moving instances to routed, check that they have an ip
3360
          target_mode = params_filled[constants.NIC_MODE]
3361
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3362
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3363
                              " address" % (instance.name, nic_idx))
3364
      if nic_errors:
3365
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3366
                                   "\n".join(nic_errors))
3367

    
3368
    # hypervisor list/parameters
3369
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3370
    if self.op.hvparams:
3371
      for hv_name, hv_dict in self.op.hvparams.items():
3372
        if hv_name not in self.new_hvparams:
3373
          self.new_hvparams[hv_name] = hv_dict
3374
        else:
3375
          self.new_hvparams[hv_name].update(hv_dict)
3376

    
3377
    # os hypervisor parameters
3378
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3379
    if self.op.os_hvp:
3380
      for os_name, hvs in self.op.os_hvp.items():
3381
        if os_name not in self.new_os_hvp:
3382
          self.new_os_hvp[os_name] = hvs
3383
        else:
3384
          for hv_name, hv_dict in hvs.items():
3385
            if hv_name not in self.new_os_hvp[os_name]:
3386
              self.new_os_hvp[os_name][hv_name] = hv_dict
3387
            else:
3388
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3389

    
3390
    # os parameters
3391
    self.new_osp = objects.FillDict(cluster.osparams, {})
3392
    if self.op.osparams:
3393
      for os_name, osp in self.op.osparams.items():
3394
        if os_name not in self.new_osp:
3395
          self.new_osp[os_name] = {}
3396

    
3397
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3398
                                                  use_none=True)
3399

    
3400
        if not self.new_osp[os_name]:
3401
          # we removed all parameters
3402
          del self.new_osp[os_name]
3403
        else:
3404
          # check the parameter validity (remote check)
3405
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3406
                         os_name, self.new_osp[os_name])
3407

    
3408
    # changes to the hypervisor list
3409
    if self.op.enabled_hypervisors is not None:
3410
      self.hv_list = self.op.enabled_hypervisors
3411
      for hv in self.hv_list:
3412
        # if the hypervisor doesn't already exist in the cluster
3413
        # hvparams, we initialize it to empty, and then (in both
3414
        # cases) we make sure to fill the defaults, as we might not
3415
        # have a complete defaults list if the hypervisor wasn't
3416
        # enabled before
3417
        if hv not in new_hvp:
3418
          new_hvp[hv] = {}
3419
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3420
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3421
    else:
3422
      self.hv_list = cluster.enabled_hypervisors
3423

    
3424
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3425
      # either the enabled list has changed, or the parameters have, validate
3426
      for hv_name, hv_params in self.new_hvparams.items():
3427
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3428
            (self.op.enabled_hypervisors and
3429
             hv_name in self.op.enabled_hypervisors)):
3430
          # either this is a new hypervisor, or its parameters have changed
3431
          hv_class = hypervisor.GetHypervisor(hv_name)
3432
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3433
          hv_class.CheckParameterSyntax(hv_params)
3434
          _CheckHVParams(self, node_list, hv_name, hv_params)
3435

    
3436
    if self.op.os_hvp:
3437
      # no need to check any newly-enabled hypervisors, since the
3438
      # defaults have already been checked in the above code-block
3439
      for os_name, os_hvp in self.new_os_hvp.items():
3440
        for hv_name, hv_params in os_hvp.items():
3441
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3442
          # we need to fill in the new os_hvp on top of the actual hv_p
3443
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3444
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3445
          hv_class = hypervisor.GetHypervisor(hv_name)
3446
          hv_class.CheckParameterSyntax(new_osp)
3447
          _CheckHVParams(self, node_list, hv_name, new_osp)
3448

    
3449
    if self.op.default_iallocator:
3450
      alloc_script = utils.FindFile(self.op.default_iallocator,
3451
                                    constants.IALLOCATOR_SEARCH_PATH,
3452
                                    os.path.isfile)
3453
      if alloc_script is None:
3454
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3455
                                   " specified" % self.op.default_iallocator,
3456
                                   errors.ECODE_INVAL)
3457

    
3458
  def Exec(self, feedback_fn):
3459
    """Change the parameters of the cluster.
3460

3461
    """
3462
    if self.op.vg_name is not None:
3463
      new_volume = self.op.vg_name
3464
      if not new_volume:
3465
        new_volume = None
3466
      if new_volume != self.cfg.GetVGName():
3467
        self.cfg.SetVGName(new_volume)
3468
      else:
3469
        feedback_fn("Cluster LVM configuration already in desired"
3470
                    " state, not changing")
3471
    if self.op.drbd_helper is not None:
3472
      new_helper = self.op.drbd_helper
3473
      if not new_helper:
3474
        new_helper = None
3475
      if new_helper != self.cfg.GetDRBDHelper():
3476
        self.cfg.SetDRBDHelper(new_helper)
3477
      else:
3478
        feedback_fn("Cluster DRBD helper already in desired state,"
3479
                    " not changing")
3480
    if self.op.hvparams:
3481
      self.cluster.hvparams = self.new_hvparams
3482
    if self.op.os_hvp:
3483
      self.cluster.os_hvp = self.new_os_hvp
3484
    if self.op.enabled_hypervisors is not None:
3485
      self.cluster.hvparams = self.new_hvparams
3486
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3487
    if self.op.beparams:
3488
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3489
    if self.op.nicparams:
3490
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3491
    if self.op.osparams:
3492
      self.cluster.osparams = self.new_osp
3493
    if self.op.ndparams:
3494
      self.cluster.ndparams = self.new_ndparams
3495

    
3496
    if self.op.candidate_pool_size is not None:
3497
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3498
      # we need to update the pool size here, otherwise the save will fail
3499
      _AdjustCandidatePool(self, [])
3500

    
3501
    if self.op.maintain_node_health is not None:
3502
      self.cluster.maintain_node_health = self.op.maintain_node_health
3503

    
3504
    if self.op.prealloc_wipe_disks is not None:
3505
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3506

    
3507
    if self.op.add_uids is not None:
3508
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3509

    
3510
    if self.op.remove_uids is not None:
3511
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3512

    
3513
    if self.op.uid_pool is not None:
3514
      self.cluster.uid_pool = self.op.uid_pool
3515

    
3516
    if self.op.default_iallocator is not None:
3517
      self.cluster.default_iallocator = self.op.default_iallocator
3518

    
3519
    if self.op.reserved_lvs is not None:
3520
      self.cluster.reserved_lvs = self.op.reserved_lvs
3521

    
3522
    def helper_os(aname, mods, desc):
3523
      desc += " OS list"
3524
      lst = getattr(self.cluster, aname)
3525
      for key, val in mods:
3526
        if key == constants.DDM_ADD:
3527
          if val in lst:
3528
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3529
          else:
3530
            lst.append(val)
3531
        elif key == constants.DDM_REMOVE:
3532
          if val in lst:
3533
            lst.remove(val)
3534
          else:
3535
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3536
        else:
3537
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3538

    
3539
    if self.op.hidden_os:
3540
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3541

    
3542
    if self.op.blacklisted_os:
3543
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3544

    
3545
    if self.op.master_netdev:
3546
      master = self.cfg.GetMasterNode()
3547
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3548
                  self.cluster.master_netdev)
3549
      result = self.rpc.call_node_stop_master(master, False)
3550
      result.Raise("Could not disable the master ip")
3551
      feedback_fn("Changing master_netdev from %s to %s" %
3552
                  (self.cluster.master_netdev, self.op.master_netdev))
3553
      self.cluster.master_netdev = self.op.master_netdev
3554

    
3555
    self.cfg.Update(self.cluster, feedback_fn)
3556

    
3557
    if self.op.master_netdev:
3558
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3559
                  self.op.master_netdev)
3560
      result = self.rpc.call_node_start_master(master, False, False)
3561
      if result.fail_msg:
3562
        self.LogWarning("Could not re-enable the master ip on"
3563
                        " the master, please restart manually: %s",
3564
                        result.fail_msg)
3565

    
3566

    
3567
def _UploadHelper(lu, nodes, fname):
3568
  """Helper for uploading a file and showing warnings.
3569

3570
  """
3571
  if os.path.exists(fname):
3572
    result = lu.rpc.call_upload_file(nodes, fname)
3573
    for to_node, to_result in result.items():
3574
      msg = to_result.fail_msg
3575
      if msg:
3576
        msg = ("Copy of file %s to node %s failed: %s" %
3577
               (fname, to_node, msg))
3578
        lu.proc.LogWarning(msg)
3579

    
3580

    
3581
def _ComputeAncillaryFiles(cluster, redist):
3582
  """Compute files external to Ganeti which need to be consistent.
3583

3584
  @type redist: boolean
3585
  @param redist: Whether to include files which need to be redistributed
3586

3587
  """
3588
  # Compute files for all nodes
3589
  files_all = set([
3590
    constants.SSH_KNOWN_HOSTS_FILE,
3591
    constants.CONFD_HMAC_KEY,
3592
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3593
    ])
3594

    
3595
  if not redist:
3596
    files_all.update(constants.ALL_CERT_FILES)
3597
    files_all.update(ssconf.SimpleStore().GetFileList())
3598

    
3599
  if cluster.modify_etc_hosts:
3600
    files_all.add(constants.ETC_HOSTS)
3601

    
3602
  # Files which must either exist on all nodes or on none
3603
  files_all_opt = set([
3604
    constants.RAPI_USERS_FILE,
3605
    ])
3606

    
3607
  # Files which should only be on master candidates
3608
  files_mc = set()
3609
  if not redist:
3610
    files_mc.add(constants.CLUSTER_CONF_FILE)
3611

    
3612
  # Files which should only be on VM-capable nodes
3613
  files_vm = set(filename
3614
    for hv_name in cluster.enabled_hypervisors
3615
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3616

    
3617
  # Filenames must be unique
3618
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3619
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3620
         "Found file listed in more than one file list"
3621

    
3622
  return (files_all, files_all_opt, files_mc, files_vm)
3623

    
3624

    
3625
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3626
  """Distribute additional files which are part of the cluster configuration.
3627

3628
  ConfigWriter takes care of distributing the config and ssconf files, but
3629
  there are more files which should be distributed to all nodes. This function
3630
  makes sure those are copied.
3631

3632
  @param lu: calling logical unit
3633
  @param additional_nodes: list of nodes not in the config to distribute to
3634
  @type additional_vm: boolean
3635
  @param additional_vm: whether the additional nodes are vm-capable or not
3636

3637
  """
3638
  # Gather target nodes
3639
  cluster = lu.cfg.GetClusterInfo()
3640
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3641

    
3642
  online_nodes = lu.cfg.GetOnlineNodeList()
3643
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3644

    
3645
  if additional_nodes is not None:
3646
    online_nodes.extend(additional_nodes)
3647
    if additional_vm:
3648
      vm_nodes.extend(additional_nodes)
3649

    
3650
  # Never distribute to master node
3651
  for nodelist in [online_nodes, vm_nodes]:
3652
    if master_info.name in nodelist:
3653
      nodelist.remove(master_info.name)
3654

    
3655
  # Gather file lists
3656
  (files_all, files_all_opt, files_mc, files_vm) = \
3657
    _ComputeAncillaryFiles(cluster, True)
3658

    
3659
  # Never re-distribute configuration file from here
3660
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3661
              constants.CLUSTER_CONF_FILE in files_vm)
3662
  assert not files_mc, "Master candidates not handled in this function"
3663

    
3664
  filemap = [
3665
    (online_nodes, files_all),
3666
    (online_nodes, files_all_opt),
3667
    (vm_nodes, files_vm),
3668
    ]
3669

    
3670
  # Upload the files
3671
  for (node_list, files) in filemap:
3672
    for fname in files:
3673
      _UploadHelper(lu, node_list, fname)
3674

    
3675

    
3676
class LUClusterRedistConf(NoHooksLU):
3677
  """Force the redistribution of cluster configuration.
3678

3679
  This is a very simple LU.
3680

3681
  """
3682
  REQ_BGL = False
3683

    
3684
  def ExpandNames(self):
3685
    self.needed_locks = {
3686
      locking.LEVEL_NODE: locking.ALL_SET,
3687
    }
3688
    self.share_locks[locking.LEVEL_NODE] = 1
3689

    
3690
  def Exec(self, feedback_fn):
3691
    """Redistribute the configuration.
3692

3693
    """
3694
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3695
    _RedistributeAncillaryFiles(self)
3696

    
3697

    
3698
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3699
  """Sleep and poll for an instance's disk to sync.
3700

3701
  """
3702
  if not instance.disks or disks is not None and not disks:
3703
    return True
3704

    
3705
  disks = _ExpandCheckDisks(instance, disks)
3706

    
3707
  if not oneshot:
3708
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3709

    
3710
  node = instance.primary_node
3711

    
3712
  for dev in disks:
3713
    lu.cfg.SetDiskID(dev, node)
3714

    
3715
  # TODO: Convert to utils.Retry
3716

    
3717
  retries = 0
3718
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3719
  while True:
3720
    max_time = 0
3721
    done = True
3722
    cumul_degraded = False
3723
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3724
    msg = rstats.fail_msg
3725
    if msg:
3726
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3727
      retries += 1
3728
      if retries >= 10:
3729
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3730
                                 " aborting." % node)
3731
      time.sleep(6)
3732
      continue
3733
    rstats = rstats.payload
3734
    retries = 0
3735
    for i, mstat in enumerate(rstats):
3736
      if mstat is None:
3737
        lu.LogWarning("Can't compute data for node %s/%s",
3738
                           node, disks[i].iv_name)
3739
        continue
3740

    
3741
      cumul_degraded = (cumul_degraded or
3742
                        (mstat.is_degraded and mstat.sync_percent is None))
3743
      if mstat.sync_percent is not None:
3744
        done = False
3745
        if mstat.estimated_time is not None:
3746
          rem_time = ("%s remaining (estimated)" %
3747
                      utils.FormatSeconds(mstat.estimated_time))
3748
          max_time = mstat.estimated_time
3749
        else:
3750
          rem_time = "no time estimate"
3751
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3752
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3753

    
3754
    # if we're done but degraded, let's do a few small retries, to
3755
    # make sure we see a stable and not transient situation; therefore
3756
    # we force restart of the loop
3757
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3758
      logging.info("Degraded disks found, %d retries left", degr_retries)
3759
      degr_retries -= 1
3760
      time.sleep(1)
3761
      continue
3762

    
3763
    if done or oneshot:
3764
      break
3765

    
3766
    time.sleep(min(60, max_time))
3767

    
3768
  if done:
3769
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3770
  return not cumul_degraded
3771

    
3772

    
3773
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3774
  """Check that mirrors are not degraded.
3775

3776
  The ldisk parameter, if True, will change the test from the
3777
  is_degraded attribute (which represents overall non-ok status for
3778
  the device(s)) to the ldisk (representing the local storage status).
3779

3780
  """
3781
  lu.cfg.SetDiskID(dev, node)
3782

    
3783
  result = True
3784

    
3785
  if on_primary or dev.AssembleOnSecondary():
3786
    rstats = lu.rpc.call_blockdev_find(node, dev)
3787
    msg = rstats.fail_msg
3788
    if msg:
3789
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3790
      result = False
3791
    elif not rstats.payload:
3792
      lu.LogWarning("Can't find disk on node %s", node)
3793
      result = False
3794
    else:
3795
      if ldisk:
3796
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3797
      else:
3798
        result = result and not rstats.payload.is_degraded
3799

    
3800
  if dev.children:
3801
    for child in dev.children:
3802
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3803

    
3804
  return result
3805

    
3806

    
3807
class LUOobCommand(NoHooksLU):
3808
  """Logical unit for OOB handling.
3809

3810
  """
3811
  REG_BGL = False
3812
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3813

    
3814
  def ExpandNames(self):
3815
    """Gather locks we need.
3816

3817
    """
3818
    if self.op.node_names:
3819
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3820
      lock_names = self.op.node_names
3821
    else:
3822
      lock_names = locking.ALL_SET
3823

    
3824
    self.needed_locks = {
3825
      locking.LEVEL_NODE: lock_names,
3826
      }
3827

    
3828
  def CheckPrereq(self):
3829
    """Check prerequisites.
3830

3831
    This checks:
3832
     - the node exists in the configuration
3833
     - OOB is supported
3834

3835
    Any errors are signaled by raising errors.OpPrereqError.
3836

3837
    """
3838
    self.nodes = []
3839
    self.master_node = self.cfg.GetMasterNode()
3840

    
3841
    assert self.op.power_delay >= 0.0
3842

    
3843
    if self.op.node_names:
3844
      if (self.op.command in self._SKIP_MASTER and
3845
          self.master_node in self.op.node_names):
3846
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3847
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3848

    
3849
        if master_oob_handler:
3850
          additional_text = ("run '%s %s %s' if you want to operate on the"
3851
                             " master regardless") % (master_oob_handler,
3852
                                                      self.op.command,
3853
                                                      self.master_node)
3854
        else:
3855
          additional_text = "it does not support out-of-band operations"
3856

    
3857
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3858
                                    " allowed for %s; %s") %
3859
                                   (self.master_node, self.op.command,
3860
                                    additional_text), errors.ECODE_INVAL)
3861
    else:
3862
      self.op.node_names = self.cfg.GetNodeList()
3863
      if self.op.command in self._SKIP_MASTER:
3864
        self.op.node_names.remove(self.master_node)
3865

    
3866
    if self.op.command in self._SKIP_MASTER:
3867
      assert self.master_node not in self.op.node_names
3868

    
3869
    for node_name in self.op.node_names:
3870
      node = self.cfg.GetNodeInfo(node_name)
3871

    
3872
      if node is None:
3873
        raise errors.OpPrereqError("Node %s not found" % node_name,
3874
                                   errors.ECODE_NOENT)
3875
      else:
3876
        self.nodes.append(node)
3877

    
3878
      if (not self.op.ignore_status and
3879
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3880
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3881
                                    " not marked offline") % node_name,
3882
                                   errors.ECODE_STATE)
3883

    
3884
  def Exec(self, feedback_fn):
3885
    """Execute OOB and return result if we expect any.
3886

3887
    """
3888
    master_node = self.master_node
3889
    ret = []
3890

    
3891
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3892
                                              key=lambda node: node.name)):
3893
      node_entry = [(constants.RS_NORMAL, node.name)]
3894
      ret.append(node_entry)
3895

    
3896
      oob_program = _SupportsOob(self.cfg, node)
3897

    
3898
      if not oob_program:
3899
        node_entry.append((constants.RS_UNAVAIL, None))
3900
        continue
3901

    
3902
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3903
                   self.op.command, oob_program, node.name)
3904
      result = self.rpc.call_run_oob(master_node, oob_program,
3905
                                     self.op.command, node.name,
3906
                                     self.op.timeout)
3907

    
3908
      if result.fail_msg:
3909
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3910
                        node.name, result.fail_msg)
3911
        node_entry.append((constants.RS_NODATA, None))
3912
      else:
3913
        try:
3914
          self._CheckPayload(result)
3915
        except errors.OpExecError, err:
3916
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3917
                          node.name, err)
3918
          node_entry.append((constants.RS_NODATA, None))
3919
        else:
3920
          if self.op.command == constants.OOB_HEALTH:
3921
            # For health we should log important events
3922
            for item, status in result.payload:
3923
              if status in [constants.OOB_STATUS_WARNING,
3924
                            constants.OOB_STATUS_CRITICAL]:
3925
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3926
                                item, node.name, status)
3927

    
3928
          if self.op.command == constants.OOB_POWER_ON:
3929
            node.powered = True
3930
          elif self.op.command == constants.OOB_POWER_OFF:
3931
            node.powered = False
3932
          elif self.op.command == constants.OOB_POWER_STATUS:
3933
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3934
            if powered != node.powered:
3935
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3936
                               " match actual power state (%s)"), node.powered,
3937
                              node.name, powered)
3938

    
3939
          # For configuration changing commands we should update the node
3940
          if self.op.command in (constants.OOB_POWER_ON,
3941
                                 constants.OOB_POWER_OFF):
3942
            self.cfg.Update(node, feedback_fn)
3943

    
3944
          node_entry.append((constants.RS_NORMAL, result.payload))
3945

    
3946
          if (self.op.command == constants.OOB_POWER_ON and
3947
              idx < len(self.nodes) - 1):
3948
            time.sleep(self.op.power_delay)
3949

    
3950
    return ret
3951

    
3952
  def _CheckPayload(self, result):
3953
    """Checks if the payload is valid.
3954

3955
    @param result: RPC result
3956
    @raises errors.OpExecError: If payload is not valid
3957

3958
    """
3959
    errs = []
3960
    if self.op.command == constants.OOB_HEALTH:
3961
      if not isinstance(result.payload, list):
3962
        errs.append("command 'health' is expected to return a list but got %s" %
3963
                    type(result.payload))
3964
      else:
3965
        for item, status in result.payload:
3966
          if status not in constants.OOB_STATUSES:
3967
            errs.append("health item '%s' has invalid status '%s'" %
3968
                        (item, status))
3969

    
3970
    if self.op.command == constants.OOB_POWER_STATUS:
3971
      if not isinstance(result.payload, dict):
3972
        errs.append("power-status is expected to return a dict but got %s" %
3973
                    type(result.payload))
3974

    
3975
    if self.op.command in [
3976
        constants.OOB_POWER_ON,
3977
        constants.OOB_POWER_OFF,
3978
        constants.OOB_POWER_CYCLE,
3979
        ]:
3980
      if result.payload is not None:
3981
        errs.append("%s is expected to not return payload but got '%s'" %
3982
                    (self.op.command, result.payload))
3983

    
3984
    if errs:
3985
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3986
                               utils.CommaJoin(errs))
3987

    
3988
class _OsQuery(_QueryBase):
3989
  FIELDS = query.OS_FIELDS
3990

    
3991
  def ExpandNames(self, lu):
3992
    # Lock all nodes in shared mode
3993
    # Temporary removal of locks, should be reverted later
3994
    # TODO: reintroduce locks when they are lighter-weight
3995
    lu.needed_locks = {}
3996
    #self.share_locks[locking.LEVEL_NODE] = 1
3997
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3998

    
3999
    # The following variables interact with _QueryBase._GetNames
4000
    if self.names:
4001
      self.wanted = self.names
4002
    else:
4003
      self.wanted = locking.ALL_SET
4004

    
4005
    self.do_locking = self.use_locking
4006

    
4007
  def DeclareLocks(self, lu, level):
4008
    pass
4009

    
4010
  @staticmethod
4011
  def _DiagnoseByOS(rlist):
4012
    """Remaps a per-node return list into an a per-os per-node dictionary
4013

4014
    @param rlist: a map with node names as keys and OS objects as values
4015

4016
    @rtype: dict
4017
    @return: a dictionary with osnames as keys and as value another
4018
        map, with nodes as keys and tuples of (path, status, diagnose,
4019
        variants, parameters, api_versions) as values, eg::
4020

4021
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4022
                                     (/srv/..., False, "invalid api")],
4023
                           "node2": [(/srv/..., True, "", [], [])]}
4024
          }
4025

4026
    """
4027
    all_os = {}
4028
    # we build here the list of nodes that didn't fail the RPC (at RPC
4029
    # level), so that nodes with a non-responding node daemon don't
4030
    # make all OSes invalid
4031
    good_nodes = [node_name for node_name in rlist
4032
                  if not rlist[node_name].fail_msg]
4033
    for node_name, nr in rlist.items():
4034
      if nr.fail_msg or not nr.payload:
4035
        continue
4036
      for (name, path, status, diagnose, variants,
4037
           params, api_versions) in nr.payload:
4038
        if name not in all_os:
4039
          # build a list of nodes for this os containing empty lists
4040
          # for each node in node_list
4041
          all_os[name] = {}
4042
          for nname in good_nodes:
4043
            all_os[name][nname] = []
4044
        # convert params from [name, help] to (name, help)
4045
        params = [tuple(v) for v in params]
4046
        all_os[name][node_name].append((path, status, diagnose,
4047
                                        variants, params, api_versions))
4048
    return all_os
4049

    
4050
  def _GetQueryData(self, lu):
4051
    """Computes the list of nodes and their attributes.
4052

4053
    """
4054
    # Locking is not used
4055
    assert not (compat.any(lu.glm.is_owned(level)
4056
                           for level in locking.LEVELS
4057
                           if level != locking.LEVEL_CLUSTER) or
4058
                self.do_locking or self.use_locking)
4059

    
4060
    valid_nodes = [node.name
4061
                   for node in lu.cfg.GetAllNodesInfo().values()
4062
                   if not node.offline and node.vm_capable]
4063
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4064
    cluster = lu.cfg.GetClusterInfo()
4065

    
4066
    data = {}
4067

    
4068
    for (os_name, os_data) in pol.items():
4069
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4070
                          hidden=(os_name in cluster.hidden_os),
4071
                          blacklisted=(os_name in cluster.blacklisted_os))
4072

    
4073
      variants = set()
4074
      parameters = set()
4075
      api_versions = set()
4076

    
4077
      for idx, osl in enumerate(os_data.values()):
4078
        info.valid = bool(info.valid and osl and osl[0][1])
4079
        if not info.valid:
4080
          break
4081

    
4082
        (node_variants, node_params, node_api) = osl[0][3:6]
4083
        if idx == 0:
4084
          # First entry
4085
          variants.update(node_variants)
4086
          parameters.update(node_params)
4087
          api_versions.update(node_api)
4088
        else:
4089
          # Filter out inconsistent values
4090
          variants.intersection_update(node_variants)
4091
          parameters.intersection_update(node_params)
4092
          api_versions.intersection_update(node_api)
4093

    
4094
      info.variants = list(variants)
4095
      info.parameters = list(parameters)
4096
      info.api_versions = list(api_versions)
4097

    
4098
      data[os_name] = info
4099

    
4100
    # Prepare data in requested order
4101
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4102
            if name in data]
4103

    
4104

    
4105
class LUOsDiagnose(NoHooksLU):
4106
  """Logical unit for OS diagnose/query.
4107

4108
  """
4109
  REQ_BGL = False
4110

    
4111
  @staticmethod
4112
  def _BuildFilter(fields, names):
4113
    """Builds a filter for querying OSes.
4114

4115
    """
4116
    name_filter = qlang.MakeSimpleFilter("name", names)
4117

    
4118
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4119
    # respective field is not requested
4120
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4121
                     for fname in ["hidden", "blacklisted"]
4122
                     if fname not in fields]
4123
    if "valid" not in fields:
4124
      status_filter.append([qlang.OP_TRUE, "valid"])
4125

    
4126
    if status_filter:
4127
      status_filter.insert(0, qlang.OP_AND)
4128
    else:
4129
      status_filter = None
4130

    
4131
    if name_filter and status_filter:
4132
      return [qlang.OP_AND, name_filter, status_filter]
4133
    elif name_filter:
4134
      return name_filter
4135
    else:
4136
      return status_filter
4137

    
4138
  def CheckArguments(self):
4139
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4140
                       self.op.output_fields, False)
4141

    
4142
  def ExpandNames(self):
4143
    self.oq.ExpandNames(self)
4144

    
4145
  def Exec(self, feedback_fn):
4146
    return self.oq.OldStyleQuery(self)
4147

    
4148

    
4149
class LUNodeRemove(LogicalUnit):
4150
  """Logical unit for removing a node.
4151

4152
  """
4153
  HPATH = "node-remove"
4154
  HTYPE = constants.HTYPE_NODE
4155

    
4156
  def BuildHooksEnv(self):
4157
    """Build hooks env.
4158

4159
    This doesn't run on the target node in the pre phase as a failed
4160
    node would then be impossible to remove.
4161

4162
    """
4163
    return {
4164
      "OP_TARGET": self.op.node_name,
4165
      "NODE_NAME": self.op.node_name,
4166
      }
4167

    
4168
  def BuildHooksNodes(self):
4169
    """Build hooks nodes.
4170

4171
    """
4172
    all_nodes = self.cfg.GetNodeList()
4173
    try:
4174
      all_nodes.remove(self.op.node_name)
4175
    except ValueError:
4176
      logging.warning("Node '%s', which is about to be removed, was not found"
4177
                      " in the list of all nodes", self.op.node_name)
4178
    return (all_nodes, all_nodes)
4179

    
4180
  def CheckPrereq(self):
4181
    """Check prerequisites.
4182

4183
    This checks:
4184
     - the node exists in the configuration
4185
     - it does not have primary or secondary instances
4186
     - it's not the master
4187

4188
    Any errors are signaled by raising errors.OpPrereqError.
4189

4190
    """
4191
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4192
    node = self.cfg.GetNodeInfo(self.op.node_name)
4193
    assert node is not None
4194

    
4195
    instance_list = self.cfg.GetInstanceList()
4196

    
4197
    masternode = self.cfg.GetMasterNode()
4198
    if node.name == masternode:
4199
      raise errors.OpPrereqError("Node is the master node, failover to another"
4200
                                 " node is required", errors.ECODE_INVAL)
4201

    
4202
    for instance_name in instance_list:
4203
      instance = self.cfg.GetInstanceInfo(instance_name)
4204
      if node.name in instance.all_nodes:
4205
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4206
                                   " please remove first" % instance_name,
4207
                                   errors.ECODE_INVAL)
4208
    self.op.node_name = node.name
4209
    self.node = node
4210

    
4211
  def Exec(self, feedback_fn):
4212
    """Removes the node from the cluster.
4213

4214
    """
4215
    node = self.node
4216
    logging.info("Stopping the node daemon and removing configs from node %s",
4217
                 node.name)
4218

    
4219
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4220

    
4221
    # Promote nodes to master candidate as needed
4222
    _AdjustCandidatePool(self, exceptions=[node.name])
4223
    self.context.RemoveNode(node.name)
4224

    
4225
    # Run post hooks on the node before it's removed
4226
    _RunPostHook(self, node.name)
4227

    
4228
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4229
    msg = result.fail_msg
4230
    if msg:
4231
      self.LogWarning("Errors encountered on the remote node while leaving"
4232
                      " the cluster: %s", msg)
4233

    
4234
    # Remove node from our /etc/hosts
4235
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4236
      master_node = self.cfg.GetMasterNode()
4237
      result = self.rpc.call_etc_hosts_modify(master_node,
4238
                                              constants.ETC_HOSTS_REMOVE,
4239
                                              node.name, None)
4240
      result.Raise("Can't update hosts file with new host data")
4241
      _RedistributeAncillaryFiles(self)
4242

    
4243

    
4244
class _NodeQuery(_QueryBase):
4245
  FIELDS = query.NODE_FIELDS
4246

    
4247
  def ExpandNames(self, lu):
4248
    lu.needed_locks = {}
4249
    lu.share_locks[locking.LEVEL_NODE] = 1
4250

    
4251
    if self.names:
4252
      self.wanted = _GetWantedNodes(lu, self.names)
4253
    else:
4254
      self.wanted = locking.ALL_SET
4255

    
4256
    self.do_locking = (self.use_locking and
4257
                       query.NQ_LIVE in self.requested_data)
4258

    
4259
    if self.do_locking:
4260
      # if we don't request only static fields, we need to lock the nodes
4261
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4262

    
4263
  def DeclareLocks(self, lu, level):
4264
    pass
4265

    
4266
  def _GetQueryData(self, lu):
4267
    """Computes the list of nodes and their attributes.
4268

4269
    """
4270
    all_info = lu.cfg.GetAllNodesInfo()
4271

    
4272
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4273

    
4274
    # Gather data as requested
4275
    if query.NQ_LIVE in self.requested_data:
4276
      # filter out non-vm_capable nodes
4277
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4278

    
4279
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4280
                                        lu.cfg.GetHypervisorType())
4281
      live_data = dict((name, nresult.payload)
4282
                       for (name, nresult) in node_data.items()
4283
                       if not nresult.fail_msg and nresult.payload)
4284
    else:
4285
      live_data = None
4286

    
4287
    if query.NQ_INST in self.requested_data:
4288
      node_to_primary = dict([(name, set()) for name in nodenames])
4289
      node_to_secondary = dict([(name, set()) for name in nodenames])
4290

    
4291
      inst_data = lu.cfg.GetAllInstancesInfo()
4292

    
4293
      for inst in inst_data.values():
4294
        if inst.primary_node in node_to_primary:
4295
          node_to_primary[inst.primary_node].add(inst.name)
4296
        for secnode in inst.secondary_nodes:
4297
          if secnode in node_to_secondary:
4298
            node_to_secondary[secnode].add(inst.name)
4299
    else:
4300
      node_to_primary = None
4301
      node_to_secondary = None
4302

    
4303
    if query.NQ_OOB in self.requested_data:
4304
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4305
                         for name, node in all_info.iteritems())
4306
    else:
4307
      oob_support = None
4308

    
4309
    if query.NQ_GROUP in self.requested_data:
4310
      groups = lu.cfg.GetAllNodeGroupsInfo()
4311
    else:
4312
      groups = {}
4313

    
4314
    return query.NodeQueryData([all_info[name] for name in nodenames],
4315
                               live_data, lu.cfg.GetMasterNode(),
4316
                               node_to_primary, node_to_secondary, groups,
4317
                               oob_support, lu.cfg.GetClusterInfo())
4318

    
4319

    
4320
class LUNodeQuery(NoHooksLU):
4321
  """Logical unit for querying nodes.
4322

4323
  """
4324
  # pylint: disable-msg=W0142
4325
  REQ_BGL = False
4326

    
4327
  def CheckArguments(self):
4328
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4329
                         self.op.output_fields, self.op.use_locking)
4330

    
4331
  def ExpandNames(self):
4332
    self.nq.ExpandNames(self)
4333

    
4334
  def Exec(self, feedback_fn):
4335
    return self.nq.OldStyleQuery(self)
4336

    
4337

    
4338
class LUNodeQueryvols(NoHooksLU):
4339
  """Logical unit for getting volumes on node(s).
4340

4341
  """
4342
  REQ_BGL = False
4343
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4344
  _FIELDS_STATIC = utils.FieldSet("node")
4345

    
4346
  def CheckArguments(self):
4347
    _CheckOutputFields(static=self._FIELDS_STATIC,
4348
                       dynamic=self._FIELDS_DYNAMIC,
4349
                       selected=self.op.output_fields)
4350

    
4351
  def ExpandNames(self):
4352
    self.needed_locks = {}
4353
    self.share_locks[locking.LEVEL_NODE] = 1
4354
    if not self.op.nodes:
4355
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4356
    else:
4357
      self.needed_locks[locking.LEVEL_NODE] = \
4358
        _GetWantedNodes(self, self.op.nodes)
4359

    
4360
  def Exec(self, feedback_fn):
4361
    """Computes the list of nodes and their attributes.
4362

4363
    """
4364
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4365
    volumes = self.rpc.call_node_volumes(nodenames)
4366

    
4367
    ilist = self.cfg.GetAllInstancesInfo()
4368
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4369

    
4370
    output = []
4371
    for node in nodenames:
4372
      nresult = volumes[node]
4373
      if nresult.offline:
4374
        continue
4375
      msg = nresult.fail_msg
4376
      if msg:
4377
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4378
        continue
4379

    
4380
      node_vols = sorted(nresult.payload,
4381
                         key=operator.itemgetter("dev"))
4382

    
4383
      for vol in node_vols:
4384
        node_output = []
4385
        for field in self.op.output_fields:
4386
          if field == "node":
4387
            val = node
4388
          elif field == "phys":
4389
            val = vol["dev"]
4390
          elif field == "vg":
4391
            val = vol["vg"]
4392
          elif field == "name":
4393
            val = vol["name"]
4394
          elif field == "size":
4395
            val = int(float(vol["size"]))
4396
          elif field == "instance":
4397
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4398
          else:
4399
            raise errors.ParameterError(field)
4400
          node_output.append(str(val))
4401

    
4402
        output.append(node_output)
4403

    
4404
    return output
4405

    
4406

    
4407
class LUNodeQueryStorage(NoHooksLU):
4408
  """Logical unit for getting information on storage units on node(s).
4409

4410
  """
4411
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4412
  REQ_BGL = False
4413

    
4414
  def CheckArguments(self):
4415
    _CheckOutputFields(static=self._FIELDS_STATIC,
4416
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4417
                       selected=self.op.output_fields)
4418

    
4419
  def ExpandNames(self):
4420
    self.needed_locks = {}
4421
    self.share_locks[locking.LEVEL_NODE] = 1
4422

    
4423
    if self.op.nodes:
4424
      self.needed_locks[locking.LEVEL_NODE] = \
4425
        _GetWantedNodes(self, self.op.nodes)
4426
    else:
4427
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4428

    
4429
  def Exec(self, feedback_fn):
4430
    """Computes the list of nodes and their attributes.
4431

4432
    """
4433
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4434

    
4435
    # Always get name to sort by
4436
    if constants.SF_NAME in self.op.output_fields:
4437
      fields = self.op.output_fields[:]
4438
    else:
4439
      fields = [constants.SF_NAME] + self.op.output_fields
4440

    
4441
    # Never ask for node or type as it's only known to the LU
4442
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4443
      while extra in fields:
4444
        fields.remove(extra)
4445

    
4446
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4447
    name_idx = field_idx[constants.SF_NAME]
4448

    
4449
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4450
    data = self.rpc.call_storage_list(self.nodes,
4451
                                      self.op.storage_type, st_args,
4452
                                      self.op.name, fields)
4453

    
4454
    result = []
4455

    
4456
    for node in utils.NiceSort(self.nodes):
4457
      nresult = data[node]
4458
      if nresult.offline:
4459
        continue
4460

    
4461
      msg = nresult.fail_msg
4462
      if msg:
4463
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4464
        continue
4465

    
4466
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4467

    
4468
      for name in utils.NiceSort(rows.keys()):
4469
        row = rows[name]
4470

    
4471
        out = []
4472

    
4473
        for field in self.op.output_fields:
4474
          if field == constants.SF_NODE:
4475
            val = node
4476
          elif field == constants.SF_TYPE:
4477
            val = self.op.storage_type
4478
          elif field in field_idx:
4479
            val = row[field_idx[field]]
4480
          else:
4481
            raise errors.ParameterError(field)
4482

    
4483
          out.append(val)
4484

    
4485
        result.append(out)
4486

    
4487
    return result
4488

    
4489

    
4490
class _InstanceQuery(_QueryBase):
4491
  FIELDS = query.INSTANCE_FIELDS
4492

    
4493
  def ExpandNames(self, lu):
4494
    lu.needed_locks = {}
4495
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4496
    lu.share_locks[locking.LEVEL_NODE] = 1
4497

    
4498
    if self.names:
4499
      self.wanted = _GetWantedInstances(lu, self.names)
4500
    else:
4501
      self.wanted = locking.ALL_SET
4502

    
4503
    self.do_locking = (self.use_locking and
4504
                       query.IQ_LIVE in self.requested_data)
4505
    if self.do_locking:
4506
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4507
      lu.needed_locks[locking.LEVEL_NODE] = []
4508
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4509

    
4510
  def DeclareLocks(self, lu, level):
4511
    if level == locking.LEVEL_NODE and self.do_locking:
4512
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4513

    
4514
  def _GetQueryData(self, lu):
4515
    """Computes the list of instances and their attributes.
4516

4517
    """
4518
    cluster = lu.cfg.GetClusterInfo()
4519
    all_info = lu.cfg.GetAllInstancesInfo()
4520

    
4521
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4522

    
4523
    instance_list = [all_info[name] for name in instance_names]
4524
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4525
                                        for inst in instance_list)))
4526
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4527
    bad_nodes = []
4528
    offline_nodes = []
4529
    wrongnode_inst = set()
4530

    
4531
    # Gather data as requested
4532
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4533
      live_data = {}
4534
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4535
      for name in nodes:
4536
        result = node_data[name]
4537
        if result.offline:
4538
          # offline nodes will be in both lists
4539
          assert result.fail_msg
4540
          offline_nodes.append(name)
4541
        if result.fail_msg:
4542
          bad_nodes.append(name)
4543
        elif result.payload:
4544
          for inst in result.payload:
4545
            if inst in all_info:
4546
              if all_info[inst].primary_node == name:
4547
                live_data.update(result.payload)
4548
              else:
4549
                wrongnode_inst.add(inst)
4550
            else:
4551
              # orphan instance; we don't list it here as we don't
4552
              # handle this case yet in the output of instance listing
4553
              logging.warning("Orphan instance '%s' found on node %s",
4554
                              inst, name)
4555
        # else no instance is alive
4556
    else:
4557
      live_data = {}
4558

    
4559
    if query.IQ_DISKUSAGE in self.requested_data:
4560
      disk_usage = dict((inst.name,
4561
                         _ComputeDiskSize(inst.disk_template,
4562
                                          [{constants.IDISK_SIZE: disk.size}
4563
                                           for disk in inst.disks]))
4564
                        for inst in instance_list)
4565
    else:
4566
      disk_usage = None
4567

    
4568
    if query.IQ_CONSOLE in self.requested_data:
4569
      consinfo = {}
4570
      for inst in instance_list:
4571
        if inst.name in live_data:
4572
          # Instance is running
4573
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4574
        else:
4575
          consinfo[inst.name] = None
4576
      assert set(consinfo.keys()) == set(instance_names)
4577
    else:
4578
      consinfo = None
4579

    
4580
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4581
                                   disk_usage, offline_nodes, bad_nodes,
4582
                                   live_data, wrongnode_inst, consinfo)
4583

    
4584

    
4585
class LUQuery(NoHooksLU):
4586
  """Query for resources/items of a certain kind.
4587

4588
  """
4589
  # pylint: disable-msg=W0142
4590
  REQ_BGL = False
4591

    
4592
  def CheckArguments(self):
4593
    qcls = _GetQueryImplementation(self.op.what)
4594

    
4595
    self.impl = qcls(self.op.filter, self.op.fields, False)
4596

    
4597
  def ExpandNames(self):
4598
    self.impl.ExpandNames(self)
4599

    
4600
  def DeclareLocks(self, level):
4601
    self.impl.DeclareLocks(self, level)
4602

    
4603
  def Exec(self, feedback_fn):
4604
    return self.impl.NewStyleQuery(self)
4605

    
4606

    
4607
class LUQueryFields(NoHooksLU):
4608
  """Query for resources/items of a certain kind.
4609

4610
  """
4611
  # pylint: disable-msg=W0142
4612
  REQ_BGL = False
4613

    
4614
  def CheckArguments(self):
4615
    self.qcls = _GetQueryImplementation(self.op.what)
4616

    
4617
  def ExpandNames(self):
4618
    self.needed_locks = {}
4619

    
4620
  def Exec(self, feedback_fn):
4621
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4622

    
4623

    
4624
class LUNodeModifyStorage(NoHooksLU):
4625
  """Logical unit for modifying a storage volume on a node.
4626

4627
  """
4628
  REQ_BGL = False
4629

    
4630
  def CheckArguments(self):
4631
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4632

    
4633
    storage_type = self.op.storage_type
4634

    
4635
    try:
4636
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4637
    except KeyError:
4638
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4639
                                 " modified" % storage_type,
4640
                                 errors.ECODE_INVAL)
4641

    
4642
    diff = set(self.op.changes.keys()) - modifiable
4643
    if diff:
4644
      raise errors.OpPrereqError("The following fields can not be modified for"
4645
                                 " storage units of type '%s': %r" %
4646
                                 (storage_type, list(diff)),
4647
                                 errors.ECODE_INVAL)
4648

    
4649
  def ExpandNames(self):
4650
    self.needed_locks = {
4651
      locking.LEVEL_NODE: self.op.node_name,
4652
      }
4653

    
4654
  def Exec(self, feedback_fn):
4655
    """Computes the list of nodes and their attributes.
4656

4657
    """
4658
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4659
    result = self.rpc.call_storage_modify(self.op.node_name,
4660
                                          self.op.storage_type, st_args,
4661
                                          self.op.name, self.op.changes)
4662
    result.Raise("Failed to modify storage unit '%s' on %s" %
4663
                 (self.op.name, self.op.node_name))
4664

    
4665

    
4666
class LUNodeAdd(LogicalUnit):
4667
  """Logical unit for adding node to the cluster.
4668

4669
  """
4670
  HPATH = "node-add"
4671
  HTYPE = constants.HTYPE_NODE
4672
  _NFLAGS = ["master_capable", "vm_capable"]
4673

    
4674
  def CheckArguments(self):
4675
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4676
    # validate/normalize the node name
4677
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4678
                                         family=self.primary_ip_family)
4679
    self.op.node_name = self.hostname.name
4680

    
4681
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4682
      raise errors.OpPrereqError("Cannot readd the master node",
4683
                                 errors.ECODE_STATE)
4684

    
4685
    if self.op.readd and self.op.group:
4686
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4687
                                 " being readded", errors.ECODE_INVAL)
4688

    
4689
  def BuildHooksEnv(self):
4690
    """Build hooks env.
4691

4692
    This will run on all nodes before, and on all nodes + the new node after.
4693

4694
    """
4695
    return {
4696
      "OP_TARGET": self.op.node_name,
4697
      "NODE_NAME": self.op.node_name,
4698
      "NODE_PIP": self.op.primary_ip,
4699
      "NODE_SIP": self.op.secondary_ip,
4700
      "MASTER_CAPABLE": str(self.op.master_capable),
4701
      "VM_CAPABLE": str(self.op.vm_capable),
4702
      }
4703

    
4704
  def BuildHooksNodes(self):
4705
    """Build hooks nodes.
4706

4707
    """
4708
    # Exclude added node
4709
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4710
    post_nodes = pre_nodes + [self.op.node_name, ]
4711

    
4712
    return (pre_nodes, post_nodes)
4713

    
4714
  def CheckPrereq(self):
4715
    """Check prerequisites.
4716

4717
    This checks:
4718
     - the new node is not already in the config
4719
     - it is resolvable
4720
     - its parameters (single/dual homed) matches the cluster
4721

4722
    Any errors are signaled by raising errors.OpPrereqError.
4723

4724
    """
4725
    cfg = self.cfg
4726
    hostname = self.hostname
4727
    node = hostname.name
4728
    primary_ip = self.op.primary_ip = hostname.ip
4729
    if self.op.secondary_ip is None:
4730
      if self.primary_ip_family == netutils.IP6Address.family:
4731
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4732
                                   " IPv4 address must be given as secondary",
4733
                                   errors.ECODE_INVAL)
4734
      self.op.secondary_ip = primary_ip
4735

    
4736
    secondary_ip = self.op.secondary_ip
4737
    if not netutils.IP4Address.IsValid(secondary_ip):
4738
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4739
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4740

    
4741
    node_list = cfg.GetNodeList()
4742
    if not self.op.readd and node in node_list:
4743
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4744
                                 node, errors.ECODE_EXISTS)
4745
    elif self.op.readd and node not in node_list:
4746
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4747
                                 errors.ECODE_NOENT)
4748

    
4749
    self.changed_primary_ip = False
4750

    
4751
    for existing_node_name in node_list:
4752
      existing_node = cfg.GetNodeInfo(existing_node_name)
4753

    
4754
      if self.op.readd and node == existing_node_name:
4755
        if existing_node.secondary_ip != secondary_ip:
4756
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4757
                                     " address configuration as before",
4758
                                     errors.ECODE_INVAL)
4759
        if existing_node.primary_ip != primary_ip:
4760
          self.changed_primary_ip = True
4761

    
4762
        continue
4763

    
4764
      if (existing_node.primary_ip == primary_ip or
4765
          existing_node.secondary_ip == primary_ip or
4766
          existing_node.primary_ip == secondary_ip or
4767
          existing_node.secondary_ip == secondary_ip):
4768
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4769
                                   " existing node %s" % existing_node.name,
4770
                                   errors.ECODE_NOTUNIQUE)
4771

    
4772
    # After this 'if' block, None is no longer a valid value for the
4773
    # _capable op attributes
4774
    if self.op.readd:
4775
      old_node = self.cfg.GetNodeInfo(node)
4776
      assert old_node is not None, "Can't retrieve locked node %s" % node
4777
      for attr in self._NFLAGS:
4778
        if getattr(self.op, attr) is None:
4779
          setattr(self.op, attr, getattr(old_node, attr))
4780
    else:
4781
      for attr in self._NFLAGS:
4782
        if getattr(self.op, attr) is None:
4783
          setattr(self.op, attr, True)
4784

    
4785
    if self.op.readd and not self.op.vm_capable:
4786
      pri, sec = cfg.GetNodeInstances(node)
4787
      if pri or sec:
4788
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4789
                                   " flag set to false, but it already holds"
4790
                                   " instances" % node,
4791
                                   errors.ECODE_STATE)
4792

    
4793
    # check that the type of the node (single versus dual homed) is the
4794
    # same as for the master
4795
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4796
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4797
    newbie_singlehomed = secondary_ip == primary_ip
4798
    if master_singlehomed != newbie_singlehomed:
4799
      if master_singlehomed:
4800
        raise errors.OpPrereqError("The master has no secondary ip but the"
4801
                                   " new node has one",
4802
                                   errors.ECODE_INVAL)
4803
      else:
4804
        raise errors.OpPrereqError("The master has a secondary ip but the"
4805
                                   " new node doesn't have one",
4806
                                   errors.ECODE_INVAL)
4807

    
4808
    # checks reachability
4809
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4810
      raise errors.OpPrereqError("Node not reachable by ping",
4811
                                 errors.ECODE_ENVIRON)
4812

    
4813
    if not newbie_singlehomed:
4814
      # check reachability from my secondary ip to newbie's secondary ip
4815
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4816
                           source=myself.secondary_ip):
4817
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4818
                                   " based ping to node daemon port",
4819
                                   errors.ECODE_ENVIRON)
4820

    
4821
    if self.op.readd:
4822
      exceptions = [node]
4823
    else:
4824
      exceptions = []
4825

    
4826
    if self.op.master_capable:
4827
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4828
    else:
4829
      self.master_candidate = False
4830

    
4831
    if self.op.readd:
4832
      self.new_node = old_node
4833
    else:
4834
      node_group = cfg.LookupNodeGroup(self.op.group)
4835
      self.new_node = objects.Node(name=node,
4836
                                   primary_ip=primary_ip,
4837
                                   secondary_ip=secondary_ip,
4838
                                   master_candidate=self.master_candidate,
4839
                                   offline=False, drained=False,
4840
                                   group=node_group)
4841

    
4842
    if self.op.ndparams:
4843
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4844

    
4845
  def Exec(self, feedback_fn):
4846
    """Adds the new node to the cluster.
4847

4848
    """
4849
    new_node = self.new_node
4850
    node = new_node.name
4851

    
4852
    # We adding a new node so we assume it's powered
4853
    new_node.powered = True
4854

    
4855
    # for re-adds, reset the offline/drained/master-candidate flags;
4856
    # we need to reset here, otherwise offline would prevent RPC calls
4857
    # later in the procedure; this also means that if the re-add
4858
    # fails, we are left with a non-offlined, broken node
4859
    if self.op.readd:
4860
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4861
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4862
      # if we demote the node, we do cleanup later in the procedure
4863
      new_node.master_candidate = self.master_candidate
4864
      if self.changed_primary_ip:
4865
        new_node.primary_ip = self.op.primary_ip
4866

    
4867
    # copy the master/vm_capable flags
4868
    for attr in self._NFLAGS:
4869
      setattr(new_node, attr, getattr(self.op, attr))
4870

    
4871
    # notify the user about any possible mc promotion
4872
    if new_node.master_candidate:
4873
      self.LogInfo("Node will be a master candidate")
4874

    
4875
    if self.op.ndparams:
4876
      new_node.ndparams = self.op.ndparams
4877
    else:
4878
      new_node.ndparams = {}
4879

    
4880
    # check connectivity
4881
    result = self.rpc.call_version([node])[node]
4882
    result.Raise("Can't get version information from node %s" % node)
4883
    if constants.PROTOCOL_VERSION == result.payload:
4884
      logging.info("Communication to node %s fine, sw version %s match",
4885
                   node, result.payload)
4886
    else:
4887
      raise errors.OpExecError("Version mismatch master version %s,"
4888
                               " node version %s" %
4889
                               (constants.PROTOCOL_VERSION, result.payload))
4890

    
4891
    # Add node to our /etc/hosts, and add key to known_hosts
4892
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4893
      master_node = self.cfg.GetMasterNode()
4894
      result = self.rpc.call_etc_hosts_modify(master_node,
4895
                                              constants.ETC_HOSTS_ADD,
4896
                                              self.hostname.name,
4897
                                              self.hostname.ip)
4898
      result.Raise("Can't update hosts file with new host data")
4899

    
4900
    if new_node.secondary_ip != new_node.primary_ip:
4901
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4902
                               False)
4903

    
4904
    node_verify_list = [self.cfg.GetMasterNode()]
4905
    node_verify_param = {
4906
      constants.NV_NODELIST: [node],
4907
      # TODO: do a node-net-test as well?
4908
    }
4909

    
4910
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4911
                                       self.cfg.GetClusterName())
4912
    for verifier in node_verify_list:
4913
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4914
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4915
      if nl_payload:
4916
        for failed in nl_payload:
4917
          feedback_fn("ssh/hostname verification failed"
4918
                      " (checking from %s): %s" %
4919
                      (verifier, nl_payload[failed]))
4920
        raise errors.OpExecError("ssh/hostname verification failed")
4921

    
4922
    if self.op.readd:
4923
      _RedistributeAncillaryFiles(self)
4924
      self.context.ReaddNode(new_node)
4925
      # make sure we redistribute the config
4926
      self.cfg.Update(new_node, feedback_fn)
4927
      # and make sure the new node will not have old files around
4928
      if not new_node.master_candidate:
4929
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4930
        msg = result.fail_msg
4931
        if msg:
4932
          self.LogWarning("Node failed to demote itself from master"
4933
                          " candidate status: %s" % msg)
4934
    else:
4935
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4936
                                  additional_vm=self.op.vm_capable)
4937
      self.context.AddNode(new_node, self.proc.GetECId())
4938

    
4939

    
4940
class LUNodeSetParams(LogicalUnit):
4941
  """Modifies the parameters of a node.
4942

4943
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4944
      to the node role (as _ROLE_*)
4945
  @cvar _R2F: a dictionary from node role to tuples of flags
4946
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4947

4948
  """
4949
  HPATH = "node-modify"
4950
  HTYPE = constants.HTYPE_NODE
4951
  REQ_BGL = False
4952
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4953
  _F2R = {
4954
    (True, False, False): _ROLE_CANDIDATE,
4955
    (False, True, False): _ROLE_DRAINED,
4956
    (False, False, True): _ROLE_OFFLINE,
4957
    (False, False, False): _ROLE_REGULAR,
4958
    }
4959
  _R2F = dict((v, k) for k, v in _F2R.items())
4960
  _FLAGS = ["master_candidate", "drained", "offline"]
4961

    
4962
  def CheckArguments(self):
4963
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4964
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4965
                self.op.master_capable, self.op.vm_capable,
4966
                self.op.secondary_ip, self.op.ndparams]
4967
    if all_mods.count(None) == len(all_mods):
4968
      raise errors.OpPrereqError("Please pass at least one modification",
4969
                                 errors.ECODE_INVAL)
4970
    if all_mods.count(True) > 1:
4971
      raise errors.OpPrereqError("Can't set the node into more than one"
4972
                                 " state at the same time",
4973
                                 errors.ECODE_INVAL)
4974

    
4975
    # Boolean value that tells us whether we might be demoting from MC
4976
    self.might_demote = (self.op.master_candidate == False or
4977
                         self.op.offline == True or
4978
                         self.op.drained == True or
4979
                         self.op.master_capable == False)
4980

    
4981
    if self.op.secondary_ip:
4982
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4983
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4984
                                   " address" % self.op.secondary_ip,
4985
                                   errors.ECODE_INVAL)
4986

    
4987
    self.lock_all = self.op.auto_promote and self.might_demote
4988
    self.lock_instances = self.op.secondary_ip is not None
4989

    
4990
  def ExpandNames(self):
4991
    if self.lock_all:
4992
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4993
    else:
4994
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4995

    
4996
    if self.lock_instances:
4997
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4998

    
4999
  def DeclareLocks(self, level):
5000
    # If we have locked all instances, before waiting to lock nodes, release
5001
    # all the ones living on nodes unrelated to the current operation.
5002
    if level == locking.LEVEL_NODE and self.lock_instances:
5003
      self.affected_instances = []
5004
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5005
        instances_keep = []
5006

    
5007
        # Build list of instances to release
5008
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
5009
          instance = self.context.cfg.GetInstanceInfo(instance_name)
5010
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5011
              self.op.node_name in instance.all_nodes):
5012
            instances_keep.append(instance_name)
5013
            self.affected_instances.append(instance)
5014

    
5015
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5016

    
5017
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5018
                set(instances_keep))
5019

    
5020
  def BuildHooksEnv(self):
5021
    """Build hooks env.
5022

5023
    This runs on the master node.
5024

5025
    """
5026
    return {
5027
      "OP_TARGET": self.op.node_name,
5028
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5029
      "OFFLINE": str(self.op.offline),
5030
      "DRAINED": str(self.op.drained),
5031
      "MASTER_CAPABLE": str(self.op.master_capable),
5032
      "VM_CAPABLE": str(self.op.vm_capable),
5033
      }
5034

    
5035
  def BuildHooksNodes(self):
5036
    """Build hooks nodes.
5037

5038
    """
5039
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5040
    return (nl, nl)
5041

    
5042
  def CheckPrereq(self):
5043
    """Check prerequisites.
5044

5045
    This only checks the instance list against the existing names.
5046

5047
    """
5048
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5049

    
5050
    if (self.op.master_candidate is not None or
5051
        self.op.drained is not None or
5052
        self.op.offline is not None):
5053
      # we can't change the master's node flags
5054
      if self.op.node_name == self.cfg.GetMasterNode():
5055
        raise errors.OpPrereqError("The master role can be changed"
5056
                                   " only via master-failover",
5057
                                   errors.ECODE_INVAL)
5058

    
5059
    if self.op.master_candidate and not node.master_capable:
5060
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5061
                                 " it a master candidate" % node.name,
5062
                                 errors.ECODE_STATE)
5063

    
5064
    if self.op.vm_capable == False:
5065
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5066
      if ipri or isec:
5067
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5068
                                   " the vm_capable flag" % node.name,
5069
                                   errors.ECODE_STATE)
5070

    
5071
    if node.master_candidate and self.might_demote and not self.lock_all:
5072
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5073
      # check if after removing the current node, we're missing master
5074
      # candidates
5075
      (mc_remaining, mc_should, _) = \
5076
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5077
      if mc_remaining < mc_should:
5078
        raise errors.OpPrereqError("Not enough master candidates, please"
5079
                                   " pass auto promote option to allow"
5080
                                   " promotion", errors.ECODE_STATE)
5081

    
5082
    self.old_flags = old_flags = (node.master_candidate,
5083
                                  node.drained, node.offline)
5084
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5085
    self.old_role = old_role = self._F2R[old_flags]
5086

    
5087
    # Check for ineffective changes
5088
    for attr in self._FLAGS:
5089
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5090
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5091
        setattr(self.op, attr, None)
5092

    
5093
    # Past this point, any flag change to False means a transition
5094
    # away from the respective state, as only real changes are kept
5095

    
5096
    # TODO: We might query the real power state if it supports OOB
5097
    if _SupportsOob(self.cfg, node):
5098
      if self.op.offline is False and not (node.powered or
5099
                                           self.op.powered == True):
5100
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5101
                                    " offline status can be reset") %
5102
                                   self.op.node_name)
5103
    elif self.op.powered is not None:
5104
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5105
                                  " as it does not support out-of-band"
5106
                                  " handling") % self.op.node_name)
5107

    
5108
    # If we're being deofflined/drained, we'll MC ourself if needed
5109
    if (self.op.drained == False or self.op.offline == False or
5110
        (self.op.master_capable and not node.master_capable)):
5111
      if _DecideSelfPromotion(self):
5112
        self.op.master_candidate = True
5113
        self.LogInfo("Auto-promoting node to master candidate")
5114

    
5115
    # If we're no longer master capable, we'll demote ourselves from MC
5116
    if self.op.master_capable == False and node.master_candidate:
5117
      self.LogInfo("Demoting from master candidate")
5118
      self.op.master_candidate = False
5119

    
5120
    # Compute new role
5121
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5122
    if self.op.master_candidate:
5123
      new_role = self._ROLE_CANDIDATE
5124
    elif self.op.drained:
5125
      new_role = self._ROLE_DRAINED
5126
    elif self.op.offline:
5127
      new_role = self._ROLE_OFFLINE
5128
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5129
      # False is still in new flags, which means we're un-setting (the
5130
      # only) True flag
5131
      new_role = self._ROLE_REGULAR
5132
    else: # no new flags, nothing, keep old role
5133
      new_role = old_role
5134

    
5135
    self.new_role = new_role
5136

    
5137
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5138
      # Trying to transition out of offline status
5139
      result = self.rpc.call_version([node.name])[node.name]
5140
      if result.fail_msg:
5141
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5142
                                   " to report its version: %s" %
5143
                                   (node.name, result.fail_msg),
5144
                                   errors.ECODE_STATE)
5145
      else:
5146
        self.LogWarning("Transitioning node from offline to online state"
5147
                        " without using re-add. Please make sure the node"
5148
                        " is healthy!")
5149

    
5150
    if self.op.secondary_ip:
5151
      # Ok even without locking, because this can't be changed by any LU
5152
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5153
      master_singlehomed = master.secondary_ip == master.primary_ip
5154
      if master_singlehomed and self.op.secondary_ip:
5155
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5156
                                   " homed cluster", errors.ECODE_INVAL)
5157

    
5158
      if node.offline:
5159
        if self.affected_instances:
5160
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5161
                                     " node has instances (%s) configured"
5162
                                     " to use it" % self.affected_instances)
5163
      else:
5164
        # On online nodes, check that no instances are running, and that
5165
        # the node has the new ip and we can reach it.
5166
        for instance in self.affected_instances:
5167
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5168

    
5169
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5170
        if master.name != node.name:
5171
          # check reachability from master secondary ip to new secondary ip
5172
          if not netutils.TcpPing(self.op.secondary_ip,
5173
                                  constants.DEFAULT_NODED_PORT,
5174
                                  source=master.secondary_ip):
5175
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5176
                                       " based ping to node daemon port",
5177
                                       errors.ECODE_ENVIRON)
5178

    
5179
    if self.op.ndparams:
5180
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5181
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5182
      self.new_ndparams = new_ndparams
5183

    
5184
  def Exec(self, feedback_fn):
5185
    """Modifies a node.
5186

5187
    """
5188
    node = self.node
5189
    old_role = self.old_role
5190
    new_role = self.new_role
5191

    
5192
    result = []
5193

    
5194
    if self.op.ndparams:
5195
      node.ndparams = self.new_ndparams
5196

    
5197
    if self.op.powered is not None:
5198
      node.powered = self.op.powered
5199

    
5200
    for attr in ["master_capable", "vm_capable"]:
5201
      val = getattr(self.op, attr)
5202
      if val is not None:
5203
        setattr(node, attr, val)
5204
        result.append((attr, str(val)))
5205

    
5206
    if new_role != old_role:
5207
      # Tell the node to demote itself, if no longer MC and not offline
5208
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5209
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5210
        if msg:
5211
          self.LogWarning("Node failed to demote itself: %s", msg)
5212

    
5213
      new_flags = self._R2F[new_role]
5214
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5215
        if of != nf:
5216
          result.append((desc, str(nf)))
5217
      (node.master_candidate, node.drained, node.offline) = new_flags
5218

    
5219
      # we locked all nodes, we adjust the CP before updating this node
5220
      if self.lock_all:
5221
        _AdjustCandidatePool(self, [node.name])
5222

    
5223
    if self.op.secondary_ip:
5224
      node.secondary_ip = self.op.secondary_ip
5225
      result.append(("secondary_ip", self.op.secondary_ip))
5226

    
5227
    # this will trigger configuration file update, if needed
5228
    self.cfg.Update(node, feedback_fn)
5229

    
5230
    # this will trigger job queue propagation or cleanup if the mc
5231
    # flag changed
5232
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5233
      self.context.ReaddNode(node)
5234

    
5235
    return result
5236

    
5237

    
5238
class LUNodePowercycle(NoHooksLU):
5239
  """Powercycles a node.
5240

5241
  """
5242
  REQ_BGL = False
5243

    
5244
  def CheckArguments(self):
5245
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5246
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5247
      raise errors.OpPrereqError("The node is the master and the force"
5248
                                 " parameter was not set",
5249
                                 errors.ECODE_INVAL)
5250

    
5251
  def ExpandNames(self):
5252
    """Locking for PowercycleNode.
5253

5254
    This is a last-resort option and shouldn't block on other
5255
    jobs. Therefore, we grab no locks.
5256

5257
    """
5258
    self.needed_locks = {}
5259

    
5260
  def Exec(self, feedback_fn):
5261
    """Reboots a node.
5262

5263
    """
5264
    result = self.rpc.call_node_powercycle(self.op.node_name,
5265
                                           self.cfg.GetHypervisorType())
5266
    result.Raise("Failed to schedule the reboot")
5267
    return result.payload
5268

    
5269

    
5270
class LUClusterQuery(NoHooksLU):
5271
  """Query cluster configuration.
5272

5273
  """
5274
  REQ_BGL = False
5275

    
5276
  def ExpandNames(self):
5277
    self.needed_locks = {}
5278

    
5279
  def Exec(self, feedback_fn):
5280
    """Return cluster config.
5281

5282
    """
5283
    cluster = self.cfg.GetClusterInfo()
5284
    os_hvp = {}
5285

    
5286
    # Filter just for enabled hypervisors
5287
    for os_name, hv_dict in cluster.os_hvp.items():
5288
      os_hvp[os_name] = {}
5289
      for hv_name, hv_params in hv_dict.items():
5290
        if hv_name in cluster.enabled_hypervisors:
5291
          os_hvp[os_name][hv_name] = hv_params
5292

    
5293
    # Convert ip_family to ip_version
5294
    primary_ip_version = constants.IP4_VERSION
5295
    if cluster.primary_ip_family == netutils.IP6Address.family:
5296
      primary_ip_version = constants.IP6_VERSION
5297

    
5298
    result = {
5299
      "software_version": constants.RELEASE_VERSION,
5300
      "protocol_version": constants.PROTOCOL_VERSION,
5301
      "config_version": constants.CONFIG_VERSION,
5302
      "os_api_version": max(constants.OS_API_VERSIONS),
5303
      "export_version": constants.EXPORT_VERSION,
5304
      "architecture": (platform.architecture()[0], platform.machine()),
5305
      "name": cluster.cluster_name,
5306
      "master": cluster.master_node,
5307
      "default_hypervisor": cluster.enabled_hypervisors[0],
5308
      "enabled_hypervisors": cluster.enabled_hypervisors,
5309
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5310
                        for hypervisor_name in cluster.enabled_hypervisors]),
5311
      "os_hvp": os_hvp,
5312
      "beparams": cluster.beparams,
5313
      "osparams": cluster.osparams,
5314
      "nicparams": cluster.nicparams,
5315
      "ndparams": cluster.ndparams,
5316
      "candidate_pool_size": cluster.candidate_pool_size,
5317
      "master_netdev": cluster.master_netdev,
5318
      "volume_group_name": cluster.volume_group_name,
5319
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5320
      "file_storage_dir": cluster.file_storage_dir,
5321
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5322
      "maintain_node_health": cluster.maintain_node_health,
5323
      "ctime": cluster.ctime,
5324
      "mtime": cluster.mtime,
5325
      "uuid": cluster.uuid,
5326
      "tags": list(cluster.GetTags()),
5327
      "uid_pool": cluster.uid_pool,
5328
      "default_iallocator": cluster.default_iallocator,
5329
      "reserved_lvs": cluster.reserved_lvs,
5330
      "primary_ip_version": primary_ip_version,
5331
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5332
      "hidden_os": cluster.hidden_os,
5333
      "blacklisted_os": cluster.blacklisted_os,
5334
      }
5335

    
5336
    return result
5337

    
5338

    
5339
class LUClusterConfigQuery(NoHooksLU):
5340
  """Return configuration values.
5341

5342
  """
5343
  REQ_BGL = False
5344
  _FIELDS_DYNAMIC = utils.FieldSet()
5345
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5346
                                  "watcher_pause", "volume_group_name")
5347

    
5348
  def CheckArguments(self):
5349
    _CheckOutputFields(static=self._FIELDS_STATIC,
5350
                       dynamic=self._FIELDS_DYNAMIC,
5351
                       selected=self.op.output_fields)
5352

    
5353
  def ExpandNames(self):
5354
    self.needed_locks = {}
5355

    
5356
  def Exec(self, feedback_fn):
5357
    """Dump a representation of the cluster config to the standard output.
5358

5359
    """
5360
    values = []
5361
    for field in self.op.output_fields:
5362
      if field == "cluster_name":
5363
        entry = self.cfg.GetClusterName()
5364
      elif field == "master_node":
5365
        entry = self.cfg.GetMasterNode()
5366
      elif field == "drain_flag":
5367
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5368
      elif field == "watcher_pause":
5369
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5370
      elif field == "volume_group_name":
5371
        entry = self.cfg.GetVGName()
5372
      else:
5373
        raise errors.ParameterError(field)
5374
      values.append(entry)
5375
    return values
5376

    
5377

    
5378
class LUInstanceActivateDisks(NoHooksLU):
5379
  """Bring up an instance's disks.
5380

5381
  """
5382
  REQ_BGL = False
5383

    
5384
  def ExpandNames(self):
5385
    self._ExpandAndLockInstance()
5386
    self.needed_locks[locking.LEVEL_NODE] = []
5387
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5388

    
5389
  def DeclareLocks(self, level):
5390
    if level == locking.LEVEL_NODE:
5391
      self._LockInstancesNodes()
5392

    
5393
  def CheckPrereq(self):
5394
    """Check prerequisites.
5395

5396
    This checks that the instance is in the cluster.
5397

5398
    """
5399
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5400
    assert self.instance is not None, \
5401
      "Cannot retrieve locked instance %s" % self.op.instance_name
5402
    _CheckNodeOnline(self, self.instance.primary_node)
5403

    
5404
  def Exec(self, feedback_fn):
5405
    """Activate the disks.
5406

5407
    """
5408
    disks_ok, disks_info = \
5409
              _AssembleInstanceDisks(self, self.instance,
5410
                                     ignore_size=self.op.ignore_size)
5411
    if not disks_ok:
5412
      raise errors.OpExecError("Cannot activate block devices")
5413

    
5414
    return disks_info
5415

    
5416

    
5417
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5418
                           ignore_size=False):
5419
  """Prepare the block devices for an instance.
5420

5421
  This sets up the block devices on all nodes.
5422

5423
  @type lu: L{LogicalUnit}
5424
  @param lu: the logical unit on whose behalf we execute
5425
  @type instance: L{objects.Instance}
5426
  @param instance: the instance for whose disks we assemble
5427
  @type disks: list of L{objects.Disk} or None
5428
  @param disks: which disks to assemble (or all, if None)
5429
  @type ignore_secondaries: boolean
5430
  @param ignore_secondaries: if true, errors on secondary nodes
5431
      won't result in an error return from the function
5432
  @type ignore_size: boolean
5433
  @param ignore_size: if true, the current known size of the disk
5434
      will not be used during the disk activation, useful for cases
5435
      when the size is wrong
5436
  @return: False if the operation failed, otherwise a list of
5437
      (host, instance_visible_name, node_visible_name)
5438
      with the mapping from node devices to instance devices
5439

5440
  """
5441
  device_info = []
5442
  disks_ok = True
5443
  iname = instance.name
5444
  disks = _ExpandCheckDisks(instance, disks)
5445

    
5446
  # With the two passes mechanism we try to reduce the window of
5447
  # opportunity for the race condition of switching DRBD to primary
5448
  # before handshaking occured, but we do not eliminate it
5449

    
5450
  # The proper fix would be to wait (with some limits) until the
5451
  # connection has been made and drbd transitions from WFConnection
5452
  # into any other network-connected state (Connected, SyncTarget,
5453
  # SyncSource, etc.)
5454

    
5455
  # 1st pass, assemble on all nodes in secondary mode
5456
  for idx, inst_disk in enumerate(disks):
5457
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5458
      if ignore_size:
5459
        node_disk = node_disk.Copy()
5460
        node_disk.UnsetSize()
5461
      lu.cfg.SetDiskID(node_disk, node)
5462
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5463
      msg = result.fail_msg
5464
      if msg:
5465
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5466
                           " (is_primary=False, pass=1): %s",
5467
                           inst_disk.iv_name, node, msg)
5468
        if not ignore_secondaries:
5469
          disks_ok = False
5470

    
5471
  # FIXME: race condition on drbd migration to primary
5472

    
5473
  # 2nd pass, do only the primary node
5474
  for idx, inst_disk in enumerate(disks):
5475
    dev_path = None
5476

    
5477
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5478
      if node != instance.primary_node:
5479
        continue
5480
      if ignore_size:
5481
        node_disk = node_disk.Copy()
5482
        node_disk.UnsetSize()
5483
      lu.cfg.SetDiskID(node_disk, node)
5484
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5485
      msg = result.fail_msg
5486
      if msg:
5487
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5488
                           " (is_primary=True, pass=2): %s",
5489
                           inst_disk.iv_name, node, msg)
5490
        disks_ok = False
5491
      else:
5492
        dev_path = result.payload
5493

    
5494
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5495

    
5496
  # leave the disks configured for the primary node
5497
  # this is a workaround that would be fixed better by
5498
  # improving the logical/physical id handling
5499
  for disk in disks:
5500
    lu.cfg.SetDiskID(disk, instance.primary_node)
5501

    
5502
  return disks_ok, device_info
5503

    
5504

    
5505
def _StartInstanceDisks(lu, instance, force):
5506
  """Start the disks of an instance.
5507

5508
  """
5509
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5510
                                           ignore_secondaries=force)
5511
  if not disks_ok:
5512
    _ShutdownInstanceDisks(lu, instance)
5513
    if force is not None and not force:
5514
      lu.proc.LogWarning("", hint="If the message above refers to a"
5515
                         " secondary node,"
5516
                         " you can retry the operation using '--force'.")
5517
    raise errors.OpExecError("Disk consistency error")
5518

    
5519

    
5520
class LUInstanceDeactivateDisks(NoHooksLU):
5521
  """Shutdown an instance's disks.
5522

5523
  """
5524
  REQ_BGL = False
5525

    
5526
  def ExpandNames(self):
5527
    self._ExpandAndLockInstance()
5528
    self.needed_locks[locking.LEVEL_NODE] = []
5529
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5530

    
5531
  def DeclareLocks(self, level):
5532
    if level == locking.LEVEL_NODE:
5533
      self._LockInstancesNodes()
5534

    
5535
  def CheckPrereq(self):
5536
    """Check prerequisites.
5537

5538
    This checks that the instance is in the cluster.
5539

5540
    """
5541
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5542
    assert self.instance is not None, \
5543
      "Cannot retrieve locked instance %s" % self.op.instance_name
5544

    
5545
  def Exec(self, feedback_fn):
5546
    """Deactivate the disks
5547

5548
    """
5549
    instance = self.instance
5550
    if self.op.force:
5551
      _ShutdownInstanceDisks(self, instance)
5552
    else:
5553
      _SafeShutdownInstanceDisks(self, instance)
5554

    
5555

    
5556
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5557
  """Shutdown block devices of an instance.
5558

5559
  This function checks if an instance is running, before calling
5560
  _ShutdownInstanceDisks.
5561

5562
  """
5563
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5564
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5565

    
5566

    
5567
def _ExpandCheckDisks(instance, disks):
5568
  """Return the instance disks selected by the disks list
5569

5570
  @type disks: list of L{objects.Disk} or None
5571
  @param disks: selected disks
5572
  @rtype: list of L{objects.Disk}
5573
  @return: selected instance disks to act on
5574

5575
  """
5576
  if disks is None:
5577
    return instance.disks
5578
  else:
5579
    if not set(disks).issubset(instance.disks):
5580
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5581
                                   " target instance")
5582
    return disks
5583

    
5584

    
5585
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5586
  """Shutdown block devices of an instance.
5587

5588
  This does the shutdown on all nodes of the instance.
5589

5590
  If the ignore_primary is false, errors on the primary node are
5591
  ignored.
5592

5593
  """
5594
  all_result = True
5595
  disks = _ExpandCheckDisks(instance, disks)
5596

    
5597
  for disk in disks:
5598
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5599
      lu.cfg.SetDiskID(top_disk, node)
5600
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5601
      msg = result.fail_msg
5602
      if msg:
5603
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5604
                      disk.iv_name, node, msg)
5605
        if ((node == instance.primary_node and not ignore_primary) or
5606
            (node != instance.primary_node and not result.offline)):
5607
          all_result = False
5608
  return all_result
5609

    
5610

    
5611
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5612
  """Checks if a node has enough free memory.
5613

5614
  This function check if a given node has the needed amount of free
5615
  memory. In case the node has less memory or we cannot get the
5616
  information from the node, this function raise an OpPrereqError
5617
  exception.
5618

5619
  @type lu: C{LogicalUnit}
5620
  @param lu: a logical unit from which we get configuration data
5621
  @type node: C{str}
5622
  @param node: the node to check
5623
  @type reason: C{str}
5624
  @param reason: string to use in the error message
5625
  @type requested: C{int}
5626
  @param requested: the amount of memory in MiB to check for
5627
  @type hypervisor_name: C{str}
5628
  @param hypervisor_name: the hypervisor to ask for memory stats
5629
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5630
      we cannot check the node
5631

5632
  """
5633
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5634
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5635
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5636
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5637
  if not isinstance(free_mem, int):
5638
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5639
                               " was '%s'" % (node, free_mem),
5640
                               errors.ECODE_ENVIRON)
5641
  if requested > free_mem:
5642
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5643
                               " needed %s MiB, available %s MiB" %
5644
                               (node, reason, requested, free_mem),
5645
                               errors.ECODE_NORES)
5646

    
5647

    
5648
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5649
  """Checks if nodes have enough free disk space in the all VGs.
5650

5651
  This function check if all given nodes have the needed amount of
5652
  free disk. In case any node has less disk or we cannot get the
5653
  information from the node, this function raise an OpPrereqError
5654
  exception.
5655

5656
  @type lu: C{LogicalUnit}
5657
  @param lu: a logical unit from which we get configuration data
5658
  @type nodenames: C{list}
5659
  @param nodenames: the list of node names to check
5660
  @type req_sizes: C{dict}
5661
  @param req_sizes: the hash of vg and corresponding amount of disk in
5662
      MiB to check for
5663
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5664
      or we cannot check the node
5665

5666
  """
5667
  for vg, req_size in req_sizes.items():
5668
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5669

    
5670

    
5671
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5672
  """Checks if nodes have enough free disk space in the specified VG.
5673

5674
  This function check if all given nodes have the needed amount of
5675
  free disk. In case any node has less disk or we cannot get the
5676
  information from the node, this function raise an OpPrereqError
5677
  exception.
5678

5679
  @type lu: C{LogicalUnit}
5680
  @param lu: a logical unit from which we get configuration data
5681
  @type nodenames: C{list}
5682
  @param nodenames: the list of node names to check
5683
  @type vg: C{str}
5684
  @param vg: the volume group to check
5685
  @type requested: C{int}
5686
  @param requested: the amount of disk in MiB to check for
5687
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5688
      or we cannot check the node
5689

5690
  """
5691
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5692
  for node in nodenames:
5693
    info = nodeinfo[node]
5694
    info.Raise("Cannot get current information from node %s" % node,
5695
               prereq=True, ecode=errors.ECODE_ENVIRON)
5696
    vg_free = info.payload.get("vg_free", None)
5697
    if not isinstance(vg_free, int):
5698
      raise errors.OpPrereqError("Can't compute free disk space on node"
5699
                                 " %s for vg %s, result was '%s'" %
5700
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5701
    if requested > vg_free:
5702
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5703
                                 " vg %s: required %d MiB, available %d MiB" %
5704
                                 (node, vg, requested, vg_free),
5705
                                 errors.ECODE_NORES)
5706

    
5707

    
5708
class LUInstanceStartup(LogicalUnit):
5709
  """Starts an instance.
5710

5711
  """
5712
  HPATH = "instance-start"
5713
  HTYPE = constants.HTYPE_INSTANCE
5714
  REQ_BGL = False
5715

    
5716
  def CheckArguments(self):
5717
    # extra beparams
5718
    if self.op.beparams:
5719
      # fill the beparams dict
5720
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5721

    
5722
  def ExpandNames(self):
5723
    self._ExpandAndLockInstance()
5724

    
5725
  def BuildHooksEnv(self):
5726
    """Build hooks env.
5727

5728
    This runs on master, primary and secondary nodes of the instance.
5729

5730
    """
5731
    env = {
5732
      "FORCE": self.op.force,
5733
      }
5734

    
5735
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5736

    
5737
    return env
5738

    
5739
  def BuildHooksNodes(self):
5740
    """Build hooks nodes.
5741

5742
    """
5743
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5744
    return (nl, nl)
5745

    
5746
  def CheckPrereq(self):
5747
    """Check prerequisites.
5748

5749
    This checks that the instance is in the cluster.
5750

5751
    """
5752
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5753
    assert self.instance is not None, \
5754
      "Cannot retrieve locked instance %s" % self.op.instance_name
5755

    
5756
    # extra hvparams
5757
    if self.op.hvparams:
5758
      # check hypervisor parameter syntax (locally)
5759
      cluster = self.cfg.GetClusterInfo()
5760
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5761
      filled_hvp = cluster.FillHV(instance)
5762
      filled_hvp.update(self.op.hvparams)
5763
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5764
      hv_type.CheckParameterSyntax(filled_hvp)
5765
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5766

    
5767
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5768

    
5769
    if self.primary_offline and self.op.ignore_offline_nodes:
5770
      self.proc.LogWarning("Ignoring offline primary node")
5771

    
5772
      if self.op.hvparams or self.op.beparams:
5773
        self.proc.LogWarning("Overridden parameters are ignored")
5774
    else:
5775
      _CheckNodeOnline(self, instance.primary_node)
5776

    
5777
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5778

    
5779
      # check bridges existence
5780
      _CheckInstanceBridgesExist(self, instance)
5781

    
5782
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5783
                                                instance.name,
5784
                                                instance.hypervisor)
5785
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5786
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5787
      if not remote_info.payload: # not running already
5788
        _CheckNodeFreeMemory(self, instance.primary_node,
5789
                             "starting instance %s" % instance.name,
5790
                             bep[constants.BE_MEMORY], instance.hypervisor)
5791

    
5792
  def Exec(self, feedback_fn):
5793
    """Start the instance.
5794

5795
    """
5796
    instance = self.instance
5797
    force = self.op.force
5798

    
5799
    if not self.op.no_remember:
5800
      self.cfg.MarkInstanceUp(instance.name)
5801

    
5802
    if self.primary_offline:
5803
      assert self.op.ignore_offline_nodes
5804
      self.proc.LogInfo("Primary node offline, marked instance as started")
5805
    else:
5806
      node_current = instance.primary_node
5807

    
5808
      _StartInstanceDisks(self, instance, force)
5809

    
5810
      result = self.rpc.call_instance_start(node_current, instance,
5811
                                            self.op.hvparams, self.op.beparams,
5812
                                            self.op.startup_paused)
5813
      msg = result.fail_msg
5814
      if msg:
5815
        _ShutdownInstanceDisks(self, instance)
5816
        raise errors.OpExecError("Could not start instance: %s" % msg)
5817

    
5818

    
5819
class LUInstanceReboot(LogicalUnit):
5820
  """Reboot an instance.
5821

5822
  """
5823
  HPATH = "instance-reboot"
5824
  HTYPE = constants.HTYPE_INSTANCE
5825
  REQ_BGL = False
5826

    
5827
  def ExpandNames(self):
5828
    self._ExpandAndLockInstance()
5829

    
5830
  def BuildHooksEnv(self):
5831
    """Build hooks env.
5832

5833
    This runs on master, primary and secondary nodes of the instance.
5834

5835
    """
5836
    env = {
5837
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5838
      "REBOOT_TYPE": self.op.reboot_type,
5839
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5840
      }
5841

    
5842
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5843

    
5844
    return env
5845

    
5846
  def BuildHooksNodes(self):
5847
    """Build hooks nodes.
5848

5849
    """
5850
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5851
    return (nl, nl)
5852

    
5853
  def CheckPrereq(self):
5854
    """Check prerequisites.
5855

5856
    This checks that the instance is in the cluster.
5857

5858
    """
5859
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5860
    assert self.instance is not None, \
5861
      "Cannot retrieve locked instance %s" % self.op.instance_name
5862

    
5863
    _CheckNodeOnline(self, instance.primary_node)
5864

    
5865
    # check bridges existence
5866
    _CheckInstanceBridgesExist(self, instance)
5867

    
5868
  def Exec(self, feedback_fn):
5869
    """Reboot the instance.
5870

5871
    """
5872
    instance = self.instance
5873
    ignore_secondaries = self.op.ignore_secondaries
5874
    reboot_type = self.op.reboot_type
5875

    
5876
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5877
                                              instance.name,
5878
                                              instance.hypervisor)
5879
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5880
    instance_running = bool(remote_info.payload)
5881

    
5882
    node_current = instance.primary_node
5883

    
5884
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5885
                                            constants.INSTANCE_REBOOT_HARD]:
5886
      for disk in instance.disks:
5887
        self.cfg.SetDiskID(disk, node_current)
5888
      result = self.rpc.call_instance_reboot(node_current, instance,
5889
                                             reboot_type,
5890
                                             self.op.shutdown_timeout)
5891
      result.Raise("Could not reboot instance")
5892
    else:
5893
      if instance_running:
5894
        result = self.rpc.call_instance_shutdown(node_current, instance,
5895
                                                 self.op.shutdown_timeout)
5896
        result.Raise("Could not shutdown instance for full reboot")
5897
        _ShutdownInstanceDisks(self, instance)
5898
      else:
5899
        self.LogInfo("Instance %s was already stopped, starting now",
5900
                     instance.name)
5901
      _StartInstanceDisks(self, instance, ignore_secondaries)
5902
      result = self.rpc.call_instance_start(node_current, instance,
5903
                                            None, None, False)
5904
      msg = result.fail_msg
5905
      if msg:
5906
        _ShutdownInstanceDisks(self, instance)
5907
        raise errors.OpExecError("Could not start instance for"
5908
                                 " full reboot: %s" % msg)
5909

    
5910
    self.cfg.MarkInstanceUp(instance.name)
5911

    
5912

    
5913
class LUInstanceShutdown(LogicalUnit):
5914
  """Shutdown an instance.
5915

5916
  """
5917
  HPATH = "instance-stop"
5918
  HTYPE = constants.HTYPE_INSTANCE
5919
  REQ_BGL = False
5920

    
5921
  def ExpandNames(self):
5922
    self._ExpandAndLockInstance()
5923

    
5924
  def BuildHooksEnv(self):
5925
    """Build hooks env.
5926

5927
    This runs on master, primary and secondary nodes of the instance.
5928

5929
    """
5930
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5931
    env["TIMEOUT"] = self.op.timeout
5932
    return env
5933

    
5934
  def BuildHooksNodes(self):
5935
    """Build hooks nodes.
5936

5937
    """
5938
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5939
    return (nl, nl)
5940

    
5941
  def CheckPrereq(self):
5942
    """Check prerequisites.
5943

5944
    This checks that the instance is in the cluster.
5945

5946
    """
5947
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5948
    assert self.instance is not None, \
5949
      "Cannot retrieve locked instance %s" % self.op.instance_name
5950

    
5951
    self.primary_offline = \
5952
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5953

    
5954
    if self.primary_offline and self.op.ignore_offline_nodes:
5955
      self.proc.LogWarning("Ignoring offline primary node")
5956
    else:
5957
      _CheckNodeOnline(self, self.instance.primary_node)
5958

    
5959
  def Exec(self, feedback_fn):
5960
    """Shutdown the instance.
5961

5962
    """
5963
    instance = self.instance
5964
    node_current = instance.primary_node
5965
    timeout = self.op.timeout
5966

    
5967
    if not self.op.no_remember:
5968
      self.cfg.MarkInstanceDown(instance.name)
5969

    
5970
    if self.primary_offline:
5971
      assert self.op.ignore_offline_nodes
5972
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5973
    else:
5974
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5975
      msg = result.fail_msg
5976
      if msg:
5977
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5978

    
5979
      _ShutdownInstanceDisks(self, instance)
5980

    
5981

    
5982
class LUInstanceReinstall(LogicalUnit):
5983
  """Reinstall an instance.
5984

5985
  """
5986
  HPATH = "instance-reinstall"
5987
  HTYPE = constants.HTYPE_INSTANCE
5988
  REQ_BGL = False
5989

    
5990
  def ExpandNames(self):
5991
    self._ExpandAndLockInstance()
5992

    
5993
  def BuildHooksEnv(self):
5994
    """Build hooks env.
5995

5996
    This runs on master, primary and secondary nodes of the instance.
5997

5998
    """
5999
    return _BuildInstanceHookEnvByObject(self, self.instance)
6000

    
6001
  def BuildHooksNodes(self):
6002
    """Build hooks nodes.
6003

6004
    """
6005
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6006
    return (nl, nl)
6007

    
6008
  def CheckPrereq(self):
6009
    """Check prerequisites.
6010

6011
    This checks that the instance is in the cluster and is not running.
6012

6013
    """
6014
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6015
    assert instance is not None, \
6016
      "Cannot retrieve locked instance %s" % self.op.instance_name
6017
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6018
                     " offline, cannot reinstall")
6019
    for node in instance.secondary_nodes:
6020
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6021
                       " cannot reinstall")
6022

    
6023
    if instance.disk_template == constants.DT_DISKLESS:
6024
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6025
                                 self.op.instance_name,
6026
                                 errors.ECODE_INVAL)
6027
    _CheckInstanceDown(self, instance, "cannot reinstall")
6028

    
6029
    if self.op.os_type is not None:
6030
      # OS verification
6031
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6032
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6033
      instance_os = self.op.os_type
6034
    else:
6035
      instance_os = instance.os
6036

    
6037
    nodelist = list(instance.all_nodes)
6038

    
6039
    if self.op.osparams:
6040
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6041
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6042
      self.os_inst = i_osdict # the new dict (without defaults)
6043
    else:
6044
      self.os_inst = None
6045

    
6046
    self.instance = instance
6047

    
6048
  def Exec(self, feedback_fn):
6049
    """Reinstall the instance.
6050

6051
    """
6052
    inst = self.instance
6053

    
6054
    if self.op.os_type is not None:
6055
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6056
      inst.os = self.op.os_type
6057
      # Write to configuration
6058
      self.cfg.Update(inst, feedback_fn)
6059

    
6060
    _StartInstanceDisks(self, inst, None)
6061
    try:
6062
      feedback_fn("Running the instance OS create scripts...")
6063
      # FIXME: pass debug option from opcode to backend
6064
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6065
                                             self.op.debug_level,
6066
                                             osparams=self.os_inst)
6067
      result.Raise("Could not install OS for instance %s on node %s" %
6068
                   (inst.name, inst.primary_node))
6069
    finally:
6070
      _ShutdownInstanceDisks(self, inst)
6071

    
6072

    
6073
class LUInstanceRecreateDisks(LogicalUnit):
6074
  """Recreate an instance's missing disks.
6075

6076
  """
6077
  HPATH = "instance-recreate-disks"
6078
  HTYPE = constants.HTYPE_INSTANCE
6079
  REQ_BGL = False
6080

    
6081
  def CheckArguments(self):
6082
    # normalise the disk list
6083
    self.op.disks = sorted(frozenset(self.op.disks))
6084

    
6085
  def ExpandNames(self):
6086
    self._ExpandAndLockInstance()
6087
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6088
    if self.op.nodes:
6089
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6090
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6091
    else:
6092
      self.needed_locks[locking.LEVEL_NODE] = []
6093

    
6094
  def DeclareLocks(self, level):
6095
    if level == locking.LEVEL_NODE:
6096
      # if we replace the nodes, we only need to lock the old primary,
6097
      # otherwise we need to lock all nodes for disk re-creation
6098
      primary_only = bool(self.op.nodes)
6099
      self._LockInstancesNodes(primary_only=primary_only)
6100

    
6101
  def BuildHooksEnv(self):
6102
    """Build hooks env.
6103

6104
    This runs on master, primary and secondary nodes of the instance.
6105

6106
    """
6107
    return _BuildInstanceHookEnvByObject(self, self.instance)
6108

    
6109
  def BuildHooksNodes(self):
6110
    """Build hooks nodes.
6111

6112
    """
6113
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114
    return (nl, nl)
6115

    
6116
  def CheckPrereq(self):
6117
    """Check prerequisites.
6118

6119
    This checks that the instance is in the cluster and is not running.
6120

6121
    """
6122
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6123
    assert instance is not None, \
6124
      "Cannot retrieve locked instance %s" % self.op.instance_name
6125
    if self.op.nodes:
6126
      if len(self.op.nodes) != len(instance.all_nodes):
6127
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6128
                                   " %d replacement nodes were specified" %
6129
                                   (instance.name, len(instance.all_nodes),
6130
                                    len(self.op.nodes)),
6131
                                   errors.ECODE_INVAL)
6132
      assert instance.disk_template != constants.DT_DRBD8 or \
6133
          len(self.op.nodes) == 2
6134
      assert instance.disk_template != constants.DT_PLAIN or \
6135
          len(self.op.nodes) == 1
6136
      primary_node = self.op.nodes[0]
6137
    else:
6138
      primary_node = instance.primary_node
6139
    _CheckNodeOnline(self, primary_node)
6140

    
6141
    if instance.disk_template == constants.DT_DISKLESS:
6142
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6143
                                 self.op.instance_name, errors.ECODE_INVAL)
6144
    # if we replace nodes *and* the old primary is offline, we don't
6145
    # check
6146
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6147
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6148
    if not (self.op.nodes and old_pnode.offline):
6149
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6150

    
6151
    if not self.op.disks:
6152
      self.op.disks = range(len(instance.disks))
6153
    else:
6154
      for idx in self.op.disks:
6155
        if idx >= len(instance.disks):
6156
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6157
                                     errors.ECODE_INVAL)
6158
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6159
      raise errors.OpPrereqError("Can't recreate disks partially and"
6160
                                 " change the nodes at the same time",
6161
                                 errors.ECODE_INVAL)
6162
    self.instance = instance
6163

    
6164
  def Exec(self, feedback_fn):
6165
    """Recreate the disks.
6166

6167
    """
6168
    instance = self.instance
6169

    
6170
    to_skip = []
6171
    mods = [] # keeps track of needed logical_id changes
6172

    
6173
    for idx, disk in enumerate(instance.disks):
6174
      if idx not in self.op.disks: # disk idx has not been passed in
6175
        to_skip.append(idx)
6176
        continue
6177
      # update secondaries for disks, if needed
6178
      if self.op.nodes:
6179
        if disk.dev_type == constants.LD_DRBD8:
6180
          # need to update the nodes and minors
6181
          assert len(self.op.nodes) == 2
6182
          assert len(disk.logical_id) == 6 # otherwise disk internals
6183
                                           # have changed
6184
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6185
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6186
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6187
                    new_minors[0], new_minors[1], old_secret)
6188
          assert len(disk.logical_id) == len(new_id)
6189
          mods.append((idx, new_id))
6190

    
6191
    # now that we have passed all asserts above, we can apply the mods
6192
    # in a single run (to avoid partial changes)
6193
    for idx, new_id in mods:
6194
      instance.disks[idx].logical_id = new_id
6195

    
6196
    # change primary node, if needed
6197
    if self.op.nodes:
6198
      instance.primary_node = self.op.nodes[0]
6199
      self.LogWarning("Changing the instance's nodes, you will have to"
6200
                      " remove any disks left on the older nodes manually")
6201

    
6202
    if self.op.nodes:
6203
      self.cfg.Update(instance, feedback_fn)
6204

    
6205
    _CreateDisks(self, instance, to_skip=to_skip)
6206

    
6207

    
6208
class LUInstanceRename(LogicalUnit):
6209
  """Rename an instance.
6210

6211
  """
6212
  HPATH = "instance-rename"
6213
  HTYPE = constants.HTYPE_INSTANCE
6214

    
6215
  def CheckArguments(self):
6216
    """Check arguments.
6217

6218
    """
6219
    if self.op.ip_check and not self.op.name_check:
6220
      # TODO: make the ip check more flexible and not depend on the name check
6221
      raise errors.OpPrereqError("IP address check requires a name check",
6222
                                 errors.ECODE_INVAL)
6223

    
6224
  def BuildHooksEnv(self):
6225
    """Build hooks env.
6226

6227
    This runs on master, primary and secondary nodes of the instance.
6228

6229
    """
6230
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6231
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6232
    return env
6233

    
6234
  def BuildHooksNodes(self):
6235
    """Build hooks nodes.
6236

6237
    """
6238
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6239
    return (nl, nl)
6240

    
6241
  def CheckPrereq(self):
6242
    """Check prerequisites.
6243

6244
    This checks that the instance is in the cluster and is not running.
6245

6246
    """
6247
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6248
                                                self.op.instance_name)
6249
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6250
    assert instance is not None
6251
    _CheckNodeOnline(self, instance.primary_node)
6252
    _CheckInstanceDown(self, instance, "cannot rename")
6253
    self.instance = instance
6254

    
6255
    new_name = self.op.new_name
6256
    if self.op.name_check:
6257
      hostname = netutils.GetHostname(name=new_name)
6258
      if hostname != new_name:
6259
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6260
                     hostname.name)
6261
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6262
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6263
                                    " same as given hostname '%s'") %
6264
                                    (hostname.name, self.op.new_name),
6265
                                    errors.ECODE_INVAL)
6266
      new_name = self.op.new_name = hostname.name
6267
      if (self.op.ip_check and
6268
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6269
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6270
                                   (hostname.ip, new_name),
6271
                                   errors.ECODE_NOTUNIQUE)
6272

    
6273
    instance_list = self.cfg.GetInstanceList()
6274
    if new_name in instance_list and new_name != instance.name:
6275
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6276
                                 new_name, errors.ECODE_EXISTS)
6277

    
6278
  def Exec(self, feedback_fn):
6279
    """Rename the instance.
6280

6281
    """
6282
    inst = self.instance
6283
    old_name = inst.name
6284

    
6285
    rename_file_storage = False
6286
    if (inst.disk_template in constants.DTS_FILEBASED and
6287
        self.op.new_name != inst.name):
6288
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6289
      rename_file_storage = True
6290

    
6291
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6292
    # Change the instance lock. This is definitely safe while we hold the BGL.
6293
    # Otherwise the new lock would have to be added in acquired mode.
6294
    assert self.REQ_BGL
6295
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6296
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6297

    
6298
    # re-read the instance from the configuration after rename
6299
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6300

    
6301
    if rename_file_storage:
6302
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6303
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6304
                                                     old_file_storage_dir,
6305
                                                     new_file_storage_dir)
6306
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6307
                   " (but the instance has been renamed in Ganeti)" %
6308
                   (inst.primary_node, old_file_storage_dir,
6309
                    new_file_storage_dir))
6310

    
6311
    _StartInstanceDisks(self, inst, None)
6312
    try:
6313
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6314
                                                 old_name, self.op.debug_level)
6315
      msg = result.fail_msg
6316
      if msg:
6317
        msg = ("Could not run OS rename script for instance %s on node %s"
6318
               " (but the instance has been renamed in Ganeti): %s" %
6319
               (inst.name, inst.primary_node, msg))
6320
        self.proc.LogWarning(msg)
6321
    finally:
6322
      _ShutdownInstanceDisks(self, inst)
6323

    
6324
    return inst.name
6325

    
6326

    
6327
class LUInstanceRemove(LogicalUnit):
6328
  """Remove an instance.
6329

6330
  """
6331
  HPATH = "instance-remove"
6332
  HTYPE = constants.HTYPE_INSTANCE
6333
  REQ_BGL = False
6334

    
6335
  def ExpandNames(self):
6336
    self._ExpandAndLockInstance()
6337
    self.needed_locks[locking.LEVEL_NODE] = []
6338
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6339

    
6340
  def DeclareLocks(self, level):
6341
    if level == locking.LEVEL_NODE:
6342
      self._LockInstancesNodes()
6343

    
6344
  def BuildHooksEnv(self):
6345
    """Build hooks env.
6346

6347
    This runs on master, primary and secondary nodes of the instance.
6348

6349
    """
6350
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6351
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6352
    return env
6353

    
6354
  def BuildHooksNodes(self):
6355
    """Build hooks nodes.
6356

6357
    """
6358
    nl = [self.cfg.GetMasterNode()]
6359
    nl_post = list(self.instance.all_nodes) + nl
6360
    return (nl, nl_post)
6361

    
6362
  def CheckPrereq(self):
6363
    """Check prerequisites.
6364

6365
    This checks that the instance is in the cluster.
6366

6367
    """
6368
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6369
    assert self.instance is not None, \
6370
      "Cannot retrieve locked instance %s" % self.op.instance_name
6371

    
6372
  def Exec(self, feedback_fn):
6373
    """Remove the instance.
6374

6375
    """
6376
    instance = self.instance
6377
    logging.info("Shutting down instance %s on node %s",
6378
                 instance.name, instance.primary_node)
6379

    
6380
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6381
                                             self.op.shutdown_timeout)
6382
    msg = result.fail_msg
6383
    if msg:
6384
      if self.op.ignore_failures:
6385
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6386
      else:
6387
        raise errors.OpExecError("Could not shutdown instance %s on"
6388
                                 " node %s: %s" %
6389
                                 (instance.name, instance.primary_node, msg))
6390

    
6391
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6392

    
6393

    
6394
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6395
  """Utility function to remove an instance.
6396

6397
  """
6398
  logging.info("Removing block devices for instance %s", instance.name)
6399

    
6400
  if not _RemoveDisks(lu, instance):
6401
    if not ignore_failures:
6402
      raise errors.OpExecError("Can't remove instance's disks")
6403
    feedback_fn("Warning: can't remove instance's disks")
6404

    
6405
  logging.info("Removing instance %s out of cluster config", instance.name)
6406

    
6407
  lu.cfg.RemoveInstance(instance.name)
6408

    
6409
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6410
    "Instance lock removal conflict"
6411

    
6412
  # Remove lock for the instance
6413
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6414

    
6415

    
6416
class LUInstanceQuery(NoHooksLU):
6417
  """Logical unit for querying instances.
6418

6419
  """
6420
  # pylint: disable-msg=W0142
6421
  REQ_BGL = False
6422

    
6423
  def CheckArguments(self):
6424
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6425
                             self.op.output_fields, self.op.use_locking)
6426

    
6427
  def ExpandNames(self):
6428
    self.iq.ExpandNames(self)
6429

    
6430
  def DeclareLocks(self, level):
6431
    self.iq.DeclareLocks(self, level)
6432

    
6433
  def Exec(self, feedback_fn):
6434
    return self.iq.OldStyleQuery(self)
6435

    
6436

    
6437
class LUInstanceFailover(LogicalUnit):
6438
  """Failover an instance.
6439

6440
  """
6441
  HPATH = "instance-failover"
6442
  HTYPE = constants.HTYPE_INSTANCE
6443
  REQ_BGL = False
6444

    
6445
  def CheckArguments(self):
6446
    """Check the arguments.
6447

6448
    """
6449
    self.iallocator = getattr(self.op, "iallocator", None)
6450
    self.target_node = getattr(self.op, "target_node", None)
6451

    
6452
  def ExpandNames(self):
6453
    self._ExpandAndLockInstance()
6454

    
6455
    if self.op.target_node is not None:
6456
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6457

    
6458
    self.needed_locks[locking.LEVEL_NODE] = []
6459
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6460

    
6461
    ignore_consistency = self.op.ignore_consistency
6462
    shutdown_timeout = self.op.shutdown_timeout
6463
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6464
                                       cleanup=False,
6465
                                       failover=True,
6466
                                       ignore_consistency=ignore_consistency,
6467
                                       shutdown_timeout=shutdown_timeout)
6468
    self.tasklets = [self._migrater]
6469

    
6470
  def DeclareLocks(self, level):
6471
    if level == locking.LEVEL_NODE:
6472
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6473
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6474
        if self.op.target_node is None:
6475
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6476
        else:
6477
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6478
                                                   self.op.target_node]
6479
        del self.recalculate_locks[locking.LEVEL_NODE]
6480
      else:
6481
        self._LockInstancesNodes()
6482

    
6483
  def BuildHooksEnv(self):
6484
    """Build hooks env.
6485

6486
    This runs on master, primary and secondary nodes of the instance.
6487

6488
    """
6489
    instance = self._migrater.instance
6490
    source_node = instance.primary_node
6491
    target_node = self.op.target_node
6492
    env = {
6493
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6494
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6495
      "OLD_PRIMARY": source_node,
6496
      "NEW_PRIMARY": target_node,
6497
      }
6498

    
6499
    if instance.disk_template in constants.DTS_INT_MIRROR:
6500
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6501
      env["NEW_SECONDARY"] = source_node
6502
    else:
6503
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6504

    
6505
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6506

    
6507
    return env
6508

    
6509
  def BuildHooksNodes(self):
6510
    """Build hooks nodes.
6511

6512
    """
6513
    instance = self._migrater.instance
6514
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6515
    return (nl, nl + [instance.primary_node])
6516

    
6517

    
6518
class LUInstanceMigrate(LogicalUnit):
6519
  """Migrate an instance.
6520

6521
  This is migration without shutting down, compared to the failover,
6522
  which is done with shutdown.
6523

6524
  """
6525
  HPATH = "instance-migrate"
6526
  HTYPE = constants.HTYPE_INSTANCE
6527
  REQ_BGL = False
6528

    
6529
  def ExpandNames(self):
6530
    self._ExpandAndLockInstance()
6531

    
6532
    if self.op.target_node is not None:
6533
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6534

    
6535
    self.needed_locks[locking.LEVEL_NODE] = []
6536
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6537

    
6538
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6539
                                       cleanup=self.op.cleanup,
6540
                                       failover=False,
6541
                                       fallback=self.op.allow_failover)
6542
    self.tasklets = [self._migrater]
6543

    
6544
  def DeclareLocks(self, level):
6545
    if level == locking.LEVEL_NODE:
6546
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6547
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6548
        if self.op.target_node is None:
6549
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6550
        else:
6551
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6552
                                                   self.op.target_node]
6553
        del self.recalculate_locks[locking.LEVEL_NODE]
6554
      else:
6555
        self._LockInstancesNodes()
6556

    
6557
  def BuildHooksEnv(self):
6558
    """Build hooks env.
6559

6560
    This runs on master, primary and secondary nodes of the instance.
6561

6562
    """
6563
    instance = self._migrater.instance
6564
    source_node = instance.primary_node
6565
    target_node = self.op.target_node
6566
    env = _BuildInstanceHookEnvByObject(self, instance)
6567
    env.update({
6568
      "MIGRATE_LIVE": self._migrater.live,
6569
      "MIGRATE_CLEANUP": self.op.cleanup,
6570
      "OLD_PRIMARY": source_node,
6571
      "NEW_PRIMARY": target_node,
6572
      })
6573

    
6574
    if instance.disk_template in constants.DTS_INT_MIRROR:
6575
      env["OLD_SECONDARY"] = target_node
6576
      env["NEW_SECONDARY"] = source_node
6577
    else:
6578
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6579

    
6580
    return env
6581

    
6582
  def BuildHooksNodes(self):
6583
    """Build hooks nodes.
6584

6585
    """
6586
    instance = self._migrater.instance
6587
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6588
    return (nl, nl + [instance.primary_node])
6589

    
6590

    
6591
class LUInstanceMove(LogicalUnit):
6592
  """Move an instance by data-copying.
6593

6594
  """
6595
  HPATH = "instance-move"
6596
  HTYPE = constants.HTYPE_INSTANCE
6597
  REQ_BGL = False
6598

    
6599
  def ExpandNames(self):
6600
    self._ExpandAndLockInstance()
6601
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6602
    self.op.target_node = target_node
6603
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6604
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6605

    
6606
  def DeclareLocks(self, level):
6607
    if level == locking.LEVEL_NODE:
6608
      self._LockInstancesNodes(primary_only=True)
6609

    
6610
  def BuildHooksEnv(self):
6611
    """Build hooks env.
6612

6613
    This runs on master, primary and secondary nodes of the instance.
6614

6615
    """
6616
    env = {
6617
      "TARGET_NODE": self.op.target_node,
6618
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6619
      }
6620
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6621
    return env
6622

    
6623
  def BuildHooksNodes(self):
6624
    """Build hooks nodes.
6625

6626
    """
6627
    nl = [
6628
      self.cfg.GetMasterNode(),
6629
      self.instance.primary_node,
6630
      self.op.target_node,
6631
      ]
6632
    return (nl, nl)
6633

    
6634
  def CheckPrereq(self):
6635
    """Check prerequisites.
6636

6637
    This checks that the instance is in the cluster.
6638

6639
    """
6640
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6641
    assert self.instance is not None, \
6642
      "Cannot retrieve locked instance %s" % self.op.instance_name
6643

    
6644
    node = self.cfg.GetNodeInfo(self.op.target_node)
6645
    assert node is not None, \
6646
      "Cannot retrieve locked node %s" % self.op.target_node
6647

    
6648
    self.target_node = target_node = node.name
6649

    
6650
    if target_node == instance.primary_node:
6651
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6652
                                 (instance.name, target_node),
6653
                                 errors.ECODE_STATE)
6654

    
6655
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6656

    
6657
    for idx, dsk in enumerate(instance.disks):
6658
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6659
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6660
                                   " cannot copy" % idx, errors.ECODE_STATE)
6661

    
6662
    _CheckNodeOnline(self, target_node)
6663
    _CheckNodeNotDrained(self, target_node)
6664
    _CheckNodeVmCapable(self, target_node)
6665

    
6666
    if instance.admin_up:
6667
      # check memory requirements on the secondary node
6668
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6669
                           instance.name, bep[constants.BE_MEMORY],
6670
                           instance.hypervisor)
6671
    else:
6672
      self.LogInfo("Not checking memory on the secondary node as"
6673
                   " instance will not be started")
6674

    
6675
    # check bridge existance
6676
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6677

    
6678
  def Exec(self, feedback_fn):
6679
    """Move an instance.
6680

6681
    The move is done by shutting it down on its present node, copying
6682
    the data over (slow) and starting it on the new node.
6683

6684
    """
6685
    instance = self.instance
6686

    
6687
    source_node = instance.primary_node
6688
    target_node = self.target_node
6689

    
6690
    self.LogInfo("Shutting down instance %s on source node %s",
6691
                 instance.name, source_node)
6692

    
6693
    result = self.rpc.call_instance_shutdown(source_node, instance,
6694
                                             self.op.shutdown_timeout)
6695
    msg = result.fail_msg
6696
    if msg:
6697
      if self.op.ignore_consistency:
6698
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6699
                             " Proceeding anyway. Please make sure node"
6700
                             " %s is down. Error details: %s",
6701
                             instance.name, source_node, source_node, msg)
6702
      else:
6703
        raise errors.OpExecError("Could not shutdown instance %s on"
6704
                                 " node %s: %s" %
6705
                                 (instance.name, source_node, msg))
6706

    
6707
    # create the target disks
6708
    try:
6709
      _CreateDisks(self, instance, target_node=target_node)
6710
    except errors.OpExecError:
6711
      self.LogWarning("Device creation failed, reverting...")
6712
      try:
6713
        _RemoveDisks(self, instance, target_node=target_node)
6714
      finally:
6715
        self.cfg.ReleaseDRBDMinors(instance.name)
6716
        raise
6717

    
6718
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6719

    
6720
    errs = []
6721
    # activate, get path, copy the data over
6722
    for idx, disk in enumerate(instance.disks):
6723
      self.LogInfo("Copying data for disk %d", idx)
6724
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6725
                                               instance.name, True, idx)
6726
      if result.fail_msg:
6727
        self.LogWarning("Can't assemble newly created disk %d: %s",
6728
                        idx, result.fail_msg)
6729
        errs.append(result.fail_msg)
6730
        break
6731
      dev_path = result.payload
6732
      result = self.rpc.call_blockdev_export(source_node, disk,
6733
                                             target_node, dev_path,
6734
                                             cluster_name)
6735
      if result.fail_msg:
6736
        self.LogWarning("Can't copy data over for disk %d: %s",
6737
                        idx, result.fail_msg)
6738
        errs.append(result.fail_msg)
6739
        break
6740

    
6741
    if errs:
6742
      self.LogWarning("Some disks failed to copy, aborting")
6743
      try:
6744
        _RemoveDisks(self, instance, target_node=target_node)
6745
      finally:
6746
        self.cfg.ReleaseDRBDMinors(instance.name)
6747
        raise errors.OpExecError("Errors during disk copy: %s" %
6748
                                 (",".join(errs),))
6749

    
6750
    instance.primary_node = target_node
6751
    self.cfg.Update(instance, feedback_fn)
6752

    
6753
    self.LogInfo("Removing the disks on the original node")
6754
    _RemoveDisks(self, instance, target_node=source_node)
6755

    
6756
    # Only start the instance if it's marked as up
6757
    if instance.admin_up:
6758
      self.LogInfo("Starting instance %s on node %s",
6759
                   instance.name, target_node)
6760

    
6761
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6762
                                           ignore_secondaries=True)
6763
      if not disks_ok:
6764
        _ShutdownInstanceDisks(self, instance)
6765
        raise errors.OpExecError("Can't activate the instance's disks")
6766

    
6767
      result = self.rpc.call_instance_start(target_node, instance,
6768
                                            None, None, False)
6769
      msg = result.fail_msg
6770
      if msg:
6771
        _ShutdownInstanceDisks(self, instance)
6772
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6773
                                 (instance.name, target_node, msg))
6774

    
6775

    
6776
class LUNodeMigrate(LogicalUnit):
6777
  """Migrate all instances from a node.
6778

6779
  """
6780
  HPATH = "node-migrate"
6781
  HTYPE = constants.HTYPE_NODE
6782
  REQ_BGL = False
6783

    
6784
  def CheckArguments(self):
6785
    pass
6786

    
6787
  def ExpandNames(self):
6788
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6789

    
6790
    self.share_locks = _ShareAll()
6791
    self.needed_locks = {
6792
      locking.LEVEL_NODE: [self.op.node_name],
6793
      }
6794

    
6795
  def BuildHooksEnv(self):
6796
    """Build hooks env.
6797

6798
    This runs on the master, the primary and all the secondaries.
6799

6800
    """
6801
    return {
6802
      "NODE_NAME": self.op.node_name,
6803
      }
6804

    
6805
  def BuildHooksNodes(self):
6806
    """Build hooks nodes.
6807

6808
    """
6809
    nl = [self.cfg.GetMasterNode()]
6810
    return (nl, nl)
6811

    
6812
  def CheckPrereq(self):
6813
    pass
6814

    
6815
  def Exec(self, feedback_fn):
6816
    # Prepare jobs for migration instances
6817
    jobs = [
6818
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6819
                                 mode=self.op.mode,
6820
                                 live=self.op.live,
6821
                                 iallocator=self.op.iallocator,
6822
                                 target_node=self.op.target_node)]
6823
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6824
      ]
6825

    
6826
    # TODO: Run iallocator in this opcode and pass correct placement options to
6827
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6828
    # running the iallocator and the actual migration, a good consistency model
6829
    # will have to be found.
6830

    
6831
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6832
            frozenset([self.op.node_name]))
6833

    
6834
    return ResultWithJobs(jobs)
6835

    
6836

    
6837
class TLMigrateInstance(Tasklet):
6838
  """Tasklet class for instance migration.
6839

6840
  @type live: boolean
6841
  @ivar live: whether the migration will be done live or non-live;
6842
      this variable is initalized only after CheckPrereq has run
6843
  @type cleanup: boolean
6844
  @ivar cleanup: Wheater we cleanup from a failed migration
6845
  @type iallocator: string
6846
  @ivar iallocator: The iallocator used to determine target_node
6847
  @type target_node: string
6848
  @ivar target_node: If given, the target_node to reallocate the instance to
6849
  @type failover: boolean
6850
  @ivar failover: Whether operation results in failover or migration
6851
  @type fallback: boolean
6852
  @ivar fallback: Whether fallback to failover is allowed if migration not
6853
                  possible
6854
  @type ignore_consistency: boolean
6855
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6856
                            and target node
6857
  @type shutdown_timeout: int
6858
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6859

6860
  """
6861
  def __init__(self, lu, instance_name, cleanup=False,
6862
               failover=False, fallback=False,
6863
               ignore_consistency=False,
6864
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6865
    """Initializes this class.
6866

6867
    """
6868
    Tasklet.__init__(self, lu)
6869

    
6870
    # Parameters
6871
    self.instance_name = instance_name
6872
    self.cleanup = cleanup
6873
    self.live = False # will be overridden later
6874
    self.failover = failover
6875
    self.fallback = fallback
6876
    self.ignore_consistency = ignore_consistency
6877
    self.shutdown_timeout = shutdown_timeout
6878

    
6879
  def CheckPrereq(self):
6880
    """Check prerequisites.
6881

6882
    This checks that the instance is in the cluster.
6883

6884
    """
6885
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6886
    instance = self.cfg.GetInstanceInfo(instance_name)
6887
    assert instance is not None
6888
    self.instance = instance
6889

    
6890
    if (not self.cleanup and not instance.admin_up and not self.failover and
6891
        self.fallback):
6892
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6893
                      " to failover")
6894
      self.failover = True
6895

    
6896
    if instance.disk_template not in constants.DTS_MIRRORED:
6897
      if self.failover:
6898
        text = "failovers"
6899
      else:
6900
        text = "migrations"
6901
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6902
                                 " %s" % (instance.disk_template, text),
6903
                                 errors.ECODE_STATE)
6904

    
6905
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6906
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6907

    
6908
      if self.lu.op.iallocator:
6909
        self._RunAllocator()
6910
      else:
6911
        # We set set self.target_node as it is required by
6912
        # BuildHooksEnv
6913
        self.target_node = self.lu.op.target_node
6914

    
6915
      # self.target_node is already populated, either directly or by the
6916
      # iallocator run
6917
      target_node = self.target_node
6918
      if self.target_node == instance.primary_node:
6919
        raise errors.OpPrereqError("Cannot migrate instance %s"
6920
                                   " to its primary (%s)" %
6921
                                   (instance.name, instance.primary_node))
6922

    
6923
      if len(self.lu.tasklets) == 1:
6924
        # It is safe to release locks only when we're the only tasklet
6925
        # in the LU
6926
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6927
                      keep=[instance.primary_node, self.target_node])
6928

    
6929
    else:
6930
      secondary_nodes = instance.secondary_nodes
6931
      if not secondary_nodes:
6932
        raise errors.ConfigurationError("No secondary node but using"
6933
                                        " %s disk template" %
6934
                                        instance.disk_template)
6935
      target_node = secondary_nodes[0]
6936
      if self.lu.op.iallocator or (self.lu.op.target_node and
6937
                                   self.lu.op.target_node != target_node):
6938
        if self.failover:
6939
          text = "failed over"
6940
        else:
6941
          text = "migrated"
6942
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6943
                                   " be %s to arbitrary nodes"
6944
                                   " (neither an iallocator nor a target"
6945
                                   " node can be passed)" %
6946
                                   (instance.disk_template, text),
6947
                                   errors.ECODE_INVAL)
6948

    
6949
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6950

    
6951
    # check memory requirements on the secondary node
6952
    if not self.failover or instance.admin_up:
6953
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6954
                           instance.name, i_be[constants.BE_MEMORY],
6955
                           instance.hypervisor)
6956
    else:
6957
      self.lu.LogInfo("Not checking memory on the secondary node as"
6958
                      " instance will not be started")
6959

    
6960
    # check bridge existance
6961
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6962

    
6963
    if not self.cleanup:
6964
      _CheckNodeNotDrained(self.lu, target_node)
6965
      if not self.failover:
6966
        result = self.rpc.call_instance_migratable(instance.primary_node,
6967
                                                   instance)
6968
        if result.fail_msg and self.fallback:
6969
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6970
                          " failover")
6971
          self.failover = True
6972
        else:
6973
          result.Raise("Can't migrate, please use failover",
6974
                       prereq=True, ecode=errors.ECODE_STATE)
6975

    
6976
    assert not (self.failover and self.cleanup)
6977

    
6978
    if not self.failover:
6979
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6980
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6981
                                   " parameters are accepted",
6982
                                   errors.ECODE_INVAL)
6983
      if self.lu.op.live is not None:
6984
        if self.lu.op.live:
6985
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6986
        else:
6987
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6988
        # reset the 'live' parameter to None so that repeated
6989
        # invocations of CheckPrereq do not raise an exception
6990
        self.lu.op.live = None
6991
      elif self.lu.op.mode is None:
6992
        # read the default value from the hypervisor
6993
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6994
                                                skip_globals=False)
6995
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6996

    
6997
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6998
    else:
6999
      # Failover is never live
7000
      self.live = False
7001

    
7002
  def _RunAllocator(self):
7003
    """Run the allocator based on input opcode.
7004

7005
    """
7006
    ial = IAllocator(self.cfg, self.rpc,
7007
                     mode=constants.IALLOCATOR_MODE_RELOC,
7008
                     name=self.instance_name,
7009
                     # TODO See why hail breaks with a single node below
7010
                     relocate_from=[self.instance.primary_node,
7011
                                    self.instance.primary_node],
7012
                     )
7013

    
7014
    ial.Run(self.lu.op.iallocator)
7015

    
7016
    if not ial.success:
7017
      raise errors.OpPrereqError("Can't compute nodes using"
7018
                                 " iallocator '%s': %s" %
7019
                                 (self.lu.op.iallocator, ial.info),
7020
                                 errors.ECODE_NORES)
7021
    if len(ial.result) != ial.required_nodes:
7022
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7023
                                 " of nodes (%s), required %s" %
7024
                                 (self.lu.op.iallocator, len(ial.result),
7025
                                  ial.required_nodes), errors.ECODE_FAULT)
7026
    self.target_node = ial.result[0]
7027
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7028
                 self.instance_name, self.lu.op.iallocator,
7029
                 utils.CommaJoin(ial.result))
7030

    
7031
  def _WaitUntilSync(self):
7032
    """Poll with custom rpc for disk sync.
7033

7034
    This uses our own step-based rpc call.
7035

7036
    """
7037
    self.feedback_fn("* wait until resync is done")
7038
    all_done = False
7039
    while not all_done:
7040
      all_done = True
7041
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7042
                                            self.nodes_ip,
7043
                                            self.instance.disks)
7044
      min_percent = 100
7045
      for node, nres in result.items():
7046
        nres.Raise("Cannot resync disks on node %s" % node)
7047
        node_done, node_percent = nres.payload
7048
        all_done = all_done and node_done
7049
        if node_percent is not None:
7050
          min_percent = min(min_percent, node_percent)
7051
      if not all_done:
7052
        if min_percent < 100:
7053
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7054
        time.sleep(2)
7055

    
7056
  def _EnsureSecondary(self, node):
7057
    """Demote a node to secondary.
7058

7059
    """
7060
    self.feedback_fn("* switching node %s to secondary mode" % node)
7061

    
7062
    for dev in self.instance.disks:
7063
      self.cfg.SetDiskID(dev, node)
7064

    
7065
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7066
                                          self.instance.disks)
7067
    result.Raise("Cannot change disk to secondary on node %s" % node)
7068

    
7069
  def _GoStandalone(self):
7070
    """Disconnect from the network.
7071

7072
    """
7073
    self.feedback_fn("* changing into standalone mode")
7074
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7075
                                               self.instance.disks)
7076
    for node, nres in result.items():
7077
      nres.Raise("Cannot disconnect disks node %s" % node)
7078

    
7079
  def _GoReconnect(self, multimaster):
7080
    """Reconnect to the network.
7081

7082
    """
7083
    if multimaster:
7084
      msg = "dual-master"
7085
    else:
7086
      msg = "single-master"
7087
    self.feedback_fn("* changing disks into %s mode" % msg)
7088
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7089
                                           self.instance.disks,
7090
                                           self.instance.name, multimaster)
7091
    for node, nres in result.items():
7092
      nres.Raise("Cannot change disks config on node %s" % node)
7093

    
7094
  def _ExecCleanup(self):
7095
    """Try to cleanup after a failed migration.
7096

7097
    The cleanup is done by:
7098
      - check that the instance is running only on one node
7099
        (and update the config if needed)
7100
      - change disks on its secondary node to secondary
7101
      - wait until disks are fully synchronized
7102
      - disconnect from the network
7103
      - change disks into single-master mode
7104
      - wait again until disks are fully synchronized
7105

7106
    """
7107
    instance = self.instance
7108
    target_node = self.target_node
7109
    source_node = self.source_node
7110

    
7111
    # check running on only one node
7112
    self.feedback_fn("* checking where the instance actually runs"
7113
                     " (if this hangs, the hypervisor might be in"
7114
                     " a bad state)")
7115
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7116
    for node, result in ins_l.items():
7117
      result.Raise("Can't contact node %s" % node)
7118

    
7119
    runningon_source = instance.name in ins_l[source_node].payload
7120
    runningon_target = instance.name in ins_l[target_node].payload
7121

    
7122
    if runningon_source and runningon_target:
7123
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7124
                               " or the hypervisor is confused; you will have"
7125
                               " to ensure manually that it runs only on one"
7126
                               " and restart this operation")
7127

    
7128
    if not (runningon_source or runningon_target):
7129
      raise errors.OpExecError("Instance does not seem to be running at all;"
7130
                               " in this case it's safer to repair by"
7131
                               " running 'gnt-instance stop' to ensure disk"
7132
                               " shutdown, and then restarting it")
7133

    
7134
    if runningon_target:
7135
      # the migration has actually succeeded, we need to update the config
7136
      self.feedback_fn("* instance running on secondary node (%s),"
7137
                       " updating config" % target_node)
7138
      instance.primary_node = target_node
7139
      self.cfg.Update(instance, self.feedback_fn)
7140
      demoted_node = source_node
7141
    else:
7142
      self.feedback_fn("* instance confirmed to be running on its"
7143
                       " primary node (%s)" % source_node)
7144
      demoted_node = target_node
7145

    
7146
    if instance.disk_template in constants.DTS_INT_MIRROR:
7147
      self._EnsureSecondary(demoted_node)
7148
      try:
7149
        self._WaitUntilSync()
7150
      except errors.OpExecError:
7151
        # we ignore here errors, since if the device is standalone, it
7152
        # won't be able to sync
7153
        pass
7154
      self._GoStandalone()
7155
      self._GoReconnect(False)
7156
      self._WaitUntilSync()
7157

    
7158
    self.feedback_fn("* done")
7159

    
7160
  def _RevertDiskStatus(self):
7161
    """Try to revert the disk status after a failed migration.
7162

7163
    """
7164
    target_node = self.target_node
7165
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7166
      return
7167

    
7168
    try:
7169
      self._EnsureSecondary(target_node)
7170
      self._GoStandalone()
7171
      self._GoReconnect(False)
7172
      self._WaitUntilSync()
7173
    except errors.OpExecError, err:
7174
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7175
                         " please try to recover the instance manually;"
7176
                         " error '%s'" % str(err))
7177

    
7178
  def _AbortMigration(self):
7179
    """Call the hypervisor code to abort a started migration.
7180

7181
    """
7182
    instance = self.instance
7183
    target_node = self.target_node
7184
    migration_info = self.migration_info
7185

    
7186
    abort_result = self.rpc.call_finalize_migration(target_node,
7187
                                                    instance,
7188
                                                    migration_info,
7189
                                                    False)
7190
    abort_msg = abort_result.fail_msg
7191
    if abort_msg:
7192
      logging.error("Aborting migration failed on target node %s: %s",
7193
                    target_node, abort_msg)
7194
      # Don't raise an exception here, as we stil have to try to revert the
7195
      # disk status, even if this step failed.
7196

    
7197
  def _ExecMigration(self):
7198
    """Migrate an instance.
7199

7200
    The migrate is done by:
7201
      - change the disks into dual-master mode
7202
      - wait until disks are fully synchronized again
7203
      - migrate the instance
7204
      - change disks on the new secondary node (the old primary) to secondary
7205
      - wait until disks are fully synchronized
7206
      - change disks into single-master mode
7207

7208
    """
7209
    instance = self.instance
7210
    target_node = self.target_node
7211
    source_node = self.source_node
7212

    
7213
    self.feedback_fn("* checking disk consistency between source and target")
7214
    for dev in instance.disks:
7215
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7216
        raise errors.OpExecError("Disk %s is degraded or not fully"
7217
                                 " synchronized on target node,"
7218
                                 " aborting migration" % dev.iv_name)
7219

    
7220
    # First get the migration information from the remote node
7221
    result = self.rpc.call_migration_info(source_node, instance)
7222
    msg = result.fail_msg
7223
    if msg:
7224
      log_err = ("Failed fetching source migration information from %s: %s" %
7225
                 (source_node, msg))
7226
      logging.error(log_err)
7227
      raise errors.OpExecError(log_err)
7228

    
7229
    self.migration_info = migration_info = result.payload
7230

    
7231
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7232
      # Then switch the disks to master/master mode
7233
      self._EnsureSecondary(target_node)
7234
      self._GoStandalone()
7235
      self._GoReconnect(True)
7236
      self._WaitUntilSync()
7237

    
7238
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7239
    result = self.rpc.call_accept_instance(target_node,
7240
                                           instance,
7241
                                           migration_info,
7242
                                           self.nodes_ip[target_node])
7243

    
7244
    msg = result.fail_msg
7245
    if msg:
7246
      logging.error("Instance pre-migration failed, trying to revert"
7247
                    " disk status: %s", msg)
7248
      self.feedback_fn("Pre-migration failed, aborting")
7249
      self._AbortMigration()
7250
      self._RevertDiskStatus()
7251
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7252
                               (instance.name, msg))
7253

    
7254
    self.feedback_fn("* migrating instance to %s" % target_node)
7255
    result = self.rpc.call_instance_migrate(source_node, instance,
7256
                                            self.nodes_ip[target_node],
7257
                                            self.live)
7258
    msg = result.fail_msg
7259
    if msg:
7260
      logging.error("Instance migration failed, trying to revert"
7261
                    " disk status: %s", msg)
7262
      self.feedback_fn("Migration failed, aborting")
7263
      self._AbortMigration()
7264
      self._RevertDiskStatus()
7265
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7266
                               (instance.name, msg))
7267

    
7268
    instance.primary_node = target_node
7269
    # distribute new instance config to the other nodes
7270
    self.cfg.Update(instance, self.feedback_fn)
7271

    
7272
    result = self.rpc.call_finalize_migration(target_node,
7273
                                              instance,
7274
                                              migration_info,
7275
                                              True)
7276
    msg = result.fail_msg
7277
    if msg:
7278
      logging.error("Instance migration succeeded, but finalization failed:"
7279
                    " %s", msg)
7280
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7281
                               msg)
7282

    
7283
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7284
      self._EnsureSecondary(source_node)
7285
      self._WaitUntilSync()
7286
      self._GoStandalone()
7287
      self._GoReconnect(False)
7288
      self._WaitUntilSync()
7289

    
7290
    self.feedback_fn("* done")
7291

    
7292
  def _ExecFailover(self):
7293
    """Failover an instance.
7294

7295
    The failover is done by shutting it down on its present node and
7296
    starting it on the secondary.
7297

7298
    """
7299
    instance = self.instance
7300
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7301

    
7302
    source_node = instance.primary_node
7303
    target_node = self.target_node
7304

    
7305
    if instance.admin_up:
7306
      self.feedback_fn("* checking disk consistency between source and target")
7307
      for dev in instance.disks:
7308
        # for drbd, these are drbd over lvm
7309
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7310
          if primary_node.offline:
7311
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7312
                             " target node %s" %
7313
                             (primary_node.name, dev.iv_name, target_node))
7314
          elif not self.ignore_consistency:
7315
            raise errors.OpExecError("Disk %s is degraded on target node,"
7316
                                     " aborting failover" % dev.iv_name)
7317
    else:
7318
      self.feedback_fn("* not checking disk consistency as instance is not"
7319
                       " running")
7320

    
7321
    self.feedback_fn("* shutting down instance on source node")
7322
    logging.info("Shutting down instance %s on node %s",
7323
                 instance.name, source_node)
7324

    
7325
    result = self.rpc.call_instance_shutdown(source_node, instance,
7326
                                             self.shutdown_timeout)
7327
    msg = result.fail_msg
7328
    if msg:
7329
      if self.ignore_consistency or primary_node.offline:
7330
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7331
                           " proceeding anyway; please make sure node"
7332
                           " %s is down; error details: %s",
7333
                           instance.name, source_node, source_node, msg)
7334
      else:
7335
        raise errors.OpExecError("Could not shutdown instance %s on"
7336
                                 " node %s: %s" %
7337
                                 (instance.name, source_node, msg))
7338

    
7339
    self.feedback_fn("* deactivating the instance's disks on source node")
7340
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7341
      raise errors.OpExecError("Can't shut down the instance's disks")
7342

    
7343
    instance.primary_node = target_node
7344
    # distribute new instance config to the other nodes
7345
    self.cfg.Update(instance, self.feedback_fn)
7346

    
7347
    # Only start the instance if it's marked as up
7348
    if instance.admin_up:
7349
      self.feedback_fn("* activating the instance's disks on target node %s" %
7350
                       target_node)
7351
      logging.info("Starting instance %s on node %s",
7352
                   instance.name, target_node)
7353

    
7354
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7355
                                           ignore_secondaries=True)
7356
      if not disks_ok:
7357
        _ShutdownInstanceDisks(self.lu, instance)
7358
        raise errors.OpExecError("Can't activate the instance's disks")
7359

    
7360
      self.feedback_fn("* starting the instance on the target node %s" %
7361
                       target_node)
7362
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7363
                                            False)
7364
      msg = result.fail_msg
7365
      if msg:
7366
        _ShutdownInstanceDisks(self.lu, instance)
7367
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7368
                                 (instance.name, target_node, msg))
7369

    
7370
  def Exec(self, feedback_fn):
7371
    """Perform the migration.
7372

7373
    """
7374
    self.feedback_fn = feedback_fn
7375
    self.source_node = self.instance.primary_node
7376

    
7377
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7378
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7379
      self.target_node = self.instance.secondary_nodes[0]
7380
      # Otherwise self.target_node has been populated either
7381
      # directly, or through an iallocator.
7382

    
7383
    self.all_nodes = [self.source_node, self.target_node]
7384
    self.nodes_ip = {
7385
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7386
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7387
      }
7388

    
7389
    if self.failover:
7390
      feedback_fn("Failover instance %s" % self.instance.name)
7391
      self._ExecFailover()
7392
    else:
7393
      feedback_fn("Migrating instance %s" % self.instance.name)
7394

    
7395
      if self.cleanup:
7396
        return self._ExecCleanup()
7397
      else:
7398
        return self._ExecMigration()
7399

    
7400

    
7401
def _CreateBlockDev(lu, node, instance, device, force_create,
7402
                    info, force_open):
7403
  """Create a tree of block devices on a given node.
7404

7405
  If this device type has to be created on secondaries, create it and
7406
  all its children.
7407

7408
  If not, just recurse to children keeping the same 'force' value.
7409

7410
  @param lu: the lu on whose behalf we execute
7411
  @param node: the node on which to create the device
7412
  @type instance: L{objects.Instance}
7413
  @param instance: the instance which owns the device
7414
  @type device: L{objects.Disk}
7415
  @param device: the device to create
7416
  @type force_create: boolean
7417
  @param force_create: whether to force creation of this device; this
7418
      will be change to True whenever we find a device which has
7419
      CreateOnSecondary() attribute
7420
  @param info: the extra 'metadata' we should attach to the device
7421
      (this will be represented as a LVM tag)
7422
  @type force_open: boolean
7423
  @param force_open: this parameter will be passes to the
7424
      L{backend.BlockdevCreate} function where it specifies
7425
      whether we run on primary or not, and it affects both
7426
      the child assembly and the device own Open() execution
7427

7428
  """
7429
  if device.CreateOnSecondary():
7430
    force_create = True
7431

    
7432
  if device.children:
7433
    for child in device.children:
7434
      _CreateBlockDev(lu, node, instance, child, force_create,
7435
                      info, force_open)
7436

    
7437
  if not force_create:
7438
    return
7439

    
7440
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7441

    
7442

    
7443
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7444
  """Create a single block device on a given node.
7445

7446
  This will not recurse over children of the device, so they must be
7447
  created in advance.
7448

7449
  @param lu: the lu on whose behalf we execute
7450
  @param node: the node on which to create the device
7451
  @type instance: L{objects.Instance}
7452
  @param instance: the instance which owns the device
7453
  @type device: L{objects.Disk}
7454
  @param device: the device to create
7455
  @param info: the extra 'metadata' we should attach to the device
7456
      (this will be represented as a LVM tag)
7457
  @type force_open: boolean
7458
  @param force_open: this parameter will be passes to the
7459
      L{backend.BlockdevCreate} function where it specifies
7460
      whether we run on primary or not, and it affects both
7461
      the child assembly and the device own Open() execution
7462

7463
  """
7464
  lu.cfg.SetDiskID(device, node)
7465
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7466
                                       instance.name, force_open, info)
7467
  result.Raise("Can't create block device %s on"
7468
               " node %s for instance %s" % (device, node, instance.name))
7469
  if device.physical_id is None:
7470
    device.physical_id = result.payload
7471

    
7472

    
7473
def _GenerateUniqueNames(lu, exts):
7474
  """Generate a suitable LV name.
7475

7476
  This will generate a logical volume name for the given instance.
7477

7478
  """
7479
  results = []
7480
  for val in exts:
7481
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7482
    results.append("%s%s" % (new_id, val))
7483
  return results
7484

    
7485

    
7486
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7487
                         iv_name, p_minor, s_minor):
7488
  """Generate a drbd8 device complete with its children.
7489

7490
  """
7491
  assert len(vgnames) == len(names) == 2
7492
  port = lu.cfg.AllocatePort()
7493
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7494
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7495
                          logical_id=(vgnames[0], names[0]))
7496
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7497
                          logical_id=(vgnames[1], names[1]))
7498
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7499
                          logical_id=(primary, secondary, port,
7500
                                      p_minor, s_minor,
7501
                                      shared_secret),
7502
                          children=[dev_data, dev_meta],
7503
                          iv_name=iv_name)
7504
  return drbd_dev
7505

    
7506

    
7507
def _GenerateDiskTemplate(lu, template_name,
7508
                          instance_name, primary_node,
7509
                          secondary_nodes, disk_info,
7510
                          file_storage_dir, file_driver,
7511
                          base_index, feedback_fn):
7512
  """Generate the entire disk layout for a given template type.
7513

7514
  """
7515
  #TODO: compute space requirements
7516

    
7517
  vgname = lu.cfg.GetVGName()
7518
  disk_count = len(disk_info)
7519
  disks = []
7520
  if template_name == constants.DT_DISKLESS:
7521
    pass
7522
  elif template_name == constants.DT_PLAIN:
7523
    if len(secondary_nodes) != 0:
7524
      raise errors.ProgrammerError("Wrong template configuration")
7525

    
7526
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7527
                                      for i in range(disk_count)])
7528
    for idx, disk in enumerate(disk_info):
7529
      disk_index = idx + base_index
7530
      vg = disk.get(constants.IDISK_VG, vgname)
7531
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7532
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7533
                              size=disk[constants.IDISK_SIZE],
7534
                              logical_id=(vg, names[idx]),
7535
                              iv_name="disk/%d" % disk_index,
7536
                              mode=disk[constants.IDISK_MODE])
7537
      disks.append(disk_dev)
7538
  elif template_name == constants.DT_DRBD8:
7539
    if len(secondary_nodes) != 1:
7540
      raise errors.ProgrammerError("Wrong template configuration")
7541
    remote_node = secondary_nodes[0]
7542
    minors = lu.cfg.AllocateDRBDMinor(
7543
      [primary_node, remote_node] * len(disk_info), instance_name)
7544

    
7545
    names = []
7546
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7547
                                               for i in range(disk_count)]):
7548
      names.append(lv_prefix + "_data")
7549
      names.append(lv_prefix + "_meta")
7550
    for idx, disk in enumerate(disk_info):
7551
      disk_index = idx + base_index
7552
      data_vg = disk.get(constants.IDISK_VG, vgname)
7553
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7554
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7555
                                      disk[constants.IDISK_SIZE],
7556
                                      [data_vg, meta_vg],
7557
                                      names[idx * 2:idx * 2 + 2],
7558
                                      "disk/%d" % disk_index,
7559
                                      minors[idx * 2], minors[idx * 2 + 1])
7560
      disk_dev.mode = disk[constants.IDISK_MODE]
7561
      disks.append(disk_dev)
7562
  elif template_name == constants.DT_FILE:
7563
    if len(secondary_nodes) != 0:
7564
      raise errors.ProgrammerError("Wrong template configuration")
7565

    
7566
    opcodes.RequireFileStorage()
7567

    
7568
    for idx, disk in enumerate(disk_info):
7569
      disk_index = idx + base_index
7570
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7571
                              size=disk[constants.IDISK_SIZE],
7572
                              iv_name="disk/%d" % disk_index,
7573
                              logical_id=(file_driver,
7574
                                          "%s/disk%d" % (file_storage_dir,
7575
                                                         disk_index)),
7576
                              mode=disk[constants.IDISK_MODE])
7577
      disks.append(disk_dev)
7578
  elif template_name == constants.DT_SHARED_FILE:
7579
    if len(secondary_nodes) != 0:
7580
      raise errors.ProgrammerError("Wrong template configuration")
7581

    
7582
    opcodes.RequireSharedFileStorage()
7583

    
7584
    for idx, disk in enumerate(disk_info):
7585
      disk_index = idx + base_index
7586
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7587
                              size=disk[constants.IDISK_SIZE],
7588
                              iv_name="disk/%d" % disk_index,
7589
                              logical_id=(file_driver,
7590
                                          "%s/disk%d" % (file_storage_dir,
7591
                                                         disk_index)),
7592
                              mode=disk[constants.IDISK_MODE])
7593
      disks.append(disk_dev)
7594
  elif template_name == constants.DT_BLOCK:
7595
    if len(secondary_nodes) != 0:
7596
      raise errors.ProgrammerError("Wrong template configuration")
7597

    
7598
    for idx, disk in enumerate(disk_info):
7599
      disk_index = idx + base_index
7600
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7601
                              size=disk[constants.IDISK_SIZE],
7602
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7603
                                          disk[constants.IDISK_ADOPT]),
7604
                              iv_name="disk/%d" % disk_index,
7605
                              mode=disk[constants.IDISK_MODE])
7606
      disks.append(disk_dev)
7607

    
7608
  else:
7609
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7610
  return disks
7611

    
7612

    
7613
def _GetInstanceInfoText(instance):
7614
  """Compute that text that should be added to the disk's metadata.
7615

7616
  """
7617
  return "originstname+%s" % instance.name
7618

    
7619

    
7620
def _CalcEta(time_taken, written, total_size):
7621
  """Calculates the ETA based on size written and total size.
7622

7623
  @param time_taken: The time taken so far
7624
  @param written: amount written so far
7625
  @param total_size: The total size of data to be written
7626
  @return: The remaining time in seconds
7627

7628
  """
7629
  avg_time = time_taken / float(written)
7630
  return (total_size - written) * avg_time
7631

    
7632

    
7633
def _WipeDisks(lu, instance):
7634
  """Wipes instance disks.
7635

7636
  @type lu: L{LogicalUnit}
7637
  @param lu: the logical unit on whose behalf we execute
7638
  @type instance: L{objects.Instance}
7639
  @param instance: the instance whose disks we should create
7640
  @return: the success of the wipe
7641

7642
  """
7643
  node = instance.primary_node
7644

    
7645
  for device in instance.disks:
7646
    lu.cfg.SetDiskID(device, node)
7647

    
7648
  logging.info("Pause sync of instance %s disks", instance.name)
7649
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7650

    
7651
  for idx, success in enumerate(result.payload):
7652
    if not success:
7653
      logging.warn("pause-sync of instance %s for disks %d failed",
7654
                   instance.name, idx)
7655

    
7656
  try:
7657
    for idx, device in enumerate(instance.disks):
7658
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7659
      # MAX_WIPE_CHUNK at max
7660
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7661
                            constants.MIN_WIPE_CHUNK_PERCENT)
7662
      # we _must_ make this an int, otherwise rounding errors will
7663
      # occur
7664
      wipe_chunk_size = int(wipe_chunk_size)
7665

    
7666
      lu.LogInfo("* Wiping disk %d", idx)
7667
      logging.info("Wiping disk %d for instance %s, node %s using"
7668
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7669

    
7670
      offset = 0
7671
      size = device.size
7672
      last_output = 0
7673
      start_time = time.time()
7674

    
7675
      while offset < size:
7676
        wipe_size = min(wipe_chunk_size, size - offset)
7677
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7678
                      idx, offset, wipe_size)
7679
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7680
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7681
                     (idx, offset, wipe_size))
7682
        now = time.time()
7683
        offset += wipe_size
7684
        if now - last_output >= 60:
7685
          eta = _CalcEta(now - start_time, offset, size)
7686
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7687
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7688
          last_output = now
7689
  finally:
7690
    logging.info("Resume sync of instance %s disks", instance.name)
7691

    
7692
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7693

    
7694
    for idx, success in enumerate(result.payload):
7695
      if not success:
7696
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7697
                      " look at the status and troubleshoot the issue", idx)
7698
        logging.warn("resume-sync of instance %s for disks %d failed",
7699
                     instance.name, idx)
7700

    
7701

    
7702
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7703
  """Create all disks for an instance.
7704

7705
  This abstracts away some work from AddInstance.
7706

7707
  @type lu: L{LogicalUnit}
7708
  @param lu: the logical unit on whose behalf we execute
7709
  @type instance: L{objects.Instance}
7710
  @param instance: the instance whose disks we should create
7711
  @type to_skip: list
7712
  @param to_skip: list of indices to skip
7713
  @type target_node: string
7714
  @param target_node: if passed, overrides the target node for creation
7715
  @rtype: boolean
7716
  @return: the success of the creation
7717

7718
  """
7719
  info = _GetInstanceInfoText(instance)
7720
  if target_node is None:
7721
    pnode = instance.primary_node
7722
    all_nodes = instance.all_nodes
7723
  else:
7724
    pnode = target_node
7725
    all_nodes = [pnode]
7726

    
7727
  if instance.disk_template in constants.DTS_FILEBASED:
7728
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7729
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7730

    
7731
    result.Raise("Failed to create directory '%s' on"
7732
                 " node %s" % (file_storage_dir, pnode))
7733

    
7734
  # Note: this needs to be kept in sync with adding of disks in
7735
  # LUInstanceSetParams
7736
  for idx, device in enumerate(instance.disks):
7737
    if to_skip and idx in to_skip:
7738
      continue
7739
    logging.info("Creating volume %s for instance %s",
7740
                 device.iv_name, instance.name)
7741
    #HARDCODE
7742
    for node in all_nodes:
7743
      f_create = node == pnode
7744
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7745

    
7746

    
7747
def _RemoveDisks(lu, instance, target_node=None):
7748
  """Remove all disks for an instance.
7749

7750
  This abstracts away some work from `AddInstance()` and
7751
  `RemoveInstance()`. Note that in case some of the devices couldn't
7752
  be removed, the removal will continue with the other ones (compare
7753
  with `_CreateDisks()`).
7754

7755
  @type lu: L{LogicalUnit}
7756
  @param lu: the logical unit on whose behalf we execute
7757
  @type instance: L{objects.Instance}
7758
  @param instance: the instance whose disks we should remove
7759
  @type target_node: string
7760
  @param target_node: used to override the node on which to remove the disks
7761
  @rtype: boolean
7762
  @return: the success of the removal
7763

7764
  """
7765
  logging.info("Removing block devices for instance %s", instance.name)
7766

    
7767
  all_result = True
7768
  for device in instance.disks:
7769
    if target_node:
7770
      edata = [(target_node, device)]
7771
    else:
7772
      edata = device.ComputeNodeTree(instance.primary_node)
7773
    for node, disk in edata:
7774
      lu.cfg.SetDiskID(disk, node)
7775
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7776
      if msg:
7777
        lu.LogWarning("Could not remove block device %s on node %s,"
7778
                      " continuing anyway: %s", device.iv_name, node, msg)
7779
        all_result = False
7780

    
7781
  if instance.disk_template == constants.DT_FILE:
7782
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7783
    if target_node:
7784
      tgt = target_node
7785
    else:
7786
      tgt = instance.primary_node
7787
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7788
    if result.fail_msg:
7789
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7790
                    file_storage_dir, instance.primary_node, result.fail_msg)
7791
      all_result = False
7792

    
7793
  return all_result
7794

    
7795

    
7796
def _ComputeDiskSizePerVG(disk_template, disks):
7797
  """Compute disk size requirements in the volume group
7798

7799
  """
7800
  def _compute(disks, payload):
7801
    """Universal algorithm.
7802

7803
    """
7804
    vgs = {}
7805
    for disk in disks:
7806
      vgs[disk[constants.IDISK_VG]] = \
7807
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7808

    
7809
    return vgs
7810

    
7811
  # Required free disk space as a function of disk and swap space
7812
  req_size_dict = {
7813
    constants.DT_DISKLESS: {},
7814
    constants.DT_PLAIN: _compute(disks, 0),
7815
    # 128 MB are added for drbd metadata for each disk
7816
    constants.DT_DRBD8: _compute(disks, 128),
7817
    constants.DT_FILE: {},
7818
    constants.DT_SHARED_FILE: {},
7819
  }
7820

    
7821
  if disk_template not in req_size_dict:
7822
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7823
                                 " is unknown" %  disk_template)
7824

    
7825
  return req_size_dict[disk_template]
7826

    
7827

    
7828
def _ComputeDiskSize(disk_template, disks):
7829
  """Compute disk size requirements in the volume group
7830

7831
  """
7832
  # Required free disk space as a function of disk and swap space
7833
  req_size_dict = {
7834
    constants.DT_DISKLESS: None,
7835
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7836
    # 128 MB are added for drbd metadata for each disk
7837
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7838
    constants.DT_FILE: None,
7839
    constants.DT_SHARED_FILE: 0,
7840
    constants.DT_BLOCK: 0,
7841
  }
7842

    
7843
  if disk_template not in req_size_dict:
7844
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7845
                                 " is unknown" %  disk_template)
7846

    
7847
  return req_size_dict[disk_template]
7848

    
7849

    
7850
def _FilterVmNodes(lu, nodenames):
7851
  """Filters out non-vm_capable nodes from a list.
7852

7853
  @type lu: L{LogicalUnit}
7854
  @param lu: the logical unit for which we check
7855
  @type nodenames: list
7856
  @param nodenames: the list of nodes on which we should check
7857
  @rtype: list
7858
  @return: the list of vm-capable nodes
7859

7860
  """
7861
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7862
  return [name for name in nodenames if name not in vm_nodes]
7863

    
7864

    
7865
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7866
  """Hypervisor parameter validation.
7867

7868
  This function abstract the hypervisor parameter validation to be
7869
  used in both instance create and instance modify.
7870

7871
  @type lu: L{LogicalUnit}
7872
  @param lu: the logical unit for which we check
7873
  @type nodenames: list
7874
  @param nodenames: the list of nodes on which we should check
7875
  @type hvname: string
7876
  @param hvname: the name of the hypervisor we should use
7877
  @type hvparams: dict
7878
  @param hvparams: the parameters which we need to check
7879
  @raise errors.OpPrereqError: if the parameters are not valid
7880

7881
  """
7882
  nodenames = _FilterVmNodes(lu, nodenames)
7883
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7884
                                                  hvname,
7885
                                                  hvparams)
7886
  for node in nodenames:
7887
    info = hvinfo[node]
7888
    if info.offline:
7889
      continue
7890
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7891

    
7892

    
7893
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7894
  """OS parameters validation.
7895

7896
  @type lu: L{LogicalUnit}
7897
  @param lu: the logical unit for which we check
7898
  @type required: boolean
7899
  @param required: whether the validation should fail if the OS is not
7900
      found
7901
  @type nodenames: list
7902
  @param nodenames: the list of nodes on which we should check
7903
  @type osname: string
7904
  @param osname: the name of the hypervisor we should use
7905
  @type osparams: dict
7906
  @param osparams: the parameters which we need to check
7907
  @raise errors.OpPrereqError: if the parameters are not valid
7908

7909
  """
7910
  nodenames = _FilterVmNodes(lu, nodenames)
7911
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7912
                                   [constants.OS_VALIDATE_PARAMETERS],
7913
                                   osparams)
7914
  for node, nres in result.items():
7915
    # we don't check for offline cases since this should be run only
7916
    # against the master node and/or an instance's nodes
7917
    nres.Raise("OS Parameters validation failed on node %s" % node)
7918
    if not nres.payload:
7919
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7920
                 osname, node)
7921

    
7922

    
7923
class LUInstanceCreate(LogicalUnit):
7924
  """Create an instance.
7925

7926
  """
7927
  HPATH = "instance-add"
7928
  HTYPE = constants.HTYPE_INSTANCE
7929
  REQ_BGL = False
7930

    
7931
  def CheckArguments(self):
7932
    """Check arguments.
7933

7934
    """
7935
    # do not require name_check to ease forward/backward compatibility
7936
    # for tools
7937
    if self.op.no_install and self.op.start:
7938
      self.LogInfo("No-installation mode selected, disabling startup")
7939
      self.op.start = False
7940
    # validate/normalize the instance name
7941
    self.op.instance_name = \
7942
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7943

    
7944
    if self.op.ip_check and not self.op.name_check:
7945
      # TODO: make the ip check more flexible and not depend on the name check
7946
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7947
                                 " check", errors.ECODE_INVAL)
7948

    
7949
    # check nics' parameter names
7950
    for nic in self.op.nics:
7951
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7952

    
7953
    # check disks. parameter names and consistent adopt/no-adopt strategy
7954
    has_adopt = has_no_adopt = False
7955
    for disk in self.op.disks:
7956
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7957
      if constants.IDISK_ADOPT in disk:
7958
        has_adopt = True
7959
      else:
7960
        has_no_adopt = True
7961
    if has_adopt and has_no_adopt:
7962
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7963
                                 errors.ECODE_INVAL)
7964
    if has_adopt:
7965
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7966
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7967
                                   " '%s' disk template" %
7968
                                   self.op.disk_template,
7969
                                   errors.ECODE_INVAL)
7970
      if self.op.iallocator is not None:
7971
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7972
                                   " iallocator script", errors.ECODE_INVAL)
7973
      if self.op.mode == constants.INSTANCE_IMPORT:
7974
        raise errors.OpPrereqError("Disk adoption not allowed for"
7975
                                   " instance import", errors.ECODE_INVAL)
7976
    else:
7977
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7978
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7979
                                   " but no 'adopt' parameter given" %
7980
                                   self.op.disk_template,
7981
                                   errors.ECODE_INVAL)
7982

    
7983
    self.adopt_disks = has_adopt
7984

    
7985
    # instance name verification
7986
    if self.op.name_check:
7987
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7988
      self.op.instance_name = self.hostname1.name
7989
      # used in CheckPrereq for ip ping check
7990
      self.check_ip = self.hostname1.ip
7991
    else:
7992
      self.check_ip = None
7993

    
7994
    # file storage checks
7995
    if (self.op.file_driver and
7996
        not self.op.file_driver in constants.FILE_DRIVER):
7997
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7998
                                 self.op.file_driver, errors.ECODE_INVAL)
7999

    
8000
    if self.op.disk_template == constants.DT_FILE:
8001
      opcodes.RequireFileStorage()
8002
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8003
      opcodes.RequireSharedFileStorage()
8004

    
8005
    ### Node/iallocator related checks
8006
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8007

    
8008
    if self.op.pnode is not None:
8009
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8010
        if self.op.snode is None:
8011
          raise errors.OpPrereqError("The networked disk templates need"
8012
                                     " a mirror node", errors.ECODE_INVAL)
8013
      elif self.op.snode:
8014
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8015
                        " template")
8016
        self.op.snode = None
8017

    
8018
    self._cds = _GetClusterDomainSecret()
8019

    
8020
    if self.op.mode == constants.INSTANCE_IMPORT:
8021
      # On import force_variant must be True, because if we forced it at
8022
      # initial install, our only chance when importing it back is that it
8023
      # works again!
8024
      self.op.force_variant = True
8025

    
8026
      if self.op.no_install:
8027
        self.LogInfo("No-installation mode has no effect during import")
8028

    
8029
    elif self.op.mode == constants.INSTANCE_CREATE:
8030
      if self.op.os_type is None:
8031
        raise errors.OpPrereqError("No guest OS specified",
8032
                                   errors.ECODE_INVAL)
8033
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8034
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8035
                                   " installation" % self.op.os_type,
8036
                                   errors.ECODE_STATE)
8037
      if self.op.disk_template is None:
8038
        raise errors.OpPrereqError("No disk template specified",
8039
                                   errors.ECODE_INVAL)
8040

    
8041
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8042
      # Check handshake to ensure both clusters have the same domain secret
8043
      src_handshake = self.op.source_handshake
8044
      if not src_handshake:
8045
        raise errors.OpPrereqError("Missing source handshake",
8046
                                   errors.ECODE_INVAL)
8047

    
8048
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8049
                                                           src_handshake)
8050
      if errmsg:
8051
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8052
                                   errors.ECODE_INVAL)
8053

    
8054
      # Load and check source CA
8055
      self.source_x509_ca_pem = self.op.source_x509_ca
8056
      if not self.source_x509_ca_pem:
8057
        raise errors.OpPrereqError("Missing source X509 CA",
8058
                                   errors.ECODE_INVAL)
8059

    
8060
      try:
8061
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8062
                                                    self._cds)
8063
      except OpenSSL.crypto.Error, err:
8064
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8065
                                   (err, ), errors.ECODE_INVAL)
8066

    
8067
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8068
      if errcode is not None:
8069
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8070
                                   errors.ECODE_INVAL)
8071

    
8072
      self.source_x509_ca = cert
8073

    
8074
      src_instance_name = self.op.source_instance_name
8075
      if not src_instance_name:
8076
        raise errors.OpPrereqError("Missing source instance name",
8077
                                   errors.ECODE_INVAL)
8078

    
8079
      self.source_instance_name = \
8080
          netutils.GetHostname(name=src_instance_name).name
8081

    
8082
    else:
8083
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8084
                                 self.op.mode, errors.ECODE_INVAL)
8085

    
8086
  def ExpandNames(self):
8087
    """ExpandNames for CreateInstance.
8088

8089
    Figure out the right locks for instance creation.
8090

8091
    """
8092
    self.needed_locks = {}
8093

    
8094
    instance_name = self.op.instance_name
8095
    # this is just a preventive check, but someone might still add this
8096
    # instance in the meantime, and creation will fail at lock-add time
8097
    if instance_name in self.cfg.GetInstanceList():
8098
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8099
                                 instance_name, errors.ECODE_EXISTS)
8100

    
8101
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8102

    
8103
    if self.op.iallocator:
8104
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8105
    else:
8106
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8107
      nodelist = [self.op.pnode]
8108
      if self.op.snode is not None:
8109
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8110
        nodelist.append(self.op.snode)
8111
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8112

    
8113
    # in case of import lock the source node too
8114
    if self.op.mode == constants.INSTANCE_IMPORT:
8115
      src_node = self.op.src_node
8116
      src_path = self.op.src_path
8117

    
8118
      if src_path is None:
8119
        self.op.src_path = src_path = self.op.instance_name
8120

    
8121
      if src_node is None:
8122
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8123
        self.op.src_node = None
8124
        if os.path.isabs(src_path):
8125
          raise errors.OpPrereqError("Importing an instance from an absolute"
8126
                                     " path requires a source node option",
8127
                                     errors.ECODE_INVAL)
8128
      else:
8129
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8130
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8131
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8132
        if not os.path.isabs(src_path):
8133
          self.op.src_path = src_path = \
8134
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8135

    
8136
  def _RunAllocator(self):
8137
    """Run the allocator based on input opcode.
8138

8139
    """
8140
    nics = [n.ToDict() for n in self.nics]
8141
    ial = IAllocator(self.cfg, self.rpc,
8142
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8143
                     name=self.op.instance_name,
8144
                     disk_template=self.op.disk_template,
8145
                     tags=self.op.tags,
8146
                     os=self.op.os_type,
8147
                     vcpus=self.be_full[constants.BE_VCPUS],
8148
                     memory=self.be_full[constants.BE_MEMORY],
8149
                     disks=self.disks,
8150
                     nics=nics,
8151
                     hypervisor=self.op.hypervisor,
8152
                     )
8153

    
8154
    ial.Run(self.op.iallocator)
8155

    
8156
    if not ial.success:
8157
      raise errors.OpPrereqError("Can't compute nodes using"
8158
                                 " iallocator '%s': %s" %
8159
                                 (self.op.iallocator, ial.info),
8160
                                 errors.ECODE_NORES)
8161
    if len(ial.result) != ial.required_nodes:
8162
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8163
                                 " of nodes (%s), required %s" %
8164
                                 (self.op.iallocator, len(ial.result),
8165
                                  ial.required_nodes), errors.ECODE_FAULT)
8166
    self.op.pnode = ial.result[0]
8167
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8168
                 self.op.instance_name, self.op.iallocator,
8169
                 utils.CommaJoin(ial.result))
8170
    if ial.required_nodes == 2:
8171
      self.op.snode = ial.result[1]
8172

    
8173
  def BuildHooksEnv(self):
8174
    """Build hooks env.
8175

8176
    This runs on master, primary and secondary nodes of the instance.
8177

8178
    """
8179
    env = {
8180
      "ADD_MODE": self.op.mode,
8181
      }
8182
    if self.op.mode == constants.INSTANCE_IMPORT:
8183
      env["SRC_NODE"] = self.op.src_node
8184
      env["SRC_PATH"] = self.op.src_path
8185
      env["SRC_IMAGES"] = self.src_images
8186

    
8187
    env.update(_BuildInstanceHookEnv(
8188
      name=self.op.instance_name,
8189
      primary_node=self.op.pnode,
8190
      secondary_nodes=self.secondaries,
8191
      status=self.op.start,
8192
      os_type=self.op.os_type,
8193
      memory=self.be_full[constants.BE_MEMORY],
8194
      vcpus=self.be_full[constants.BE_VCPUS],
8195
      nics=_NICListToTuple(self, self.nics),
8196
      disk_template=self.op.disk_template,
8197
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8198
             for d in self.disks],
8199
      bep=self.be_full,
8200
      hvp=self.hv_full,
8201
      hypervisor_name=self.op.hypervisor,
8202
      tags=self.op.tags,
8203
    ))
8204

    
8205
    return env
8206

    
8207
  def BuildHooksNodes(self):
8208
    """Build hooks nodes.
8209

8210
    """
8211
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8212
    return nl, nl
8213

    
8214
  def _ReadExportInfo(self):
8215
    """Reads the export information from disk.
8216

8217
    It will override the opcode source node and path with the actual
8218
    information, if these two were not specified before.
8219

8220
    @return: the export information
8221

8222
    """
8223
    assert self.op.mode == constants.INSTANCE_IMPORT
8224

    
8225
    src_node = self.op.src_node
8226
    src_path = self.op.src_path
8227

    
8228
    if src_node is None:
8229
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8230
      exp_list = self.rpc.call_export_list(locked_nodes)
8231
      found = False
8232
      for node in exp_list:
8233
        if exp_list[node].fail_msg:
8234
          continue
8235
        if src_path in exp_list[node].payload:
8236
          found = True
8237
          self.op.src_node = src_node = node
8238
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8239
                                                       src_path)
8240
          break
8241
      if not found:
8242
        raise errors.OpPrereqError("No export found for relative path %s" %
8243
                                    src_path, errors.ECODE_INVAL)
8244

    
8245
    _CheckNodeOnline(self, src_node)
8246
    result = self.rpc.call_export_info(src_node, src_path)
8247
    result.Raise("No export or invalid export found in dir %s" % src_path)
8248

    
8249
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8250
    if not export_info.has_section(constants.INISECT_EXP):
8251
      raise errors.ProgrammerError("Corrupted export config",
8252
                                   errors.ECODE_ENVIRON)
8253

    
8254
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8255
    if (int(ei_version) != constants.EXPORT_VERSION):
8256
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8257
                                 (ei_version, constants.EXPORT_VERSION),
8258
                                 errors.ECODE_ENVIRON)
8259
    return export_info
8260

    
8261
  def _ReadExportParams(self, einfo):
8262
    """Use export parameters as defaults.
8263

8264
    In case the opcode doesn't specify (as in override) some instance
8265
    parameters, then try to use them from the export information, if
8266
    that declares them.
8267

8268
    """
8269
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8270

    
8271
    if self.op.disk_template is None:
8272
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8273
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8274
                                          "disk_template")
8275
      else:
8276
        raise errors.OpPrereqError("No disk template specified and the export"
8277
                                   " is missing the disk_template information",
8278
                                   errors.ECODE_INVAL)
8279

    
8280
    if not self.op.disks:
8281
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8282
        disks = []
8283
        # TODO: import the disk iv_name too
8284
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8285
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8286
          disks.append({constants.IDISK_SIZE: disk_sz})
8287
        self.op.disks = disks
8288
      else:
8289
        raise errors.OpPrereqError("No disk info specified and the export"
8290
                                   " is missing the disk information",
8291
                                   errors.ECODE_INVAL)
8292

    
8293
    if (not self.op.nics and
8294
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8295
      nics = []
8296
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8297
        ndict = {}
8298
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8299
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8300
          ndict[name] = v
8301
        nics.append(ndict)
8302
      self.op.nics = nics
8303

    
8304
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8305
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8306

    
8307
    if (self.op.hypervisor is None and
8308
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8309
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8310

    
8311
    if einfo.has_section(constants.INISECT_HYP):
8312
      # use the export parameters but do not override the ones
8313
      # specified by the user
8314
      for name, value in einfo.items(constants.INISECT_HYP):
8315
        if name not in self.op.hvparams:
8316
          self.op.hvparams[name] = value
8317

    
8318
    if einfo.has_section(constants.INISECT_BEP):
8319
      # use the parameters, without overriding
8320
      for name, value in einfo.items(constants.INISECT_BEP):
8321
        if name not in self.op.beparams:
8322
          self.op.beparams[name] = value
8323
    else:
8324
      # try to read the parameters old style, from the main section
8325
      for name in constants.BES_PARAMETERS:
8326
        if (name not in self.op.beparams and
8327
            einfo.has_option(constants.INISECT_INS, name)):
8328
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8329

    
8330
    if einfo.has_section(constants.INISECT_OSP):
8331
      # use the parameters, without overriding
8332
      for name, value in einfo.items(constants.INISECT_OSP):
8333
        if name not in self.op.osparams:
8334
          self.op.osparams[name] = value
8335

    
8336
  def _RevertToDefaults(self, cluster):
8337
    """Revert the instance parameters to the default values.
8338

8339
    """
8340
    # hvparams
8341
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8342
    for name in self.op.hvparams.keys():
8343
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8344
        del self.op.hvparams[name]
8345
    # beparams
8346
    be_defs = cluster.SimpleFillBE({})
8347
    for name in self.op.beparams.keys():
8348
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8349
        del self.op.beparams[name]
8350
    # nic params
8351
    nic_defs = cluster.SimpleFillNIC({})
8352
    for nic in self.op.nics:
8353
      for name in constants.NICS_PARAMETERS:
8354
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8355
          del nic[name]
8356
    # osparams
8357
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8358
    for name in self.op.osparams.keys():
8359
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8360
        del self.op.osparams[name]
8361

    
8362
  def _CalculateFileStorageDir(self):
8363
    """Calculate final instance file storage dir.
8364

8365
    """
8366
    # file storage dir calculation/check
8367
    self.instance_file_storage_dir = None
8368
    if self.op.disk_template in constants.DTS_FILEBASED:
8369
      # build the full file storage dir path
8370
      joinargs = []
8371

    
8372
      if self.op.disk_template == constants.DT_SHARED_FILE:
8373
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8374
      else:
8375
        get_fsd_fn = self.cfg.GetFileStorageDir
8376

    
8377
      cfg_storagedir = get_fsd_fn()
8378
      if not cfg_storagedir:
8379
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8380
      joinargs.append(cfg_storagedir)
8381

    
8382
      if self.op.file_storage_dir is not None:
8383
        joinargs.append(self.op.file_storage_dir)
8384

    
8385
      joinargs.append(self.op.instance_name)
8386

    
8387
      # pylint: disable-msg=W0142
8388
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8389

    
8390
  def CheckPrereq(self):
8391
    """Check prerequisites.
8392

8393
    """
8394
    self._CalculateFileStorageDir()
8395

    
8396
    if self.op.mode == constants.INSTANCE_IMPORT:
8397
      export_info = self._ReadExportInfo()
8398
      self._ReadExportParams(export_info)
8399

    
8400
    if (not self.cfg.GetVGName() and
8401
        self.op.disk_template not in constants.DTS_NOT_LVM):
8402
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8403
                                 " instances", errors.ECODE_STATE)
8404

    
8405
    if self.op.hypervisor is None:
8406
      self.op.hypervisor = self.cfg.GetHypervisorType()
8407

    
8408
    cluster = self.cfg.GetClusterInfo()
8409
    enabled_hvs = cluster.enabled_hypervisors
8410
    if self.op.hypervisor not in enabled_hvs:
8411
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8412
                                 " cluster (%s)" % (self.op.hypervisor,
8413
                                  ",".join(enabled_hvs)),
8414
                                 errors.ECODE_STATE)
8415

    
8416
    # Check tag validity
8417
    for tag in self.op.tags:
8418
      objects.TaggableObject.ValidateTag(tag)
8419

    
8420
    # check hypervisor parameter syntax (locally)
8421
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8422
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8423
                                      self.op.hvparams)
8424
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8425
    hv_type.CheckParameterSyntax(filled_hvp)
8426
    self.hv_full = filled_hvp
8427
    # check that we don't specify global parameters on an instance
8428
    _CheckGlobalHvParams(self.op.hvparams)
8429

    
8430
    # fill and remember the beparams dict
8431
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8432
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8433

    
8434
    # build os parameters
8435
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8436

    
8437
    # now that hvp/bep are in final format, let's reset to defaults,
8438
    # if told to do so
8439
    if self.op.identify_defaults:
8440
      self._RevertToDefaults(cluster)
8441

    
8442
    # NIC buildup
8443
    self.nics = []
8444
    for idx, nic in enumerate(self.op.nics):
8445
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8446
      nic_mode = nic_mode_req
8447
      if nic_mode is None:
8448
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8449

    
8450
      # in routed mode, for the first nic, the default ip is 'auto'
8451
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8452
        default_ip_mode = constants.VALUE_AUTO
8453
      else:
8454
        default_ip_mode = constants.VALUE_NONE
8455

    
8456
      # ip validity checks
8457
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8458
      if ip is None or ip.lower() == constants.VALUE_NONE:
8459
        nic_ip = None
8460
      elif ip.lower() == constants.VALUE_AUTO:
8461
        if not self.op.name_check:
8462
          raise errors.OpPrereqError("IP address set to auto but name checks"
8463
                                     " have been skipped",
8464
                                     errors.ECODE_INVAL)
8465
        nic_ip = self.hostname1.ip
8466
      else:
8467
        if not netutils.IPAddress.IsValid(ip):
8468
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8469
                                     errors.ECODE_INVAL)
8470
        nic_ip = ip
8471

    
8472
      # TODO: check the ip address for uniqueness
8473
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8474
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8475
                                   errors.ECODE_INVAL)
8476

    
8477
      # MAC address verification
8478
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8479
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8480
        mac = utils.NormalizeAndValidateMac(mac)
8481

    
8482
        try:
8483
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8484
        except errors.ReservationError:
8485
          raise errors.OpPrereqError("MAC address %s already in use"
8486
                                     " in cluster" % mac,
8487
                                     errors.ECODE_NOTUNIQUE)
8488

    
8489
      #  Build nic parameters
8490
      link = nic.get(constants.INIC_LINK, None)
8491
      nicparams = {}
8492
      if nic_mode_req:
8493
        nicparams[constants.NIC_MODE] = nic_mode_req
8494
      if link:
8495
        nicparams[constants.NIC_LINK] = link
8496

    
8497
      check_params = cluster.SimpleFillNIC(nicparams)
8498
      objects.NIC.CheckParameterSyntax(check_params)
8499
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8500

    
8501
    # disk checks/pre-build
8502
    default_vg = self.cfg.GetVGName()
8503
    self.disks = []
8504
    for disk in self.op.disks:
8505
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8506
      if mode not in constants.DISK_ACCESS_SET:
8507
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8508
                                   mode, errors.ECODE_INVAL)
8509
      size = disk.get(constants.IDISK_SIZE, None)
8510
      if size is None:
8511
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8512
      try:
8513
        size = int(size)
8514
      except (TypeError, ValueError):
8515
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8516
                                   errors.ECODE_INVAL)
8517

    
8518
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8519
      new_disk = {
8520
        constants.IDISK_SIZE: size,
8521
        constants.IDISK_MODE: mode,
8522
        constants.IDISK_VG: data_vg,
8523
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8524
        }
8525
      if constants.IDISK_ADOPT in disk:
8526
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8527
      self.disks.append(new_disk)
8528

    
8529
    if self.op.mode == constants.INSTANCE_IMPORT:
8530

    
8531
      # Check that the new instance doesn't have less disks than the export
8532
      instance_disks = len(self.disks)
8533
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8534
      if instance_disks < export_disks:
8535
        raise errors.OpPrereqError("Not enough disks to import."
8536
                                   " (instance: %d, export: %d)" %
8537
                                   (instance_disks, export_disks),
8538
                                   errors.ECODE_INVAL)
8539

    
8540
      disk_images = []
8541
      for idx in range(export_disks):
8542
        option = "disk%d_dump" % idx
8543
        if export_info.has_option(constants.INISECT_INS, option):
8544
          # FIXME: are the old os-es, disk sizes, etc. useful?
8545
          export_name = export_info.get(constants.INISECT_INS, option)
8546
          image = utils.PathJoin(self.op.src_path, export_name)
8547
          disk_images.append(image)
8548
        else:
8549
          disk_images.append(False)
8550

    
8551
      self.src_images = disk_images
8552

    
8553
      old_name = export_info.get(constants.INISECT_INS, "name")
8554
      try:
8555
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8556
      except (TypeError, ValueError), err:
8557
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8558
                                   " an integer: %s" % str(err),
8559
                                   errors.ECODE_STATE)
8560
      if self.op.instance_name == old_name:
8561
        for idx, nic in enumerate(self.nics):
8562
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8563
            nic_mac_ini = "nic%d_mac" % idx
8564
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8565

    
8566
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8567

    
8568
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8569
    if self.op.ip_check:
8570
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8571
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8572
                                   (self.check_ip, self.op.instance_name),
8573
                                   errors.ECODE_NOTUNIQUE)
8574

    
8575
    #### mac address generation
8576
    # By generating here the mac address both the allocator and the hooks get
8577
    # the real final mac address rather than the 'auto' or 'generate' value.
8578
    # There is a race condition between the generation and the instance object
8579
    # creation, which means that we know the mac is valid now, but we're not
8580
    # sure it will be when we actually add the instance. If things go bad
8581
    # adding the instance will abort because of a duplicate mac, and the
8582
    # creation job will fail.
8583
    for nic in self.nics:
8584
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8585
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8586

    
8587
    #### allocator run
8588

    
8589
    if self.op.iallocator is not None:
8590
      self._RunAllocator()
8591

    
8592
    #### node related checks
8593

    
8594
    # check primary node
8595
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8596
    assert self.pnode is not None, \
8597
      "Cannot retrieve locked node %s" % self.op.pnode
8598
    if pnode.offline:
8599
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8600
                                 pnode.name, errors.ECODE_STATE)
8601
    if pnode.drained:
8602
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8603
                                 pnode.name, errors.ECODE_STATE)
8604
    if not pnode.vm_capable:
8605
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8606
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8607

    
8608
    self.secondaries = []
8609

    
8610
    # mirror node verification
8611
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8612
      if self.op.snode == pnode.name:
8613
        raise errors.OpPrereqError("The secondary node cannot be the"
8614
                                   " primary node", errors.ECODE_INVAL)
8615
      _CheckNodeOnline(self, self.op.snode)
8616
      _CheckNodeNotDrained(self, self.op.snode)
8617
      _CheckNodeVmCapable(self, self.op.snode)
8618
      self.secondaries.append(self.op.snode)
8619

    
8620
    nodenames = [pnode.name] + self.secondaries
8621

    
8622
    if not self.adopt_disks:
8623
      # Check lv size requirements, if not adopting
8624
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8625
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8626

    
8627
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8628
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8629
                                disk[constants.IDISK_ADOPT])
8630
                     for disk in self.disks])
8631
      if len(all_lvs) != len(self.disks):
8632
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8633
                                   errors.ECODE_INVAL)
8634
      for lv_name in all_lvs:
8635
        try:
8636
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8637
          # to ReserveLV uses the same syntax
8638
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8639
        except errors.ReservationError:
8640
          raise errors.OpPrereqError("LV named %s used by another instance" %
8641
                                     lv_name, errors.ECODE_NOTUNIQUE)
8642

    
8643
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8644
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8645

    
8646
      node_lvs = self.rpc.call_lv_list([pnode.name],
8647
                                       vg_names.payload.keys())[pnode.name]
8648
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8649
      node_lvs = node_lvs.payload
8650

    
8651
      delta = all_lvs.difference(node_lvs.keys())
8652
      if delta:
8653
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8654
                                   utils.CommaJoin(delta),
8655
                                   errors.ECODE_INVAL)
8656
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8657
      if online_lvs:
8658
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8659
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8660
                                   errors.ECODE_STATE)
8661
      # update the size of disk based on what is found
8662
      for dsk in self.disks:
8663
        dsk[constants.IDISK_SIZE] = \
8664
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8665
                                        dsk[constants.IDISK_ADOPT])][0]))
8666

    
8667
    elif self.op.disk_template == constants.DT_BLOCK:
8668
      # Normalize and de-duplicate device paths
8669
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8670
                       for disk in self.disks])
8671
      if len(all_disks) != len(self.disks):
8672
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8673
                                   errors.ECODE_INVAL)
8674
      baddisks = [d for d in all_disks
8675
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8676
      if baddisks:
8677
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8678
                                   " cannot be adopted" %
8679
                                   (", ".join(baddisks),
8680
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8681
                                   errors.ECODE_INVAL)
8682

    
8683
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8684
                                            list(all_disks))[pnode.name]
8685
      node_disks.Raise("Cannot get block device information from node %s" %
8686
                       pnode.name)
8687
      node_disks = node_disks.payload
8688
      delta = all_disks.difference(node_disks.keys())
8689
      if delta:
8690
        raise errors.OpPrereqError("Missing block device(s): %s" %
8691
                                   utils.CommaJoin(delta),
8692
                                   errors.ECODE_INVAL)
8693
      for dsk in self.disks:
8694
        dsk[constants.IDISK_SIZE] = \
8695
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8696

    
8697
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8698

    
8699
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8700
    # check OS parameters (remotely)
8701
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8702

    
8703
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8704

    
8705
    # memory check on primary node
8706
    if self.op.start:
8707
      _CheckNodeFreeMemory(self, self.pnode.name,
8708
                           "creating instance %s" % self.op.instance_name,
8709
                           self.be_full[constants.BE_MEMORY],
8710
                           self.op.hypervisor)
8711

    
8712
    self.dry_run_result = list(nodenames)
8713

    
8714
  def Exec(self, feedback_fn):
8715
    """Create and add the instance to the cluster.
8716

8717
    """
8718
    instance = self.op.instance_name
8719
    pnode_name = self.pnode.name
8720

    
8721
    ht_kind = self.op.hypervisor
8722
    if ht_kind in constants.HTS_REQ_PORT:
8723
      network_port = self.cfg.AllocatePort()
8724
    else:
8725
      network_port = None
8726

    
8727
    disks = _GenerateDiskTemplate(self,
8728
                                  self.op.disk_template,
8729
                                  instance, pnode_name,
8730
                                  self.secondaries,
8731
                                  self.disks,
8732
                                  self.instance_file_storage_dir,
8733
                                  self.op.file_driver,
8734
                                  0,
8735
                                  feedback_fn)
8736

    
8737
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8738
                            primary_node=pnode_name,
8739
                            nics=self.nics, disks=disks,
8740
                            disk_template=self.op.disk_template,
8741
                            admin_up=False,
8742
                            network_port=network_port,
8743
                            beparams=self.op.beparams,
8744
                            hvparams=self.op.hvparams,
8745
                            hypervisor=self.op.hypervisor,
8746
                            osparams=self.op.osparams,
8747
                            )
8748

    
8749
    if self.op.tags:
8750
      for tag in self.op.tags:
8751
        iobj.AddTag(tag)
8752

    
8753
    if self.adopt_disks:
8754
      if self.op.disk_template == constants.DT_PLAIN:
8755
        # rename LVs to the newly-generated names; we need to construct
8756
        # 'fake' LV disks with the old data, plus the new unique_id
8757
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8758
        rename_to = []
8759
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8760
          rename_to.append(t_dsk.logical_id)
8761
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8762
          self.cfg.SetDiskID(t_dsk, pnode_name)
8763
        result = self.rpc.call_blockdev_rename(pnode_name,
8764
                                               zip(tmp_disks, rename_to))
8765
        result.Raise("Failed to rename adoped LVs")
8766
    else:
8767
      feedback_fn("* creating instance disks...")
8768
      try:
8769
        _CreateDisks(self, iobj)
8770
      except errors.OpExecError:
8771
        self.LogWarning("Device creation failed, reverting...")
8772
        try:
8773
          _RemoveDisks(self, iobj)
8774
        finally:
8775
          self.cfg.ReleaseDRBDMinors(instance)
8776
          raise
8777

    
8778
    feedback_fn("adding instance %s to cluster config" % instance)
8779

    
8780
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8781

    
8782
    # Declare that we don't want to remove the instance lock anymore, as we've
8783
    # added the instance to the config
8784
    del self.remove_locks[locking.LEVEL_INSTANCE]
8785

    
8786
    if self.op.mode == constants.INSTANCE_IMPORT:
8787
      # Release unused nodes
8788
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8789
    else:
8790
      # Release all nodes
8791
      _ReleaseLocks(self, locking.LEVEL_NODE)
8792

    
8793
    disk_abort = False
8794
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8795
      feedback_fn("* wiping instance disks...")
8796
      try:
8797
        _WipeDisks(self, iobj)
8798
      except errors.OpExecError, err:
8799
        logging.exception("Wiping disks failed")
8800
        self.LogWarning("Wiping instance disks failed (%s)", err)
8801
        disk_abort = True
8802

    
8803
    if disk_abort:
8804
      # Something is already wrong with the disks, don't do anything else
8805
      pass
8806
    elif self.op.wait_for_sync:
8807
      disk_abort = not _WaitForSync(self, iobj)
8808
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8809
      # make sure the disks are not degraded (still sync-ing is ok)
8810
      time.sleep(15)
8811
      feedback_fn("* checking mirrors status")
8812
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8813
    else:
8814
      disk_abort = False
8815

    
8816
    if disk_abort:
8817
      _RemoveDisks(self, iobj)
8818
      self.cfg.RemoveInstance(iobj.name)
8819
      # Make sure the instance lock gets removed
8820
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8821
      raise errors.OpExecError("There are some degraded disks for"
8822
                               " this instance")
8823

    
8824
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8825
      if self.op.mode == constants.INSTANCE_CREATE:
8826
        if not self.op.no_install:
8827
          feedback_fn("* running the instance OS create scripts...")
8828
          # FIXME: pass debug option from opcode to backend
8829
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8830
                                                 self.op.debug_level)
8831
          result.Raise("Could not add os for instance %s"
8832
                       " on node %s" % (instance, pnode_name))
8833

    
8834
      elif self.op.mode == constants.INSTANCE_IMPORT:
8835
        feedback_fn("* running the instance OS import scripts...")
8836

    
8837
        transfers = []
8838

    
8839
        for idx, image in enumerate(self.src_images):
8840
          if not image:
8841
            continue
8842

    
8843
          # FIXME: pass debug option from opcode to backend
8844
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8845
                                             constants.IEIO_FILE, (image, ),
8846
                                             constants.IEIO_SCRIPT,
8847
                                             (iobj.disks[idx], idx),
8848
                                             None)
8849
          transfers.append(dt)
8850

    
8851
        import_result = \
8852
          masterd.instance.TransferInstanceData(self, feedback_fn,
8853
                                                self.op.src_node, pnode_name,
8854
                                                self.pnode.secondary_ip,
8855
                                                iobj, transfers)
8856
        if not compat.all(import_result):
8857
          self.LogWarning("Some disks for instance %s on node %s were not"
8858
                          " imported successfully" % (instance, pnode_name))
8859

    
8860
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8861
        feedback_fn("* preparing remote import...")
8862
        # The source cluster will stop the instance before attempting to make a
8863
        # connection. In some cases stopping an instance can take a long time,
8864
        # hence the shutdown timeout is added to the connection timeout.
8865
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8866
                           self.op.source_shutdown_timeout)
8867
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8868

    
8869
        assert iobj.primary_node == self.pnode.name
8870
        disk_results = \
8871
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8872
                                        self.source_x509_ca,
8873
                                        self._cds, timeouts)
8874
        if not compat.all(disk_results):
8875
          # TODO: Should the instance still be started, even if some disks
8876
          # failed to import (valid for local imports, too)?
8877
          self.LogWarning("Some disks for instance %s on node %s were not"
8878
                          " imported successfully" % (instance, pnode_name))
8879

    
8880
        # Run rename script on newly imported instance
8881
        assert iobj.name == instance
8882
        feedback_fn("Running rename script for %s" % instance)
8883
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8884
                                                   self.source_instance_name,
8885
                                                   self.op.debug_level)
8886
        if result.fail_msg:
8887
          self.LogWarning("Failed to run rename script for %s on node"
8888
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8889

    
8890
      else:
8891
        # also checked in the prereq part
8892
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8893
                                     % self.op.mode)
8894

    
8895
    if self.op.start:
8896
      iobj.admin_up = True
8897
      self.cfg.Update(iobj, feedback_fn)
8898
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8899
      feedback_fn("* starting instance...")
8900
      result = self.rpc.call_instance_start(pnode_name, iobj,
8901
                                            None, None, False)
8902
      result.Raise("Could not start instance")
8903

    
8904
    return list(iobj.all_nodes)
8905

    
8906

    
8907
class LUInstanceConsole(NoHooksLU):
8908
  """Connect to an instance's console.
8909

8910
  This is somewhat special in that it returns the command line that
8911
  you need to run on the master node in order to connect to the
8912
  console.
8913

8914
  """
8915
  REQ_BGL = False
8916

    
8917
  def ExpandNames(self):
8918
    self._ExpandAndLockInstance()
8919

    
8920
  def CheckPrereq(self):
8921
    """Check prerequisites.
8922

8923
    This checks that the instance is in the cluster.
8924

8925
    """
8926
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8927
    assert self.instance is not None, \
8928
      "Cannot retrieve locked instance %s" % self.op.instance_name
8929
    _CheckNodeOnline(self, self.instance.primary_node)
8930

    
8931
  def Exec(self, feedback_fn):
8932
    """Connect to the console of an instance
8933

8934
    """
8935
    instance = self.instance
8936
    node = instance.primary_node
8937

    
8938
    node_insts = self.rpc.call_instance_list([node],
8939
                                             [instance.hypervisor])[node]
8940
    node_insts.Raise("Can't get node information from %s" % node)
8941

    
8942
    if instance.name not in node_insts.payload:
8943
      if instance.admin_up:
8944
        state = constants.INSTST_ERRORDOWN
8945
      else:
8946
        state = constants.INSTST_ADMINDOWN
8947
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8948
                               (instance.name, state))
8949

    
8950
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8951

    
8952
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8953

    
8954

    
8955
def _GetInstanceConsole(cluster, instance):
8956
  """Returns console information for an instance.
8957

8958
  @type cluster: L{objects.Cluster}
8959
  @type instance: L{objects.Instance}
8960
  @rtype: dict
8961

8962
  """
8963
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8964
  # beparams and hvparams are passed separately, to avoid editing the
8965
  # instance and then saving the defaults in the instance itself.
8966
  hvparams = cluster.FillHV(instance)
8967
  beparams = cluster.FillBE(instance)
8968
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8969

    
8970
  assert console.instance == instance.name
8971
  assert console.Validate()
8972

    
8973
  return console.ToDict()
8974

    
8975

    
8976
class LUInstanceReplaceDisks(LogicalUnit):
8977
  """Replace the disks of an instance.
8978

8979
  """
8980
  HPATH = "mirrors-replace"
8981
  HTYPE = constants.HTYPE_INSTANCE
8982
  REQ_BGL = False
8983

    
8984
  def CheckArguments(self):
8985
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8986
                                  self.op.iallocator)
8987

    
8988
  def ExpandNames(self):
8989
    self._ExpandAndLockInstance()
8990

    
8991
    assert locking.LEVEL_NODE not in self.needed_locks
8992
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8993

    
8994
    assert self.op.iallocator is None or self.op.remote_node is None, \
8995
      "Conflicting options"
8996

    
8997
    if self.op.remote_node is not None:
8998
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8999

    
9000
      # Warning: do not remove the locking of the new secondary here
9001
      # unless DRBD8.AddChildren is changed to work in parallel;
9002
      # currently it doesn't since parallel invocations of
9003
      # FindUnusedMinor will conflict
9004
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9005
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9006
    else:
9007
      self.needed_locks[locking.LEVEL_NODE] = []
9008
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9009

    
9010
      if self.op.iallocator is not None:
9011
        # iallocator will select a new node in the same group
9012
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9013

    
9014
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9015
                                   self.op.iallocator, self.op.remote_node,
9016
                                   self.op.disks, False, self.op.early_release)
9017

    
9018
    self.tasklets = [self.replacer]
9019

    
9020
  def DeclareLocks(self, level):
9021
    if level == locking.LEVEL_NODEGROUP:
9022
      assert self.op.remote_node is None
9023
      assert self.op.iallocator is not None
9024
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9025

    
9026
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9027
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9028
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9029

    
9030
    elif level == locking.LEVEL_NODE:
9031
      if self.op.iallocator is not None:
9032
        assert self.op.remote_node is None
9033
        assert not self.needed_locks[locking.LEVEL_NODE]
9034

    
9035
        # Lock member nodes of all locked groups
9036
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9037
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9038
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9039
      else:
9040
        self._LockInstancesNodes()
9041

    
9042
  def BuildHooksEnv(self):
9043
    """Build hooks env.
9044

9045
    This runs on the master, the primary and all the secondaries.
9046

9047
    """
9048
    instance = self.replacer.instance
9049
    env = {
9050
      "MODE": self.op.mode,
9051
      "NEW_SECONDARY": self.op.remote_node,
9052
      "OLD_SECONDARY": instance.secondary_nodes[0],
9053
      }
9054
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9055
    return env
9056

    
9057
  def BuildHooksNodes(self):
9058
    """Build hooks nodes.
9059

9060
    """
9061
    instance = self.replacer.instance
9062
    nl = [
9063
      self.cfg.GetMasterNode(),
9064
      instance.primary_node,
9065
      ]
9066
    if self.op.remote_node is not None:
9067
      nl.append(self.op.remote_node)
9068
    return nl, nl
9069

    
9070
  def CheckPrereq(self):
9071
    """Check prerequisites.
9072

9073
    """
9074
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9075
            self.op.iallocator is None)
9076

    
9077
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9078
    if owned_groups:
9079
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9080
      if owned_groups != groups:
9081
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9082
                                 " since lock was acquired, current list is %r,"
9083
                                 " used to be '%s'" %
9084
                                 (self.op.instance_name,
9085
                                  utils.CommaJoin(groups),
9086
                                  utils.CommaJoin(owned_groups)))
9087

    
9088
    return LogicalUnit.CheckPrereq(self)
9089

    
9090

    
9091
class TLReplaceDisks(Tasklet):
9092
  """Replaces disks for an instance.
9093

9094
  Note: Locking is not within the scope of this class.
9095

9096
  """
9097
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9098
               disks, delay_iallocator, early_release):
9099
    """Initializes this class.
9100

9101
    """
9102
    Tasklet.__init__(self, lu)
9103

    
9104
    # Parameters
9105
    self.instance_name = instance_name
9106
    self.mode = mode
9107
    self.iallocator_name = iallocator_name
9108
    self.remote_node = remote_node
9109
    self.disks = disks
9110
    self.delay_iallocator = delay_iallocator
9111
    self.early_release = early_release
9112

    
9113
    # Runtime data
9114
    self.instance = None
9115
    self.new_node = None
9116
    self.target_node = None
9117
    self.other_node = None
9118
    self.remote_node_info = None
9119
    self.node_secondary_ip = None
9120

    
9121
  @staticmethod
9122
  def CheckArguments(mode, remote_node, iallocator):
9123
    """Helper function for users of this class.
9124

9125
    """
9126
    # check for valid parameter combination
9127
    if mode == constants.REPLACE_DISK_CHG:
9128
      if remote_node is None and iallocator is None:
9129
        raise errors.OpPrereqError("When changing the secondary either an"
9130
                                   " iallocator script must be used or the"
9131
                                   " new node given", errors.ECODE_INVAL)
9132

    
9133
      if remote_node is not None and iallocator is not None:
9134
        raise errors.OpPrereqError("Give either the iallocator or the new"
9135
                                   " secondary, not both", errors.ECODE_INVAL)
9136

    
9137
    elif remote_node is not None or iallocator is not None:
9138
      # Not replacing the secondary
9139
      raise errors.OpPrereqError("The iallocator and new node options can"
9140
                                 " only be used when changing the"
9141
                                 " secondary node", errors.ECODE_INVAL)
9142

    
9143
  @staticmethod
9144
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9145
    """Compute a new secondary node using an IAllocator.
9146

9147
    """
9148
    ial = IAllocator(lu.cfg, lu.rpc,
9149
                     mode=constants.IALLOCATOR_MODE_RELOC,
9150
                     name=instance_name,
9151
                     relocate_from=relocate_from)
9152

    
9153
    ial.Run(iallocator_name)
9154

    
9155
    if not ial.success:
9156
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9157
                                 " %s" % (iallocator_name, ial.info),
9158
                                 errors.ECODE_NORES)
9159

    
9160
    if len(ial.result) != ial.required_nodes:
9161
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9162
                                 " of nodes (%s), required %s" %
9163
                                 (iallocator_name,
9164
                                  len(ial.result), ial.required_nodes),
9165
                                 errors.ECODE_FAULT)
9166

    
9167
    remote_node_name = ial.result[0]
9168

    
9169
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9170
               instance_name, remote_node_name)
9171

    
9172
    return remote_node_name
9173

    
9174
  def _FindFaultyDisks(self, node_name):
9175
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9176
                                    node_name, True)
9177

    
9178
  def _CheckDisksActivated(self, instance):
9179
    """Checks if the instance disks are activated.
9180

9181
    @param instance: The instance to check disks
9182
    @return: True if they are activated, False otherwise
9183

9184
    """
9185
    nodes = instance.all_nodes
9186

    
9187
    for idx, dev in enumerate(instance.disks):
9188
      for node in nodes:
9189
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9190
        self.cfg.SetDiskID(dev, node)
9191

    
9192
        result = self.rpc.call_blockdev_find(node, dev)
9193

    
9194
        if result.offline:
9195
          continue
9196
        elif result.fail_msg or not result.payload:
9197
          return False
9198

    
9199
    return True
9200

    
9201
  def CheckPrereq(self):
9202
    """Check prerequisites.
9203

9204
    This checks that the instance is in the cluster.
9205

9206
    """
9207
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9208
    assert instance is not None, \
9209
      "Cannot retrieve locked instance %s" % self.instance_name
9210

    
9211
    if instance.disk_template != constants.DT_DRBD8:
9212
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9213
                                 " instances", errors.ECODE_INVAL)
9214

    
9215
    if len(instance.secondary_nodes) != 1:
9216
      raise errors.OpPrereqError("The instance has a strange layout,"
9217
                                 " expected one secondary but found %d" %
9218
                                 len(instance.secondary_nodes),
9219
                                 errors.ECODE_FAULT)
9220

    
9221
    if not self.delay_iallocator:
9222
      self._CheckPrereq2()
9223

    
9224
  def _CheckPrereq2(self):
9225
    """Check prerequisites, second part.
9226

9227
    This function should always be part of CheckPrereq. It was separated and is
9228
    now called from Exec because during node evacuation iallocator was only
9229
    called with an unmodified cluster model, not taking planned changes into
9230
    account.
9231

9232
    """
9233
    instance = self.instance
9234
    secondary_node = instance.secondary_nodes[0]
9235

    
9236
    if self.iallocator_name is None:
9237
      remote_node = self.remote_node
9238
    else:
9239
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9240
                                       instance.name, instance.secondary_nodes)
9241

    
9242
    if remote_node is None:
9243
      self.remote_node_info = None
9244
    else:
9245
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9246
             "Remote node '%s' is not locked" % remote_node
9247

    
9248
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9249
      assert self.remote_node_info is not None, \
9250
        "Cannot retrieve locked node %s" % remote_node
9251

    
9252
    if remote_node == self.instance.primary_node:
9253
      raise errors.OpPrereqError("The specified node is the primary node of"
9254
                                 " the instance", errors.ECODE_INVAL)
9255

    
9256
    if remote_node == secondary_node:
9257
      raise errors.OpPrereqError("The specified node is already the"
9258
                                 " secondary node of the instance",
9259
                                 errors.ECODE_INVAL)
9260

    
9261
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9262
                                    constants.REPLACE_DISK_CHG):
9263
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9264
                                 errors.ECODE_INVAL)
9265

    
9266
    if self.mode == constants.REPLACE_DISK_AUTO:
9267
      if not self._CheckDisksActivated(instance):
9268
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9269
                                   " first" % self.instance_name,
9270
                                   errors.ECODE_STATE)
9271
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9272
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9273

    
9274
      if faulty_primary and faulty_secondary:
9275
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9276
                                   " one node and can not be repaired"
9277
                                   " automatically" % self.instance_name,
9278
                                   errors.ECODE_STATE)
9279

    
9280
      if faulty_primary:
9281
        self.disks = faulty_primary
9282
        self.target_node = instance.primary_node
9283
        self.other_node = secondary_node
9284
        check_nodes = [self.target_node, self.other_node]
9285
      elif faulty_secondary:
9286
        self.disks = faulty_secondary
9287
        self.target_node = secondary_node
9288
        self.other_node = instance.primary_node
9289
        check_nodes = [self.target_node, self.other_node]
9290
      else:
9291
        self.disks = []
9292
        check_nodes = []
9293

    
9294
    else:
9295
      # Non-automatic modes
9296
      if self.mode == constants.REPLACE_DISK_PRI:
9297
        self.target_node = instance.primary_node
9298
        self.other_node = secondary_node
9299
        check_nodes = [self.target_node, self.other_node]
9300

    
9301
      elif self.mode == constants.REPLACE_DISK_SEC:
9302
        self.target_node = secondary_node
9303
        self.other_node = instance.primary_node
9304
        check_nodes = [self.target_node, self.other_node]
9305

    
9306
      elif self.mode == constants.REPLACE_DISK_CHG:
9307
        self.new_node = remote_node
9308
        self.other_node = instance.primary_node
9309
        self.target_node = secondary_node
9310
        check_nodes = [self.new_node, self.other_node]
9311

    
9312
        _CheckNodeNotDrained(self.lu, remote_node)
9313
        _CheckNodeVmCapable(self.lu, remote_node)
9314

    
9315
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9316
        assert old_node_info is not None
9317
        if old_node_info.offline and not self.early_release:
9318
          # doesn't make sense to delay the release
9319
          self.early_release = True
9320
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9321
                          " early-release mode", secondary_node)
9322

    
9323
      else:
9324
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9325
                                     self.mode)
9326

    
9327
      # If not specified all disks should be replaced
9328
      if not self.disks:
9329
        self.disks = range(len(self.instance.disks))
9330

    
9331
    for node in check_nodes:
9332
      _CheckNodeOnline(self.lu, node)
9333

    
9334
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9335
                                                          self.other_node,
9336
                                                          self.target_node]
9337
                              if node_name is not None)
9338

    
9339
    # Release unneeded node locks
9340
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9341

    
9342
    # Release any owned node group
9343
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9344
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9345

    
9346
    # Check whether disks are valid
9347
    for disk_idx in self.disks:
9348
      instance.FindDisk(disk_idx)
9349

    
9350
    # Get secondary node IP addresses
9351
    self.node_secondary_ip = \
9352
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9353
           for node_name in touched_nodes)
9354

    
9355
  def Exec(self, feedback_fn):
9356
    """Execute disk replacement.
9357

9358
    This dispatches the disk replacement to the appropriate handler.
9359

9360
    """
9361
    if self.delay_iallocator:
9362
      self._CheckPrereq2()
9363

    
9364
    if __debug__:
9365
      # Verify owned locks before starting operation
9366
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9367
      assert set(owned_locks) == set(self.node_secondary_ip), \
9368
          ("Incorrect node locks, owning %s, expected %s" %
9369
           (owned_locks, self.node_secondary_ip.keys()))
9370

    
9371
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9372
      assert list(owned_locks) == [self.instance_name], \
9373
          "Instance '%s' not locked" % self.instance_name
9374

    
9375
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9376
          "Should not own any node group lock at this point"
9377

    
9378
    if not self.disks:
9379
      feedback_fn("No disks need replacement")
9380
      return
9381

    
9382
    feedback_fn("Replacing disk(s) %s for %s" %
9383
                (utils.CommaJoin(self.disks), self.instance.name))
9384

    
9385
    activate_disks = (not self.instance.admin_up)
9386

    
9387
    # Activate the instance disks if we're replacing them on a down instance
9388
    if activate_disks:
9389
      _StartInstanceDisks(self.lu, self.instance, True)
9390

    
9391
    try:
9392
      # Should we replace the secondary node?
9393
      if self.new_node is not None:
9394
        fn = self._ExecDrbd8Secondary
9395
      else:
9396
        fn = self._ExecDrbd8DiskOnly
9397

    
9398
      result = fn(feedback_fn)
9399
    finally:
9400
      # Deactivate the instance disks if we're replacing them on a
9401
      # down instance
9402
      if activate_disks:
9403
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9404

    
9405
    if __debug__:
9406
      # Verify owned locks
9407
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9408
      nodes = frozenset(self.node_secondary_ip)
9409
      assert ((self.early_release and not owned_locks) or
9410
              (not self.early_release and not (set(owned_locks) - nodes))), \
9411
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9412
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9413

    
9414
    return result
9415

    
9416
  def _CheckVolumeGroup(self, nodes):
9417
    self.lu.LogInfo("Checking volume groups")
9418

    
9419
    vgname = self.cfg.GetVGName()
9420

    
9421
    # Make sure volume group exists on all involved nodes
9422
    results = self.rpc.call_vg_list(nodes)
9423
    if not results:
9424
      raise errors.OpExecError("Can't list volume groups on the nodes")
9425

    
9426
    for node in nodes:
9427
      res = results[node]
9428
      res.Raise("Error checking node %s" % node)
9429
      if vgname not in res.payload:
9430
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9431
                                 (vgname, node))
9432

    
9433
  def _CheckDisksExistence(self, nodes):
9434
    # Check disk existence
9435
    for idx, dev in enumerate(self.instance.disks):
9436
      if idx not in self.disks:
9437
        continue
9438

    
9439
      for node in nodes:
9440
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9441
        self.cfg.SetDiskID(dev, node)
9442

    
9443
        result = self.rpc.call_blockdev_find(node, dev)
9444

    
9445
        msg = result.fail_msg
9446
        if msg or not result.payload:
9447
          if not msg:
9448
            msg = "disk not found"
9449
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9450
                                   (idx, node, msg))
9451

    
9452
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9453
    for idx, dev in enumerate(self.instance.disks):
9454
      if idx not in self.disks:
9455
        continue
9456

    
9457
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9458
                      (idx, node_name))
9459

    
9460
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9461
                                   ldisk=ldisk):
9462
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9463
                                 " replace disks for instance %s" %
9464
                                 (node_name, self.instance.name))
9465

    
9466
  def _CreateNewStorage(self, node_name):
9467
    """Create new storage on the primary or secondary node.
9468

9469
    This is only used for same-node replaces, not for changing the
9470
    secondary node, hence we don't want to modify the existing disk.
9471

9472
    """
9473
    iv_names = {}
9474

    
9475
    for idx, dev in enumerate(self.instance.disks):
9476
      if idx not in self.disks:
9477
        continue
9478

    
9479
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9480

    
9481
      self.cfg.SetDiskID(dev, node_name)
9482

    
9483
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9484
      names = _GenerateUniqueNames(self.lu, lv_names)
9485

    
9486
      vg_data = dev.children[0].logical_id[0]
9487
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9488
                             logical_id=(vg_data, names[0]))
9489
      vg_meta = dev.children[1].logical_id[0]
9490
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9491
                             logical_id=(vg_meta, names[1]))
9492

    
9493
      new_lvs = [lv_data, lv_meta]
9494
      old_lvs = [child.Copy() for child in dev.children]
9495
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9496

    
9497
      # we pass force_create=True to force the LVM creation
9498
      for new_lv in new_lvs:
9499
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9500
                        _GetInstanceInfoText(self.instance), False)
9501

    
9502
    return iv_names
9503

    
9504
  def _CheckDevices(self, node_name, iv_names):
9505
    for name, (dev, _, _) in iv_names.iteritems():
9506
      self.cfg.SetDiskID(dev, node_name)
9507

    
9508
      result = self.rpc.call_blockdev_find(node_name, dev)
9509

    
9510
      msg = result.fail_msg
9511
      if msg or not result.payload:
9512
        if not msg:
9513
          msg = "disk not found"
9514
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9515
                                 (name, msg))
9516

    
9517
      if result.payload.is_degraded:
9518
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9519

    
9520
  def _RemoveOldStorage(self, node_name, iv_names):
9521
    for name, (_, old_lvs, _) in iv_names.iteritems():
9522
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9523

    
9524
      for lv in old_lvs:
9525
        self.cfg.SetDiskID(lv, node_name)
9526

    
9527
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9528
        if msg:
9529
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9530
                             hint="remove unused LVs manually")
9531

    
9532
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9533
    """Replace a disk on the primary or secondary for DRBD 8.
9534

9535
    The algorithm for replace is quite complicated:
9536

9537
      1. for each disk to be replaced:
9538

9539
        1. create new LVs on the target node with unique names
9540
        1. detach old LVs from the drbd device
9541
        1. rename old LVs to name_replaced.<time_t>
9542
        1. rename new LVs to old LVs
9543
        1. attach the new LVs (with the old names now) to the drbd device
9544

9545
      1. wait for sync across all devices
9546

9547
      1. for each modified disk:
9548

9549
        1. remove old LVs (which have the name name_replaces.<time_t>)
9550

9551
    Failures are not very well handled.
9552

9553
    """
9554
    steps_total = 6
9555

    
9556
    # Step: check device activation
9557
    self.lu.LogStep(1, steps_total, "Check device existence")
9558
    self._CheckDisksExistence([self.other_node, self.target_node])
9559
    self._CheckVolumeGroup([self.target_node, self.other_node])
9560

    
9561
    # Step: check other node consistency
9562
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9563
    self._CheckDisksConsistency(self.other_node,
9564
                                self.other_node == self.instance.primary_node,
9565
                                False)
9566

    
9567
    # Step: create new storage
9568
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9569
    iv_names = self._CreateNewStorage(self.target_node)
9570

    
9571
    # Step: for each lv, detach+rename*2+attach
9572
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9573
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9574
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9575

    
9576
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9577
                                                     old_lvs)
9578
      result.Raise("Can't detach drbd from local storage on node"
9579
                   " %s for device %s" % (self.target_node, dev.iv_name))
9580
      #dev.children = []
9581
      #cfg.Update(instance)
9582

    
9583
      # ok, we created the new LVs, so now we know we have the needed
9584
      # storage; as such, we proceed on the target node to rename
9585
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9586
      # using the assumption that logical_id == physical_id (which in
9587
      # turn is the unique_id on that node)
9588

    
9589
      # FIXME(iustin): use a better name for the replaced LVs
9590
      temp_suffix = int(time.time())
9591
      ren_fn = lambda d, suff: (d.physical_id[0],
9592
                                d.physical_id[1] + "_replaced-%s" % suff)
9593

    
9594
      # Build the rename list based on what LVs exist on the node
9595
      rename_old_to_new = []
9596
      for to_ren in old_lvs:
9597
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9598
        if not result.fail_msg and result.payload:
9599
          # device exists
9600
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9601

    
9602
      self.lu.LogInfo("Renaming the old LVs on the target node")
9603
      result = self.rpc.call_blockdev_rename(self.target_node,
9604
                                             rename_old_to_new)
9605
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9606

    
9607
      # Now we rename the new LVs to the old LVs
9608
      self.lu.LogInfo("Renaming the new LVs on the target node")
9609
      rename_new_to_old = [(new, old.physical_id)
9610
                           for old, new in zip(old_lvs, new_lvs)]
9611
      result = self.rpc.call_blockdev_rename(self.target_node,
9612
                                             rename_new_to_old)
9613
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9614

    
9615
      # Intermediate steps of in memory modifications
9616
      for old, new in zip(old_lvs, new_lvs):
9617
        new.logical_id = old.logical_id
9618
        self.cfg.SetDiskID(new, self.target_node)
9619

    
9620
      # We need to modify old_lvs so that removal later removes the
9621
      # right LVs, not the newly added ones; note that old_lvs is a
9622
      # copy here
9623
      for disk in old_lvs:
9624
        disk.logical_id = ren_fn(disk, temp_suffix)
9625
        self.cfg.SetDiskID(disk, self.target_node)
9626

    
9627
      # Now that the new lvs have the old name, we can add them to the device
9628
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9629
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9630
                                                  new_lvs)
9631
      msg = result.fail_msg
9632
      if msg:
9633
        for new_lv in new_lvs:
9634
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9635
                                               new_lv).fail_msg
9636
          if msg2:
9637
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9638
                               hint=("cleanup manually the unused logical"
9639
                                     "volumes"))
9640
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9641

    
9642
    cstep = 5
9643
    if self.early_release:
9644
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9645
      cstep += 1
9646
      self._RemoveOldStorage(self.target_node, iv_names)
9647
      # WARNING: we release both node locks here, do not do other RPCs
9648
      # than WaitForSync to the primary node
9649
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9650
                    names=[self.target_node, self.other_node])
9651

    
9652
    # Wait for sync
9653
    # This can fail as the old devices are degraded and _WaitForSync
9654
    # does a combined result over all disks, so we don't check its return value
9655
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9656
    cstep += 1
9657
    _WaitForSync(self.lu, self.instance)
9658

    
9659
    # Check all devices manually
9660
    self._CheckDevices(self.instance.primary_node, iv_names)
9661

    
9662
    # Step: remove old storage
9663
    if not self.early_release:
9664
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9665
      cstep += 1
9666
      self._RemoveOldStorage(self.target_node, iv_names)
9667

    
9668
  def _ExecDrbd8Secondary(self, feedback_fn):
9669
    """Replace the secondary node for DRBD 8.
9670

9671
    The algorithm for replace is quite complicated:
9672
      - for all disks of the instance:
9673
        - create new LVs on the new node with same names
9674
        - shutdown the drbd device on the old secondary
9675
        - disconnect the drbd network on the primary
9676
        - create the drbd device on the new secondary
9677
        - network attach the drbd on the primary, using an artifice:
9678
          the drbd code for Attach() will connect to the network if it
9679
          finds a device which is connected to the good local disks but
9680
          not network enabled
9681
      - wait for sync across all devices
9682
      - remove all disks from the old secondary
9683

9684
    Failures are not very well handled.
9685

9686
    """
9687
    steps_total = 6
9688

    
9689
    # Step: check device activation
9690
    self.lu.LogStep(1, steps_total, "Check device existence")
9691
    self._CheckDisksExistence([self.instance.primary_node])
9692
    self._CheckVolumeGroup([self.instance.primary_node])
9693

    
9694
    # Step: check other node consistency
9695
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9696
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9697

    
9698
    # Step: create new storage
9699
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9700
    for idx, dev in enumerate(self.instance.disks):
9701
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9702
                      (self.new_node, idx))
9703
      # we pass force_create=True to force LVM creation
9704
      for new_lv in dev.children:
9705
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9706
                        _GetInstanceInfoText(self.instance), False)
9707

    
9708
    # Step 4: dbrd minors and drbd setups changes
9709
    # after this, we must manually remove the drbd minors on both the
9710
    # error and the success paths
9711
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9712
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9713
                                         for dev in self.instance.disks],
9714
                                        self.instance.name)
9715
    logging.debug("Allocated minors %r", minors)
9716

    
9717
    iv_names = {}
9718
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9719
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9720
                      (self.new_node, idx))
9721
      # create new devices on new_node; note that we create two IDs:
9722
      # one without port, so the drbd will be activated without
9723
      # networking information on the new node at this stage, and one
9724
      # with network, for the latter activation in step 4
9725
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9726
      if self.instance.primary_node == o_node1:
9727
        p_minor = o_minor1
9728
      else:
9729
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9730
        p_minor = o_minor2
9731

    
9732
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9733
                      p_minor, new_minor, o_secret)
9734
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9735
                    p_minor, new_minor, o_secret)
9736

    
9737
      iv_names[idx] = (dev, dev.children, new_net_id)
9738
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9739
                    new_net_id)
9740
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9741
                              logical_id=new_alone_id,
9742
                              children=dev.children,
9743
                              size=dev.size)
9744
      try:
9745
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9746
                              _GetInstanceInfoText(self.instance), False)
9747
      except errors.GenericError:
9748
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9749
        raise
9750

    
9751
    # We have new devices, shutdown the drbd on the old secondary
9752
    for idx, dev in enumerate(self.instance.disks):
9753
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9754
      self.cfg.SetDiskID(dev, self.target_node)
9755
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9756
      if msg:
9757
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9758
                           "node: %s" % (idx, msg),
9759
                           hint=("Please cleanup this device manually as"
9760
                                 " soon as possible"))
9761

    
9762
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9763
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9764
                                               self.node_secondary_ip,
9765
                                               self.instance.disks)\
9766
                                              [self.instance.primary_node]
9767

    
9768
    msg = result.fail_msg
9769
    if msg:
9770
      # detaches didn't succeed (unlikely)
9771
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9772
      raise errors.OpExecError("Can't detach the disks from the network on"
9773
                               " old node: %s" % (msg,))
9774

    
9775
    # if we managed to detach at least one, we update all the disks of
9776
    # the instance to point to the new secondary
9777
    self.lu.LogInfo("Updating instance configuration")
9778
    for dev, _, new_logical_id in iv_names.itervalues():
9779
      dev.logical_id = new_logical_id
9780
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9781

    
9782
    self.cfg.Update(self.instance, feedback_fn)
9783

    
9784
    # and now perform the drbd attach
9785
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9786
                    " (standalone => connected)")
9787
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9788
                                            self.new_node],
9789
                                           self.node_secondary_ip,
9790
                                           self.instance.disks,
9791
                                           self.instance.name,
9792
                                           False)
9793
    for to_node, to_result in result.items():
9794
      msg = to_result.fail_msg
9795
      if msg:
9796
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9797
                           to_node, msg,
9798
                           hint=("please do a gnt-instance info to see the"
9799
                                 " status of disks"))
9800
    cstep = 5
9801
    if self.early_release:
9802
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9803
      cstep += 1
9804
      self._RemoveOldStorage(self.target_node, iv_names)
9805
      # WARNING: we release all node locks here, do not do other RPCs
9806
      # than WaitForSync to the primary node
9807
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9808
                    names=[self.instance.primary_node,
9809
                           self.target_node,
9810
                           self.new_node])
9811

    
9812
    # Wait for sync
9813
    # This can fail as the old devices are degraded and _WaitForSync
9814
    # does a combined result over all disks, so we don't check its return value
9815
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9816
    cstep += 1
9817
    _WaitForSync(self.lu, self.instance)
9818

    
9819
    # Check all devices manually
9820
    self._CheckDevices(self.instance.primary_node, iv_names)
9821

    
9822
    # Step: remove old storage
9823
    if not self.early_release:
9824
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9825
      self._RemoveOldStorage(self.target_node, iv_names)
9826

    
9827

    
9828
class LURepairNodeStorage(NoHooksLU):
9829
  """Repairs the volume group on a node.
9830

9831
  """
9832
  REQ_BGL = False
9833

    
9834
  def CheckArguments(self):
9835
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9836

    
9837
    storage_type = self.op.storage_type
9838

    
9839
    if (constants.SO_FIX_CONSISTENCY not in
9840
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9841
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9842
                                 " repaired" % storage_type,
9843
                                 errors.ECODE_INVAL)
9844

    
9845
  def ExpandNames(self):
9846
    self.needed_locks = {
9847
      locking.LEVEL_NODE: [self.op.node_name],
9848
      }
9849

    
9850
  def _CheckFaultyDisks(self, instance, node_name):
9851
    """Ensure faulty disks abort the opcode or at least warn."""
9852
    try:
9853
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9854
                                  node_name, True):
9855
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9856
                                   " node '%s'" % (instance.name, node_name),
9857
                                   errors.ECODE_STATE)
9858
    except errors.OpPrereqError, err:
9859
      if self.op.ignore_consistency:
9860
        self.proc.LogWarning(str(err.args[0]))
9861
      else:
9862
        raise
9863

    
9864
  def CheckPrereq(self):
9865
    """Check prerequisites.
9866

9867
    """
9868
    # Check whether any instance on this node has faulty disks
9869
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9870
      if not inst.admin_up:
9871
        continue
9872
      check_nodes = set(inst.all_nodes)
9873
      check_nodes.discard(self.op.node_name)
9874
      for inst_node_name in check_nodes:
9875
        self._CheckFaultyDisks(inst, inst_node_name)
9876

    
9877
  def Exec(self, feedback_fn):
9878
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9879
                (self.op.name, self.op.node_name))
9880

    
9881
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9882
    result = self.rpc.call_storage_execute(self.op.node_name,
9883
                                           self.op.storage_type, st_args,
9884
                                           self.op.name,
9885
                                           constants.SO_FIX_CONSISTENCY)
9886
    result.Raise("Failed to repair storage unit '%s' on %s" %
9887
                 (self.op.name, self.op.node_name))
9888

    
9889

    
9890
class LUNodeEvacuate(NoHooksLU):
9891
  """Evacuates instances off a list of nodes.
9892

9893
  """
9894
  REQ_BGL = False
9895

    
9896
  def CheckArguments(self):
9897
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9898

    
9899
  def ExpandNames(self):
9900
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9901

    
9902
    if self.op.remote_node is not None:
9903
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9904
      assert self.op.remote_node
9905

    
9906
      if self.op.remote_node == self.op.node_name:
9907
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9908
                                   " secondary node", errors.ECODE_INVAL)
9909

    
9910
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9911
        raise errors.OpPrereqError("Without the use of an iallocator only"
9912
                                   " secondary instances can be evacuated",
9913
                                   errors.ECODE_INVAL)
9914

    
9915
    # Declare locks
9916
    self.share_locks = _ShareAll()
9917
    self.needed_locks = {
9918
      locking.LEVEL_INSTANCE: [],
9919
      locking.LEVEL_NODEGROUP: [],
9920
      locking.LEVEL_NODE: [],
9921
      }
9922

    
9923
    if self.op.remote_node is None:
9924
      # Iallocator will choose any node(s) in the same group
9925
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9926
    else:
9927
      group_nodes = frozenset([self.op.remote_node])
9928

    
9929
    # Determine nodes to be locked
9930
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9931

    
9932
  def _DetermineInstances(self):
9933
    """Builds list of instances to operate on.
9934

9935
    """
9936
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9937

    
9938
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9939
      # Primary instances only
9940
      inst_fn = _GetNodePrimaryInstances
9941
      assert self.op.remote_node is None, \
9942
        "Evacuating primary instances requires iallocator"
9943
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9944
      # Secondary instances only
9945
      inst_fn = _GetNodeSecondaryInstances
9946
    else:
9947
      # All instances
9948
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9949
      inst_fn = _GetNodeInstances
9950

    
9951
    return inst_fn(self.cfg, self.op.node_name)
9952

    
9953
  def DeclareLocks(self, level):
9954
    if level == locking.LEVEL_INSTANCE:
9955
      # Lock instances optimistically, needs verification once node and group
9956
      # locks have been acquired
9957
      self.needed_locks[locking.LEVEL_INSTANCE] = \
9958
        set(i.name for i in self._DetermineInstances())
9959

    
9960
    elif level == locking.LEVEL_NODEGROUP:
9961
      # Lock node groups optimistically, needs verification once nodes have
9962
      # been acquired
9963
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9964
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9965

    
9966
    elif level == locking.LEVEL_NODE:
9967
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9968

    
9969
  def CheckPrereq(self):
9970
    # Verify locks
9971
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9972
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9973
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9974

    
9975
    assert owned_nodes == self.lock_nodes
9976

    
9977
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9978
    if owned_groups != wanted_groups:
9979
      raise errors.OpExecError("Node groups changed since locks were acquired,"
9980
                               " current groups are '%s', used to be '%s'" %
9981
                               (utils.CommaJoin(wanted_groups),
9982
                                utils.CommaJoin(owned_groups)))
9983

    
9984
    # Determine affected instances
9985
    self.instances = self._DetermineInstances()
9986
    self.instance_names = [i.name for i in self.instances]
9987

    
9988
    if set(self.instance_names) != owned_instances:
9989
      raise errors.OpExecError("Instances on node '%s' changed since locks"
9990
                               " were acquired, current instances are '%s',"
9991
                               " used to be '%s'" %
9992
                               (self.op.node_name,
9993
                                utils.CommaJoin(self.instance_names),
9994
                                utils.CommaJoin(owned_instances)))
9995

    
9996
    if self.instance_names:
9997
      self.LogInfo("Evacuating instances from node '%s': %s",
9998
                   self.op.node_name,
9999
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10000
    else:
10001
      self.LogInfo("No instances to evacuate from node '%s'",
10002
                   self.op.node_name)
10003

    
10004
    if self.op.remote_node is not None:
10005
      for i in self.instances:
10006
        if i.primary_node == self.op.remote_node:
10007
          raise errors.OpPrereqError("Node %s is the primary node of"
10008
                                     " instance %s, cannot use it as"
10009
                                     " secondary" %
10010
                                     (self.op.remote_node, i.name),
10011
                                     errors.ECODE_INVAL)
10012

    
10013
  def Exec(self, feedback_fn):
10014
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10015

    
10016
    if not self.instance_names:
10017
      # No instances to evacuate
10018
      jobs = []
10019

    
10020
    elif self.op.iallocator is not None:
10021
      # TODO: Implement relocation to other group
10022
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10023
                       evac_mode=self.op.mode,
10024
                       instances=list(self.instance_names))
10025

    
10026
      ial.Run(self.op.iallocator)
10027

    
10028
      if not ial.success:
10029
        raise errors.OpPrereqError("Can't compute node evacuation using"
10030
                                   " iallocator '%s': %s" %
10031
                                   (self.op.iallocator, ial.info),
10032
                                   errors.ECODE_NORES)
10033

    
10034
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10035

    
10036
    elif self.op.remote_node is not None:
10037
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10038
      jobs = [
10039
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10040
                                        remote_node=self.op.remote_node,
10041
                                        disks=[],
10042
                                        mode=constants.REPLACE_DISK_CHG,
10043
                                        early_release=self.op.early_release)]
10044
        for instance_name in self.instance_names
10045
        ]
10046

    
10047
    else:
10048
      raise errors.ProgrammerError("No iallocator or remote node")
10049

    
10050
    return ResultWithJobs(jobs)
10051

    
10052

    
10053
def _SetOpEarlyRelease(early_release, op):
10054
  """Sets C{early_release} flag on opcodes if available.
10055

10056
  """
10057
  try:
10058
    op.early_release = early_release
10059
  except AttributeError:
10060
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10061

    
10062
  return op
10063

    
10064

    
10065
def _NodeEvacDest(use_nodes, group, nodes):
10066
  """Returns group or nodes depending on caller's choice.
10067

10068
  """
10069
  if use_nodes:
10070
    return utils.CommaJoin(nodes)
10071
  else:
10072
    return group
10073

    
10074

    
10075
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10076
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10077

10078
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10079
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10080

10081
  @type lu: L{LogicalUnit}
10082
  @param lu: Logical unit instance
10083
  @type alloc_result: tuple/list
10084
  @param alloc_result: Result from iallocator
10085
  @type early_release: bool
10086
  @param early_release: Whether to release locks early if possible
10087
  @type use_nodes: bool
10088
  @param use_nodes: Whether to display node names instead of groups
10089

10090
  """
10091
  (moved, failed, jobs) = alloc_result
10092

    
10093
  if failed:
10094
    lu.LogWarning("Unable to evacuate instances %s",
10095
                  utils.CommaJoin("%s (%s)" % (name, reason)
10096
                                  for (name, reason) in failed))
10097

    
10098
  if moved:
10099
    lu.LogInfo("Instances to be moved: %s",
10100
               utils.CommaJoin("%s (to %s)" %
10101
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10102
                               for (name, group, nodes) in moved))
10103

    
10104
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10105
              map(opcodes.OpCode.LoadOpCode, ops))
10106
          for ops in jobs]
10107

    
10108

    
10109
class LUInstanceGrowDisk(LogicalUnit):
10110
  """Grow a disk of an instance.
10111

10112
  """
10113
  HPATH = "disk-grow"
10114
  HTYPE = constants.HTYPE_INSTANCE
10115
  REQ_BGL = False
10116

    
10117
  def ExpandNames(self):
10118
    self._ExpandAndLockInstance()
10119
    self.needed_locks[locking.LEVEL_NODE] = []
10120
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10121

    
10122
  def DeclareLocks(self, level):
10123
    if level == locking.LEVEL_NODE:
10124
      self._LockInstancesNodes()
10125

    
10126
  def BuildHooksEnv(self):
10127
    """Build hooks env.
10128

10129
    This runs on the master, the primary and all the secondaries.
10130

10131
    """
10132
    env = {
10133
      "DISK": self.op.disk,
10134
      "AMOUNT": self.op.amount,
10135
      }
10136
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10137
    return env
10138

    
10139
  def BuildHooksNodes(self):
10140
    """Build hooks nodes.
10141

10142
    """
10143
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10144
    return (nl, nl)
10145

    
10146
  def CheckPrereq(self):
10147
    """Check prerequisites.
10148

10149
    This checks that the instance is in the cluster.
10150

10151
    """
10152
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10153
    assert instance is not None, \
10154
      "Cannot retrieve locked instance %s" % self.op.instance_name
10155
    nodenames = list(instance.all_nodes)
10156
    for node in nodenames:
10157
      _CheckNodeOnline(self, node)
10158

    
10159
    self.instance = instance
10160

    
10161
    if instance.disk_template not in constants.DTS_GROWABLE:
10162
      raise errors.OpPrereqError("Instance's disk layout does not support"
10163
                                 " growing", errors.ECODE_INVAL)
10164

    
10165
    self.disk = instance.FindDisk(self.op.disk)
10166

    
10167
    if instance.disk_template not in (constants.DT_FILE,
10168
                                      constants.DT_SHARED_FILE):
10169
      # TODO: check the free disk space for file, when that feature will be
10170
      # supported
10171
      _CheckNodesFreeDiskPerVG(self, nodenames,
10172
                               self.disk.ComputeGrowth(self.op.amount))
10173

    
10174
  def Exec(self, feedback_fn):
10175
    """Execute disk grow.
10176

10177
    """
10178
    instance = self.instance
10179
    disk = self.disk
10180

    
10181
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10182
    if not disks_ok:
10183
      raise errors.OpExecError("Cannot activate block device to grow")
10184

    
10185
    # First run all grow ops in dry-run mode
10186
    for node in instance.all_nodes:
10187
      self.cfg.SetDiskID(disk, node)
10188
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10189
      result.Raise("Grow request failed to node %s" % node)
10190

    
10191
    # We know that (as far as we can test) operations across different
10192
    # nodes will succeed, time to run it for real
10193
    for node in instance.all_nodes:
10194
      self.cfg.SetDiskID(disk, node)
10195
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10196
      result.Raise("Grow request failed to node %s" % node)
10197

    
10198
      # TODO: Rewrite code to work properly
10199
      # DRBD goes into sync mode for a short amount of time after executing the
10200
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10201
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10202
      # time is a work-around.
10203
      time.sleep(5)
10204

    
10205
    disk.RecordGrow(self.op.amount)
10206
    self.cfg.Update(instance, feedback_fn)
10207
    if self.op.wait_for_sync:
10208
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10209
      if disk_abort:
10210
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10211
                             " status; please check the instance")
10212
      if not instance.admin_up:
10213
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10214
    elif not instance.admin_up:
10215
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10216
                           " not supposed to be running because no wait for"
10217
                           " sync mode was requested")
10218

    
10219

    
10220
class LUInstanceQueryData(NoHooksLU):
10221
  """Query runtime instance data.
10222

10223
  """
10224
  REQ_BGL = False
10225

    
10226
  def ExpandNames(self):
10227
    self.needed_locks = {}
10228

    
10229
    # Use locking if requested or when non-static information is wanted
10230
    if not (self.op.static or self.op.use_locking):
10231
      self.LogWarning("Non-static data requested, locks need to be acquired")
10232
      self.op.use_locking = True
10233

    
10234
    if self.op.instances or not self.op.use_locking:
10235
      # Expand instance names right here
10236
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10237
    else:
10238
      # Will use acquired locks
10239
      self.wanted_names = None
10240

    
10241
    if self.op.use_locking:
10242
      self.share_locks = _ShareAll()
10243

    
10244
      if self.wanted_names is None:
10245
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10246
      else:
10247
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10248

    
10249
      self.needed_locks[locking.LEVEL_NODE] = []
10250
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10251

    
10252
  def DeclareLocks(self, level):
10253
    if self.op.use_locking and level == locking.LEVEL_NODE:
10254
      self._LockInstancesNodes()
10255

    
10256
  def CheckPrereq(self):
10257
    """Check prerequisites.
10258

10259
    This only checks the optional instance list against the existing names.
10260

10261
    """
10262
    if self.wanted_names is None:
10263
      assert self.op.use_locking, "Locking was not used"
10264
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10265

    
10266
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10267
                             for name in self.wanted_names]
10268

    
10269
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10270
    """Returns the status of a block device
10271

10272
    """
10273
    if self.op.static or not node:
10274
      return None
10275

    
10276
    self.cfg.SetDiskID(dev, node)
10277

    
10278
    result = self.rpc.call_blockdev_find(node, dev)
10279
    if result.offline:
10280
      return None
10281

    
10282
    result.Raise("Can't compute disk status for %s" % instance_name)
10283

    
10284
    status = result.payload
10285
    if status is None:
10286
      return None
10287

    
10288
    return (status.dev_path, status.major, status.minor,
10289
            status.sync_percent, status.estimated_time,
10290
            status.is_degraded, status.ldisk_status)
10291

    
10292
  def _ComputeDiskStatus(self, instance, snode, dev):
10293
    """Compute block device status.
10294

10295
    """
10296
    if dev.dev_type in constants.LDS_DRBD:
10297
      # we change the snode then (otherwise we use the one passed in)
10298
      if dev.logical_id[0] == instance.primary_node:
10299
        snode = dev.logical_id[1]
10300
      else:
10301
        snode = dev.logical_id[0]
10302

    
10303
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10304
                                              instance.name, dev)
10305
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10306

    
10307
    if dev.children:
10308
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10309
                                        instance, snode),
10310
                         dev.children)
10311
    else:
10312
      dev_children = []
10313

    
10314
    return {
10315
      "iv_name": dev.iv_name,
10316
      "dev_type": dev.dev_type,
10317
      "logical_id": dev.logical_id,
10318
      "physical_id": dev.physical_id,
10319
      "pstatus": dev_pstatus,
10320
      "sstatus": dev_sstatus,
10321
      "children": dev_children,
10322
      "mode": dev.mode,
10323
      "size": dev.size,
10324
      }
10325

    
10326
  def Exec(self, feedback_fn):
10327
    """Gather and return data"""
10328
    result = {}
10329

    
10330
    cluster = self.cfg.GetClusterInfo()
10331

    
10332
    for instance in self.wanted_instances:
10333
      pnode = self.cfg.GetNodeInfo(instance.primary_node)
10334

    
10335
      if self.op.static or pnode.offline:
10336
        remote_state = None
10337
        if pnode.offline:
10338
          self.LogWarning("Primary node %s is marked offline, returning static"
10339
                          " information only for instance %s" %
10340
                          (pnode.name, instance.name))
10341
      else:
10342
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10343
                                                  instance.name,
10344
                                                  instance.hypervisor)
10345
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10346
        remote_info = remote_info.payload
10347
        if remote_info and "state" in remote_info:
10348
          remote_state = "up"
10349
        else:
10350
          remote_state = "down"
10351

    
10352
      if instance.admin_up:
10353
        config_state = "up"
10354
      else:
10355
        config_state = "down"
10356

    
10357
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10358
                  instance.disks)
10359

    
10360
      result[instance.name] = {
10361
        "name": instance.name,
10362
        "config_state": config_state,
10363
        "run_state": remote_state,
10364
        "pnode": instance.primary_node,
10365
        "snodes": instance.secondary_nodes,
10366
        "os": instance.os,
10367
        # this happens to be the same format used for hooks
10368
        "nics": _NICListToTuple(self, instance.nics),
10369
        "disk_template": instance.disk_template,
10370
        "disks": disks,
10371
        "hypervisor": instance.hypervisor,
10372
        "network_port": instance.network_port,
10373
        "hv_instance": instance.hvparams,
10374
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10375
        "be_instance": instance.beparams,
10376
        "be_actual": cluster.FillBE(instance),
10377
        "os_instance": instance.osparams,
10378
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10379
        "serial_no": instance.serial_no,
10380
        "mtime": instance.mtime,
10381
        "ctime": instance.ctime,
10382
        "uuid": instance.uuid,
10383
        }
10384

    
10385
    return result
10386

    
10387

    
10388
class LUInstanceSetParams(LogicalUnit):
10389
  """Modifies an instances's parameters.
10390

10391
  """
10392
  HPATH = "instance-modify"
10393
  HTYPE = constants.HTYPE_INSTANCE
10394
  REQ_BGL = False
10395

    
10396
  def CheckArguments(self):
10397
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10398
            self.op.hvparams or self.op.beparams or self.op.os_name):
10399
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10400

    
10401
    if self.op.hvparams:
10402
      _CheckGlobalHvParams(self.op.hvparams)
10403

    
10404
    # Disk validation
10405
    disk_addremove = 0
10406
    for disk_op, disk_dict in self.op.disks:
10407
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10408
      if disk_op == constants.DDM_REMOVE:
10409
        disk_addremove += 1
10410
        continue
10411
      elif disk_op == constants.DDM_ADD:
10412
        disk_addremove += 1
10413
      else:
10414
        if not isinstance(disk_op, int):
10415
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10416
        if not isinstance(disk_dict, dict):
10417
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10418
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10419

    
10420
      if disk_op == constants.DDM_ADD:
10421
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10422
        if mode not in constants.DISK_ACCESS_SET:
10423
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10424
                                     errors.ECODE_INVAL)
10425
        size = disk_dict.get(constants.IDISK_SIZE, None)
10426
        if size is None:
10427
          raise errors.OpPrereqError("Required disk parameter size missing",
10428
                                     errors.ECODE_INVAL)
10429
        try:
10430
          size = int(size)
10431
        except (TypeError, ValueError), err:
10432
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10433
                                     str(err), errors.ECODE_INVAL)
10434
        disk_dict[constants.IDISK_SIZE] = size
10435
      else:
10436
        # modification of disk
10437
        if constants.IDISK_SIZE in disk_dict:
10438
          raise errors.OpPrereqError("Disk size change not possible, use"
10439
                                     " grow-disk", errors.ECODE_INVAL)
10440

    
10441
    if disk_addremove > 1:
10442
      raise errors.OpPrereqError("Only one disk add or remove operation"
10443
                                 " supported at a time", errors.ECODE_INVAL)
10444

    
10445
    if self.op.disks and self.op.disk_template is not None:
10446
      raise errors.OpPrereqError("Disk template conversion and other disk"
10447
                                 " changes not supported at the same time",
10448
                                 errors.ECODE_INVAL)
10449

    
10450
    if (self.op.disk_template and
10451
        self.op.disk_template in constants.DTS_INT_MIRROR and
10452
        self.op.remote_node is None):
10453
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10454
                                 " one requires specifying a secondary node",
10455
                                 errors.ECODE_INVAL)
10456

    
10457
    # NIC validation
10458
    nic_addremove = 0
10459
    for nic_op, nic_dict in self.op.nics:
10460
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10461
      if nic_op == constants.DDM_REMOVE:
10462
        nic_addremove += 1
10463
        continue
10464
      elif nic_op == constants.DDM_ADD:
10465
        nic_addremove += 1
10466
      else:
10467
        if not isinstance(nic_op, int):
10468
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10469
        if not isinstance(nic_dict, dict):
10470
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10471
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10472

    
10473
      # nic_dict should be a dict
10474
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10475
      if nic_ip is not None:
10476
        if nic_ip.lower() == constants.VALUE_NONE:
10477
          nic_dict[constants.INIC_IP] = None
10478
        else:
10479
          if not netutils.IPAddress.IsValid(nic_ip):
10480
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10481
                                       errors.ECODE_INVAL)
10482

    
10483
      nic_bridge = nic_dict.get("bridge", None)
10484
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10485
      if nic_bridge and nic_link:
10486
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10487
                                   " at the same time", errors.ECODE_INVAL)
10488
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10489
        nic_dict["bridge"] = None
10490
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10491
        nic_dict[constants.INIC_LINK] = None
10492

    
10493
      if nic_op == constants.DDM_ADD:
10494
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10495
        if nic_mac is None:
10496
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10497

    
10498
      if constants.INIC_MAC in nic_dict:
10499
        nic_mac = nic_dict[constants.INIC_MAC]
10500
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10501
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10502

    
10503
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10504
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10505
                                     " modifying an existing nic",
10506
                                     errors.ECODE_INVAL)
10507

    
10508
    if nic_addremove > 1:
10509
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10510
                                 " supported at a time", errors.ECODE_INVAL)
10511

    
10512
  def ExpandNames(self):
10513
    self._ExpandAndLockInstance()
10514
    self.needed_locks[locking.LEVEL_NODE] = []
10515
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10516

    
10517
  def DeclareLocks(self, level):
10518
    if level == locking.LEVEL_NODE:
10519
      self._LockInstancesNodes()
10520
      if self.op.disk_template and self.op.remote_node:
10521
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10522
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10523

    
10524
  def BuildHooksEnv(self):
10525
    """Build hooks env.
10526

10527
    This runs on the master, primary and secondaries.
10528

10529
    """
10530
    args = dict()
10531
    if constants.BE_MEMORY in self.be_new:
10532
      args["memory"] = self.be_new[constants.BE_MEMORY]
10533
    if constants.BE_VCPUS in self.be_new:
10534
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10535
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10536
    # information at all.
10537
    if self.op.nics:
10538
      args["nics"] = []
10539
      nic_override = dict(self.op.nics)
10540
      for idx, nic in enumerate(self.instance.nics):
10541
        if idx in nic_override:
10542
          this_nic_override = nic_override[idx]
10543
        else:
10544
          this_nic_override = {}
10545
        if constants.INIC_IP in this_nic_override:
10546
          ip = this_nic_override[constants.INIC_IP]
10547
        else:
10548
          ip = nic.ip
10549
        if constants.INIC_MAC in this_nic_override:
10550
          mac = this_nic_override[constants.INIC_MAC]
10551
        else:
10552
          mac = nic.mac
10553
        if idx in self.nic_pnew:
10554
          nicparams = self.nic_pnew[idx]
10555
        else:
10556
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10557
        mode = nicparams[constants.NIC_MODE]
10558
        link = nicparams[constants.NIC_LINK]
10559
        args["nics"].append((ip, mac, mode, link))
10560
      if constants.DDM_ADD in nic_override:
10561
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10562
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10563
        nicparams = self.nic_pnew[constants.DDM_ADD]
10564
        mode = nicparams[constants.NIC_MODE]
10565
        link = nicparams[constants.NIC_LINK]
10566
        args["nics"].append((ip, mac, mode, link))
10567
      elif constants.DDM_REMOVE in nic_override:
10568
        del args["nics"][-1]
10569

    
10570
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10571
    if self.op.disk_template:
10572
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10573

    
10574
    return env
10575

    
10576
  def BuildHooksNodes(self):
10577
    """Build hooks nodes.
10578

10579
    """
10580
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10581
    return (nl, nl)
10582

    
10583
  def CheckPrereq(self):
10584
    """Check prerequisites.
10585

10586
    This only checks the instance list against the existing names.
10587

10588
    """
10589
    # checking the new params on the primary/secondary nodes
10590

    
10591
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10592
    cluster = self.cluster = self.cfg.GetClusterInfo()
10593
    assert self.instance is not None, \
10594
      "Cannot retrieve locked instance %s" % self.op.instance_name
10595
    pnode = instance.primary_node
10596
    nodelist = list(instance.all_nodes)
10597

    
10598
    # OS change
10599
    if self.op.os_name and not self.op.force:
10600
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10601
                      self.op.force_variant)
10602
      instance_os = self.op.os_name
10603
    else:
10604
      instance_os = instance.os
10605

    
10606
    if self.op.disk_template:
10607
      if instance.disk_template == self.op.disk_template:
10608
        raise errors.OpPrereqError("Instance already has disk template %s" %
10609
                                   instance.disk_template, errors.ECODE_INVAL)
10610

    
10611
      if (instance.disk_template,
10612
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10613
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10614
                                   " %s to %s" % (instance.disk_template,
10615
                                                  self.op.disk_template),
10616
                                   errors.ECODE_INVAL)
10617
      _CheckInstanceDown(self, instance, "cannot change disk template")
10618
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10619
        if self.op.remote_node == pnode:
10620
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10621
                                     " as the primary node of the instance" %
10622
                                     self.op.remote_node, errors.ECODE_STATE)
10623
        _CheckNodeOnline(self, self.op.remote_node)
10624
        _CheckNodeNotDrained(self, self.op.remote_node)
10625
        # FIXME: here we assume that the old instance type is DT_PLAIN
10626
        assert instance.disk_template == constants.DT_PLAIN
10627
        disks = [{constants.IDISK_SIZE: d.size,
10628
                  constants.IDISK_VG: d.logical_id[0]}
10629
                 for d in instance.disks]
10630
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10631
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10632

    
10633
    # hvparams processing
10634
    if self.op.hvparams:
10635
      hv_type = instance.hypervisor
10636
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10637
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10638
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10639

    
10640
      # local check
10641
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10642
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10643
      self.hv_new = hv_new # the new actual values
10644
      self.hv_inst = i_hvdict # the new dict (without defaults)
10645
    else:
10646
      self.hv_new = self.hv_inst = {}
10647

    
10648
    # beparams processing
10649
    if self.op.beparams:
10650
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10651
                                   use_none=True)
10652
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10653
      be_new = cluster.SimpleFillBE(i_bedict)
10654
      self.be_new = be_new # the new actual values
10655
      self.be_inst = i_bedict # the new dict (without defaults)
10656
    else:
10657
      self.be_new = self.be_inst = {}
10658
    be_old = cluster.FillBE(instance)
10659

    
10660
    # osparams processing
10661
    if self.op.osparams:
10662
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10663
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10664
      self.os_inst = i_osdict # the new dict (without defaults)
10665
    else:
10666
      self.os_inst = {}
10667

    
10668
    self.warn = []
10669

    
10670
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10671
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10672
      mem_check_list = [pnode]
10673
      if be_new[constants.BE_AUTO_BALANCE]:
10674
        # either we changed auto_balance to yes or it was from before
10675
        mem_check_list.extend(instance.secondary_nodes)
10676
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10677
                                                  instance.hypervisor)
10678
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10679
                                         instance.hypervisor)
10680
      pninfo = nodeinfo[pnode]
10681
      msg = pninfo.fail_msg
10682
      if msg:
10683
        # Assume the primary node is unreachable and go ahead
10684
        self.warn.append("Can't get info from primary node %s: %s" %
10685
                         (pnode,  msg))
10686
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10687
        self.warn.append("Node data from primary node %s doesn't contain"
10688
                         " free memory information" % pnode)
10689
      elif instance_info.fail_msg:
10690
        self.warn.append("Can't get instance runtime information: %s" %
10691
                        instance_info.fail_msg)
10692
      else:
10693
        if instance_info.payload:
10694
          current_mem = int(instance_info.payload["memory"])
10695
        else:
10696
          # Assume instance not running
10697
          # (there is a slight race condition here, but it's not very probable,
10698
          # and we have no other way to check)
10699
          current_mem = 0
10700
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10701
                    pninfo.payload["memory_free"])
10702
        if miss_mem > 0:
10703
          raise errors.OpPrereqError("This change will prevent the instance"
10704
                                     " from starting, due to %d MB of memory"
10705
                                     " missing on its primary node" % miss_mem,
10706
                                     errors.ECODE_NORES)
10707

    
10708
      if be_new[constants.BE_AUTO_BALANCE]:
10709
        for node, nres in nodeinfo.items():
10710
          if node not in instance.secondary_nodes:
10711
            continue
10712
          nres.Raise("Can't get info from secondary node %s" % node,
10713
                     prereq=True, ecode=errors.ECODE_STATE)
10714
          if not isinstance(nres.payload.get("memory_free", None), int):
10715
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10716
                                       " memory information" % node,
10717
                                       errors.ECODE_STATE)
10718
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10719
            raise errors.OpPrereqError("This change will prevent the instance"
10720
                                       " from failover to its secondary node"
10721
                                       " %s, due to not enough memory" % node,
10722
                                       errors.ECODE_STATE)
10723

    
10724
    # NIC processing
10725
    self.nic_pnew = {}
10726
    self.nic_pinst = {}
10727
    for nic_op, nic_dict in self.op.nics:
10728
      if nic_op == constants.DDM_REMOVE:
10729
        if not instance.nics:
10730
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10731
                                     errors.ECODE_INVAL)
10732
        continue
10733
      if nic_op != constants.DDM_ADD:
10734
        # an existing nic
10735
        if not instance.nics:
10736
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10737
                                     " no NICs" % nic_op,
10738
                                     errors.ECODE_INVAL)
10739
        if nic_op < 0 or nic_op >= len(instance.nics):
10740
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10741
                                     " are 0 to %d" %
10742
                                     (nic_op, len(instance.nics) - 1),
10743
                                     errors.ECODE_INVAL)
10744
        old_nic_params = instance.nics[nic_op].nicparams
10745
        old_nic_ip = instance.nics[nic_op].ip
10746
      else:
10747
        old_nic_params = {}
10748
        old_nic_ip = None
10749

    
10750
      update_params_dict = dict([(key, nic_dict[key])
10751
                                 for key in constants.NICS_PARAMETERS
10752
                                 if key in nic_dict])
10753

    
10754
      if "bridge" in nic_dict:
10755
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10756

    
10757
      new_nic_params = _GetUpdatedParams(old_nic_params,
10758
                                         update_params_dict)
10759
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10760
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10761
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10762
      self.nic_pinst[nic_op] = new_nic_params
10763
      self.nic_pnew[nic_op] = new_filled_nic_params
10764
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10765

    
10766
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10767
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10768
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10769
        if msg:
10770
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10771
          if self.op.force:
10772
            self.warn.append(msg)
10773
          else:
10774
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10775
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10776
        if constants.INIC_IP in nic_dict:
10777
          nic_ip = nic_dict[constants.INIC_IP]
10778
        else:
10779
          nic_ip = old_nic_ip
10780
        if nic_ip is None:
10781
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10782
                                     " on a routed nic", errors.ECODE_INVAL)
10783
      if constants.INIC_MAC in nic_dict:
10784
        nic_mac = nic_dict[constants.INIC_MAC]
10785
        if nic_mac is None:
10786
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10787
                                     errors.ECODE_INVAL)
10788
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10789
          # otherwise generate the mac
10790
          nic_dict[constants.INIC_MAC] = \
10791
            self.cfg.GenerateMAC(self.proc.GetECId())
10792
        else:
10793
          # or validate/reserve the current one
10794
          try:
10795
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10796
          except errors.ReservationError:
10797
            raise errors.OpPrereqError("MAC address %s already in use"
10798
                                       " in cluster" % nic_mac,
10799
                                       errors.ECODE_NOTUNIQUE)
10800

    
10801
    # DISK processing
10802
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10803
      raise errors.OpPrereqError("Disk operations not supported for"
10804
                                 " diskless instances",
10805
                                 errors.ECODE_INVAL)
10806
    for disk_op, _ in self.op.disks:
10807
      if disk_op == constants.DDM_REMOVE:
10808
        if len(instance.disks) == 1:
10809
          raise errors.OpPrereqError("Cannot remove the last disk of"
10810
                                     " an instance", errors.ECODE_INVAL)
10811
        _CheckInstanceDown(self, instance, "cannot remove disks")
10812

    
10813
      if (disk_op == constants.DDM_ADD and
10814
          len(instance.disks) >= constants.MAX_DISKS):
10815
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10816
                                   " add more" % constants.MAX_DISKS,
10817
                                   errors.ECODE_STATE)
10818
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10819
        # an existing disk
10820
        if disk_op < 0 or disk_op >= len(instance.disks):
10821
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10822
                                     " are 0 to %d" %
10823
                                     (disk_op, len(instance.disks)),
10824
                                     errors.ECODE_INVAL)
10825

    
10826
    return
10827

    
10828
  def _ConvertPlainToDrbd(self, feedback_fn):
10829
    """Converts an instance from plain to drbd.
10830

10831
    """
10832
    feedback_fn("Converting template to drbd")
10833
    instance = self.instance
10834
    pnode = instance.primary_node
10835
    snode = self.op.remote_node
10836

    
10837
    # create a fake disk info for _GenerateDiskTemplate
10838
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10839
                  constants.IDISK_VG: d.logical_id[0]}
10840
                 for d in instance.disks]
10841
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10842
                                      instance.name, pnode, [snode],
10843
                                      disk_info, None, None, 0, feedback_fn)
10844
    info = _GetInstanceInfoText(instance)
10845
    feedback_fn("Creating aditional volumes...")
10846
    # first, create the missing data and meta devices
10847
    for disk in new_disks:
10848
      # unfortunately this is... not too nice
10849
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10850
                            info, True)
10851
      for child in disk.children:
10852
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10853
    # at this stage, all new LVs have been created, we can rename the
10854
    # old ones
10855
    feedback_fn("Renaming original volumes...")
10856
    rename_list = [(o, n.children[0].logical_id)
10857
                   for (o, n) in zip(instance.disks, new_disks)]
10858
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10859
    result.Raise("Failed to rename original LVs")
10860

    
10861
    feedback_fn("Initializing DRBD devices...")
10862
    # all child devices are in place, we can now create the DRBD devices
10863
    for disk in new_disks:
10864
      for node in [pnode, snode]:
10865
        f_create = node == pnode
10866
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10867

    
10868
    # at this point, the instance has been modified
10869
    instance.disk_template = constants.DT_DRBD8
10870
    instance.disks = new_disks
10871
    self.cfg.Update(instance, feedback_fn)
10872

    
10873
    # disks are created, waiting for sync
10874
    disk_abort = not _WaitForSync(self, instance,
10875
                                  oneshot=not self.op.wait_for_sync)
10876
    if disk_abort:
10877
      raise errors.OpExecError("There are some degraded disks for"
10878
                               " this instance, please cleanup manually")
10879

    
10880
  def _ConvertDrbdToPlain(self, feedback_fn):
10881
    """Converts an instance from drbd to plain.
10882

10883
    """
10884
    instance = self.instance
10885
    assert len(instance.secondary_nodes) == 1
10886
    pnode = instance.primary_node
10887
    snode = instance.secondary_nodes[0]
10888
    feedback_fn("Converting template to plain")
10889

    
10890
    old_disks = instance.disks
10891
    new_disks = [d.children[0] for d in old_disks]
10892

    
10893
    # copy over size and mode
10894
    for parent, child in zip(old_disks, new_disks):
10895
      child.size = parent.size
10896
      child.mode = parent.mode
10897

    
10898
    # update instance structure
10899
    instance.disks = new_disks
10900
    instance.disk_template = constants.DT_PLAIN
10901
    self.cfg.Update(instance, feedback_fn)
10902

    
10903
    feedback_fn("Removing volumes on the secondary node...")
10904
    for disk in old_disks:
10905
      self.cfg.SetDiskID(disk, snode)
10906
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10907
      if msg:
10908
        self.LogWarning("Could not remove block device %s on node %s,"
10909
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10910

    
10911
    feedback_fn("Removing unneeded volumes on the primary node...")
10912
    for idx, disk in enumerate(old_disks):
10913
      meta = disk.children[1]
10914
      self.cfg.SetDiskID(meta, pnode)
10915
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10916
      if msg:
10917
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10918
                        " continuing anyway: %s", idx, pnode, msg)
10919

    
10920
  def Exec(self, feedback_fn):
10921
    """Modifies an instance.
10922

10923
    All parameters take effect only at the next restart of the instance.
10924

10925
    """
10926
    # Process here the warnings from CheckPrereq, as we don't have a
10927
    # feedback_fn there.
10928
    for warn in self.warn:
10929
      feedback_fn("WARNING: %s" % warn)
10930

    
10931
    result = []
10932
    instance = self.instance
10933
    # disk changes
10934
    for disk_op, disk_dict in self.op.disks:
10935
      if disk_op == constants.DDM_REMOVE:
10936
        # remove the last disk
10937
        device = instance.disks.pop()
10938
        device_idx = len(instance.disks)
10939
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10940
          self.cfg.SetDiskID(disk, node)
10941
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10942
          if msg:
10943
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10944
                            " continuing anyway", device_idx, node, msg)
10945
        result.append(("disk/%d" % device_idx, "remove"))
10946
      elif disk_op == constants.DDM_ADD:
10947
        # add a new disk
10948
        if instance.disk_template in (constants.DT_FILE,
10949
                                        constants.DT_SHARED_FILE):
10950
          file_driver, file_path = instance.disks[0].logical_id
10951
          file_path = os.path.dirname(file_path)
10952
        else:
10953
          file_driver = file_path = None
10954
        disk_idx_base = len(instance.disks)
10955
        new_disk = _GenerateDiskTemplate(self,
10956
                                         instance.disk_template,
10957
                                         instance.name, instance.primary_node,
10958
                                         instance.secondary_nodes,
10959
                                         [disk_dict],
10960
                                         file_path,
10961
                                         file_driver,
10962
                                         disk_idx_base, feedback_fn)[0]
10963
        instance.disks.append(new_disk)
10964
        info = _GetInstanceInfoText(instance)
10965

    
10966
        logging.info("Creating volume %s for instance %s",
10967
                     new_disk.iv_name, instance.name)
10968
        # Note: this needs to be kept in sync with _CreateDisks
10969
        #HARDCODE
10970
        for node in instance.all_nodes:
10971
          f_create = node == instance.primary_node
10972
          try:
10973
            _CreateBlockDev(self, node, instance, new_disk,
10974
                            f_create, info, f_create)
10975
          except errors.OpExecError, err:
10976
            self.LogWarning("Failed to create volume %s (%s) on"
10977
                            " node %s: %s",
10978
                            new_disk.iv_name, new_disk, node, err)
10979
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10980
                       (new_disk.size, new_disk.mode)))
10981
      else:
10982
        # change a given disk
10983
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10984
        result.append(("disk.mode/%d" % disk_op,
10985
                       disk_dict[constants.IDISK_MODE]))
10986

    
10987
    if self.op.disk_template:
10988
      r_shut = _ShutdownInstanceDisks(self, instance)
10989
      if not r_shut:
10990
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10991
                                 " proceed with disk template conversion")
10992
      mode = (instance.disk_template, self.op.disk_template)
10993
      try:
10994
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10995
      except:
10996
        self.cfg.ReleaseDRBDMinors(instance.name)
10997
        raise
10998
      result.append(("disk_template", self.op.disk_template))
10999

    
11000
    # NIC changes
11001
    for nic_op, nic_dict in self.op.nics:
11002
      if nic_op == constants.DDM_REMOVE:
11003
        # remove the last nic
11004
        del instance.nics[-1]
11005
        result.append(("nic.%d" % len(instance.nics), "remove"))
11006
      elif nic_op == constants.DDM_ADD:
11007
        # mac and bridge should be set, by now
11008
        mac = nic_dict[constants.INIC_MAC]
11009
        ip = nic_dict.get(constants.INIC_IP, None)
11010
        nicparams = self.nic_pinst[constants.DDM_ADD]
11011
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11012
        instance.nics.append(new_nic)
11013
        result.append(("nic.%d" % (len(instance.nics) - 1),
11014
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11015
                       (new_nic.mac, new_nic.ip,
11016
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11017
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11018
                       )))
11019
      else:
11020
        for key in (constants.INIC_MAC, constants.INIC_IP):
11021
          if key in nic_dict:
11022
            setattr(instance.nics[nic_op], key, nic_dict[key])
11023
        if nic_op in self.nic_pinst:
11024
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11025
        for key, val in nic_dict.iteritems():
11026
          result.append(("nic.%s/%d" % (key, nic_op), val))
11027

    
11028
    # hvparams changes
11029
    if self.op.hvparams:
11030
      instance.hvparams = self.hv_inst
11031
      for key, val in self.op.hvparams.iteritems():
11032
        result.append(("hv/%s" % key, val))
11033

    
11034
    # beparams changes
11035
    if self.op.beparams:
11036
      instance.beparams = self.be_inst
11037
      for key, val in self.op.beparams.iteritems():
11038
        result.append(("be/%s" % key, val))
11039

    
11040
    # OS change
11041
    if self.op.os_name:
11042
      instance.os = self.op.os_name
11043

    
11044
    # osparams changes
11045
    if self.op.osparams:
11046
      instance.osparams = self.os_inst
11047
      for key, val in self.op.osparams.iteritems():
11048
        result.append(("os/%s" % key, val))
11049

    
11050
    self.cfg.Update(instance, feedback_fn)
11051

    
11052
    return result
11053

    
11054
  _DISK_CONVERSIONS = {
11055
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11056
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11057
    }
11058

    
11059

    
11060
class LUBackupQuery(NoHooksLU):
11061
  """Query the exports list
11062

11063
  """
11064
  REQ_BGL = False
11065

    
11066
  def ExpandNames(self):
11067
    self.needed_locks = {}
11068
    self.share_locks[locking.LEVEL_NODE] = 1
11069
    if not self.op.nodes:
11070
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11071
    else:
11072
      self.needed_locks[locking.LEVEL_NODE] = \
11073
        _GetWantedNodes(self, self.op.nodes)
11074

    
11075
  def Exec(self, feedback_fn):
11076
    """Compute the list of all the exported system images.
11077

11078
    @rtype: dict
11079
    @return: a dictionary with the structure node->(export-list)
11080
        where export-list is a list of the instances exported on
11081
        that node.
11082

11083
    """
11084
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11085
    rpcresult = self.rpc.call_export_list(self.nodes)
11086
    result = {}
11087
    for node in rpcresult:
11088
      if rpcresult[node].fail_msg:
11089
        result[node] = False
11090
      else:
11091
        result[node] = rpcresult[node].payload
11092

    
11093
    return result
11094

    
11095

    
11096
class LUBackupPrepare(NoHooksLU):
11097
  """Prepares an instance for an export and returns useful information.
11098

11099
  """
11100
  REQ_BGL = False
11101

    
11102
  def ExpandNames(self):
11103
    self._ExpandAndLockInstance()
11104

    
11105
  def CheckPrereq(self):
11106
    """Check prerequisites.
11107

11108
    """
11109
    instance_name = self.op.instance_name
11110

    
11111
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11112
    assert self.instance is not None, \
11113
          "Cannot retrieve locked instance %s" % self.op.instance_name
11114
    _CheckNodeOnline(self, self.instance.primary_node)
11115

    
11116
    self._cds = _GetClusterDomainSecret()
11117

    
11118
  def Exec(self, feedback_fn):
11119
    """Prepares an instance for an export.
11120

11121
    """
11122
    instance = self.instance
11123

    
11124
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11125
      salt = utils.GenerateSecret(8)
11126

    
11127
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11128
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11129
                                              constants.RIE_CERT_VALIDITY)
11130
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11131

    
11132
      (name, cert_pem) = result.payload
11133

    
11134
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11135
                                             cert_pem)
11136

    
11137
      return {
11138
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11139
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11140
                          salt),
11141
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11142
        }
11143

    
11144
    return None
11145

    
11146

    
11147
class LUBackupExport(LogicalUnit):
11148
  """Export an instance to an image in the cluster.
11149

11150
  """
11151
  HPATH = "instance-export"
11152
  HTYPE = constants.HTYPE_INSTANCE
11153
  REQ_BGL = False
11154

    
11155
  def CheckArguments(self):
11156
    """Check the arguments.
11157

11158
    """
11159
    self.x509_key_name = self.op.x509_key_name
11160
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11161

    
11162
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11163
      if not self.x509_key_name:
11164
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11165
                                   errors.ECODE_INVAL)
11166

    
11167
      if not self.dest_x509_ca_pem:
11168
        raise errors.OpPrereqError("Missing destination X509 CA",
11169
                                   errors.ECODE_INVAL)
11170

    
11171
  def ExpandNames(self):
11172
    self._ExpandAndLockInstance()
11173

    
11174
    # Lock all nodes for local exports
11175
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11176
      # FIXME: lock only instance primary and destination node
11177
      #
11178
      # Sad but true, for now we have do lock all nodes, as we don't know where
11179
      # the previous export might be, and in this LU we search for it and
11180
      # remove it from its current node. In the future we could fix this by:
11181
      #  - making a tasklet to search (share-lock all), then create the
11182
      #    new one, then one to remove, after
11183
      #  - removing the removal operation altogether
11184
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11185

    
11186
  def DeclareLocks(self, level):
11187
    """Last minute lock declaration."""
11188
    # All nodes are locked anyway, so nothing to do here.
11189

    
11190
  def BuildHooksEnv(self):
11191
    """Build hooks env.
11192

11193
    This will run on the master, primary node and target node.
11194

11195
    """
11196
    env = {
11197
      "EXPORT_MODE": self.op.mode,
11198
      "EXPORT_NODE": self.op.target_node,
11199
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11200
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11201
      # TODO: Generic function for boolean env variables
11202
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11203
      }
11204

    
11205
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11206

    
11207
    return env
11208

    
11209
  def BuildHooksNodes(self):
11210
    """Build hooks nodes.
11211

11212
    """
11213
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11214

    
11215
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11216
      nl.append(self.op.target_node)
11217

    
11218
    return (nl, nl)
11219

    
11220
  def CheckPrereq(self):
11221
    """Check prerequisites.
11222

11223
    This checks that the instance and node names are valid.
11224

11225
    """
11226
    instance_name = self.op.instance_name
11227

    
11228
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11229
    assert self.instance is not None, \
11230
          "Cannot retrieve locked instance %s" % self.op.instance_name
11231
    _CheckNodeOnline(self, self.instance.primary_node)
11232

    
11233
    if (self.op.remove_instance and self.instance.admin_up and
11234
        not self.op.shutdown):
11235
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11236
                                 " down before")
11237

    
11238
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11239
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11240
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11241
      assert self.dst_node is not None
11242

    
11243
      _CheckNodeOnline(self, self.dst_node.name)
11244
      _CheckNodeNotDrained(self, self.dst_node.name)
11245

    
11246
      self._cds = None
11247
      self.dest_disk_info = None
11248
      self.dest_x509_ca = None
11249

    
11250
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11251
      self.dst_node = None
11252

    
11253
      if len(self.op.target_node) != len(self.instance.disks):
11254
        raise errors.OpPrereqError(("Received destination information for %s"
11255
                                    " disks, but instance %s has %s disks") %
11256
                                   (len(self.op.target_node), instance_name,
11257
                                    len(self.instance.disks)),
11258
                                   errors.ECODE_INVAL)
11259

    
11260
      cds = _GetClusterDomainSecret()
11261

    
11262
      # Check X509 key name
11263
      try:
11264
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11265
      except (TypeError, ValueError), err:
11266
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11267

    
11268
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11269
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11270
                                   errors.ECODE_INVAL)
11271

    
11272
      # Load and verify CA
11273
      try:
11274
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11275
      except OpenSSL.crypto.Error, err:
11276
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11277
                                   (err, ), errors.ECODE_INVAL)
11278

    
11279
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11280
      if errcode is not None:
11281
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11282
                                   (msg, ), errors.ECODE_INVAL)
11283

    
11284
      self.dest_x509_ca = cert
11285

    
11286
      # Verify target information
11287
      disk_info = []
11288
      for idx, disk_data in enumerate(self.op.target_node):
11289
        try:
11290
          (host, port, magic) = \
11291
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11292
        except errors.GenericError, err:
11293
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11294
                                     (idx, err), errors.ECODE_INVAL)
11295

    
11296
        disk_info.append((host, port, magic))
11297

    
11298
      assert len(disk_info) == len(self.op.target_node)
11299
      self.dest_disk_info = disk_info
11300

    
11301
    else:
11302
      raise errors.ProgrammerError("Unhandled export mode %r" %
11303
                                   self.op.mode)
11304

    
11305
    # instance disk type verification
11306
    # TODO: Implement export support for file-based disks
11307
    for disk in self.instance.disks:
11308
      if disk.dev_type == constants.LD_FILE:
11309
        raise errors.OpPrereqError("Export not supported for instances with"
11310
                                   " file-based disks", errors.ECODE_INVAL)
11311

    
11312
  def _CleanupExports(self, feedback_fn):
11313
    """Removes exports of current instance from all other nodes.
11314

11315
    If an instance in a cluster with nodes A..D was exported to node C, its
11316
    exports will be removed from the nodes A, B and D.
11317

11318
    """
11319
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11320

    
11321
    nodelist = self.cfg.GetNodeList()
11322
    nodelist.remove(self.dst_node.name)
11323

    
11324
    # on one-node clusters nodelist will be empty after the removal
11325
    # if we proceed the backup would be removed because OpBackupQuery
11326
    # substitutes an empty list with the full cluster node list.
11327
    iname = self.instance.name
11328
    if nodelist:
11329
      feedback_fn("Removing old exports for instance %s" % iname)
11330
      exportlist = self.rpc.call_export_list(nodelist)
11331
      for node in exportlist:
11332
        if exportlist[node].fail_msg:
11333
          continue
11334
        if iname in exportlist[node].payload:
11335
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11336
          if msg:
11337
            self.LogWarning("Could not remove older export for instance %s"
11338
                            " on node %s: %s", iname, node, msg)
11339

    
11340
  def Exec(self, feedback_fn):
11341
    """Export an instance to an image in the cluster.
11342

11343
    """
11344
    assert self.op.mode in constants.EXPORT_MODES
11345

    
11346
    instance = self.instance
11347
    src_node = instance.primary_node
11348

    
11349
    if self.op.shutdown:
11350
      # shutdown the instance, but not the disks
11351
      feedback_fn("Shutting down instance %s" % instance.name)
11352
      result = self.rpc.call_instance_shutdown(src_node, instance,
11353
                                               self.op.shutdown_timeout)
11354
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11355
      result.Raise("Could not shutdown instance %s on"
11356
                   " node %s" % (instance.name, src_node))
11357

    
11358
    # set the disks ID correctly since call_instance_start needs the
11359
    # correct drbd minor to create the symlinks
11360
    for disk in instance.disks:
11361
      self.cfg.SetDiskID(disk, src_node)
11362

    
11363
    activate_disks = (not instance.admin_up)
11364

    
11365
    if activate_disks:
11366
      # Activate the instance disks if we'exporting a stopped instance
11367
      feedback_fn("Activating disks for %s" % instance.name)
11368
      _StartInstanceDisks(self, instance, None)
11369

    
11370
    try:
11371
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11372
                                                     instance)
11373

    
11374
      helper.CreateSnapshots()
11375
      try:
11376
        if (self.op.shutdown and instance.admin_up and
11377
            not self.op.remove_instance):
11378
          assert not activate_disks
11379
          feedback_fn("Starting instance %s" % instance.name)
11380
          result = self.rpc.call_instance_start(src_node, instance,
11381
                                                None, None, False)
11382
          msg = result.fail_msg
11383
          if msg:
11384
            feedback_fn("Failed to start instance: %s" % msg)
11385
            _ShutdownInstanceDisks(self, instance)
11386
            raise errors.OpExecError("Could not start instance: %s" % msg)
11387

    
11388
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11389
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11390
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11391
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11392
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11393

    
11394
          (key_name, _, _) = self.x509_key_name
11395

    
11396
          dest_ca_pem = \
11397
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11398
                                            self.dest_x509_ca)
11399

    
11400
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11401
                                                     key_name, dest_ca_pem,
11402
                                                     timeouts)
11403
      finally:
11404
        helper.Cleanup()
11405

    
11406
      # Check for backwards compatibility
11407
      assert len(dresults) == len(instance.disks)
11408
      assert compat.all(isinstance(i, bool) for i in dresults), \
11409
             "Not all results are boolean: %r" % dresults
11410

    
11411
    finally:
11412
      if activate_disks:
11413
        feedback_fn("Deactivating disks for %s" % instance.name)
11414
        _ShutdownInstanceDisks(self, instance)
11415

    
11416
    if not (compat.all(dresults) and fin_resu):
11417
      failures = []
11418
      if not fin_resu:
11419
        failures.append("export finalization")
11420
      if not compat.all(dresults):
11421
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11422
                               if not dsk)
11423
        failures.append("disk export: disk(s) %s" % fdsk)
11424

    
11425
      raise errors.OpExecError("Export failed, errors in %s" %
11426
                               utils.CommaJoin(failures))
11427

    
11428
    # At this point, the export was successful, we can cleanup/finish
11429

    
11430
    # Remove instance if requested
11431
    if self.op.remove_instance:
11432
      feedback_fn("Removing instance %s" % instance.name)
11433
      _RemoveInstance(self, feedback_fn, instance,
11434
                      self.op.ignore_remove_failures)
11435

    
11436
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11437
      self._CleanupExports(feedback_fn)
11438

    
11439
    return fin_resu, dresults
11440

    
11441

    
11442
class LUBackupRemove(NoHooksLU):
11443
  """Remove exports related to the named instance.
11444

11445
  """
11446
  REQ_BGL = False
11447

    
11448
  def ExpandNames(self):
11449
    self.needed_locks = {}
11450
    # We need all nodes to be locked in order for RemoveExport to work, but we
11451
    # don't need to lock the instance itself, as nothing will happen to it (and
11452
    # we can remove exports also for a removed instance)
11453
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11454

    
11455
  def Exec(self, feedback_fn):
11456
    """Remove any export.
11457

11458
    """
11459
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11460
    # If the instance was not found we'll try with the name that was passed in.
11461
    # This will only work if it was an FQDN, though.
11462
    fqdn_warn = False
11463
    if not instance_name:
11464
      fqdn_warn = True
11465
      instance_name = self.op.instance_name
11466

    
11467
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11468
    exportlist = self.rpc.call_export_list(locked_nodes)
11469
    found = False
11470
    for node in exportlist:
11471
      msg = exportlist[node].fail_msg
11472
      if msg:
11473
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11474
        continue
11475
      if instance_name in exportlist[node].payload:
11476
        found = True
11477
        result = self.rpc.call_export_remove(node, instance_name)
11478
        msg = result.fail_msg
11479
        if msg:
11480
          logging.error("Could not remove export for instance %s"
11481
                        " on node %s: %s", instance_name, node, msg)
11482

    
11483
    if fqdn_warn and not found:
11484
      feedback_fn("Export not found. If trying to remove an export belonging"
11485
                  " to a deleted instance please use its Fully Qualified"
11486
                  " Domain Name.")
11487

    
11488

    
11489
class LUGroupAdd(LogicalUnit):
11490
  """Logical unit for creating node groups.
11491

11492
  """
11493
  HPATH = "group-add"
11494
  HTYPE = constants.HTYPE_GROUP
11495
  REQ_BGL = False
11496

    
11497
  def ExpandNames(self):
11498
    # We need the new group's UUID here so that we can create and acquire the
11499
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11500
    # that it should not check whether the UUID exists in the configuration.
11501
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11502
    self.needed_locks = {}
11503
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11504

    
11505
  def CheckPrereq(self):
11506
    """Check prerequisites.
11507

11508
    This checks that the given group name is not an existing node group
11509
    already.
11510

11511
    """
11512
    try:
11513
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11514
    except errors.OpPrereqError:
11515
      pass
11516
    else:
11517
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11518
                                 " node group (UUID: %s)" %
11519
                                 (self.op.group_name, existing_uuid),
11520
                                 errors.ECODE_EXISTS)
11521

    
11522
    if self.op.ndparams:
11523
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11524

    
11525
  def BuildHooksEnv(self):
11526
    """Build hooks env.
11527

11528
    """
11529
    return {
11530
      "GROUP_NAME": self.op.group_name,
11531
      }
11532

    
11533
  def BuildHooksNodes(self):
11534
    """Build hooks nodes.
11535

11536
    """
11537
    mn = self.cfg.GetMasterNode()
11538
    return ([mn], [mn])
11539

    
11540
  def Exec(self, feedback_fn):
11541
    """Add the node group to the cluster.
11542

11543
    """
11544
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11545
                                  uuid=self.group_uuid,
11546
                                  alloc_policy=self.op.alloc_policy,
11547
                                  ndparams=self.op.ndparams)
11548

    
11549
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11550
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11551

    
11552

    
11553
class LUGroupAssignNodes(NoHooksLU):
11554
  """Logical unit for assigning nodes to groups.
11555

11556
  """
11557
  REQ_BGL = False
11558

    
11559
  def ExpandNames(self):
11560
    # These raise errors.OpPrereqError on their own:
11561
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11562
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11563

    
11564
    # We want to lock all the affected nodes and groups. We have readily
11565
    # available the list of nodes, and the *destination* group. To gather the
11566
    # list of "source" groups, we need to fetch node information later on.
11567
    self.needed_locks = {
11568
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11569
      locking.LEVEL_NODE: self.op.nodes,
11570
      }
11571

    
11572
  def DeclareLocks(self, level):
11573
    if level == locking.LEVEL_NODEGROUP:
11574
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11575

    
11576
      # Try to get all affected nodes' groups without having the group or node
11577
      # lock yet. Needs verification later in the code flow.
11578
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11579

    
11580
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11581

    
11582
  def CheckPrereq(self):
11583
    """Check prerequisites.
11584

11585
    """
11586
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11587
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11588
            frozenset(self.op.nodes))
11589

    
11590
    expected_locks = (set([self.group_uuid]) |
11591
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11592
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11593
    if actual_locks != expected_locks:
11594
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11595
                               " current groups are '%s', used to be '%s'" %
11596
                               (utils.CommaJoin(expected_locks),
11597
                                utils.CommaJoin(actual_locks)))
11598

    
11599
    self.node_data = self.cfg.GetAllNodesInfo()
11600
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11601
    instance_data = self.cfg.GetAllInstancesInfo()
11602

    
11603
    if self.group is None:
11604
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11605
                               (self.op.group_name, self.group_uuid))
11606

    
11607
    (new_splits, previous_splits) = \
11608
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11609
                                             for node in self.op.nodes],
11610
                                            self.node_data, instance_data)
11611

    
11612
    if new_splits:
11613
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11614

    
11615
      if not self.op.force:
11616
        raise errors.OpExecError("The following instances get split by this"
11617
                                 " change and --force was not given: %s" %
11618
                                 fmt_new_splits)
11619
      else:
11620
        self.LogWarning("This operation will split the following instances: %s",
11621
                        fmt_new_splits)
11622

    
11623
        if previous_splits:
11624
          self.LogWarning("In addition, these already-split instances continue"
11625
                          " to be split across groups: %s",
11626
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11627

    
11628
  def Exec(self, feedback_fn):
11629
    """Assign nodes to a new group.
11630

11631
    """
11632
    for node in self.op.nodes:
11633
      self.node_data[node].group = self.group_uuid
11634

    
11635
    # FIXME: Depends on side-effects of modifying the result of
11636
    # C{cfg.GetAllNodesInfo}
11637

    
11638
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11639

    
11640
  @staticmethod
11641
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11642
    """Check for split instances after a node assignment.
11643

11644
    This method considers a series of node assignments as an atomic operation,
11645
    and returns information about split instances after applying the set of
11646
    changes.
11647

11648
    In particular, it returns information about newly split instances, and
11649
    instances that were already split, and remain so after the change.
11650

11651
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11652
    considered.
11653

11654
    @type changes: list of (node_name, new_group_uuid) pairs.
11655
    @param changes: list of node assignments to consider.
11656
    @param node_data: a dict with data for all nodes
11657
    @param instance_data: a dict with all instances to consider
11658
    @rtype: a two-tuple
11659
    @return: a list of instances that were previously okay and result split as a
11660
      consequence of this change, and a list of instances that were previously
11661
      split and this change does not fix.
11662

11663
    """
11664
    changed_nodes = dict((node, group) for node, group in changes
11665
                         if node_data[node].group != group)
11666

    
11667
    all_split_instances = set()
11668
    previously_split_instances = set()
11669

    
11670
    def InstanceNodes(instance):
11671
      return [instance.primary_node] + list(instance.secondary_nodes)
11672

    
11673
    for inst in instance_data.values():
11674
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11675
        continue
11676

    
11677
      instance_nodes = InstanceNodes(inst)
11678

    
11679
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11680
        previously_split_instances.add(inst.name)
11681

    
11682
      if len(set(changed_nodes.get(node, node_data[node].group)
11683
                 for node in instance_nodes)) > 1:
11684
        all_split_instances.add(inst.name)
11685

    
11686
    return (list(all_split_instances - previously_split_instances),
11687
            list(previously_split_instances & all_split_instances))
11688

    
11689

    
11690
class _GroupQuery(_QueryBase):
11691
  FIELDS = query.GROUP_FIELDS
11692

    
11693
  def ExpandNames(self, lu):
11694
    lu.needed_locks = {}
11695

    
11696
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11697
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11698

    
11699
    if not self.names:
11700
      self.wanted = [name_to_uuid[name]
11701
                     for name in utils.NiceSort(name_to_uuid.keys())]
11702
    else:
11703
      # Accept names to be either names or UUIDs.
11704
      missing = []
11705
      self.wanted = []
11706
      all_uuid = frozenset(self._all_groups.keys())
11707

    
11708
      for name in self.names:
11709
        if name in all_uuid:
11710
          self.wanted.append(name)
11711
        elif name in name_to_uuid:
11712
          self.wanted.append(name_to_uuid[name])
11713
        else:
11714
          missing.append(name)
11715

    
11716
      if missing:
11717
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11718
                                   utils.CommaJoin(missing),
11719
                                   errors.ECODE_NOENT)
11720

    
11721
  def DeclareLocks(self, lu, level):
11722
    pass
11723

    
11724
  def _GetQueryData(self, lu):
11725
    """Computes the list of node groups and their attributes.
11726

11727
    """
11728
    do_nodes = query.GQ_NODE in self.requested_data
11729
    do_instances = query.GQ_INST in self.requested_data
11730

    
11731
    group_to_nodes = None
11732
    group_to_instances = None
11733

    
11734
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11735
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11736
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11737
    # instance->node. Hence, we will need to process nodes even if we only need
11738
    # instance information.
11739
    if do_nodes or do_instances:
11740
      all_nodes = lu.cfg.GetAllNodesInfo()
11741
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11742
      node_to_group = {}
11743

    
11744
      for node in all_nodes.values():
11745
        if node.group in group_to_nodes:
11746
          group_to_nodes[node.group].append(node.name)
11747
          node_to_group[node.name] = node.group
11748

    
11749
      if do_instances:
11750
        all_instances = lu.cfg.GetAllInstancesInfo()
11751
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11752

    
11753
        for instance in all_instances.values():
11754
          node = instance.primary_node
11755
          if node in node_to_group:
11756
            group_to_instances[node_to_group[node]].append(instance.name)
11757

    
11758
        if not do_nodes:
11759
          # Do not pass on node information if it was not requested.
11760
          group_to_nodes = None
11761

    
11762
    return query.GroupQueryData([self._all_groups[uuid]
11763
                                 for uuid in self.wanted],
11764
                                group_to_nodes, group_to_instances)
11765

    
11766

    
11767
class LUGroupQuery(NoHooksLU):
11768
  """Logical unit for querying node groups.
11769

11770
  """
11771
  REQ_BGL = False
11772

    
11773
  def CheckArguments(self):
11774
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11775
                          self.op.output_fields, False)
11776

    
11777
  def ExpandNames(self):
11778
    self.gq.ExpandNames(self)
11779

    
11780
  def Exec(self, feedback_fn):
11781
    return self.gq.OldStyleQuery(self)
11782

    
11783

    
11784
class LUGroupSetParams(LogicalUnit):
11785
  """Modifies the parameters of a node group.
11786

11787
  """
11788
  HPATH = "group-modify"
11789
  HTYPE = constants.HTYPE_GROUP
11790
  REQ_BGL = False
11791

    
11792
  def CheckArguments(self):
11793
    all_changes = [
11794
      self.op.ndparams,
11795
      self.op.alloc_policy,
11796
      ]
11797

    
11798
    if all_changes.count(None) == len(all_changes):
11799
      raise errors.OpPrereqError("Please pass at least one modification",
11800
                                 errors.ECODE_INVAL)
11801

    
11802
  def ExpandNames(self):
11803
    # This raises errors.OpPrereqError on its own:
11804
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11805

    
11806
    self.needed_locks = {
11807
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11808
      }
11809

    
11810
  def CheckPrereq(self):
11811
    """Check prerequisites.
11812

11813
    """
11814
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11815

    
11816
    if self.group is None:
11817
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11818
                               (self.op.group_name, self.group_uuid))
11819

    
11820
    if self.op.ndparams:
11821
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11822
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11823
      self.new_ndparams = new_ndparams
11824

    
11825
  def BuildHooksEnv(self):
11826
    """Build hooks env.
11827

11828
    """
11829
    return {
11830
      "GROUP_NAME": self.op.group_name,
11831
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11832
      }
11833

    
11834
  def BuildHooksNodes(self):
11835
    """Build hooks nodes.
11836

11837
    """
11838
    mn = self.cfg.GetMasterNode()
11839
    return ([mn], [mn])
11840

    
11841
  def Exec(self, feedback_fn):
11842
    """Modifies the node group.
11843

11844
    """
11845
    result = []
11846

    
11847
    if self.op.ndparams:
11848
      self.group.ndparams = self.new_ndparams
11849
      result.append(("ndparams", str(self.group.ndparams)))
11850

    
11851
    if self.op.alloc_policy:
11852
      self.group.alloc_policy = self.op.alloc_policy
11853

    
11854
    self.cfg.Update(self.group, feedback_fn)
11855
    return result
11856

    
11857

    
11858

    
11859
class LUGroupRemove(LogicalUnit):
11860
  HPATH = "group-remove"
11861
  HTYPE = constants.HTYPE_GROUP
11862
  REQ_BGL = False
11863

    
11864
  def ExpandNames(self):
11865
    # This will raises errors.OpPrereqError on its own:
11866
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11867
    self.needed_locks = {
11868
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11869
      }
11870

    
11871
  def CheckPrereq(self):
11872
    """Check prerequisites.
11873

11874
    This checks that the given group name exists as a node group, that is
11875
    empty (i.e., contains no nodes), and that is not the last group of the
11876
    cluster.
11877

11878
    """
11879
    # Verify that the group is empty.
11880
    group_nodes = [node.name
11881
                   for node in self.cfg.GetAllNodesInfo().values()
11882
                   if node.group == self.group_uuid]
11883

    
11884
    if group_nodes:
11885
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11886
                                 " nodes: %s" %
11887
                                 (self.op.group_name,
11888
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11889
                                 errors.ECODE_STATE)
11890

    
11891
    # Verify the cluster would not be left group-less.
11892
    if len(self.cfg.GetNodeGroupList()) == 1:
11893
      raise errors.OpPrereqError("Group '%s' is the only group,"
11894
                                 " cannot be removed" %
11895
                                 self.op.group_name,
11896
                                 errors.ECODE_STATE)
11897

    
11898
  def BuildHooksEnv(self):
11899
    """Build hooks env.
11900

11901
    """
11902
    return {
11903
      "GROUP_NAME": self.op.group_name,
11904
      }
11905

    
11906
  def BuildHooksNodes(self):
11907
    """Build hooks nodes.
11908

11909
    """
11910
    mn = self.cfg.GetMasterNode()
11911
    return ([mn], [mn])
11912

    
11913
  def Exec(self, feedback_fn):
11914
    """Remove the node group.
11915

11916
    """
11917
    try:
11918
      self.cfg.RemoveNodeGroup(self.group_uuid)
11919
    except errors.ConfigurationError:
11920
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11921
                               (self.op.group_name, self.group_uuid))
11922

    
11923
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11924

    
11925

    
11926
class LUGroupRename(LogicalUnit):
11927
  HPATH = "group-rename"
11928
  HTYPE = constants.HTYPE_GROUP
11929
  REQ_BGL = False
11930

    
11931
  def ExpandNames(self):
11932
    # This raises errors.OpPrereqError on its own:
11933
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11934

    
11935
    self.needed_locks = {
11936
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11937
      }
11938

    
11939
  def CheckPrereq(self):
11940
    """Check prerequisites.
11941

11942
    Ensures requested new name is not yet used.
11943

11944
    """
11945
    try:
11946
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11947
    except errors.OpPrereqError:
11948
      pass
11949
    else:
11950
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11951
                                 " node group (UUID: %s)" %
11952
                                 (self.op.new_name, new_name_uuid),
11953
                                 errors.ECODE_EXISTS)
11954

    
11955
  def BuildHooksEnv(self):
11956
    """Build hooks env.
11957

11958
    """
11959
    return {
11960
      "OLD_NAME": self.op.group_name,
11961
      "NEW_NAME": self.op.new_name,
11962
      }
11963

    
11964
  def BuildHooksNodes(self):
11965
    """Build hooks nodes.
11966

11967
    """
11968
    mn = self.cfg.GetMasterNode()
11969

    
11970
    all_nodes = self.cfg.GetAllNodesInfo()
11971
    all_nodes.pop(mn, None)
11972

    
11973
    run_nodes = [mn]
11974
    run_nodes.extend(node.name for node in all_nodes.values()
11975
                     if node.group == self.group_uuid)
11976

    
11977
    return (run_nodes, run_nodes)
11978

    
11979
  def Exec(self, feedback_fn):
11980
    """Rename the node group.
11981

11982
    """
11983
    group = self.cfg.GetNodeGroup(self.group_uuid)
11984

    
11985
    if group is None:
11986
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11987
                               (self.op.group_name, self.group_uuid))
11988

    
11989
    group.name = self.op.new_name
11990
    self.cfg.Update(group, feedback_fn)
11991

    
11992
    return self.op.new_name
11993

    
11994

    
11995
class LUGroupEvacuate(LogicalUnit):
11996
  HPATH = "group-evacuate"
11997
  HTYPE = constants.HTYPE_GROUP
11998
  REQ_BGL = False
11999

    
12000
  def ExpandNames(self):
12001
    # This raises errors.OpPrereqError on its own:
12002
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12003

    
12004
    if self.op.target_groups:
12005
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12006
                                  self.op.target_groups)
12007
    else:
12008
      self.req_target_uuids = []
12009

    
12010
    if self.group_uuid in self.req_target_uuids:
12011
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12012
                                 " as a target group (targets are %s)" %
12013
                                 (self.group_uuid,
12014
                                  utils.CommaJoin(self.req_target_uuids)),
12015
                                 errors.ECODE_INVAL)
12016

    
12017
    if not self.op.iallocator:
12018
      # Use default iallocator
12019
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
12020

    
12021
    if not self.op.iallocator:
12022
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
12023
                                 " opcode nor as a cluster-wide default",
12024
                                 errors.ECODE_INVAL)
12025

    
12026
    self.share_locks = _ShareAll()
12027
    self.needed_locks = {
12028
      locking.LEVEL_INSTANCE: [],
12029
      locking.LEVEL_NODEGROUP: [],
12030
      locking.LEVEL_NODE: [],
12031
      }
12032

    
12033
  def DeclareLocks(self, level):
12034
    if level == locking.LEVEL_INSTANCE:
12035
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12036

    
12037
      # Lock instances optimistically, needs verification once node and group
12038
      # locks have been acquired
12039
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12040
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12041

    
12042
    elif level == locking.LEVEL_NODEGROUP:
12043
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12044

    
12045
      if self.req_target_uuids:
12046
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12047

    
12048
        # Lock all groups used by instances optimistically; this requires going
12049
        # via the node before it's locked, requiring verification later on
12050
        lock_groups.update(group_uuid
12051
                           for instance_name in
12052
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12053
                           for group_uuid in
12054
                             self.cfg.GetInstanceNodeGroups(instance_name))
12055
      else:
12056
        # No target groups, need to lock all of them
12057
        lock_groups = locking.ALL_SET
12058

    
12059
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12060

    
12061
    elif level == locking.LEVEL_NODE:
12062
      # This will only lock the nodes in the group to be evacuated which
12063
      # contain actual instances
12064
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12065
      self._LockInstancesNodes()
12066

    
12067
      # Lock all nodes in group to be evacuated
12068
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12069
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12070
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12071

    
12072
  def CheckPrereq(self):
12073
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12074
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12075
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12076

    
12077
    assert owned_groups.issuperset(self.req_target_uuids)
12078
    assert self.group_uuid in owned_groups
12079

    
12080
    # Check if locked instances are still correct
12081
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12082
    if owned_instances != wanted_instances:
12083
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12084
                                 " changed since locks were acquired, wanted"
12085
                                 " %s, have %s; retry the operation" %
12086
                                 (self.group_uuid,
12087
                                  utils.CommaJoin(wanted_instances),
12088
                                  utils.CommaJoin(owned_instances)),
12089
                                 errors.ECODE_STATE)
12090

    
12091
    # Get instance information
12092
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
12093
                          for name in owned_instances)
12094

    
12095
    # Check if node groups for locked instances are still correct
12096
    for instance_name in owned_instances:
12097
      inst = self.instances[instance_name]
12098
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12099
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12100
      assert owned_nodes.issuperset(inst.all_nodes), \
12101
        "Instance %s's nodes changed while we kept the lock" % instance_name
12102

    
12103
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12104
      if not owned_groups.issuperset(inst_groups):
12105
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12106
                                   " locks were acquired, current groups"
12107
                                   " are '%s', owning groups '%s'; retry the"
12108
                                   " operation" %
12109
                                   (instance_name,
12110
                                    utils.CommaJoin(inst_groups),
12111
                                    utils.CommaJoin(owned_groups)),
12112
                                   errors.ECODE_STATE)
12113

    
12114
    if self.req_target_uuids:
12115
      # User requested specific target groups
12116
      self.target_uuids = self.req_target_uuids
12117
    else:
12118
      # All groups except the one to be evacuated are potential targets
12119
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12120
                           if group_uuid != self.group_uuid]
12121

    
12122
      if not self.target_uuids:
12123
        raise errors.OpExecError("There are no possible target groups")
12124

    
12125
  def BuildHooksEnv(self):
12126
    """Build hooks env.
12127

12128
    """
12129
    return {
12130
      "GROUP_NAME": self.op.group_name,
12131
      "TARGET_GROUPS": " ".join(self.target_uuids),
12132
      }
12133

    
12134
  def BuildHooksNodes(self):
12135
    """Build hooks nodes.
12136

12137
    """
12138
    mn = self.cfg.GetMasterNode()
12139

    
12140
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12141

    
12142
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12143

    
12144
    return (run_nodes, run_nodes)
12145

    
12146
  def Exec(self, feedback_fn):
12147
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12148

    
12149
    assert self.group_uuid not in self.target_uuids
12150

    
12151
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12152
                     instances=instances, target_groups=self.target_uuids)
12153

    
12154
    ial.Run(self.op.iallocator)
12155

    
12156
    if not ial.success:
12157
      raise errors.OpPrereqError("Can't compute group evacuation using"
12158
                                 " iallocator '%s': %s" %
12159
                                 (self.op.iallocator, ial.info),
12160
                                 errors.ECODE_NORES)
12161

    
12162
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12163

    
12164
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12165
                 len(jobs), self.op.group_name)
12166

    
12167
    return ResultWithJobs(jobs)
12168

    
12169

    
12170
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12171
  """Generic tags LU.
12172

12173
  This is an abstract class which is the parent of all the other tags LUs.
12174

12175
  """
12176
  def ExpandNames(self):
12177
    self.group_uuid = None
12178
    self.needed_locks = {}
12179
    if self.op.kind == constants.TAG_NODE:
12180
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12181
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12182
    elif self.op.kind == constants.TAG_INSTANCE:
12183
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12184
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12185
    elif self.op.kind == constants.TAG_NODEGROUP:
12186
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12187

    
12188
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12189
    # not possible to acquire the BGL based on opcode parameters)
12190

    
12191
  def CheckPrereq(self):
12192
    """Check prerequisites.
12193

12194
    """
12195
    if self.op.kind == constants.TAG_CLUSTER:
12196
      self.target = self.cfg.GetClusterInfo()
12197
    elif self.op.kind == constants.TAG_NODE:
12198
      self.target = self.cfg.GetNodeInfo(self.op.name)
12199
    elif self.op.kind == constants.TAG_INSTANCE:
12200
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12201
    elif self.op.kind == constants.TAG_NODEGROUP:
12202
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12203
    else:
12204
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12205
                                 str(self.op.kind), errors.ECODE_INVAL)
12206

    
12207

    
12208
class LUTagsGet(TagsLU):
12209
  """Returns the tags of a given object.
12210

12211
  """
12212
  REQ_BGL = False
12213

    
12214
  def ExpandNames(self):
12215
    TagsLU.ExpandNames(self)
12216

    
12217
    # Share locks as this is only a read operation
12218
    self.share_locks = _ShareAll()
12219

    
12220
  def Exec(self, feedback_fn):
12221
    """Returns the tag list.
12222

12223
    """
12224
    return list(self.target.GetTags())
12225

    
12226

    
12227
class LUTagsSearch(NoHooksLU):
12228
  """Searches the tags for a given pattern.
12229

12230
  """
12231
  REQ_BGL = False
12232

    
12233
  def ExpandNames(self):
12234
    self.needed_locks = {}
12235

    
12236
  def CheckPrereq(self):
12237
    """Check prerequisites.
12238

12239
    This checks the pattern passed for validity by compiling it.
12240

12241
    """
12242
    try:
12243
      self.re = re.compile(self.op.pattern)
12244
    except re.error, err:
12245
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12246
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12247

    
12248
  def Exec(self, feedback_fn):
12249
    """Returns the tag list.
12250

12251
    """
12252
    cfg = self.cfg
12253
    tgts = [("/cluster", cfg.GetClusterInfo())]
12254
    ilist = cfg.GetAllInstancesInfo().values()
12255
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12256
    nlist = cfg.GetAllNodesInfo().values()
12257
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12258
    tgts.extend(("/nodegroup/%s" % n.name, n)
12259
                for n in cfg.GetAllNodeGroupsInfo().values())
12260
    results = []
12261
    for path, target in tgts:
12262
      for tag in target.GetTags():
12263
        if self.re.search(tag):
12264
          results.append((path, tag))
12265
    return results
12266

    
12267

    
12268
class LUTagsSet(TagsLU):
12269
  """Sets a tag on a given object.
12270

12271
  """
12272
  REQ_BGL = False
12273

    
12274
  def CheckPrereq(self):
12275
    """Check prerequisites.
12276

12277
    This checks the type and length of the tag name and value.
12278

12279
    """
12280
    TagsLU.CheckPrereq(self)
12281
    for tag in self.op.tags:
12282
      objects.TaggableObject.ValidateTag(tag)
12283

    
12284
  def Exec(self, feedback_fn):
12285
    """Sets the tag.
12286

12287
    """
12288
    try:
12289
      for tag in self.op.tags:
12290
        self.target.AddTag(tag)
12291
    except errors.TagError, err:
12292
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12293
    self.cfg.Update(self.target, feedback_fn)
12294

    
12295

    
12296
class LUTagsDel(TagsLU):
12297
  """Delete a list of tags from a given object.
12298

12299
  """
12300
  REQ_BGL = False
12301

    
12302
  def CheckPrereq(self):
12303
    """Check prerequisites.
12304

12305
    This checks that we have the given tag.
12306

12307
    """
12308
    TagsLU.CheckPrereq(self)
12309
    for tag in self.op.tags:
12310
      objects.TaggableObject.ValidateTag(tag)
12311
    del_tags = frozenset(self.op.tags)
12312
    cur_tags = self.target.GetTags()
12313

    
12314
    diff_tags = del_tags - cur_tags
12315
    if diff_tags:
12316
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12317
      raise errors.OpPrereqError("Tag(s) %s not found" %
12318
                                 (utils.CommaJoin(diff_names), ),
12319
                                 errors.ECODE_NOENT)
12320

    
12321
  def Exec(self, feedback_fn):
12322
    """Remove the tag from the object.
12323

12324
    """
12325
    for tag in self.op.tags:
12326
      self.target.RemoveTag(tag)
12327
    self.cfg.Update(self.target, feedback_fn)
12328

    
12329

    
12330
class LUTestDelay(NoHooksLU):
12331
  """Sleep for a specified amount of time.
12332

12333
  This LU sleeps on the master and/or nodes for a specified amount of
12334
  time.
12335

12336
  """
12337
  REQ_BGL = False
12338

    
12339
  def ExpandNames(self):
12340
    """Expand names and set required locks.
12341

12342
    This expands the node list, if any.
12343

12344
    """
12345
    self.needed_locks = {}
12346
    if self.op.on_nodes:
12347
      # _GetWantedNodes can be used here, but is not always appropriate to use
12348
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12349
      # more information.
12350
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12351
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12352

    
12353
  def _TestDelay(self):
12354
    """Do the actual sleep.
12355

12356
    """
12357
    if self.op.on_master:
12358
      if not utils.TestDelay(self.op.duration):
12359
        raise errors.OpExecError("Error during master delay test")
12360
    if self.op.on_nodes:
12361
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12362
      for node, node_result in result.items():
12363
        node_result.Raise("Failure during rpc call to node %s" % node)
12364

    
12365
  def Exec(self, feedback_fn):
12366
    """Execute the test delay opcode, with the wanted repetitions.
12367

12368
    """
12369
    if self.op.repeat == 0:
12370
      self._TestDelay()
12371
    else:
12372
      top_value = self.op.repeat - 1
12373
      for i in range(self.op.repeat):
12374
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12375
        self._TestDelay()
12376

    
12377

    
12378
class LUTestJqueue(NoHooksLU):
12379
  """Utility LU to test some aspects of the job queue.
12380

12381
  """
12382
  REQ_BGL = False
12383

    
12384
  # Must be lower than default timeout for WaitForJobChange to see whether it
12385
  # notices changed jobs
12386
  _CLIENT_CONNECT_TIMEOUT = 20.0
12387
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12388

    
12389
  @classmethod
12390
  def _NotifyUsingSocket(cls, cb, errcls):
12391
    """Opens a Unix socket and waits for another program to connect.
12392

12393
    @type cb: callable
12394
    @param cb: Callback to send socket name to client
12395
    @type errcls: class
12396
    @param errcls: Exception class to use for errors
12397

12398
    """
12399
    # Using a temporary directory as there's no easy way to create temporary
12400
    # sockets without writing a custom loop around tempfile.mktemp and
12401
    # socket.bind
12402
    tmpdir = tempfile.mkdtemp()
12403
    try:
12404
      tmpsock = utils.PathJoin(tmpdir, "sock")
12405

    
12406
      logging.debug("Creating temporary socket at %s", tmpsock)
12407
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12408
      try:
12409
        sock.bind(tmpsock)
12410
        sock.listen(1)
12411

    
12412
        # Send details to client
12413
        cb(tmpsock)
12414

    
12415
        # Wait for client to connect before continuing
12416
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12417
        try:
12418
          (conn, _) = sock.accept()
12419
        except socket.error, err:
12420
          raise errcls("Client didn't connect in time (%s)" % err)
12421
      finally:
12422
        sock.close()
12423
    finally:
12424
      # Remove as soon as client is connected
12425
      shutil.rmtree(tmpdir)
12426

    
12427
    # Wait for client to close
12428
    try:
12429
      try:
12430
        # pylint: disable-msg=E1101
12431
        # Instance of '_socketobject' has no ... member
12432
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12433
        conn.recv(1)
12434
      except socket.error, err:
12435
        raise errcls("Client failed to confirm notification (%s)" % err)
12436
    finally:
12437
      conn.close()
12438

    
12439
  def _SendNotification(self, test, arg, sockname):
12440
    """Sends a notification to the client.
12441

12442
    @type test: string
12443
    @param test: Test name
12444
    @param arg: Test argument (depends on test)
12445
    @type sockname: string
12446
    @param sockname: Socket path
12447

12448
    """
12449
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12450

    
12451
  def _Notify(self, prereq, test, arg):
12452
    """Notifies the client of a test.
12453

12454
    @type prereq: bool
12455
    @param prereq: Whether this is a prereq-phase test
12456
    @type test: string
12457
    @param test: Test name
12458
    @param arg: Test argument (depends on test)
12459

12460
    """
12461
    if prereq:
12462
      errcls = errors.OpPrereqError
12463
    else:
12464
      errcls = errors.OpExecError
12465

    
12466
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12467
                                                  test, arg),
12468
                                   errcls)
12469

    
12470
  def CheckArguments(self):
12471
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12472
    self.expandnames_calls = 0
12473

    
12474
  def ExpandNames(self):
12475
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12476
    if checkargs_calls < 1:
12477
      raise errors.ProgrammerError("CheckArguments was not called")
12478

    
12479
    self.expandnames_calls += 1
12480

    
12481
    if self.op.notify_waitlock:
12482
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12483

    
12484
    self.LogInfo("Expanding names")
12485

    
12486
    # Get lock on master node (just to get a lock, not for a particular reason)
12487
    self.needed_locks = {
12488
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12489
      }
12490

    
12491
  def Exec(self, feedback_fn):
12492
    if self.expandnames_calls < 1:
12493
      raise errors.ProgrammerError("ExpandNames was not called")
12494

    
12495
    if self.op.notify_exec:
12496
      self._Notify(False, constants.JQT_EXEC, None)
12497

    
12498
    self.LogInfo("Executing")
12499

    
12500
    if self.op.log_messages:
12501
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12502
      for idx, msg in enumerate(self.op.log_messages):
12503
        self.LogInfo("Sending log message %s", idx + 1)
12504
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12505
        # Report how many test messages have been sent
12506
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12507

    
12508
    if self.op.fail:
12509
      raise errors.OpExecError("Opcode failure was requested")
12510

    
12511
    return True
12512

    
12513

    
12514
class IAllocator(object):
12515
  """IAllocator framework.
12516

12517
  An IAllocator instance has three sets of attributes:
12518
    - cfg that is needed to query the cluster
12519
    - input data (all members of the _KEYS class attribute are required)
12520
    - four buffer attributes (in|out_data|text), that represent the
12521
      input (to the external script) in text and data structure format,
12522
      and the output from it, again in two formats
12523
    - the result variables from the script (success, info, nodes) for
12524
      easy usage
12525

12526
  """
12527
  # pylint: disable-msg=R0902
12528
  # lots of instance attributes
12529

    
12530
  def __init__(self, cfg, rpc, mode, **kwargs):
12531
    self.cfg = cfg
12532
    self.rpc = rpc
12533
    # init buffer variables
12534
    self.in_text = self.out_text = self.in_data = self.out_data = None
12535
    # init all input fields so that pylint is happy
12536
    self.mode = mode
12537
    self.memory = self.disks = self.disk_template = None
12538
    self.os = self.tags = self.nics = self.vcpus = None
12539
    self.hypervisor = None
12540
    self.relocate_from = None
12541
    self.name = None
12542
    self.evac_nodes = None
12543
    self.instances = None
12544
    self.evac_mode = None
12545
    self.target_groups = []
12546
    # computed fields
12547
    self.required_nodes = None
12548
    # init result fields
12549
    self.success = self.info = self.result = None
12550

    
12551
    try:
12552
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12553
    except KeyError:
12554
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12555
                                   " IAllocator" % self.mode)
12556

    
12557
    keyset = [n for (n, _) in keydata]
12558

    
12559
    for key in kwargs:
12560
      if key not in keyset:
12561
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12562
                                     " IAllocator" % key)
12563
      setattr(self, key, kwargs[key])
12564

    
12565
    for key in keyset:
12566
      if key not in kwargs:
12567
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12568
                                     " IAllocator" % key)
12569
    self._BuildInputData(compat.partial(fn, self), keydata)
12570

    
12571
  def _ComputeClusterData(self):
12572
    """Compute the generic allocator input data.
12573

12574
    This is the data that is independent of the actual operation.
12575

12576
    """
12577
    cfg = self.cfg
12578
    cluster_info = cfg.GetClusterInfo()
12579
    # cluster data
12580
    data = {
12581
      "version": constants.IALLOCATOR_VERSION,
12582
      "cluster_name": cfg.GetClusterName(),
12583
      "cluster_tags": list(cluster_info.GetTags()),
12584
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12585
      # we don't have job IDs
12586
      }
12587
    ninfo = cfg.GetAllNodesInfo()
12588
    iinfo = cfg.GetAllInstancesInfo().values()
12589
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12590

    
12591
    # node data
12592
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12593

    
12594
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12595
      hypervisor_name = self.hypervisor
12596
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12597
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12598
    else:
12599
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12600

    
12601
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12602
                                        hypervisor_name)
12603
    node_iinfo = \
12604
      self.rpc.call_all_instances_info(node_list,
12605
                                       cluster_info.enabled_hypervisors)
12606

    
12607
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12608

    
12609
    config_ndata = self._ComputeBasicNodeData(ninfo)
12610
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12611
                                                 i_list, config_ndata)
12612
    assert len(data["nodes"]) == len(ninfo), \
12613
        "Incomplete node data computed"
12614

    
12615
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12616

    
12617
    self.in_data = data
12618

    
12619
  @staticmethod
12620
  def _ComputeNodeGroupData(cfg):
12621
    """Compute node groups data.
12622

12623
    """
12624
    ng = dict((guuid, {
12625
      "name": gdata.name,
12626
      "alloc_policy": gdata.alloc_policy,
12627
      })
12628
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12629

    
12630
    return ng
12631

    
12632
  @staticmethod
12633
  def _ComputeBasicNodeData(node_cfg):
12634
    """Compute global node data.
12635

12636
    @rtype: dict
12637
    @returns: a dict of name: (node dict, node config)
12638

12639
    """
12640
    # fill in static (config-based) values
12641
    node_results = dict((ninfo.name, {
12642
      "tags": list(ninfo.GetTags()),
12643
      "primary_ip": ninfo.primary_ip,
12644
      "secondary_ip": ninfo.secondary_ip,
12645
      "offline": ninfo.offline,
12646
      "drained": ninfo.drained,
12647
      "master_candidate": ninfo.master_candidate,
12648
      "group": ninfo.group,
12649
      "master_capable": ninfo.master_capable,
12650
      "vm_capable": ninfo.vm_capable,
12651
      })
12652
      for ninfo in node_cfg.values())
12653

    
12654
    return node_results
12655

    
12656
  @staticmethod
12657
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12658
                              node_results):
12659
    """Compute global node data.
12660

12661
    @param node_results: the basic node structures as filled from the config
12662

12663
    """
12664
    # make a copy of the current dict
12665
    node_results = dict(node_results)
12666
    for nname, nresult in node_data.items():
12667
      assert nname in node_results, "Missing basic data for node %s" % nname
12668
      ninfo = node_cfg[nname]
12669

    
12670
      if not (ninfo.offline or ninfo.drained):
12671
        nresult.Raise("Can't get data for node %s" % nname)
12672
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12673
                                nname)
12674
        remote_info = nresult.payload
12675

    
12676
        for attr in ["memory_total", "memory_free", "memory_dom0",
12677
                     "vg_size", "vg_free", "cpu_total"]:
12678
          if attr not in remote_info:
12679
            raise errors.OpExecError("Node '%s' didn't return attribute"
12680
                                     " '%s'" % (nname, attr))
12681
          if not isinstance(remote_info[attr], int):
12682
            raise errors.OpExecError("Node '%s' returned invalid value"
12683
                                     " for '%s': %s" %
12684
                                     (nname, attr, remote_info[attr]))
12685
        # compute memory used by primary instances
12686
        i_p_mem = i_p_up_mem = 0
12687
        for iinfo, beinfo in i_list:
12688
          if iinfo.primary_node == nname:
12689
            i_p_mem += beinfo[constants.BE_MEMORY]
12690
            if iinfo.name not in node_iinfo[nname].payload:
12691
              i_used_mem = 0
12692
            else:
12693
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12694
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12695
            remote_info["memory_free"] -= max(0, i_mem_diff)
12696

    
12697
            if iinfo.admin_up:
12698
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12699

    
12700
        # compute memory used by instances
12701
        pnr_dyn = {
12702
          "total_memory": remote_info["memory_total"],
12703
          "reserved_memory": remote_info["memory_dom0"],
12704
          "free_memory": remote_info["memory_free"],
12705
          "total_disk": remote_info["vg_size"],
12706
          "free_disk": remote_info["vg_free"],
12707
          "total_cpus": remote_info["cpu_total"],
12708
          "i_pri_memory": i_p_mem,
12709
          "i_pri_up_memory": i_p_up_mem,
12710
          }
12711
        pnr_dyn.update(node_results[nname])
12712
        node_results[nname] = pnr_dyn
12713

    
12714
    return node_results
12715

    
12716
  @staticmethod
12717
  def _ComputeInstanceData(cluster_info, i_list):
12718
    """Compute global instance data.
12719

12720
    """
12721
    instance_data = {}
12722
    for iinfo, beinfo in i_list:
12723
      nic_data = []
12724
      for nic in iinfo.nics:
12725
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12726
        nic_dict = {
12727
          "mac": nic.mac,
12728
          "ip": nic.ip,
12729
          "mode": filled_params[constants.NIC_MODE],
12730
          "link": filled_params[constants.NIC_LINK],
12731
          }
12732
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12733
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12734
        nic_data.append(nic_dict)
12735
      pir = {
12736
        "tags": list(iinfo.GetTags()),
12737
        "admin_up": iinfo.admin_up,
12738
        "vcpus": beinfo[constants.BE_VCPUS],
12739
        "memory": beinfo[constants.BE_MEMORY],
12740
        "os": iinfo.os,
12741
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12742
        "nics": nic_data,
12743
        "disks": [{constants.IDISK_SIZE: dsk.size,
12744
                   constants.IDISK_MODE: dsk.mode}
12745
                  for dsk in iinfo.disks],
12746
        "disk_template": iinfo.disk_template,
12747
        "hypervisor": iinfo.hypervisor,
12748
        }
12749
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12750
                                                 pir["disks"])
12751
      instance_data[iinfo.name] = pir
12752

    
12753
    return instance_data
12754

    
12755
  def _AddNewInstance(self):
12756
    """Add new instance data to allocator structure.
12757

12758
    This in combination with _AllocatorGetClusterData will create the
12759
    correct structure needed as input for the allocator.
12760

12761
    The checks for the completeness of the opcode must have already been
12762
    done.
12763

12764
    """
12765
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12766

    
12767
    if self.disk_template in constants.DTS_INT_MIRROR:
12768
      self.required_nodes = 2
12769
    else:
12770
      self.required_nodes = 1
12771

    
12772
    request = {
12773
      "name": self.name,
12774
      "disk_template": self.disk_template,
12775
      "tags": self.tags,
12776
      "os": self.os,
12777
      "vcpus": self.vcpus,
12778
      "memory": self.memory,
12779
      "disks": self.disks,
12780
      "disk_space_total": disk_space,
12781
      "nics": self.nics,
12782
      "required_nodes": self.required_nodes,
12783
      "hypervisor": self.hypervisor,
12784
      }
12785

    
12786
    return request
12787

    
12788
  def _AddRelocateInstance(self):
12789
    """Add relocate instance data to allocator structure.
12790

12791
    This in combination with _IAllocatorGetClusterData will create the
12792
    correct structure needed as input for the allocator.
12793

12794
    The checks for the completeness of the opcode must have already been
12795
    done.
12796

12797
    """
12798
    instance = self.cfg.GetInstanceInfo(self.name)
12799
    if instance is None:
12800
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12801
                                   " IAllocator" % self.name)
12802

    
12803
    if instance.disk_template not in constants.DTS_MIRRORED:
12804
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12805
                                 errors.ECODE_INVAL)
12806

    
12807
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12808
        len(instance.secondary_nodes) != 1:
12809
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12810
                                 errors.ECODE_STATE)
12811

    
12812
    self.required_nodes = 1
12813
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12814
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12815

    
12816
    request = {
12817
      "name": self.name,
12818
      "disk_space_total": disk_space,
12819
      "required_nodes": self.required_nodes,
12820
      "relocate_from": self.relocate_from,
12821
      }
12822
    return request
12823

    
12824
  def _AddEvacuateNodes(self):
12825
    """Add evacuate nodes data to allocator structure.
12826

12827
    """
12828
    request = {
12829
      "evac_nodes": self.evac_nodes
12830
      }
12831
    return request
12832

    
12833
  def _AddNodeEvacuate(self):
12834
    """Get data for node-evacuate requests.
12835

12836
    """
12837
    return {
12838
      "instances": self.instances,
12839
      "evac_mode": self.evac_mode,
12840
      }
12841

    
12842
  def _AddChangeGroup(self):
12843
    """Get data for node-evacuate requests.
12844

12845
    """
12846
    return {
12847
      "instances": self.instances,
12848
      "target_groups": self.target_groups,
12849
      }
12850

    
12851
  def _BuildInputData(self, fn, keydata):
12852
    """Build input data structures.
12853

12854
    """
12855
    self._ComputeClusterData()
12856

    
12857
    request = fn()
12858
    request["type"] = self.mode
12859
    for keyname, keytype in keydata:
12860
      if keyname not in request:
12861
        raise errors.ProgrammerError("Request parameter %s is missing" %
12862
                                     keyname)
12863
      val = request[keyname]
12864
      if not keytype(val):
12865
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12866
                                     " validation, value %s, expected"
12867
                                     " type %s" % (keyname, val, keytype))
12868
    self.in_data["request"] = request
12869

    
12870
    self.in_text = serializer.Dump(self.in_data)
12871

    
12872
  _STRING_LIST = ht.TListOf(ht.TString)
12873
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12874
     # pylint: disable-msg=E1101
12875
     # Class '...' has no 'OP_ID' member
12876
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12877
                          opcodes.OpInstanceMigrate.OP_ID,
12878
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12879
     })))
12880

    
12881
  _NEVAC_MOVED = \
12882
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12883
                       ht.TItems([ht.TNonEmptyString,
12884
                                  ht.TNonEmptyString,
12885
                                  ht.TListOf(ht.TNonEmptyString),
12886
                                 ])))
12887
  _NEVAC_FAILED = \
12888
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12889
                       ht.TItems([ht.TNonEmptyString,
12890
                                  ht.TMaybeString,
12891
                                 ])))
12892
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12893
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12894

    
12895
  _MODE_DATA = {
12896
    constants.IALLOCATOR_MODE_ALLOC:
12897
      (_AddNewInstance,
12898
       [
12899
        ("name", ht.TString),
12900
        ("memory", ht.TInt),
12901
        ("disks", ht.TListOf(ht.TDict)),
12902
        ("disk_template", ht.TString),
12903
        ("os", ht.TString),
12904
        ("tags", _STRING_LIST),
12905
        ("nics", ht.TListOf(ht.TDict)),
12906
        ("vcpus", ht.TInt),
12907
        ("hypervisor", ht.TString),
12908
        ], ht.TList),
12909
    constants.IALLOCATOR_MODE_RELOC:
12910
      (_AddRelocateInstance,
12911
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12912
       ht.TList),
12913
    constants.IALLOCATOR_MODE_MEVAC:
12914
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12915
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12916
     constants.IALLOCATOR_MODE_NODE_EVAC:
12917
      (_AddNodeEvacuate, [
12918
        ("instances", _STRING_LIST),
12919
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12920
        ], _NEVAC_RESULT),
12921
     constants.IALLOCATOR_MODE_CHG_GROUP:
12922
      (_AddChangeGroup, [
12923
        ("instances", _STRING_LIST),
12924
        ("target_groups", _STRING_LIST),
12925
        ], _NEVAC_RESULT),
12926
    }
12927

    
12928
  def Run(self, name, validate=True, call_fn=None):
12929
    """Run an instance allocator and return the results.
12930

12931
    """
12932
    if call_fn is None:
12933
      call_fn = self.rpc.call_iallocator_runner
12934

    
12935
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12936
    result.Raise("Failure while running the iallocator script")
12937

    
12938
    self.out_text = result.payload
12939
    if validate:
12940
      self._ValidateResult()
12941

    
12942
  def _ValidateResult(self):
12943
    """Process the allocator results.
12944

12945
    This will process and if successful save the result in
12946
    self.out_data and the other parameters.
12947

12948
    """
12949
    try:
12950
      rdict = serializer.Load(self.out_text)
12951
    except Exception, err:
12952
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12953

    
12954
    if not isinstance(rdict, dict):
12955
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12956

    
12957
    # TODO: remove backwards compatiblity in later versions
12958
    if "nodes" in rdict and "result" not in rdict:
12959
      rdict["result"] = rdict["nodes"]
12960
      del rdict["nodes"]
12961

    
12962
    for key in "success", "info", "result":
12963
      if key not in rdict:
12964
        raise errors.OpExecError("Can't parse iallocator results:"
12965
                                 " missing key '%s'" % key)
12966
      setattr(self, key, rdict[key])
12967

    
12968
    if not self._result_check(self.result):
12969
      raise errors.OpExecError("Iallocator returned invalid result,"
12970
                               " expected %s, got %s" %
12971
                               (self._result_check, self.result),
12972
                               errors.ECODE_INVAL)
12973

    
12974
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12975
                     constants.IALLOCATOR_MODE_MEVAC):
12976
      node2group = dict((name, ndata["group"])
12977
                        for (name, ndata) in self.in_data["nodes"].items())
12978

    
12979
      fn = compat.partial(self._NodesToGroups, node2group,
12980
                          self.in_data["nodegroups"])
12981

    
12982
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12983
        assert self.relocate_from is not None
12984
        assert self.required_nodes == 1
12985

    
12986
        request_groups = fn(self.relocate_from)
12987
        result_groups = fn(rdict["result"])
12988

    
12989
        if result_groups != request_groups:
12990
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12991
                                   " differ from original groups (%s)" %
12992
                                   (utils.CommaJoin(result_groups),
12993
                                    utils.CommaJoin(request_groups)))
12994
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12995
        request_groups = fn(self.evac_nodes)
12996
        for (instance_name, secnode) in self.result:
12997
          result_groups = fn([secnode])
12998
          if result_groups != request_groups:
12999
            raise errors.OpExecError("Iallocator returned new secondary node"
13000
                                     " '%s' (group '%s') for instance '%s'"
13001
                                     " which is not in original group '%s'" %
13002
                                     (secnode, utils.CommaJoin(result_groups),
13003
                                      instance_name,
13004
                                      utils.CommaJoin(request_groups)))
13005
      else:
13006
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13007

    
13008
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13009
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13010

    
13011
    self.out_data = rdict
13012

    
13013
  @staticmethod
13014
  def _NodesToGroups(node2group, groups, nodes):
13015
    """Returns a list of unique group names for a list of nodes.
13016

13017
    @type node2group: dict
13018
    @param node2group: Map from node name to group UUID
13019
    @type groups: dict
13020
    @param groups: Group information
13021
    @type nodes: list
13022
    @param nodes: Node names
13023

13024
    """
13025
    result = set()
13026

    
13027
    for node in nodes:
13028
      try:
13029
        group_uuid = node2group[node]
13030
      except KeyError:
13031
        # Ignore unknown node
13032
        pass
13033
      else:
13034
        try:
13035
          group = groups[group_uuid]
13036
        except KeyError:
13037
          # Can't find group, let's use UUID
13038
          group_name = group_uuid
13039
        else:
13040
          group_name = group["name"]
13041

    
13042
        result.add(group_name)
13043

    
13044
    return sorted(result)
13045

    
13046

    
13047
class LUTestAllocator(NoHooksLU):
13048
  """Run allocator tests.
13049

13050
  This LU runs the allocator tests
13051

13052
  """
13053
  def CheckPrereq(self):
13054
    """Check prerequisites.
13055

13056
    This checks the opcode parameters depending on the director and mode test.
13057

13058
    """
13059
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13060
      for attr in ["memory", "disks", "disk_template",
13061
                   "os", "tags", "nics", "vcpus"]:
13062
        if not hasattr(self.op, attr):
13063
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13064
                                     attr, errors.ECODE_INVAL)
13065
      iname = self.cfg.ExpandInstanceName(self.op.name)
13066
      if iname is not None:
13067
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13068
                                   iname, errors.ECODE_EXISTS)
13069
      if not isinstance(self.op.nics, list):
13070
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13071
                                   errors.ECODE_INVAL)
13072
      if not isinstance(self.op.disks, list):
13073
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13074
                                   errors.ECODE_INVAL)
13075
      for row in self.op.disks:
13076
        if (not isinstance(row, dict) or
13077
            constants.IDISK_SIZE not in row or
13078
            not isinstance(row[constants.IDISK_SIZE], int) or
13079
            constants.IDISK_MODE not in row or
13080
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13081
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13082
                                     " parameter", errors.ECODE_INVAL)
13083
      if self.op.hypervisor is None:
13084
        self.op.hypervisor = self.cfg.GetHypervisorType()
13085
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13086
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13087
      self.op.name = fname
13088
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
13089
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13090
      if not hasattr(self.op, "evac_nodes"):
13091
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13092
                                   " opcode input", errors.ECODE_INVAL)
13093
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13094
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13095
      if not self.op.instances:
13096
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13097
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13098
    else:
13099
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13100
                                 self.op.mode, errors.ECODE_INVAL)
13101

    
13102
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13103
      if self.op.allocator is None:
13104
        raise errors.OpPrereqError("Missing allocator name",
13105
                                   errors.ECODE_INVAL)
13106
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13107
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13108
                                 self.op.direction, errors.ECODE_INVAL)
13109

    
13110
  def Exec(self, feedback_fn):
13111
    """Run the allocator test.
13112

13113
    """
13114
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13115
      ial = IAllocator(self.cfg, self.rpc,
13116
                       mode=self.op.mode,
13117
                       name=self.op.name,
13118
                       memory=self.op.memory,
13119
                       disks=self.op.disks,
13120
                       disk_template=self.op.disk_template,
13121
                       os=self.op.os,
13122
                       tags=self.op.tags,
13123
                       nics=self.op.nics,
13124
                       vcpus=self.op.vcpus,
13125
                       hypervisor=self.op.hypervisor,
13126
                       )
13127
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13128
      ial = IAllocator(self.cfg, self.rpc,
13129
                       mode=self.op.mode,
13130
                       name=self.op.name,
13131
                       relocate_from=list(self.relocate_from),
13132
                       )
13133
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13134
      ial = IAllocator(self.cfg, self.rpc,
13135
                       mode=self.op.mode,
13136
                       evac_nodes=self.op.evac_nodes)
13137
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13138
      ial = IAllocator(self.cfg, self.rpc,
13139
                       mode=self.op.mode,
13140
                       instances=self.op.instances,
13141
                       target_groups=self.op.target_groups)
13142
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13143
      ial = IAllocator(self.cfg, self.rpc,
13144
                       mode=self.op.mode,
13145
                       instances=self.op.instances,
13146
                       evac_mode=self.op.evac_mode)
13147
    else:
13148
      raise errors.ProgrammerError("Uncatched mode %s in"
13149
                                   " LUTestAllocator.Exec", self.op.mode)
13150

    
13151
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13152
      result = ial.in_text
13153
    else:
13154
      ial.Run(self.op.allocator, validate=False)
13155
      result = ial.out_text
13156
    return result
13157

    
13158

    
13159
#: Query type implementations
13160
_QUERY_IMPL = {
13161
  constants.QR_INSTANCE: _InstanceQuery,
13162
  constants.QR_NODE: _NodeQuery,
13163
  constants.QR_GROUP: _GroupQuery,
13164
  constants.QR_OS: _OsQuery,
13165
  }
13166

    
13167
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13168

    
13169

    
13170
def _GetQueryImplementation(name):
13171
  """Returns the implemtnation for a query type.
13172

13173
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13174

13175
  """
13176
  try:
13177
    return _QUERY_IMPL[name]
13178
  except KeyError:
13179
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13180
                               errors.ECODE_INVAL)