Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ cb92e7a1

History | View | Annotate | Download (475.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.BuildHooksManager(lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708

    
1709
    self.needed_locks = {
1710
      locking.LEVEL_INSTANCE: inst_names,
1711
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1712
      locking.LEVEL_NODE: [],
1713
      }
1714

    
1715
    self.share_locks = _ShareAll()
1716

    
1717
  def DeclareLocks(self, level):
1718
    if level == locking.LEVEL_NODE:
1719
      # Get members of node group; this is unsafe and needs verification later
1720
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721

    
1722
      all_inst_info = self.cfg.GetAllInstancesInfo()
1723

    
1724
      # In Exec(), we warn about mirrored instances that have primary and
1725
      # secondary living in separate node groups. To fully verify that
1726
      # volumes for these instances are healthy, we will need to do an
1727
      # extra call to their secondaries. We ensure here those nodes will
1728
      # be locked.
1729
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730
        # Important: access only the instances whose lock is owned
1731
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732
          nodes.update(all_inst_info[inst].secondary_nodes)
1733

    
1734
      self.needed_locks[locking.LEVEL_NODE] = nodes
1735

    
1736
  def CheckPrereq(self):
1737
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739

    
1740
    group_nodes = set(self.group_info.members)
1741
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742

    
1743
    unlocked_nodes = \
1744
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745

    
1746
    unlocked_instances = \
1747
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748

    
1749
    if unlocked_nodes:
1750
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751
                                 utils.CommaJoin(unlocked_nodes))
1752

    
1753
    if unlocked_instances:
1754
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1755
                                 utils.CommaJoin(unlocked_instances))
1756

    
1757
    self.all_node_info = self.cfg.GetAllNodesInfo()
1758
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759

    
1760
    self.my_node_names = utils.NiceSort(group_nodes)
1761
    self.my_inst_names = utils.NiceSort(group_instances)
1762

    
1763
    self.my_node_info = dict((name, self.all_node_info[name])
1764
                             for name in self.my_node_names)
1765

    
1766
    self.my_inst_info = dict((name, self.all_inst_info[name])
1767
                             for name in self.my_inst_names)
1768

    
1769
    # We detect here the nodes that will need the extra RPC calls for verifying
1770
    # split LV volumes; they should be locked.
1771
    extra_lv_nodes = set()
1772

    
1773
    for inst in self.my_inst_info.values():
1774
      if inst.disk_template in constants.DTS_INT_MIRROR:
1775
        group = self.my_node_info[inst.primary_node].group
1776
        for nname in inst.secondary_nodes:
1777
          if self.all_node_info[nname].group != group:
1778
            extra_lv_nodes.add(nname)
1779

    
1780
    unlocked_lv_nodes = \
1781
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782

    
1783
    if unlocked_lv_nodes:
1784
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1785
                                 utils.CommaJoin(unlocked_lv_nodes))
1786
    self.extra_lv_nodes = list(extra_lv_nodes)
1787

    
1788
  def _VerifyNode(self, ninfo, nresult):
1789
    """Perform some basic validation on data returned from a node.
1790

1791
      - check the result data structure is well formed and has all the
1792
        mandatory fields
1793
      - check ganeti version
1794

1795
    @type ninfo: L{objects.Node}
1796
    @param ninfo: the node to check
1797
    @param nresult: the results from the node
1798
    @rtype: boolean
1799
    @return: whether overall this call was successful (and we can expect
1800
         reasonable values in the respose)
1801

1802
    """
1803
    node = ninfo.name
1804
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805

    
1806
    # main result, nresult should be a non-empty dict
1807
    test = not nresult or not isinstance(nresult, dict)
1808
    _ErrorIf(test, self.ENODERPC, node,
1809
                  "unable to verify node: no data returned")
1810
    if test:
1811
      return False
1812

    
1813
    # compares ganeti version
1814
    local_version = constants.PROTOCOL_VERSION
1815
    remote_version = nresult.get("version", None)
1816
    test = not (remote_version and
1817
                isinstance(remote_version, (list, tuple)) and
1818
                len(remote_version) == 2)
1819
    _ErrorIf(test, self.ENODERPC, node,
1820
             "connection to node returned invalid data")
1821
    if test:
1822
      return False
1823

    
1824
    test = local_version != remote_version[0]
1825
    _ErrorIf(test, self.ENODEVERSION, node,
1826
             "incompatible protocol versions: master %s,"
1827
             " node %s", local_version, remote_version[0])
1828
    if test:
1829
      return False
1830

    
1831
    # node seems compatible, we can actually try to look into its results
1832

    
1833
    # full package version
1834
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835
                  self.ENODEVERSION, node,
1836
                  "software version mismatch: master %s, node %s",
1837
                  constants.RELEASE_VERSION, remote_version[1],
1838
                  code=self.ETYPE_WARNING)
1839

    
1840
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1842
      for hv_name, hv_result in hyp_result.iteritems():
1843
        test = hv_result is not None
1844
        _ErrorIf(test, self.ENODEHV, node,
1845
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846

    
1847
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848
    if ninfo.vm_capable and isinstance(hvp_result, list):
1849
      for item, hv_name, hv_result in hvp_result:
1850
        _ErrorIf(True, self.ENODEHV, node,
1851
                 "hypervisor %s parameter verify failure (source %s): %s",
1852
                 hv_name, item, hv_result)
1853

    
1854
    test = nresult.get(constants.NV_NODESETUP,
1855
                       ["Missing NODESETUP results"])
1856
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857
             "; ".join(test))
1858

    
1859
    return True
1860

    
1861
  def _VerifyNodeTime(self, ninfo, nresult,
1862
                      nvinfo_starttime, nvinfo_endtime):
1863
    """Check the node time.
1864

1865
    @type ninfo: L{objects.Node}
1866
    @param ninfo: the node to check
1867
    @param nresult: the remote results for the node
1868
    @param nvinfo_starttime: the start time of the RPC call
1869
    @param nvinfo_endtime: the end time of the RPC call
1870

1871
    """
1872
    node = ninfo.name
1873
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874

    
1875
    ntime = nresult.get(constants.NV_TIME, None)
1876
    try:
1877
      ntime_merged = utils.MergeTime(ntime)
1878
    except (ValueError, TypeError):
1879
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880
      return
1881

    
1882
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886
    else:
1887
      ntime_diff = None
1888

    
1889
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890
             "Node time diverges by at least %s from master node time",
1891
             ntime_diff)
1892

    
1893
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894
    """Check the node LVM results.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param vg_name: the configured VG name
1900

1901
    """
1902
    if vg_name is None:
1903
      return
1904

    
1905
    node = ninfo.name
1906
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907

    
1908
    # checks vg existence and size > 20G
1909
    vglist = nresult.get(constants.NV_VGLIST, None)
1910
    test = not vglist
1911
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912
    if not test:
1913
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914
                                            constants.MIN_VG_SIZE)
1915
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916

    
1917
    # check pv names
1918
    pvlist = nresult.get(constants.NV_PVLIST, None)
1919
    test = pvlist is None
1920
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921
    if not test:
1922
      # check that ':' is not present in PV names, since it's a
1923
      # special character for lvcreate (denotes the range of PEs to
1924
      # use on the PV)
1925
      for _, pvname, owner_vg in pvlist:
1926
        test = ":" in pvname
1927
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928
                 " '%s' of VG '%s'", pvname, owner_vg)
1929

    
1930
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931
    """Check the node bridges.
1932

1933
    @type ninfo: L{objects.Node}
1934
    @param ninfo: the node to check
1935
    @param nresult: the remote results for the node
1936
    @param bridges: the expected list of bridges
1937

1938
    """
1939
    if not bridges:
1940
      return
1941

    
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    missing = nresult.get(constants.NV_BRIDGES, None)
1946
    test = not isinstance(missing, list)
1947
    _ErrorIf(test, self.ENODENET, node,
1948
             "did not return valid bridge information")
1949
    if not test:
1950
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951
               utils.CommaJoin(sorted(missing)))
1952

    
1953
  def _VerifyNodeNetwork(self, ninfo, nresult):
1954
    """Check the node network connectivity results.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963

    
1964
    test = constants.NV_NODELIST not in nresult
1965
    _ErrorIf(test, self.ENODESSH, node,
1966
             "node hasn't returned node ssh connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODELIST]:
1969
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970
          _ErrorIf(True, self.ENODESSH, node,
1971
                   "ssh communication with node '%s': %s", a_node, a_msg)
1972

    
1973
    test = constants.NV_NODENETTEST not in nresult
1974
    _ErrorIf(test, self.ENODENET, node,
1975
             "node hasn't returned node tcp connectivity data")
1976
    if not test:
1977
      if nresult[constants.NV_NODENETTEST]:
1978
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979
        for anode in nlist:
1980
          _ErrorIf(True, self.ENODENET, node,
1981
                   "tcp communication with node '%s': %s",
1982
                   anode, nresult[constants.NV_NODENETTEST][anode])
1983

    
1984
    test = constants.NV_MASTERIP not in nresult
1985
    _ErrorIf(test, self.ENODENET, node,
1986
             "node hasn't returned node master IP reachability data")
1987
    if not test:
1988
      if not nresult[constants.NV_MASTERIP]:
1989
        if node == self.master_node:
1990
          msg = "the master node cannot reach the master IP (not configured?)"
1991
        else:
1992
          msg = "cannot reach the master IP"
1993
        _ErrorIf(True, self.ENODENET, node, msg)
1994

    
1995
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1996
                      diskstatus):
1997
    """Verify an instance.
1998

1999
    This function checks to see if the required block devices are
2000
    available on the instance's node.
2001

2002
    """
2003
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004
    node_current = instanceconfig.primary_node
2005

    
2006
    node_vol_should = {}
2007
    instanceconfig.MapLVsByNode(node_vol_should)
2008

    
2009
    for node in node_vol_should:
2010
      n_img = node_image[node]
2011
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012
        # ignore missing volumes on offline or broken nodes
2013
        continue
2014
      for volume in node_vol_should[node]:
2015
        test = volume not in n_img.volumes
2016
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017
                 "volume %s missing on node %s", volume, node)
2018

    
2019
    if instanceconfig.admin_up:
2020
      pri_img = node_image[node_current]
2021
      test = instance not in pri_img.instances and not pri_img.offline
2022
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023
               "instance not running on its primary node %s",
2024
               node_current)
2025

    
2026
    diskdata = [(nname, success, status, idx)
2027
                for (nname, disks) in diskstatus.items()
2028
                for idx, (success, status) in enumerate(disks)]
2029

    
2030
    for nname, success, bdev_status, idx in diskdata:
2031
      # the 'ghost node' construction in Exec() ensures that we have a
2032
      # node here
2033
      snode = node_image[nname]
2034
      bad_snode = snode.ghost or snode.offline
2035
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036
               self.EINSTANCEFAULTYDISK, instance,
2037
               "couldn't retrieve status for disk/%s on %s: %s",
2038
               idx, nname, bdev_status)
2039
      _ErrorIf((instanceconfig.admin_up and success and
2040
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2041
               self.EINSTANCEFAULTYDISK, instance,
2042
               "disk/%s on %s is faulty", idx, nname)
2043

    
2044
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045
    """Verify if there are any unknown volumes in the cluster.
2046

2047
    The .os, .swap and backup volumes are ignored. All other volumes are
2048
    reported as unknown.
2049

2050
    @type reserved: L{ganeti.utils.FieldSet}
2051
    @param reserved: a FieldSet of reserved volume names
2052

2053
    """
2054
    for node, n_img in node_image.items():
2055
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056
        # skip non-healthy nodes
2057
        continue
2058
      for volume in n_img.volumes:
2059
        test = ((node not in node_vol_should or
2060
                volume not in node_vol_should[node]) and
2061
                not reserved.Matches(volume))
2062
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2063
                      "volume %s is unknown", volume)
2064

    
2065
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066
    """Verify N+1 Memory Resilience.
2067

2068
    Check that if one single node dies we can still start all the
2069
    instances it was primary for.
2070

2071
    """
2072
    cluster_info = self.cfg.GetClusterInfo()
2073
    for node, n_img in node_image.items():
2074
      # This code checks that every node which is now listed as
2075
      # secondary has enough memory to host all instances it is
2076
      # supposed to should a single other node in the cluster fail.
2077
      # FIXME: not ready for failover to an arbitrary node
2078
      # FIXME: does not support file-backed instances
2079
      # WARNING: we currently take into account down instances as well
2080
      # as up ones, considering that even if they're down someone
2081
      # might want to start them even in the event of a node failure.
2082
      if n_img.offline:
2083
        # we're skipping offline nodes from the N+1 warning, since
2084
        # most likely we don't have good memory infromation from them;
2085
        # we already list instances living on such nodes, and that's
2086
        # enough warning
2087
        continue
2088
      for prinode, instances in n_img.sbp.items():
2089
        needed_mem = 0
2090
        for instance in instances:
2091
          bep = cluster_info.FillBE(instance_cfg[instance])
2092
          if bep[constants.BE_AUTO_BALANCE]:
2093
            needed_mem += bep[constants.BE_MEMORY]
2094
        test = n_img.mfree < needed_mem
2095
        self._ErrorIf(test, self.ENODEN1, node,
2096
                      "not enough memory to accomodate instance failovers"
2097
                      " should node %s fail (%dMiB needed, %dMiB available)",
2098
                      prinode, needed_mem, n_img.mfree)
2099

    
2100
  @classmethod
2101
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102
                   (files_all, files_opt, files_mc, files_vm)):
2103
    """Verifies file checksums collected from all nodes.
2104

2105
    @param errorif: Callback for reporting errors
2106
    @param nodeinfo: List of L{objects.Node} objects
2107
    @param master_node: Name of master node
2108
    @param all_nvinfo: RPC results
2109

2110
    """
2111
    # Define functions determining which nodes to consider for a file
2112
    files2nodefn = [
2113
      (files_all, None),
2114
      (files_mc, lambda node: (node.master_candidate or
2115
                               node.name == master_node)),
2116
      (files_vm, lambda node: node.vm_capable),
2117
      ]
2118

    
2119
    # Build mapping from filename to list of nodes which should have the file
2120
    nodefiles = {}
2121
    for (files, fn) in files2nodefn:
2122
      if fn is None:
2123
        filenodes = nodeinfo
2124
      else:
2125
        filenodes = filter(fn, nodeinfo)
2126
      nodefiles.update((filename,
2127
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2128
                       for filename in files)
2129

    
2130
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2131

    
2132
    fileinfo = dict((filename, {}) for filename in nodefiles)
2133
    ignore_nodes = set()
2134

    
2135
    for node in nodeinfo:
2136
      if node.offline:
2137
        ignore_nodes.add(node.name)
2138
        continue
2139

    
2140
      nresult = all_nvinfo[node.name]
2141

    
2142
      if nresult.fail_msg or not nresult.payload:
2143
        node_files = None
2144
      else:
2145
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2146

    
2147
      test = not (node_files and isinstance(node_files, dict))
2148
      errorif(test, cls.ENODEFILECHECK, node.name,
2149
              "Node did not return file checksum data")
2150
      if test:
2151
        ignore_nodes.add(node.name)
2152
        continue
2153

    
2154
      # Build per-checksum mapping from filename to nodes having it
2155
      for (filename, checksum) in node_files.items():
2156
        assert filename in nodefiles
2157
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2158

    
2159
    for (filename, checksums) in fileinfo.items():
2160
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2161

    
2162
      # Nodes having the file
2163
      with_file = frozenset(node_name
2164
                            for nodes in fileinfo[filename].values()
2165
                            for node_name in nodes) - ignore_nodes
2166

    
2167
      expected_nodes = nodefiles[filename] - ignore_nodes
2168

    
2169
      # Nodes missing file
2170
      missing_file = expected_nodes - with_file
2171

    
2172
      if filename in files_opt:
2173
        # All or no nodes
2174
        errorif(missing_file and missing_file != expected_nodes,
2175
                cls.ECLUSTERFILECHECK, None,
2176
                "File %s is optional, but it must exist on all or no"
2177
                " nodes (not found on %s)",
2178
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2179
      else:
2180
        # Non-optional files
2181
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2182
                "File %s is missing from node(s) %s", filename,
2183
                utils.CommaJoin(utils.NiceSort(missing_file)))
2184

    
2185
        # Warn if a node has a file it shouldn't
2186
        unexpected = with_file - expected_nodes
2187
        errorif(unexpected,
2188
                cls.ECLUSTERFILECHECK, None,
2189
                "File %s should not exist on node(s) %s",
2190
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2191

    
2192
      # See if there are multiple versions of the file
2193
      test = len(checksums) > 1
2194
      if test:
2195
        variants = ["variant %s on %s" %
2196
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2197
                    for (idx, (checksum, nodes)) in
2198
                      enumerate(sorted(checksums.items()))]
2199
      else:
2200
        variants = []
2201

    
2202
      errorif(test, cls.ECLUSTERFILECHECK, None,
2203
              "File %s found with %s different checksums (%s)",
2204
              filename, len(checksums), "; ".join(variants))
2205

    
2206
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2207
                      drbd_map):
2208
    """Verifies and the node DRBD status.
2209

2210
    @type ninfo: L{objects.Node}
2211
    @param ninfo: the node to check
2212
    @param nresult: the remote results for the node
2213
    @param instanceinfo: the dict of instances
2214
    @param drbd_helper: the configured DRBD usermode helper
2215
    @param drbd_map: the DRBD map as returned by
2216
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2217

2218
    """
2219
    node = ninfo.name
2220
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2221

    
2222
    if drbd_helper:
2223
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2224
      test = (helper_result == None)
2225
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2226
               "no drbd usermode helper returned")
2227
      if helper_result:
2228
        status, payload = helper_result
2229
        test = not status
2230
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231
                 "drbd usermode helper check unsuccessful: %s", payload)
2232
        test = status and (payload != drbd_helper)
2233
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2234
                 "wrong drbd usermode helper: %s", payload)
2235

    
2236
    # compute the DRBD minors
2237
    node_drbd = {}
2238
    for minor, instance in drbd_map[node].items():
2239
      test = instance not in instanceinfo
2240
      _ErrorIf(test, self.ECLUSTERCFG, None,
2241
               "ghost instance '%s' in temporary DRBD map", instance)
2242
        # ghost instance should not be running, but otherwise we
2243
        # don't give double warnings (both ghost instance and
2244
        # unallocated minor in use)
2245
      if test:
2246
        node_drbd[minor] = (instance, False)
2247
      else:
2248
        instance = instanceinfo[instance]
2249
        node_drbd[minor] = (instance.name, instance.admin_up)
2250

    
2251
    # and now check them
2252
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2253
    test = not isinstance(used_minors, (tuple, list))
2254
    _ErrorIf(test, self.ENODEDRBD, node,
2255
             "cannot parse drbd status file: %s", str(used_minors))
2256
    if test:
2257
      # we cannot check drbd status
2258
      return
2259

    
2260
    for minor, (iname, must_exist) in node_drbd.items():
2261
      test = minor not in used_minors and must_exist
2262
      _ErrorIf(test, self.ENODEDRBD, node,
2263
               "drbd minor %d of instance %s is not active", minor, iname)
2264
    for minor in used_minors:
2265
      test = minor not in node_drbd
2266
      _ErrorIf(test, self.ENODEDRBD, node,
2267
               "unallocated drbd minor %d is in use", minor)
2268

    
2269
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2270
    """Builds the node OS structures.
2271

2272
    @type ninfo: L{objects.Node}
2273
    @param ninfo: the node to check
2274
    @param nresult: the remote results for the node
2275
    @param nimg: the node image object
2276

2277
    """
2278
    node = ninfo.name
2279
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2280

    
2281
    remote_os = nresult.get(constants.NV_OSLIST, None)
2282
    test = (not isinstance(remote_os, list) or
2283
            not compat.all(isinstance(v, list) and len(v) == 7
2284
                           for v in remote_os))
2285

    
2286
    _ErrorIf(test, self.ENODEOS, node,
2287
             "node hasn't returned valid OS data")
2288

    
2289
    nimg.os_fail = test
2290

    
2291
    if test:
2292
      return
2293

    
2294
    os_dict = {}
2295

    
2296
    for (name, os_path, status, diagnose,
2297
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2298

    
2299
      if name not in os_dict:
2300
        os_dict[name] = []
2301

    
2302
      # parameters is a list of lists instead of list of tuples due to
2303
      # JSON lacking a real tuple type, fix it:
2304
      parameters = [tuple(v) for v in parameters]
2305
      os_dict[name].append((os_path, status, diagnose,
2306
                            set(variants), set(parameters), set(api_ver)))
2307

    
2308
    nimg.oslist = os_dict
2309

    
2310
  def _VerifyNodeOS(self, ninfo, nimg, base):
2311
    """Verifies the node OS list.
2312

2313
    @type ninfo: L{objects.Node}
2314
    @param ninfo: the node to check
2315
    @param nimg: the node image object
2316
    @param base: the 'template' node we match against (e.g. from the master)
2317

2318
    """
2319
    node = ninfo.name
2320
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321

    
2322
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2323

    
2324
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2325
    for os_name, os_data in nimg.oslist.items():
2326
      assert os_data, "Empty OS status for OS %s?!" % os_name
2327
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2328
      _ErrorIf(not f_status, self.ENODEOS, node,
2329
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2330
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2331
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2332
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2333
      # comparisons with the 'base' image
2334
      test = os_name not in base.oslist
2335
      _ErrorIf(test, self.ENODEOS, node,
2336
               "Extra OS %s not present on reference node (%s)",
2337
               os_name, base.name)
2338
      if test:
2339
        continue
2340
      assert base.oslist[os_name], "Base node has empty OS status?"
2341
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2342
      if not b_status:
2343
        # base OS is invalid, skipping
2344
        continue
2345
      for kind, a, b in [("API version", f_api, b_api),
2346
                         ("variants list", f_var, b_var),
2347
                         ("parameters", beautify_params(f_param),
2348
                          beautify_params(b_param))]:
2349
        _ErrorIf(a != b, self.ENODEOS, node,
2350
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2351
                 kind, os_name, base.name,
2352
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2353

    
2354
    # check any missing OSes
2355
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2356
    _ErrorIf(missing, self.ENODEOS, node,
2357
             "OSes present on reference node %s but missing on this node: %s",
2358
             base.name, utils.CommaJoin(missing))
2359

    
2360
  def _VerifyOob(self, ninfo, nresult):
2361
    """Verifies out of band functionality of a node.
2362

2363
    @type ninfo: L{objects.Node}
2364
    @param ninfo: the node to check
2365
    @param nresult: the remote results for the node
2366

2367
    """
2368
    node = ninfo.name
2369
    # We just have to verify the paths on master and/or master candidates
2370
    # as the oob helper is invoked on the master
2371
    if ((ninfo.master_candidate or ninfo.master_capable) and
2372
        constants.NV_OOB_PATHS in nresult):
2373
      for path_result in nresult[constants.NV_OOB_PATHS]:
2374
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2375

    
2376
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2377
    """Verifies and updates the node volume data.
2378

2379
    This function will update a L{NodeImage}'s internal structures
2380
    with data from the remote call.
2381

2382
    @type ninfo: L{objects.Node}
2383
    @param ninfo: the node to check
2384
    @param nresult: the remote results for the node
2385
    @param nimg: the node image object
2386
    @param vg_name: the configured VG name
2387

2388
    """
2389
    node = ninfo.name
2390
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2391

    
2392
    nimg.lvm_fail = True
2393
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2394
    if vg_name is None:
2395
      pass
2396
    elif isinstance(lvdata, basestring):
2397
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2398
               utils.SafeEncode(lvdata))
2399
    elif not isinstance(lvdata, dict):
2400
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2401
    else:
2402
      nimg.volumes = lvdata
2403
      nimg.lvm_fail = False
2404

    
2405
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2406
    """Verifies and updates the node instance list.
2407

2408
    If the listing was successful, then updates this node's instance
2409
    list. Otherwise, it marks the RPC call as failed for the instance
2410
    list key.
2411

2412
    @type ninfo: L{objects.Node}
2413
    @param ninfo: the node to check
2414
    @param nresult: the remote results for the node
2415
    @param nimg: the node image object
2416

2417
    """
2418
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2419
    test = not isinstance(idata, list)
2420
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2421
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2422
    if test:
2423
      nimg.hyp_fail = True
2424
    else:
2425
      nimg.instances = idata
2426

    
2427
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2428
    """Verifies and computes a node information map
2429

2430
    @type ninfo: L{objects.Node}
2431
    @param ninfo: the node to check
2432
    @param nresult: the remote results for the node
2433
    @param nimg: the node image object
2434
    @param vg_name: the configured VG name
2435

2436
    """
2437
    node = ninfo.name
2438
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439

    
2440
    # try to read free memory (from the hypervisor)
2441
    hv_info = nresult.get(constants.NV_HVINFO, None)
2442
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2443
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2444
    if not test:
2445
      try:
2446
        nimg.mfree = int(hv_info["memory_free"])
2447
      except (ValueError, TypeError):
2448
        _ErrorIf(True, self.ENODERPC, node,
2449
                 "node returned invalid nodeinfo, check hypervisor")
2450

    
2451
    # FIXME: devise a free space model for file based instances as well
2452
    if vg_name is not None:
2453
      test = (constants.NV_VGLIST not in nresult or
2454
              vg_name not in nresult[constants.NV_VGLIST])
2455
      _ErrorIf(test, self.ENODELVM, node,
2456
               "node didn't return data for the volume group '%s'"
2457
               " - it is either missing or broken", vg_name)
2458
      if not test:
2459
        try:
2460
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2461
        except (ValueError, TypeError):
2462
          _ErrorIf(True, self.ENODERPC, node,
2463
                   "node returned invalid LVM info, check LVM status")
2464

    
2465
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2466
    """Gets per-disk status information for all instances.
2467

2468
    @type nodelist: list of strings
2469
    @param nodelist: Node names
2470
    @type node_image: dict of (name, L{objects.Node})
2471
    @param node_image: Node objects
2472
    @type instanceinfo: dict of (name, L{objects.Instance})
2473
    @param instanceinfo: Instance objects
2474
    @rtype: {instance: {node: [(succes, payload)]}}
2475
    @return: a dictionary of per-instance dictionaries with nodes as
2476
        keys and disk information as values; the disk information is a
2477
        list of tuples (success, payload)
2478

2479
    """
2480
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2481

    
2482
    node_disks = {}
2483
    node_disks_devonly = {}
2484
    diskless_instances = set()
2485
    diskless = constants.DT_DISKLESS
2486

    
2487
    for nname in nodelist:
2488
      node_instances = list(itertools.chain(node_image[nname].pinst,
2489
                                            node_image[nname].sinst))
2490
      diskless_instances.update(inst for inst in node_instances
2491
                                if instanceinfo[inst].disk_template == diskless)
2492
      disks = [(inst, disk)
2493
               for inst in node_instances
2494
               for disk in instanceinfo[inst].disks]
2495

    
2496
      if not disks:
2497
        # No need to collect data
2498
        continue
2499

    
2500
      node_disks[nname] = disks
2501

    
2502
      # Creating copies as SetDiskID below will modify the objects and that can
2503
      # lead to incorrect data returned from nodes
2504
      devonly = [dev.Copy() for (_, dev) in disks]
2505

    
2506
      for dev in devonly:
2507
        self.cfg.SetDiskID(dev, nname)
2508

    
2509
      node_disks_devonly[nname] = devonly
2510

    
2511
    assert len(node_disks) == len(node_disks_devonly)
2512

    
2513
    # Collect data from all nodes with disks
2514
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2515
                                                          node_disks_devonly)
2516

    
2517
    assert len(result) == len(node_disks)
2518

    
2519
    instdisk = {}
2520

    
2521
    for (nname, nres) in result.items():
2522
      disks = node_disks[nname]
2523

    
2524
      if nres.offline:
2525
        # No data from this node
2526
        data = len(disks) * [(False, "node offline")]
2527
      else:
2528
        msg = nres.fail_msg
2529
        _ErrorIf(msg, self.ENODERPC, nname,
2530
                 "while getting disk information: %s", msg)
2531
        if msg:
2532
          # No data from this node
2533
          data = len(disks) * [(False, msg)]
2534
        else:
2535
          data = []
2536
          for idx, i in enumerate(nres.payload):
2537
            if isinstance(i, (tuple, list)) and len(i) == 2:
2538
              data.append(i)
2539
            else:
2540
              logging.warning("Invalid result from node %s, entry %d: %s",
2541
                              nname, idx, i)
2542
              data.append((False, "Invalid result from the remote node"))
2543

    
2544
      for ((inst, _), status) in zip(disks, data):
2545
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2546

    
2547
    # Add empty entries for diskless instances.
2548
    for inst in diskless_instances:
2549
      assert inst not in instdisk
2550
      instdisk[inst] = {}
2551

    
2552
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2553
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2554
                      compat.all(isinstance(s, (tuple, list)) and
2555
                                 len(s) == 2 for s in statuses)
2556
                      for inst, nnames in instdisk.items()
2557
                      for nname, statuses in nnames.items())
2558
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2559

    
2560
    return instdisk
2561

    
2562
  @staticmethod
2563
  def _SshNodeSelector(group_uuid, all_nodes):
2564
    """Create endless iterators for all potential SSH check hosts.
2565

2566
    """
2567
    nodes = [node for node in all_nodes
2568
             if (node.group != group_uuid and
2569
                 not node.offline)]
2570
    keyfunc = operator.attrgetter("group")
2571

    
2572
    return map(itertools.cycle,
2573
               [sorted(map(operator.attrgetter("name"), names))
2574
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2575
                                                  keyfunc)])
2576

    
2577
  @classmethod
2578
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2579
    """Choose which nodes should talk to which other nodes.
2580

2581
    We will make nodes contact all nodes in their group, and one node from
2582
    every other group.
2583

2584
    @warning: This algorithm has a known issue if one node group is much
2585
      smaller than others (e.g. just one node). In such a case all other
2586
      nodes will talk to the single node.
2587

2588
    """
2589
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2590
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2591

    
2592
    return (online_nodes,
2593
            dict((name, sorted([i.next() for i in sel]))
2594
                 for name in online_nodes))
2595

    
2596
  def BuildHooksEnv(self):
2597
    """Build hooks env.
2598

2599
    Cluster-Verify hooks just ran in the post phase and their failure makes
2600
    the output be logged in the verify output and the verification to fail.
2601

2602
    """
2603
    env = {
2604
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2605
      }
2606

    
2607
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2608
               for node in self.my_node_info.values())
2609

    
2610
    return env
2611

    
2612
  def BuildHooksNodes(self):
2613
    """Build hooks nodes.
2614

2615
    """
2616
    return ([], self.my_node_names)
2617

    
2618
  def Exec(self, feedback_fn):
2619
    """Verify integrity of the node group, performing various test on nodes.
2620

2621
    """
2622
    # This method has too many local variables. pylint: disable=R0914
2623
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2624

    
2625
    if not self.my_node_names:
2626
      # empty node group
2627
      feedback_fn("* Empty node group, skipping verification")
2628
      return True
2629

    
2630
    self.bad = False
2631
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2632
    verbose = self.op.verbose
2633
    self._feedback_fn = feedback_fn
2634

    
2635
    vg_name = self.cfg.GetVGName()
2636
    drbd_helper = self.cfg.GetDRBDHelper()
2637
    cluster = self.cfg.GetClusterInfo()
2638
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2639
    hypervisors = cluster.enabled_hypervisors
2640
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2641

    
2642
    i_non_redundant = [] # Non redundant instances
2643
    i_non_a_balanced = [] # Non auto-balanced instances
2644
    n_offline = 0 # Count of offline nodes
2645
    n_drained = 0 # Count of nodes being drained
2646
    node_vol_should = {}
2647

    
2648
    # FIXME: verify OS list
2649

    
2650
    # File verification
2651
    filemap = _ComputeAncillaryFiles(cluster, False)
2652

    
2653
    # do local checksums
2654
    master_node = self.master_node = self.cfg.GetMasterNode()
2655
    master_ip = self.cfg.GetMasterIP()
2656

    
2657
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2658

    
2659
    node_verify_param = {
2660
      constants.NV_FILELIST:
2661
        utils.UniqueSequence(filename
2662
                             for files in filemap
2663
                             for filename in files),
2664
      constants.NV_NODELIST:
2665
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2666
                                  self.all_node_info.values()),
2667
      constants.NV_HYPERVISOR: hypervisors,
2668
      constants.NV_HVPARAMS:
2669
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2670
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2671
                                 for node in node_data_list
2672
                                 if not node.offline],
2673
      constants.NV_INSTANCELIST: hypervisors,
2674
      constants.NV_VERSION: None,
2675
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2676
      constants.NV_NODESETUP: None,
2677
      constants.NV_TIME: None,
2678
      constants.NV_MASTERIP: (master_node, master_ip),
2679
      constants.NV_OSLIST: None,
2680
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2681
      }
2682

    
2683
    if vg_name is not None:
2684
      node_verify_param[constants.NV_VGLIST] = None
2685
      node_verify_param[constants.NV_LVLIST] = vg_name
2686
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2687
      node_verify_param[constants.NV_DRBDLIST] = None
2688

    
2689
    if drbd_helper:
2690
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2691

    
2692
    # bridge checks
2693
    # FIXME: this needs to be changed per node-group, not cluster-wide
2694
    bridges = set()
2695
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2696
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2697
      bridges.add(default_nicpp[constants.NIC_LINK])
2698
    for instance in self.my_inst_info.values():
2699
      for nic in instance.nics:
2700
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2701
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702
          bridges.add(full_nic[constants.NIC_LINK])
2703

    
2704
    if bridges:
2705
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2706

    
2707
    # Build our expected cluster state
2708
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2709
                                                 name=node.name,
2710
                                                 vm_capable=node.vm_capable))
2711
                      for node in node_data_list)
2712

    
2713
    # Gather OOB paths
2714
    oob_paths = []
2715
    for node in self.all_node_info.values():
2716
      path = _SupportsOob(self.cfg, node)
2717
      if path and path not in oob_paths:
2718
        oob_paths.append(path)
2719

    
2720
    if oob_paths:
2721
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2722

    
2723
    for instance in self.my_inst_names:
2724
      inst_config = self.my_inst_info[instance]
2725

    
2726
      for nname in inst_config.all_nodes:
2727
        if nname not in node_image:
2728
          gnode = self.NodeImage(name=nname)
2729
          gnode.ghost = (nname not in self.all_node_info)
2730
          node_image[nname] = gnode
2731

    
2732
      inst_config.MapLVsByNode(node_vol_should)
2733

    
2734
      pnode = inst_config.primary_node
2735
      node_image[pnode].pinst.append(instance)
2736

    
2737
      for snode in inst_config.secondary_nodes:
2738
        nimg = node_image[snode]
2739
        nimg.sinst.append(instance)
2740
        if pnode not in nimg.sbp:
2741
          nimg.sbp[pnode] = []
2742
        nimg.sbp[pnode].append(instance)
2743

    
2744
    # At this point, we have the in-memory data structures complete,
2745
    # except for the runtime information, which we'll gather next
2746

    
2747
    # Due to the way our RPC system works, exact response times cannot be
2748
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2749
    # time before and after executing the request, we can at least have a time
2750
    # window.
2751
    nvinfo_starttime = time.time()
2752
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2753
                                           node_verify_param,
2754
                                           self.cfg.GetClusterName())
2755
    nvinfo_endtime = time.time()
2756

    
2757
    if self.extra_lv_nodes and vg_name is not None:
2758
      extra_lv_nvinfo = \
2759
          self.rpc.call_node_verify(self.extra_lv_nodes,
2760
                                    {constants.NV_LVLIST: vg_name},
2761
                                    self.cfg.GetClusterName())
2762
    else:
2763
      extra_lv_nvinfo = {}
2764

    
2765
    all_drbd_map = self.cfg.ComputeDRBDMap()
2766

    
2767
    feedback_fn("* Gathering disk information (%s nodes)" %
2768
                len(self.my_node_names))
2769
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2770
                                     self.my_inst_info)
2771

    
2772
    feedback_fn("* Verifying configuration file consistency")
2773

    
2774
    # If not all nodes are being checked, we need to make sure the master node
2775
    # and a non-checked vm_capable node are in the list.
2776
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2777
    if absent_nodes:
2778
      vf_nvinfo = all_nvinfo.copy()
2779
      vf_node_info = list(self.my_node_info.values())
2780
      additional_nodes = []
2781
      if master_node not in self.my_node_info:
2782
        additional_nodes.append(master_node)
2783
        vf_node_info.append(self.all_node_info[master_node])
2784
      # Add the first vm_capable node we find which is not included
2785
      for node in absent_nodes:
2786
        nodeinfo = self.all_node_info[node]
2787
        if nodeinfo.vm_capable and not nodeinfo.offline:
2788
          additional_nodes.append(node)
2789
          vf_node_info.append(self.all_node_info[node])
2790
          break
2791
      key = constants.NV_FILELIST
2792
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2793
                                                 {key: node_verify_param[key]},
2794
                                                 self.cfg.GetClusterName()))
2795
    else:
2796
      vf_nvinfo = all_nvinfo
2797
      vf_node_info = self.my_node_info.values()
2798

    
2799
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2800

    
2801
    feedback_fn("* Verifying node status")
2802

    
2803
    refos_img = None
2804

    
2805
    for node_i in node_data_list:
2806
      node = node_i.name
2807
      nimg = node_image[node]
2808

    
2809
      if node_i.offline:
2810
        if verbose:
2811
          feedback_fn("* Skipping offline node %s" % (node,))
2812
        n_offline += 1
2813
        continue
2814

    
2815
      if node == master_node:
2816
        ntype = "master"
2817
      elif node_i.master_candidate:
2818
        ntype = "master candidate"
2819
      elif node_i.drained:
2820
        ntype = "drained"
2821
        n_drained += 1
2822
      else:
2823
        ntype = "regular"
2824
      if verbose:
2825
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2826

    
2827
      msg = all_nvinfo[node].fail_msg
2828
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2829
      if msg:
2830
        nimg.rpc_fail = True
2831
        continue
2832

    
2833
      nresult = all_nvinfo[node].payload
2834

    
2835
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2836
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2837
      self._VerifyNodeNetwork(node_i, nresult)
2838
      self._VerifyOob(node_i, nresult)
2839

    
2840
      if nimg.vm_capable:
2841
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2842
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2843
                             all_drbd_map)
2844

    
2845
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2846
        self._UpdateNodeInstances(node_i, nresult, nimg)
2847
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2848
        self._UpdateNodeOS(node_i, nresult, nimg)
2849

    
2850
        if not nimg.os_fail:
2851
          if refos_img is None:
2852
            refos_img = nimg
2853
          self._VerifyNodeOS(node_i, nimg, refos_img)
2854
        self._VerifyNodeBridges(node_i, nresult, bridges)
2855

    
2856
        # Check whether all running instancies are primary for the node. (This
2857
        # can no longer be done from _VerifyInstance below, since some of the
2858
        # wrong instances could be from other node groups.)
2859
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2860

    
2861
        for inst in non_primary_inst:
2862
          test = inst in self.all_inst_info
2863
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2864
                   "instance should not run on node %s", node_i.name)
2865
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2866
                   "node is running unknown instance %s", inst)
2867

    
2868
    for node, result in extra_lv_nvinfo.items():
2869
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2870
                              node_image[node], vg_name)
2871

    
2872
    feedback_fn("* Verifying instance status")
2873
    for instance in self.my_inst_names:
2874
      if verbose:
2875
        feedback_fn("* Verifying instance %s" % instance)
2876
      inst_config = self.my_inst_info[instance]
2877
      self._VerifyInstance(instance, inst_config, node_image,
2878
                           instdisk[instance])
2879
      inst_nodes_offline = []
2880

    
2881
      pnode = inst_config.primary_node
2882
      pnode_img = node_image[pnode]
2883
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2884
               self.ENODERPC, pnode, "instance %s, connection to"
2885
               " primary node failed", instance)
2886

    
2887
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2888
               self.EINSTANCEBADNODE, instance,
2889
               "instance is marked as running and lives on offline node %s",
2890
               inst_config.primary_node)
2891

    
2892
      # If the instance is non-redundant we cannot survive losing its primary
2893
      # node, so we are not N+1 compliant. On the other hand we have no disk
2894
      # templates with more than one secondary so that situation is not well
2895
      # supported either.
2896
      # FIXME: does not support file-backed instances
2897
      if not inst_config.secondary_nodes:
2898
        i_non_redundant.append(instance)
2899

    
2900
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2901
               instance, "instance has multiple secondary nodes: %s",
2902
               utils.CommaJoin(inst_config.secondary_nodes),
2903
               code=self.ETYPE_WARNING)
2904

    
2905
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2906
        pnode = inst_config.primary_node
2907
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2908
        instance_groups = {}
2909

    
2910
        for node in instance_nodes:
2911
          instance_groups.setdefault(self.all_node_info[node].group,
2912
                                     []).append(node)
2913

    
2914
        pretty_list = [
2915
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2916
          # Sort so that we always list the primary node first.
2917
          for group, nodes in sorted(instance_groups.items(),
2918
                                     key=lambda (_, nodes): pnode in nodes,
2919
                                     reverse=True)]
2920

    
2921
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2922
                      instance, "instance has primary and secondary nodes in"
2923
                      " different groups: %s", utils.CommaJoin(pretty_list),
2924
                      code=self.ETYPE_WARNING)
2925

    
2926
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2927
        i_non_a_balanced.append(instance)
2928

    
2929
      for snode in inst_config.secondary_nodes:
2930
        s_img = node_image[snode]
2931
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2932
                 "instance %s, connection to secondary node failed", instance)
2933

    
2934
        if s_img.offline:
2935
          inst_nodes_offline.append(snode)
2936

    
2937
      # warn that the instance lives on offline nodes
2938
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2939
               "instance has offline secondary node(s) %s",
2940
               utils.CommaJoin(inst_nodes_offline))
2941
      # ... or ghost/non-vm_capable nodes
2942
      for node in inst_config.all_nodes:
2943
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2944
                 "instance lives on ghost node %s", node)
2945
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2946
                 instance, "instance lives on non-vm_capable node %s", node)
2947

    
2948
    feedback_fn("* Verifying orphan volumes")
2949
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2950

    
2951
    # We will get spurious "unknown volume" warnings if any node of this group
2952
    # is secondary for an instance whose primary is in another group. To avoid
2953
    # them, we find these instances and add their volumes to node_vol_should.
2954
    for inst in self.all_inst_info.values():
2955
      for secondary in inst.secondary_nodes:
2956
        if (secondary in self.my_node_info
2957
            and inst.name not in self.my_inst_info):
2958
          inst.MapLVsByNode(node_vol_should)
2959
          break
2960

    
2961
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2962

    
2963
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2964
      feedback_fn("* Verifying N+1 Memory redundancy")
2965
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2966

    
2967
    feedback_fn("* Other Notes")
2968
    if i_non_redundant:
2969
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2970
                  % len(i_non_redundant))
2971

    
2972
    if i_non_a_balanced:
2973
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2974
                  % len(i_non_a_balanced))
2975

    
2976
    if n_offline:
2977
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2978

    
2979
    if n_drained:
2980
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2981

    
2982
    return not self.bad
2983

    
2984
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2985
    """Analyze the post-hooks' result
2986

2987
    This method analyses the hook result, handles it, and sends some
2988
    nicely-formatted feedback back to the user.
2989

2990
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2991
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2992
    @param hooks_results: the results of the multi-node hooks rpc call
2993
    @param feedback_fn: function used send feedback back to the caller
2994
    @param lu_result: previous Exec result
2995
    @return: the new Exec result, based on the previous result
2996
        and hook results
2997

2998
    """
2999
    # We only really run POST phase hooks, only for non-empty groups,
3000
    # and are only interested in their results
3001
    if not self.my_node_names:
3002
      # empty node group
3003
      pass
3004
    elif phase == constants.HOOKS_PHASE_POST:
3005
      # Used to change hooks' output to proper indentation
3006
      feedback_fn("* Hooks Results")
3007
      assert hooks_results, "invalid result from hooks"
3008

    
3009
      for node_name in hooks_results:
3010
        res = hooks_results[node_name]
3011
        msg = res.fail_msg
3012
        test = msg and not res.offline
3013
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3014
                      "Communication failure in hooks execution: %s", msg)
3015
        if res.offline or msg:
3016
          # No need to investigate payload if node is offline or gave
3017
          # an error.
3018
          continue
3019
        for script, hkr, output in res.payload:
3020
          test = hkr == constants.HKR_FAIL
3021
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3022
                        "Script %s failed, output:", script)
3023
          if test:
3024
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3025
            feedback_fn("%s" % output)
3026
            lu_result = False
3027

    
3028
    return lu_result
3029

    
3030

    
3031
class LUClusterVerifyDisks(NoHooksLU):
3032
  """Verifies the cluster disks status.
3033

3034
  """
3035
  REQ_BGL = False
3036

    
3037
  def ExpandNames(self):
3038
    self.share_locks = _ShareAll()
3039
    self.needed_locks = {
3040
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3041
      }
3042

    
3043
  def Exec(self, feedback_fn):
3044
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3045

    
3046
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3047
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3048
                           for group in group_names])
3049

    
3050

    
3051
class LUGroupVerifyDisks(NoHooksLU):
3052
  """Verifies the status of all disks in a node group.
3053

3054
  """
3055
  REQ_BGL = False
3056

    
3057
  def ExpandNames(self):
3058
    # Raises errors.OpPrereqError on its own if group can't be found
3059
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3060

    
3061
    self.share_locks = _ShareAll()
3062
    self.needed_locks = {
3063
      locking.LEVEL_INSTANCE: [],
3064
      locking.LEVEL_NODEGROUP: [],
3065
      locking.LEVEL_NODE: [],
3066
      }
3067

    
3068
  def DeclareLocks(self, level):
3069
    if level == locking.LEVEL_INSTANCE:
3070
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3071

    
3072
      # Lock instances optimistically, needs verification once node and group
3073
      # locks have been acquired
3074
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3075
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3076

    
3077
    elif level == locking.LEVEL_NODEGROUP:
3078
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3079

    
3080
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3081
        set([self.group_uuid] +
3082
            # Lock all groups used by instances optimistically; this requires
3083
            # going via the node before it's locked, requiring verification
3084
            # later on
3085
            [group_uuid
3086
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3087
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3088

    
3089
    elif level == locking.LEVEL_NODE:
3090
      # This will only lock the nodes in the group to be verified which contain
3091
      # actual instances
3092
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3093
      self._LockInstancesNodes()
3094

    
3095
      # Lock all nodes in group to be verified
3096
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3097
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3098
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3099

    
3100
  def CheckPrereq(self):
3101
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3102
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3103
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3104

    
3105
    assert self.group_uuid in owned_groups
3106

    
3107
    # Check if locked instances are still correct
3108
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3109

    
3110
    # Get instance information
3111
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3112

    
3113
    # Check if node groups for locked instances are still correct
3114
    for (instance_name, inst) in self.instances.items():
3115
      assert owned_nodes.issuperset(inst.all_nodes), \
3116
        "Instance %s's nodes changed while we kept the lock" % instance_name
3117

    
3118
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3119
                                             owned_groups)
3120

    
3121
      assert self.group_uuid in inst_groups, \
3122
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3123

    
3124
  def Exec(self, feedback_fn):
3125
    """Verify integrity of cluster disks.
3126

3127
    @rtype: tuple of three items
3128
    @return: a tuple of (dict of node-to-node_error, list of instances
3129
        which need activate-disks, dict of instance: (node, volume) for
3130
        missing volumes
3131

3132
    """
3133
    res_nodes = {}
3134
    res_instances = set()
3135
    res_missing = {}
3136

    
3137
    nv_dict = _MapInstanceDisksToNodes([inst
3138
                                        for inst in self.instances.values()
3139
                                        if inst.admin_up])
3140

    
3141
    if nv_dict:
3142
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3143
                             set(self.cfg.GetVmCapableNodeList()))
3144

    
3145
      node_lvs = self.rpc.call_lv_list(nodes, [])
3146

    
3147
      for (node, node_res) in node_lvs.items():
3148
        if node_res.offline:
3149
          continue
3150

    
3151
        msg = node_res.fail_msg
3152
        if msg:
3153
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3154
          res_nodes[node] = msg
3155
          continue
3156

    
3157
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3158
          inst = nv_dict.pop((node, lv_name), None)
3159
          if not (lv_online or inst is None):
3160
            res_instances.add(inst)
3161

    
3162
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3163
      # better
3164
      for key, inst in nv_dict.iteritems():
3165
        res_missing.setdefault(inst, []).append(list(key))
3166

    
3167
    return (res_nodes, list(res_instances), res_missing)
3168

    
3169

    
3170
class LUClusterRepairDiskSizes(NoHooksLU):
3171
  """Verifies the cluster disks sizes.
3172

3173
  """
3174
  REQ_BGL = False
3175

    
3176
  def ExpandNames(self):
3177
    if self.op.instances:
3178
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3179
      self.needed_locks = {
3180
        locking.LEVEL_NODE: [],
3181
        locking.LEVEL_INSTANCE: self.wanted_names,
3182
        }
3183
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3184
    else:
3185
      self.wanted_names = None
3186
      self.needed_locks = {
3187
        locking.LEVEL_NODE: locking.ALL_SET,
3188
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3189
        }
3190
    self.share_locks = {
3191
      locking.LEVEL_NODE: 1,
3192
      locking.LEVEL_INSTANCE: 0,
3193
      }
3194

    
3195
  def DeclareLocks(self, level):
3196
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3197
      self._LockInstancesNodes(primary_only=True)
3198

    
3199
  def CheckPrereq(self):
3200
    """Check prerequisites.
3201

3202
    This only checks the optional instance list against the existing names.
3203

3204
    """
3205
    if self.wanted_names is None:
3206
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3207

    
3208
    self.wanted_instances = \
3209
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3210

    
3211
  def _EnsureChildSizes(self, disk):
3212
    """Ensure children of the disk have the needed disk size.
3213

3214
    This is valid mainly for DRBD8 and fixes an issue where the
3215
    children have smaller disk size.
3216

3217
    @param disk: an L{ganeti.objects.Disk} object
3218

3219
    """
3220
    if disk.dev_type == constants.LD_DRBD8:
3221
      assert disk.children, "Empty children for DRBD8?"
3222
      fchild = disk.children[0]
3223
      mismatch = fchild.size < disk.size
3224
      if mismatch:
3225
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3226
                     fchild.size, disk.size)
3227
        fchild.size = disk.size
3228

    
3229
      # and we recurse on this child only, not on the metadev
3230
      return self._EnsureChildSizes(fchild) or mismatch
3231
    else:
3232
      return False
3233

    
3234
  def Exec(self, feedback_fn):
3235
    """Verify the size of cluster disks.
3236

3237
    """
3238
    # TODO: check child disks too
3239
    # TODO: check differences in size between primary/secondary nodes
3240
    per_node_disks = {}
3241
    for instance in self.wanted_instances:
3242
      pnode = instance.primary_node
3243
      if pnode not in per_node_disks:
3244
        per_node_disks[pnode] = []
3245
      for idx, disk in enumerate(instance.disks):
3246
        per_node_disks[pnode].append((instance, idx, disk))
3247

    
3248
    changed = []
3249
    for node, dskl in per_node_disks.items():
3250
      newl = [v[2].Copy() for v in dskl]
3251
      for dsk in newl:
3252
        self.cfg.SetDiskID(dsk, node)
3253
      result = self.rpc.call_blockdev_getsize(node, newl)
3254
      if result.fail_msg:
3255
        self.LogWarning("Failure in blockdev_getsize call to node"
3256
                        " %s, ignoring", node)
3257
        continue
3258
      if len(result.payload) != len(dskl):
3259
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3260
                        " result.payload=%s", node, len(dskl), result.payload)
3261
        self.LogWarning("Invalid result from node %s, ignoring node results",
3262
                        node)
3263
        continue
3264
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3265
        if size is None:
3266
          self.LogWarning("Disk %d of instance %s did not return size"
3267
                          " information, ignoring", idx, instance.name)
3268
          continue
3269
        if not isinstance(size, (int, long)):
3270
          self.LogWarning("Disk %d of instance %s did not return valid"
3271
                          " size information, ignoring", idx, instance.name)
3272
          continue
3273
        size = size >> 20
3274
        if size != disk.size:
3275
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3276
                       " correcting: recorded %d, actual %d", idx,
3277
                       instance.name, disk.size, size)
3278
          disk.size = size
3279
          self.cfg.Update(instance, feedback_fn)
3280
          changed.append((instance.name, idx, size))
3281
        if self._EnsureChildSizes(disk):
3282
          self.cfg.Update(instance, feedback_fn)
3283
          changed.append((instance.name, idx, disk.size))
3284
    return changed
3285

    
3286

    
3287
class LUClusterRename(LogicalUnit):
3288
  """Rename the cluster.
3289

3290
  """
3291
  HPATH = "cluster-rename"
3292
  HTYPE = constants.HTYPE_CLUSTER
3293

    
3294
  def BuildHooksEnv(self):
3295
    """Build hooks env.
3296

3297
    """
3298
    return {
3299
      "OP_TARGET": self.cfg.GetClusterName(),
3300
      "NEW_NAME": self.op.name,
3301
      }
3302

    
3303
  def BuildHooksNodes(self):
3304
    """Build hooks nodes.
3305

3306
    """
3307
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3308

    
3309
  def CheckPrereq(self):
3310
    """Verify that the passed name is a valid one.
3311

3312
    """
3313
    hostname = netutils.GetHostname(name=self.op.name,
3314
                                    family=self.cfg.GetPrimaryIPFamily())
3315

    
3316
    new_name = hostname.name
3317
    self.ip = new_ip = hostname.ip
3318
    old_name = self.cfg.GetClusterName()
3319
    old_ip = self.cfg.GetMasterIP()
3320
    if new_name == old_name and new_ip == old_ip:
3321
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3322
                                 " cluster has changed",
3323
                                 errors.ECODE_INVAL)
3324
    if new_ip != old_ip:
3325
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3326
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3327
                                   " reachable on the network" %
3328
                                   new_ip, errors.ECODE_NOTUNIQUE)
3329

    
3330
    self.op.name = new_name
3331

    
3332
  def Exec(self, feedback_fn):
3333
    """Rename the cluster.
3334

3335
    """
3336
    clustername = self.op.name
3337
    ip = self.ip
3338

    
3339
    # shutdown the master IP
3340
    master = self.cfg.GetMasterNode()
3341
    result = self.rpc.call_node_deactivate_master_ip(master)
3342
    result.Raise("Could not disable the master role")
3343

    
3344
    try:
3345
      cluster = self.cfg.GetClusterInfo()
3346
      cluster.cluster_name = clustername
3347
      cluster.master_ip = ip
3348
      self.cfg.Update(cluster, feedback_fn)
3349

    
3350
      # update the known hosts file
3351
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3352
      node_list = self.cfg.GetOnlineNodeList()
3353
      try:
3354
        node_list.remove(master)
3355
      except ValueError:
3356
        pass
3357
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3358
    finally:
3359
      result = self.rpc.call_node_activate_master_ip(master)
3360
      msg = result.fail_msg
3361
      if msg:
3362
        self.LogWarning("Could not re-enable the master role on"
3363
                        " the master, please restart manually: %s", msg)
3364

    
3365
    return clustername
3366

    
3367

    
3368
class LUClusterSetParams(LogicalUnit):
3369
  """Change the parameters of the cluster.
3370

3371
  """
3372
  HPATH = "cluster-modify"
3373
  HTYPE = constants.HTYPE_CLUSTER
3374
  REQ_BGL = False
3375

    
3376
  def CheckArguments(self):
3377
    """Check parameters
3378

3379
    """
3380
    if self.op.uid_pool:
3381
      uidpool.CheckUidPool(self.op.uid_pool)
3382

    
3383
    if self.op.add_uids:
3384
      uidpool.CheckUidPool(self.op.add_uids)
3385

    
3386
    if self.op.remove_uids:
3387
      uidpool.CheckUidPool(self.op.remove_uids)
3388

    
3389
  def ExpandNames(self):
3390
    # FIXME: in the future maybe other cluster params won't require checking on
3391
    # all nodes to be modified.
3392
    self.needed_locks = {
3393
      locking.LEVEL_NODE: locking.ALL_SET,
3394
    }
3395
    self.share_locks[locking.LEVEL_NODE] = 1
3396

    
3397
  def BuildHooksEnv(self):
3398
    """Build hooks env.
3399

3400
    """
3401
    return {
3402
      "OP_TARGET": self.cfg.GetClusterName(),
3403
      "NEW_VG_NAME": self.op.vg_name,
3404
      }
3405

    
3406
  def BuildHooksNodes(self):
3407
    """Build hooks nodes.
3408

3409
    """
3410
    mn = self.cfg.GetMasterNode()
3411
    return ([mn], [mn])
3412

    
3413
  def CheckPrereq(self):
3414
    """Check prerequisites.
3415

3416
    This checks whether the given params don't conflict and
3417
    if the given volume group is valid.
3418

3419
    """
3420
    if self.op.vg_name is not None and not self.op.vg_name:
3421
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3422
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3423
                                   " instances exist", errors.ECODE_INVAL)
3424

    
3425
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3426
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3427
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3428
                                   " drbd-based instances exist",
3429
                                   errors.ECODE_INVAL)
3430

    
3431
    node_list = self.owned_locks(locking.LEVEL_NODE)
3432

    
3433
    # if vg_name not None, checks given volume group on all nodes
3434
    if self.op.vg_name:
3435
      vglist = self.rpc.call_vg_list(node_list)
3436
      for node in node_list:
3437
        msg = vglist[node].fail_msg
3438
        if msg:
3439
          # ignoring down node
3440
          self.LogWarning("Error while gathering data on node %s"
3441
                          " (ignoring node): %s", node, msg)
3442
          continue
3443
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3444
                                              self.op.vg_name,
3445
                                              constants.MIN_VG_SIZE)
3446
        if vgstatus:
3447
          raise errors.OpPrereqError("Error on node '%s': %s" %
3448
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3449

    
3450
    if self.op.drbd_helper:
3451
      # checks given drbd helper on all nodes
3452
      helpers = self.rpc.call_drbd_helper(node_list)
3453
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3454
        if ninfo.offline:
3455
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3456
          continue
3457
        msg = helpers[node].fail_msg
3458
        if msg:
3459
          raise errors.OpPrereqError("Error checking drbd helper on node"
3460
                                     " '%s': %s" % (node, msg),
3461
                                     errors.ECODE_ENVIRON)
3462
        node_helper = helpers[node].payload
3463
        if node_helper != self.op.drbd_helper:
3464
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3465
                                     (node, node_helper), errors.ECODE_ENVIRON)
3466

    
3467
    self.cluster = cluster = self.cfg.GetClusterInfo()
3468
    # validate params changes
3469
    if self.op.beparams:
3470
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3471
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3472

    
3473
    if self.op.ndparams:
3474
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3475
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3476

    
3477
      # TODO: we need a more general way to handle resetting
3478
      # cluster-level parameters to default values
3479
      if self.new_ndparams["oob_program"] == "":
3480
        self.new_ndparams["oob_program"] = \
3481
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3482

    
3483
    if self.op.nicparams:
3484
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3485
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3486
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3487
      nic_errors = []
3488

    
3489
      # check all instances for consistency
3490
      for instance in self.cfg.GetAllInstancesInfo().values():
3491
        for nic_idx, nic in enumerate(instance.nics):
3492
          params_copy = copy.deepcopy(nic.nicparams)
3493
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3494

    
3495
          # check parameter syntax
3496
          try:
3497
            objects.NIC.CheckParameterSyntax(params_filled)
3498
          except errors.ConfigurationError, err:
3499
            nic_errors.append("Instance %s, nic/%d: %s" %
3500
                              (instance.name, nic_idx, err))
3501

    
3502
          # if we're moving instances to routed, check that they have an ip
3503
          target_mode = params_filled[constants.NIC_MODE]
3504
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3505
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3506
                              " address" % (instance.name, nic_idx))
3507
      if nic_errors:
3508
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3509
                                   "\n".join(nic_errors))
3510

    
3511
    # hypervisor list/parameters
3512
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3513
    if self.op.hvparams:
3514
      for hv_name, hv_dict in self.op.hvparams.items():
3515
        if hv_name not in self.new_hvparams:
3516
          self.new_hvparams[hv_name] = hv_dict
3517
        else:
3518
          self.new_hvparams[hv_name].update(hv_dict)
3519

    
3520
    # os hypervisor parameters
3521
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3522
    if self.op.os_hvp:
3523
      for os_name, hvs in self.op.os_hvp.items():
3524
        if os_name not in self.new_os_hvp:
3525
          self.new_os_hvp[os_name] = hvs
3526
        else:
3527
          for hv_name, hv_dict in hvs.items():
3528
            if hv_name not in self.new_os_hvp[os_name]:
3529
              self.new_os_hvp[os_name][hv_name] = hv_dict
3530
            else:
3531
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3532

    
3533
    # os parameters
3534
    self.new_osp = objects.FillDict(cluster.osparams, {})
3535
    if self.op.osparams:
3536
      for os_name, osp in self.op.osparams.items():
3537
        if os_name not in self.new_osp:
3538
          self.new_osp[os_name] = {}
3539

    
3540
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3541
                                                  use_none=True)
3542

    
3543
        if not self.new_osp[os_name]:
3544
          # we removed all parameters
3545
          del self.new_osp[os_name]
3546
        else:
3547
          # check the parameter validity (remote check)
3548
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3549
                         os_name, self.new_osp[os_name])
3550

    
3551
    # changes to the hypervisor list
3552
    if self.op.enabled_hypervisors is not None:
3553
      self.hv_list = self.op.enabled_hypervisors
3554
      for hv in self.hv_list:
3555
        # if the hypervisor doesn't already exist in the cluster
3556
        # hvparams, we initialize it to empty, and then (in both
3557
        # cases) we make sure to fill the defaults, as we might not
3558
        # have a complete defaults list if the hypervisor wasn't
3559
        # enabled before
3560
        if hv not in new_hvp:
3561
          new_hvp[hv] = {}
3562
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3563
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3564
    else:
3565
      self.hv_list = cluster.enabled_hypervisors
3566

    
3567
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3568
      # either the enabled list has changed, or the parameters have, validate
3569
      for hv_name, hv_params in self.new_hvparams.items():
3570
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3571
            (self.op.enabled_hypervisors and
3572
             hv_name in self.op.enabled_hypervisors)):
3573
          # either this is a new hypervisor, or its parameters have changed
3574
          hv_class = hypervisor.GetHypervisor(hv_name)
3575
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3576
          hv_class.CheckParameterSyntax(hv_params)
3577
          _CheckHVParams(self, node_list, hv_name, hv_params)
3578

    
3579
    if self.op.os_hvp:
3580
      # no need to check any newly-enabled hypervisors, since the
3581
      # defaults have already been checked in the above code-block
3582
      for os_name, os_hvp in self.new_os_hvp.items():
3583
        for hv_name, hv_params in os_hvp.items():
3584
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3585
          # we need to fill in the new os_hvp on top of the actual hv_p
3586
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3587
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3588
          hv_class = hypervisor.GetHypervisor(hv_name)
3589
          hv_class.CheckParameterSyntax(new_osp)
3590
          _CheckHVParams(self, node_list, hv_name, new_osp)
3591

    
3592
    if self.op.default_iallocator:
3593
      alloc_script = utils.FindFile(self.op.default_iallocator,
3594
                                    constants.IALLOCATOR_SEARCH_PATH,
3595
                                    os.path.isfile)
3596
      if alloc_script is None:
3597
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3598
                                   " specified" % self.op.default_iallocator,
3599
                                   errors.ECODE_INVAL)
3600

    
3601
  def Exec(self, feedback_fn):
3602
    """Change the parameters of the cluster.
3603

3604
    """
3605
    if self.op.vg_name is not None:
3606
      new_volume = self.op.vg_name
3607
      if not new_volume:
3608
        new_volume = None
3609
      if new_volume != self.cfg.GetVGName():
3610
        self.cfg.SetVGName(new_volume)
3611
      else:
3612
        feedback_fn("Cluster LVM configuration already in desired"
3613
                    " state, not changing")
3614
    if self.op.drbd_helper is not None:
3615
      new_helper = self.op.drbd_helper
3616
      if not new_helper:
3617
        new_helper = None
3618
      if new_helper != self.cfg.GetDRBDHelper():
3619
        self.cfg.SetDRBDHelper(new_helper)
3620
      else:
3621
        feedback_fn("Cluster DRBD helper already in desired state,"
3622
                    " not changing")
3623
    if self.op.hvparams:
3624
      self.cluster.hvparams = self.new_hvparams
3625
    if self.op.os_hvp:
3626
      self.cluster.os_hvp = self.new_os_hvp
3627
    if self.op.enabled_hypervisors is not None:
3628
      self.cluster.hvparams = self.new_hvparams
3629
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3630
    if self.op.beparams:
3631
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3632
    if self.op.nicparams:
3633
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3634
    if self.op.osparams:
3635
      self.cluster.osparams = self.new_osp
3636
    if self.op.ndparams:
3637
      self.cluster.ndparams = self.new_ndparams
3638

    
3639
    if self.op.candidate_pool_size is not None:
3640
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3641
      # we need to update the pool size here, otherwise the save will fail
3642
      _AdjustCandidatePool(self, [])
3643

    
3644
    if self.op.maintain_node_health is not None:
3645
      self.cluster.maintain_node_health = self.op.maintain_node_health
3646

    
3647
    if self.op.prealloc_wipe_disks is not None:
3648
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3649

    
3650
    if self.op.add_uids is not None:
3651
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3652

    
3653
    if self.op.remove_uids is not None:
3654
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3655

    
3656
    if self.op.uid_pool is not None:
3657
      self.cluster.uid_pool = self.op.uid_pool
3658

    
3659
    if self.op.default_iallocator is not None:
3660
      self.cluster.default_iallocator = self.op.default_iallocator
3661

    
3662
    if self.op.reserved_lvs is not None:
3663
      self.cluster.reserved_lvs = self.op.reserved_lvs
3664

    
3665
    def helper_os(aname, mods, desc):
3666
      desc += " OS list"
3667
      lst = getattr(self.cluster, aname)
3668
      for key, val in mods:
3669
        if key == constants.DDM_ADD:
3670
          if val in lst:
3671
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3672
          else:
3673
            lst.append(val)
3674
        elif key == constants.DDM_REMOVE:
3675
          if val in lst:
3676
            lst.remove(val)
3677
          else:
3678
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3679
        else:
3680
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3681

    
3682
    if self.op.hidden_os:
3683
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3684

    
3685
    if self.op.blacklisted_os:
3686
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3687

    
3688
    if self.op.master_netdev:
3689
      master = self.cfg.GetMasterNode()
3690
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3691
                  self.cluster.master_netdev)
3692
      result = self.rpc.call_node_deactivate_master_ip(master)
3693
      result.Raise("Could not disable the master ip")
3694
      feedback_fn("Changing master_netdev from %s to %s" %
3695
                  (self.cluster.master_netdev, self.op.master_netdev))
3696
      self.cluster.master_netdev = self.op.master_netdev
3697

    
3698
    self.cfg.Update(self.cluster, feedback_fn)
3699

    
3700
    if self.op.master_netdev:
3701
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3702
                  self.op.master_netdev)
3703
      result = self.rpc.call_node_activate_master_ip(master)
3704
      if result.fail_msg:
3705
        self.LogWarning("Could not re-enable the master ip on"
3706
                        " the master, please restart manually: %s",
3707
                        result.fail_msg)
3708

    
3709

    
3710
def _UploadHelper(lu, nodes, fname):
3711
  """Helper for uploading a file and showing warnings.
3712

3713
  """
3714
  if os.path.exists(fname):
3715
    result = lu.rpc.call_upload_file(nodes, fname)
3716
    for to_node, to_result in result.items():
3717
      msg = to_result.fail_msg
3718
      if msg:
3719
        msg = ("Copy of file %s to node %s failed: %s" %
3720
               (fname, to_node, msg))
3721
        lu.proc.LogWarning(msg)
3722

    
3723

    
3724
def _ComputeAncillaryFiles(cluster, redist):
3725
  """Compute files external to Ganeti which need to be consistent.
3726

3727
  @type redist: boolean
3728
  @param redist: Whether to include files which need to be redistributed
3729

3730
  """
3731
  # Compute files for all nodes
3732
  files_all = set([
3733
    constants.SSH_KNOWN_HOSTS_FILE,
3734
    constants.CONFD_HMAC_KEY,
3735
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3736
    constants.RAPI_USERS_FILE,
3737
    ])
3738

    
3739
  if not redist:
3740
    files_all.update(constants.ALL_CERT_FILES)
3741
    files_all.update(ssconf.SimpleStore().GetFileList())
3742
  else:
3743
    # we need to ship at least the RAPI certificate
3744
    files_all.add(constants.RAPI_CERT_FILE)
3745

    
3746
  if cluster.modify_etc_hosts:
3747
    files_all.add(constants.ETC_HOSTS)
3748

    
3749
  # Files which are optional, these must:
3750
  # - be present in one other category as well
3751
  # - either exist or not exist on all nodes of that category (mc, vm all)
3752
  files_opt = set([
3753
    constants.RAPI_USERS_FILE,
3754
    ])
3755

    
3756
  # Files which should only be on master candidates
3757
  files_mc = set()
3758
  if not redist:
3759
    files_mc.add(constants.CLUSTER_CONF_FILE)
3760

    
3761
  # Files which should only be on VM-capable nodes
3762
  files_vm = set(filename
3763
    for hv_name in cluster.enabled_hypervisors
3764
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3765

    
3766
  files_opt |= set(filename
3767
    for hv_name in cluster.enabled_hypervisors
3768
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3769

    
3770
  # Filenames in each category must be unique
3771
  all_files_set = files_all | files_mc | files_vm
3772
  assert (len(all_files_set) ==
3773
          sum(map(len, [files_all, files_mc, files_vm]))), \
3774
         "Found file listed in more than one file list"
3775

    
3776
  # Optional files must be present in one other category
3777
  assert all_files_set.issuperset(files_opt), \
3778
         "Optional file not in a different required list"
3779

    
3780
  return (files_all, files_opt, files_mc, files_vm)
3781

    
3782

    
3783
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3784
  """Distribute additional files which are part of the cluster configuration.
3785

3786
  ConfigWriter takes care of distributing the config and ssconf files, but
3787
  there are more files which should be distributed to all nodes. This function
3788
  makes sure those are copied.
3789

3790
  @param lu: calling logical unit
3791
  @param additional_nodes: list of nodes not in the config to distribute to
3792
  @type additional_vm: boolean
3793
  @param additional_vm: whether the additional nodes are vm-capable or not
3794

3795
  """
3796
  # Gather target nodes
3797
  cluster = lu.cfg.GetClusterInfo()
3798
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3799

    
3800
  online_nodes = lu.cfg.GetOnlineNodeList()
3801
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3802

    
3803
  if additional_nodes is not None:
3804
    online_nodes.extend(additional_nodes)
3805
    if additional_vm:
3806
      vm_nodes.extend(additional_nodes)
3807

    
3808
  # Never distribute to master node
3809
  for nodelist in [online_nodes, vm_nodes]:
3810
    if master_info.name in nodelist:
3811
      nodelist.remove(master_info.name)
3812

    
3813
  # Gather file lists
3814
  (files_all, _, files_mc, files_vm) = \
3815
    _ComputeAncillaryFiles(cluster, True)
3816

    
3817
  # Never re-distribute configuration file from here
3818
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3819
              constants.CLUSTER_CONF_FILE in files_vm)
3820
  assert not files_mc, "Master candidates not handled in this function"
3821

    
3822
  filemap = [
3823
    (online_nodes, files_all),
3824
    (vm_nodes, files_vm),
3825
    ]
3826

    
3827
  # Upload the files
3828
  for (node_list, files) in filemap:
3829
    for fname in files:
3830
      _UploadHelper(lu, node_list, fname)
3831

    
3832

    
3833
class LUClusterRedistConf(NoHooksLU):
3834
  """Force the redistribution of cluster configuration.
3835

3836
  This is a very simple LU.
3837

3838
  """
3839
  REQ_BGL = False
3840

    
3841
  def ExpandNames(self):
3842
    self.needed_locks = {
3843
      locking.LEVEL_NODE: locking.ALL_SET,
3844
    }
3845
    self.share_locks[locking.LEVEL_NODE] = 1
3846

    
3847
  def Exec(self, feedback_fn):
3848
    """Redistribute the configuration.
3849

3850
    """
3851
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3852
    _RedistributeAncillaryFiles(self)
3853

    
3854

    
3855
class LUClusterActivateMasterIp(NoHooksLU):
3856
  """Activate the master IP on the master node.
3857

3858
  """
3859
  def Exec(self, feedback_fn):
3860
    """Activate the master IP.
3861

3862
    """
3863
    master = self.cfg.GetMasterNode()
3864
    result = self.rpc.call_node_activate_master_ip(master)
3865
    result.Raise("Could not activate the master IP")
3866

    
3867

    
3868
class LUClusterDeactivateMasterIp(NoHooksLU):
3869
  """Deactivate the master IP on the master node.
3870

3871
  """
3872
  def Exec(self, feedback_fn):
3873
    """Deactivate the master IP.
3874

3875
    """
3876
    master = self.cfg.GetMasterNode()
3877
    result = self.rpc.call_node_deactivate_master_ip(master)
3878
    result.Raise("Could not deactivate the master IP")
3879

    
3880

    
3881
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3882
  """Sleep and poll for an instance's disk to sync.
3883

3884
  """
3885
  if not instance.disks or disks is not None and not disks:
3886
    return True
3887

    
3888
  disks = _ExpandCheckDisks(instance, disks)
3889

    
3890
  if not oneshot:
3891
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3892

    
3893
  node = instance.primary_node
3894

    
3895
  for dev in disks:
3896
    lu.cfg.SetDiskID(dev, node)
3897

    
3898
  # TODO: Convert to utils.Retry
3899

    
3900
  retries = 0
3901
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3902
  while True:
3903
    max_time = 0
3904
    done = True
3905
    cumul_degraded = False
3906
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3907
    msg = rstats.fail_msg
3908
    if msg:
3909
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3910
      retries += 1
3911
      if retries >= 10:
3912
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3913
                                 " aborting." % node)
3914
      time.sleep(6)
3915
      continue
3916
    rstats = rstats.payload
3917
    retries = 0
3918
    for i, mstat in enumerate(rstats):
3919
      if mstat is None:
3920
        lu.LogWarning("Can't compute data for node %s/%s",
3921
                           node, disks[i].iv_name)
3922
        continue
3923

    
3924
      cumul_degraded = (cumul_degraded or
3925
                        (mstat.is_degraded and mstat.sync_percent is None))
3926
      if mstat.sync_percent is not None:
3927
        done = False
3928
        if mstat.estimated_time is not None:
3929
          rem_time = ("%s remaining (estimated)" %
3930
                      utils.FormatSeconds(mstat.estimated_time))
3931
          max_time = mstat.estimated_time
3932
        else:
3933
          rem_time = "no time estimate"
3934
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3935
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3936

    
3937
    # if we're done but degraded, let's do a few small retries, to
3938
    # make sure we see a stable and not transient situation; therefore
3939
    # we force restart of the loop
3940
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3941
      logging.info("Degraded disks found, %d retries left", degr_retries)
3942
      degr_retries -= 1
3943
      time.sleep(1)
3944
      continue
3945

    
3946
    if done or oneshot:
3947
      break
3948

    
3949
    time.sleep(min(60, max_time))
3950

    
3951
  if done:
3952
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3953
  return not cumul_degraded
3954

    
3955

    
3956
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3957
  """Check that mirrors are not degraded.
3958

3959
  The ldisk parameter, if True, will change the test from the
3960
  is_degraded attribute (which represents overall non-ok status for
3961
  the device(s)) to the ldisk (representing the local storage status).
3962

3963
  """
3964
  lu.cfg.SetDiskID(dev, node)
3965

    
3966
  result = True
3967

    
3968
  if on_primary or dev.AssembleOnSecondary():
3969
    rstats = lu.rpc.call_blockdev_find(node, dev)
3970
    msg = rstats.fail_msg
3971
    if msg:
3972
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3973
      result = False
3974
    elif not rstats.payload:
3975
      lu.LogWarning("Can't find disk on node %s", node)
3976
      result = False
3977
    else:
3978
      if ldisk:
3979
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3980
      else:
3981
        result = result and not rstats.payload.is_degraded
3982

    
3983
  if dev.children:
3984
    for child in dev.children:
3985
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3986

    
3987
  return result
3988

    
3989

    
3990
class LUOobCommand(NoHooksLU):
3991
  """Logical unit for OOB handling.
3992

3993
  """
3994
  REG_BGL = False
3995
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3996

    
3997
  def ExpandNames(self):
3998
    """Gather locks we need.
3999

4000
    """
4001
    if self.op.node_names:
4002
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4003
      lock_names = self.op.node_names
4004
    else:
4005
      lock_names = locking.ALL_SET
4006

    
4007
    self.needed_locks = {
4008
      locking.LEVEL_NODE: lock_names,
4009
      }
4010

    
4011
  def CheckPrereq(self):
4012
    """Check prerequisites.
4013

4014
    This checks:
4015
     - the node exists in the configuration
4016
     - OOB is supported
4017

4018
    Any errors are signaled by raising errors.OpPrereqError.
4019

4020
    """
4021
    self.nodes = []
4022
    self.master_node = self.cfg.GetMasterNode()
4023

    
4024
    assert self.op.power_delay >= 0.0
4025

    
4026
    if self.op.node_names:
4027
      if (self.op.command in self._SKIP_MASTER and
4028
          self.master_node in self.op.node_names):
4029
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4030
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4031

    
4032
        if master_oob_handler:
4033
          additional_text = ("run '%s %s %s' if you want to operate on the"
4034
                             " master regardless") % (master_oob_handler,
4035
                                                      self.op.command,
4036
                                                      self.master_node)
4037
        else:
4038
          additional_text = "it does not support out-of-band operations"
4039

    
4040
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4041
                                    " allowed for %s; %s") %
4042
                                   (self.master_node, self.op.command,
4043
                                    additional_text), errors.ECODE_INVAL)
4044
    else:
4045
      self.op.node_names = self.cfg.GetNodeList()
4046
      if self.op.command in self._SKIP_MASTER:
4047
        self.op.node_names.remove(self.master_node)
4048

    
4049
    if self.op.command in self._SKIP_MASTER:
4050
      assert self.master_node not in self.op.node_names
4051

    
4052
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4053
      if node is None:
4054
        raise errors.OpPrereqError("Node %s not found" % node_name,
4055
                                   errors.ECODE_NOENT)
4056
      else:
4057
        self.nodes.append(node)
4058

    
4059
      if (not self.op.ignore_status and
4060
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4061
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4062
                                    " not marked offline") % node_name,
4063
                                   errors.ECODE_STATE)
4064

    
4065
  def Exec(self, feedback_fn):
4066
    """Execute OOB and return result if we expect any.
4067

4068
    """
4069
    master_node = self.master_node
4070
    ret = []
4071

    
4072
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4073
                                              key=lambda node: node.name)):
4074
      node_entry = [(constants.RS_NORMAL, node.name)]
4075
      ret.append(node_entry)
4076

    
4077
      oob_program = _SupportsOob(self.cfg, node)
4078

    
4079
      if not oob_program:
4080
        node_entry.append((constants.RS_UNAVAIL, None))
4081
        continue
4082

    
4083
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4084
                   self.op.command, oob_program, node.name)
4085
      result = self.rpc.call_run_oob(master_node, oob_program,
4086
                                     self.op.command, node.name,
4087
                                     self.op.timeout)
4088

    
4089
      if result.fail_msg:
4090
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4091
                        node.name, result.fail_msg)
4092
        node_entry.append((constants.RS_NODATA, None))
4093
      else:
4094
        try:
4095
          self._CheckPayload(result)
4096
        except errors.OpExecError, err:
4097
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4098
                          node.name, err)
4099
          node_entry.append((constants.RS_NODATA, None))
4100
        else:
4101
          if self.op.command == constants.OOB_HEALTH:
4102
            # For health we should log important events
4103
            for item, status in result.payload:
4104
              if status in [constants.OOB_STATUS_WARNING,
4105
                            constants.OOB_STATUS_CRITICAL]:
4106
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4107
                                item, node.name, status)
4108

    
4109
          if self.op.command == constants.OOB_POWER_ON:
4110
            node.powered = True
4111
          elif self.op.command == constants.OOB_POWER_OFF:
4112
            node.powered = False
4113
          elif self.op.command == constants.OOB_POWER_STATUS:
4114
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4115
            if powered != node.powered:
4116
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4117
                               " match actual power state (%s)"), node.powered,
4118
                              node.name, powered)
4119

    
4120
          # For configuration changing commands we should update the node
4121
          if self.op.command in (constants.OOB_POWER_ON,
4122
                                 constants.OOB_POWER_OFF):
4123
            self.cfg.Update(node, feedback_fn)
4124

    
4125
          node_entry.append((constants.RS_NORMAL, result.payload))
4126

    
4127
          if (self.op.command == constants.OOB_POWER_ON and
4128
              idx < len(self.nodes) - 1):
4129
            time.sleep(self.op.power_delay)
4130

    
4131
    return ret
4132

    
4133
  def _CheckPayload(self, result):
4134
    """Checks if the payload is valid.
4135

4136
    @param result: RPC result
4137
    @raises errors.OpExecError: If payload is not valid
4138

4139
    """
4140
    errs = []
4141
    if self.op.command == constants.OOB_HEALTH:
4142
      if not isinstance(result.payload, list):
4143
        errs.append("command 'health' is expected to return a list but got %s" %
4144
                    type(result.payload))
4145
      else:
4146
        for item, status in result.payload:
4147
          if status not in constants.OOB_STATUSES:
4148
            errs.append("health item '%s' has invalid status '%s'" %
4149
                        (item, status))
4150

    
4151
    if self.op.command == constants.OOB_POWER_STATUS:
4152
      if not isinstance(result.payload, dict):
4153
        errs.append("power-status is expected to return a dict but got %s" %
4154
                    type(result.payload))
4155

    
4156
    if self.op.command in [
4157
        constants.OOB_POWER_ON,
4158
        constants.OOB_POWER_OFF,
4159
        constants.OOB_POWER_CYCLE,
4160
        ]:
4161
      if result.payload is not None:
4162
        errs.append("%s is expected to not return payload but got '%s'" %
4163
                    (self.op.command, result.payload))
4164

    
4165
    if errs:
4166
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4167
                               utils.CommaJoin(errs))
4168

    
4169

    
4170
class _OsQuery(_QueryBase):
4171
  FIELDS = query.OS_FIELDS
4172

    
4173
  def ExpandNames(self, lu):
4174
    # Lock all nodes in shared mode
4175
    # Temporary removal of locks, should be reverted later
4176
    # TODO: reintroduce locks when they are lighter-weight
4177
    lu.needed_locks = {}
4178
    #self.share_locks[locking.LEVEL_NODE] = 1
4179
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4180

    
4181
    # The following variables interact with _QueryBase._GetNames
4182
    if self.names:
4183
      self.wanted = self.names
4184
    else:
4185
      self.wanted = locking.ALL_SET
4186

    
4187
    self.do_locking = self.use_locking
4188

    
4189
  def DeclareLocks(self, lu, level):
4190
    pass
4191

    
4192
  @staticmethod
4193
  def _DiagnoseByOS(rlist):
4194
    """Remaps a per-node return list into an a per-os per-node dictionary
4195

4196
    @param rlist: a map with node names as keys and OS objects as values
4197

4198
    @rtype: dict
4199
    @return: a dictionary with osnames as keys and as value another
4200
        map, with nodes as keys and tuples of (path, status, diagnose,
4201
        variants, parameters, api_versions) as values, eg::
4202

4203
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4204
                                     (/srv/..., False, "invalid api")],
4205
                           "node2": [(/srv/..., True, "", [], [])]}
4206
          }
4207

4208
    """
4209
    all_os = {}
4210
    # we build here the list of nodes that didn't fail the RPC (at RPC
4211
    # level), so that nodes with a non-responding node daemon don't
4212
    # make all OSes invalid
4213
    good_nodes = [node_name for node_name in rlist
4214
                  if not rlist[node_name].fail_msg]
4215
    for node_name, nr in rlist.items():
4216
      if nr.fail_msg or not nr.payload:
4217
        continue
4218
      for (name, path, status, diagnose, variants,
4219
           params, api_versions) in nr.payload:
4220
        if name not in all_os:
4221
          # build a list of nodes for this os containing empty lists
4222
          # for each node in node_list
4223
          all_os[name] = {}
4224
          for nname in good_nodes:
4225
            all_os[name][nname] = []
4226
        # convert params from [name, help] to (name, help)
4227
        params = [tuple(v) for v in params]
4228
        all_os[name][node_name].append((path, status, diagnose,
4229
                                        variants, params, api_versions))
4230
    return all_os
4231

    
4232
  def _GetQueryData(self, lu):
4233
    """Computes the list of nodes and their attributes.
4234

4235
    """
4236
    # Locking is not used
4237
    assert not (compat.any(lu.glm.is_owned(level)
4238
                           for level in locking.LEVELS
4239
                           if level != locking.LEVEL_CLUSTER) or
4240
                self.do_locking or self.use_locking)
4241

    
4242
    valid_nodes = [node.name
4243
                   for node in lu.cfg.GetAllNodesInfo().values()
4244
                   if not node.offline and node.vm_capable]
4245
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4246
    cluster = lu.cfg.GetClusterInfo()
4247

    
4248
    data = {}
4249

    
4250
    for (os_name, os_data) in pol.items():
4251
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4252
                          hidden=(os_name in cluster.hidden_os),
4253
                          blacklisted=(os_name in cluster.blacklisted_os))
4254

    
4255
      variants = set()
4256
      parameters = set()
4257
      api_versions = set()
4258

    
4259
      for idx, osl in enumerate(os_data.values()):
4260
        info.valid = bool(info.valid and osl and osl[0][1])
4261
        if not info.valid:
4262
          break
4263

    
4264
        (node_variants, node_params, node_api) = osl[0][3:6]
4265
        if idx == 0:
4266
          # First entry
4267
          variants.update(node_variants)
4268
          parameters.update(node_params)
4269
          api_versions.update(node_api)
4270
        else:
4271
          # Filter out inconsistent values
4272
          variants.intersection_update(node_variants)
4273
          parameters.intersection_update(node_params)
4274
          api_versions.intersection_update(node_api)
4275

    
4276
      info.variants = list(variants)
4277
      info.parameters = list(parameters)
4278
      info.api_versions = list(api_versions)
4279

    
4280
      data[os_name] = info
4281

    
4282
    # Prepare data in requested order
4283
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4284
            if name in data]
4285

    
4286

    
4287
class LUOsDiagnose(NoHooksLU):
4288
  """Logical unit for OS diagnose/query.
4289

4290
  """
4291
  REQ_BGL = False
4292

    
4293
  @staticmethod
4294
  def _BuildFilter(fields, names):
4295
    """Builds a filter for querying OSes.
4296

4297
    """
4298
    name_filter = qlang.MakeSimpleFilter("name", names)
4299

    
4300
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4301
    # respective field is not requested
4302
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4303
                     for fname in ["hidden", "blacklisted"]
4304
                     if fname not in fields]
4305
    if "valid" not in fields:
4306
      status_filter.append([qlang.OP_TRUE, "valid"])
4307

    
4308
    if status_filter:
4309
      status_filter.insert(0, qlang.OP_AND)
4310
    else:
4311
      status_filter = None
4312

    
4313
    if name_filter and status_filter:
4314
      return [qlang.OP_AND, name_filter, status_filter]
4315
    elif name_filter:
4316
      return name_filter
4317
    else:
4318
      return status_filter
4319

    
4320
  def CheckArguments(self):
4321
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4322
                       self.op.output_fields, False)
4323

    
4324
  def ExpandNames(self):
4325
    self.oq.ExpandNames(self)
4326

    
4327
  def Exec(self, feedback_fn):
4328
    return self.oq.OldStyleQuery(self)
4329

    
4330

    
4331
class LUNodeRemove(LogicalUnit):
4332
  """Logical unit for removing a node.
4333

4334
  """
4335
  HPATH = "node-remove"
4336
  HTYPE = constants.HTYPE_NODE
4337

    
4338
  def BuildHooksEnv(self):
4339
    """Build hooks env.
4340

4341
    This doesn't run on the target node in the pre phase as a failed
4342
    node would then be impossible to remove.
4343

4344
    """
4345
    return {
4346
      "OP_TARGET": self.op.node_name,
4347
      "NODE_NAME": self.op.node_name,
4348
      }
4349

    
4350
  def BuildHooksNodes(self):
4351
    """Build hooks nodes.
4352

4353
    """
4354
    all_nodes = self.cfg.GetNodeList()
4355
    try:
4356
      all_nodes.remove(self.op.node_name)
4357
    except ValueError:
4358
      logging.warning("Node '%s', which is about to be removed, was not found"
4359
                      " in the list of all nodes", self.op.node_name)
4360
    return (all_nodes, all_nodes)
4361

    
4362
  def CheckPrereq(self):
4363
    """Check prerequisites.
4364

4365
    This checks:
4366
     - the node exists in the configuration
4367
     - it does not have primary or secondary instances
4368
     - it's not the master
4369

4370
    Any errors are signaled by raising errors.OpPrereqError.
4371

4372
    """
4373
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4374
    node = self.cfg.GetNodeInfo(self.op.node_name)
4375
    assert node is not None
4376

    
4377
    masternode = self.cfg.GetMasterNode()
4378
    if node.name == masternode:
4379
      raise errors.OpPrereqError("Node is the master node, failover to another"
4380
                                 " node is required", errors.ECODE_INVAL)
4381

    
4382
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4383
      if node.name in instance.all_nodes:
4384
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4385
                                   " please remove first" % instance_name,
4386
                                   errors.ECODE_INVAL)
4387
    self.op.node_name = node.name
4388
    self.node = node
4389

    
4390
  def Exec(self, feedback_fn):
4391
    """Removes the node from the cluster.
4392

4393
    """
4394
    node = self.node
4395
    logging.info("Stopping the node daemon and removing configs from node %s",
4396
                 node.name)
4397

    
4398
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4399

    
4400
    # Promote nodes to master candidate as needed
4401
    _AdjustCandidatePool(self, exceptions=[node.name])
4402
    self.context.RemoveNode(node.name)
4403

    
4404
    # Run post hooks on the node before it's removed
4405
    _RunPostHook(self, node.name)
4406

    
4407
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4408
    msg = result.fail_msg
4409
    if msg:
4410
      self.LogWarning("Errors encountered on the remote node while leaving"
4411
                      " the cluster: %s", msg)
4412

    
4413
    # Remove node from our /etc/hosts
4414
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4415
      master_node = self.cfg.GetMasterNode()
4416
      result = self.rpc.call_etc_hosts_modify(master_node,
4417
                                              constants.ETC_HOSTS_REMOVE,
4418
                                              node.name, None)
4419
      result.Raise("Can't update hosts file with new host data")
4420
      _RedistributeAncillaryFiles(self)
4421

    
4422

    
4423
class _NodeQuery(_QueryBase):
4424
  FIELDS = query.NODE_FIELDS
4425

    
4426
  def ExpandNames(self, lu):
4427
    lu.needed_locks = {}
4428
    lu.share_locks = _ShareAll()
4429

    
4430
    if self.names:
4431
      self.wanted = _GetWantedNodes(lu, self.names)
4432
    else:
4433
      self.wanted = locking.ALL_SET
4434

    
4435
    self.do_locking = (self.use_locking and
4436
                       query.NQ_LIVE in self.requested_data)
4437

    
4438
    if self.do_locking:
4439
      # If any non-static field is requested we need to lock the nodes
4440
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4441

    
4442
  def DeclareLocks(self, lu, level):
4443
    pass
4444

    
4445
  def _GetQueryData(self, lu):
4446
    """Computes the list of nodes and their attributes.
4447

4448
    """
4449
    all_info = lu.cfg.GetAllNodesInfo()
4450

    
4451
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4452

    
4453
    # Gather data as requested
4454
    if query.NQ_LIVE in self.requested_data:
4455
      # filter out non-vm_capable nodes
4456
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4457

    
4458
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4459
                                        lu.cfg.GetHypervisorType())
4460
      live_data = dict((name, nresult.payload)
4461
                       for (name, nresult) in node_data.items()
4462
                       if not nresult.fail_msg and nresult.payload)
4463
    else:
4464
      live_data = None
4465

    
4466
    if query.NQ_INST in self.requested_data:
4467
      node_to_primary = dict([(name, set()) for name in nodenames])
4468
      node_to_secondary = dict([(name, set()) for name in nodenames])
4469

    
4470
      inst_data = lu.cfg.GetAllInstancesInfo()
4471

    
4472
      for inst in inst_data.values():
4473
        if inst.primary_node in node_to_primary:
4474
          node_to_primary[inst.primary_node].add(inst.name)
4475
        for secnode in inst.secondary_nodes:
4476
          if secnode in node_to_secondary:
4477
            node_to_secondary[secnode].add(inst.name)
4478
    else:
4479
      node_to_primary = None
4480
      node_to_secondary = None
4481

    
4482
    if query.NQ_OOB in self.requested_data:
4483
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4484
                         for name, node in all_info.iteritems())
4485
    else:
4486
      oob_support = None
4487

    
4488
    if query.NQ_GROUP in self.requested_data:
4489
      groups = lu.cfg.GetAllNodeGroupsInfo()
4490
    else:
4491
      groups = {}
4492

    
4493
    return query.NodeQueryData([all_info[name] for name in nodenames],
4494
                               live_data, lu.cfg.GetMasterNode(),
4495
                               node_to_primary, node_to_secondary, groups,
4496
                               oob_support, lu.cfg.GetClusterInfo())
4497

    
4498

    
4499
class LUNodeQuery(NoHooksLU):
4500
  """Logical unit for querying nodes.
4501

4502
  """
4503
  # pylint: disable=W0142
4504
  REQ_BGL = False
4505

    
4506
  def CheckArguments(self):
4507
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4508
                         self.op.output_fields, self.op.use_locking)
4509

    
4510
  def ExpandNames(self):
4511
    self.nq.ExpandNames(self)
4512

    
4513
  def Exec(self, feedback_fn):
4514
    return self.nq.OldStyleQuery(self)
4515

    
4516

    
4517
class LUNodeQueryvols(NoHooksLU):
4518
  """Logical unit for getting volumes on node(s).
4519

4520
  """
4521
  REQ_BGL = False
4522
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4523
  _FIELDS_STATIC = utils.FieldSet("node")
4524

    
4525
  def CheckArguments(self):
4526
    _CheckOutputFields(static=self._FIELDS_STATIC,
4527
                       dynamic=self._FIELDS_DYNAMIC,
4528
                       selected=self.op.output_fields)
4529

    
4530
  def ExpandNames(self):
4531
    self.needed_locks = {}
4532
    self.share_locks[locking.LEVEL_NODE] = 1
4533
    if not self.op.nodes:
4534
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4535
    else:
4536
      self.needed_locks[locking.LEVEL_NODE] = \
4537
        _GetWantedNodes(self, self.op.nodes)
4538

    
4539
  def Exec(self, feedback_fn):
4540
    """Computes the list of nodes and their attributes.
4541

4542
    """
4543
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4544
    volumes = self.rpc.call_node_volumes(nodenames)
4545

    
4546
    ilist = self.cfg.GetAllInstancesInfo()
4547
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4548

    
4549
    output = []
4550
    for node in nodenames:
4551
      nresult = volumes[node]
4552
      if nresult.offline:
4553
        continue
4554
      msg = nresult.fail_msg
4555
      if msg:
4556
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4557
        continue
4558

    
4559
      node_vols = sorted(nresult.payload,
4560
                         key=operator.itemgetter("dev"))
4561

    
4562
      for vol in node_vols:
4563
        node_output = []
4564
        for field in self.op.output_fields:
4565
          if field == "node":
4566
            val = node
4567
          elif field == "phys":
4568
            val = vol["dev"]
4569
          elif field == "vg":
4570
            val = vol["vg"]
4571
          elif field == "name":
4572
            val = vol["name"]
4573
          elif field == "size":
4574
            val = int(float(vol["size"]))
4575
          elif field == "instance":
4576
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4577
          else:
4578
            raise errors.ParameterError(field)
4579
          node_output.append(str(val))
4580

    
4581
        output.append(node_output)
4582

    
4583
    return output
4584

    
4585

    
4586
class LUNodeQueryStorage(NoHooksLU):
4587
  """Logical unit for getting information on storage units on node(s).
4588

4589
  """
4590
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4591
  REQ_BGL = False
4592

    
4593
  def CheckArguments(self):
4594
    _CheckOutputFields(static=self._FIELDS_STATIC,
4595
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4596
                       selected=self.op.output_fields)
4597

    
4598
  def ExpandNames(self):
4599
    self.needed_locks = {}
4600
    self.share_locks[locking.LEVEL_NODE] = 1
4601

    
4602
    if self.op.nodes:
4603
      self.needed_locks[locking.LEVEL_NODE] = \
4604
        _GetWantedNodes(self, self.op.nodes)
4605
    else:
4606
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4607

    
4608
  def Exec(self, feedback_fn):
4609
    """Computes the list of nodes and their attributes.
4610

4611
    """
4612
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4613

    
4614
    # Always get name to sort by
4615
    if constants.SF_NAME in self.op.output_fields:
4616
      fields = self.op.output_fields[:]
4617
    else:
4618
      fields = [constants.SF_NAME] + self.op.output_fields
4619

    
4620
    # Never ask for node or type as it's only known to the LU
4621
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4622
      while extra in fields:
4623
        fields.remove(extra)
4624

    
4625
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4626
    name_idx = field_idx[constants.SF_NAME]
4627

    
4628
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4629
    data = self.rpc.call_storage_list(self.nodes,
4630
                                      self.op.storage_type, st_args,
4631
                                      self.op.name, fields)
4632

    
4633
    result = []
4634

    
4635
    for node in utils.NiceSort(self.nodes):
4636
      nresult = data[node]
4637
      if nresult.offline:
4638
        continue
4639

    
4640
      msg = nresult.fail_msg
4641
      if msg:
4642
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4643
        continue
4644

    
4645
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4646

    
4647
      for name in utils.NiceSort(rows.keys()):
4648
        row = rows[name]
4649

    
4650
        out = []
4651

    
4652
        for field in self.op.output_fields:
4653
          if field == constants.SF_NODE:
4654
            val = node
4655
          elif field == constants.SF_TYPE:
4656
            val = self.op.storage_type
4657
          elif field in field_idx:
4658
            val = row[field_idx[field]]
4659
          else:
4660
            raise errors.ParameterError(field)
4661

    
4662
          out.append(val)
4663

    
4664
        result.append(out)
4665

    
4666
    return result
4667

    
4668

    
4669
class _InstanceQuery(_QueryBase):
4670
  FIELDS = query.INSTANCE_FIELDS
4671

    
4672
  def ExpandNames(self, lu):
4673
    lu.needed_locks = {}
4674
    lu.share_locks = _ShareAll()
4675

    
4676
    if self.names:
4677
      self.wanted = _GetWantedInstances(lu, self.names)
4678
    else:
4679
      self.wanted = locking.ALL_SET
4680

    
4681
    self.do_locking = (self.use_locking and
4682
                       query.IQ_LIVE in self.requested_data)
4683
    if self.do_locking:
4684
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4685
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4686
      lu.needed_locks[locking.LEVEL_NODE] = []
4687
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4688

    
4689
    self.do_grouplocks = (self.do_locking and
4690
                          query.IQ_NODES in self.requested_data)
4691

    
4692
  def DeclareLocks(self, lu, level):
4693
    if self.do_locking:
4694
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4695
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4696

    
4697
        # Lock all groups used by instances optimistically; this requires going
4698
        # via the node before it's locked, requiring verification later on
4699
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4700
          set(group_uuid
4701
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4702
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4703
      elif level == locking.LEVEL_NODE:
4704
        lu._LockInstancesNodes() # pylint: disable=W0212
4705

    
4706
  @staticmethod
4707
  def _CheckGroupLocks(lu):
4708
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4709
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4710

    
4711
    # Check if node groups for locked instances are still correct
4712
    for instance_name in owned_instances:
4713
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4714

    
4715
  def _GetQueryData(self, lu):
4716
    """Computes the list of instances and their attributes.
4717

4718
    """
4719
    if self.do_grouplocks:
4720
      self._CheckGroupLocks(lu)
4721

    
4722
    cluster = lu.cfg.GetClusterInfo()
4723
    all_info = lu.cfg.GetAllInstancesInfo()
4724

    
4725
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4726

    
4727
    instance_list = [all_info[name] for name in instance_names]
4728
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4729
                                        for inst in instance_list)))
4730
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4731
    bad_nodes = []
4732
    offline_nodes = []
4733
    wrongnode_inst = set()
4734

    
4735
    # Gather data as requested
4736
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4737
      live_data = {}
4738
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4739
      for name in nodes:
4740
        result = node_data[name]
4741
        if result.offline:
4742
          # offline nodes will be in both lists
4743
          assert result.fail_msg
4744
          offline_nodes.append(name)
4745
        if result.fail_msg:
4746
          bad_nodes.append(name)
4747
        elif result.payload:
4748
          for inst in result.payload:
4749
            if inst in all_info:
4750
              if all_info[inst].primary_node == name:
4751
                live_data.update(result.payload)
4752
              else:
4753
                wrongnode_inst.add(inst)
4754
            else:
4755
              # orphan instance; we don't list it here as we don't
4756
              # handle this case yet in the output of instance listing
4757
              logging.warning("Orphan instance '%s' found on node %s",
4758
                              inst, name)
4759
        # else no instance is alive
4760
    else:
4761
      live_data = {}
4762

    
4763
    if query.IQ_DISKUSAGE in self.requested_data:
4764
      disk_usage = dict((inst.name,
4765
                         _ComputeDiskSize(inst.disk_template,
4766
                                          [{constants.IDISK_SIZE: disk.size}
4767
                                           for disk in inst.disks]))
4768
                        for inst in instance_list)
4769
    else:
4770
      disk_usage = None
4771

    
4772
    if query.IQ_CONSOLE in self.requested_data:
4773
      consinfo = {}
4774
      for inst in instance_list:
4775
        if inst.name in live_data:
4776
          # Instance is running
4777
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4778
        else:
4779
          consinfo[inst.name] = None
4780
      assert set(consinfo.keys()) == set(instance_names)
4781
    else:
4782
      consinfo = None
4783

    
4784
    if query.IQ_NODES in self.requested_data:
4785
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4786
                                            instance_list)))
4787
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4788
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4789
                    for uuid in set(map(operator.attrgetter("group"),
4790
                                        nodes.values())))
4791
    else:
4792
      nodes = None
4793
      groups = None
4794

    
4795
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4796
                                   disk_usage, offline_nodes, bad_nodes,
4797
                                   live_data, wrongnode_inst, consinfo,
4798
                                   nodes, groups)
4799

    
4800

    
4801
class LUQuery(NoHooksLU):
4802
  """Query for resources/items of a certain kind.
4803

4804
  """
4805
  # pylint: disable=W0142
4806
  REQ_BGL = False
4807

    
4808
  def CheckArguments(self):
4809
    qcls = _GetQueryImplementation(self.op.what)
4810

    
4811
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4812

    
4813
  def ExpandNames(self):
4814
    self.impl.ExpandNames(self)
4815

    
4816
  def DeclareLocks(self, level):
4817
    self.impl.DeclareLocks(self, level)
4818

    
4819
  def Exec(self, feedback_fn):
4820
    return self.impl.NewStyleQuery(self)
4821

    
4822

    
4823
class LUQueryFields(NoHooksLU):
4824
  """Query for resources/items of a certain kind.
4825

4826
  """
4827
  # pylint: disable=W0142
4828
  REQ_BGL = False
4829

    
4830
  def CheckArguments(self):
4831
    self.qcls = _GetQueryImplementation(self.op.what)
4832

    
4833
  def ExpandNames(self):
4834
    self.needed_locks = {}
4835

    
4836
  def Exec(self, feedback_fn):
4837
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4838

    
4839

    
4840
class LUNodeModifyStorage(NoHooksLU):
4841
  """Logical unit for modifying a storage volume on a node.
4842

4843
  """
4844
  REQ_BGL = False
4845

    
4846
  def CheckArguments(self):
4847
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4848

    
4849
    storage_type = self.op.storage_type
4850

    
4851
    try:
4852
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4853
    except KeyError:
4854
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4855
                                 " modified" % storage_type,
4856
                                 errors.ECODE_INVAL)
4857

    
4858
    diff = set(self.op.changes.keys()) - modifiable
4859
    if diff:
4860
      raise errors.OpPrereqError("The following fields can not be modified for"
4861
                                 " storage units of type '%s': %r" %
4862
                                 (storage_type, list(diff)),
4863
                                 errors.ECODE_INVAL)
4864

    
4865
  def ExpandNames(self):
4866
    self.needed_locks = {
4867
      locking.LEVEL_NODE: self.op.node_name,
4868
      }
4869

    
4870
  def Exec(self, feedback_fn):
4871
    """Computes the list of nodes and their attributes.
4872

4873
    """
4874
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4875
    result = self.rpc.call_storage_modify(self.op.node_name,
4876
                                          self.op.storage_type, st_args,
4877
                                          self.op.name, self.op.changes)
4878
    result.Raise("Failed to modify storage unit '%s' on %s" %
4879
                 (self.op.name, self.op.node_name))
4880

    
4881

    
4882
class LUNodeAdd(LogicalUnit):
4883
  """Logical unit for adding node to the cluster.
4884

4885
  """
4886
  HPATH = "node-add"
4887
  HTYPE = constants.HTYPE_NODE
4888
  _NFLAGS = ["master_capable", "vm_capable"]
4889

    
4890
  def CheckArguments(self):
4891
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4892
    # validate/normalize the node name
4893
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4894
                                         family=self.primary_ip_family)
4895
    self.op.node_name = self.hostname.name
4896

    
4897
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4898
      raise errors.OpPrereqError("Cannot readd the master node",
4899
                                 errors.ECODE_STATE)
4900

    
4901
    if self.op.readd and self.op.group:
4902
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4903
                                 " being readded", errors.ECODE_INVAL)
4904

    
4905
  def BuildHooksEnv(self):
4906
    """Build hooks env.
4907

4908
    This will run on all nodes before, and on all nodes + the new node after.
4909

4910
    """
4911
    return {
4912
      "OP_TARGET": self.op.node_name,
4913
      "NODE_NAME": self.op.node_name,
4914
      "NODE_PIP": self.op.primary_ip,
4915
      "NODE_SIP": self.op.secondary_ip,
4916
      "MASTER_CAPABLE": str(self.op.master_capable),
4917
      "VM_CAPABLE": str(self.op.vm_capable),
4918
      }
4919

    
4920
  def BuildHooksNodes(self):
4921
    """Build hooks nodes.
4922

4923
    """
4924
    # Exclude added node
4925
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4926
    post_nodes = pre_nodes + [self.op.node_name, ]
4927

    
4928
    return (pre_nodes, post_nodes)
4929

    
4930
  def CheckPrereq(self):
4931
    """Check prerequisites.
4932

4933
    This checks:
4934
     - the new node is not already in the config
4935
     - it is resolvable
4936
     - its parameters (single/dual homed) matches the cluster
4937

4938
    Any errors are signaled by raising errors.OpPrereqError.
4939

4940
    """
4941
    cfg = self.cfg
4942
    hostname = self.hostname
4943
    node = hostname.name
4944
    primary_ip = self.op.primary_ip = hostname.ip
4945
    if self.op.secondary_ip is None:
4946
      if self.primary_ip_family == netutils.IP6Address.family:
4947
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4948
                                   " IPv4 address must be given as secondary",
4949
                                   errors.ECODE_INVAL)
4950
      self.op.secondary_ip = primary_ip
4951

    
4952
    secondary_ip = self.op.secondary_ip
4953
    if not netutils.IP4Address.IsValid(secondary_ip):
4954
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4955
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4956

    
4957
    node_list = cfg.GetNodeList()
4958
    if not self.op.readd and node in node_list:
4959
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4960
                                 node, errors.ECODE_EXISTS)
4961
    elif self.op.readd and node not in node_list:
4962
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4963
                                 errors.ECODE_NOENT)
4964

    
4965
    self.changed_primary_ip = False
4966

    
4967
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4968
      if self.op.readd and node == existing_node_name:
4969
        if existing_node.secondary_ip != secondary_ip:
4970
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4971
                                     " address configuration as before",
4972
                                     errors.ECODE_INVAL)
4973
        if existing_node.primary_ip != primary_ip:
4974
          self.changed_primary_ip = True
4975

    
4976
        continue
4977

    
4978
      if (existing_node.primary_ip == primary_ip or
4979
          existing_node.secondary_ip == primary_ip or
4980
          existing_node.primary_ip == secondary_ip or
4981
          existing_node.secondary_ip == secondary_ip):
4982
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4983
                                   " existing node %s" % existing_node.name,
4984
                                   errors.ECODE_NOTUNIQUE)
4985

    
4986
    # After this 'if' block, None is no longer a valid value for the
4987
    # _capable op attributes
4988
    if self.op.readd:
4989
      old_node = self.cfg.GetNodeInfo(node)
4990
      assert old_node is not None, "Can't retrieve locked node %s" % node
4991
      for attr in self._NFLAGS:
4992
        if getattr(self.op, attr) is None:
4993
          setattr(self.op, attr, getattr(old_node, attr))
4994
    else:
4995
      for attr in self._NFLAGS:
4996
        if getattr(self.op, attr) is None:
4997
          setattr(self.op, attr, True)
4998

    
4999
    if self.op.readd and not self.op.vm_capable:
5000
      pri, sec = cfg.GetNodeInstances(node)
5001
      if pri or sec:
5002
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5003
                                   " flag set to false, but it already holds"
5004
                                   " instances" % node,
5005
                                   errors.ECODE_STATE)
5006

    
5007
    # check that the type of the node (single versus dual homed) is the
5008
    # same as for the master
5009
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5010
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5011
    newbie_singlehomed = secondary_ip == primary_ip
5012
    if master_singlehomed != newbie_singlehomed:
5013
      if master_singlehomed:
5014
        raise errors.OpPrereqError("The master has no secondary ip but the"
5015
                                   " new node has one",
5016
                                   errors.ECODE_INVAL)
5017
      else:
5018
        raise errors.OpPrereqError("The master has a secondary ip but the"
5019
                                   " new node doesn't have one",
5020
                                   errors.ECODE_INVAL)
5021

    
5022
    # checks reachability
5023
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5024
      raise errors.OpPrereqError("Node not reachable by ping",
5025
                                 errors.ECODE_ENVIRON)
5026

    
5027
    if not newbie_singlehomed:
5028
      # check reachability from my secondary ip to newbie's secondary ip
5029
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5030
                           source=myself.secondary_ip):
5031
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5032
                                   " based ping to node daemon port",
5033
                                   errors.ECODE_ENVIRON)
5034

    
5035
    if self.op.readd:
5036
      exceptions = [node]
5037
    else:
5038
      exceptions = []
5039

    
5040
    if self.op.master_capable:
5041
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5042
    else:
5043
      self.master_candidate = False
5044

    
5045
    if self.op.readd:
5046
      self.new_node = old_node
5047
    else:
5048
      node_group = cfg.LookupNodeGroup(self.op.group)
5049
      self.new_node = objects.Node(name=node,
5050
                                   primary_ip=primary_ip,
5051
                                   secondary_ip=secondary_ip,
5052
                                   master_candidate=self.master_candidate,
5053
                                   offline=False, drained=False,
5054
                                   group=node_group)
5055

    
5056
    if self.op.ndparams:
5057
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5058

    
5059
  def Exec(self, feedback_fn):
5060
    """Adds the new node to the cluster.
5061

5062
    """
5063
    new_node = self.new_node
5064
    node = new_node.name
5065

    
5066
    # We adding a new node so we assume it's powered
5067
    new_node.powered = True
5068

    
5069
    # for re-adds, reset the offline/drained/master-candidate flags;
5070
    # we need to reset here, otherwise offline would prevent RPC calls
5071
    # later in the procedure; this also means that if the re-add
5072
    # fails, we are left with a non-offlined, broken node
5073
    if self.op.readd:
5074
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5075
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5076
      # if we demote the node, we do cleanup later in the procedure
5077
      new_node.master_candidate = self.master_candidate
5078
      if self.changed_primary_ip:
5079
        new_node.primary_ip = self.op.primary_ip
5080

    
5081
    # copy the master/vm_capable flags
5082
    for attr in self._NFLAGS:
5083
      setattr(new_node, attr, getattr(self.op, attr))
5084

    
5085
    # notify the user about any possible mc promotion
5086
    if new_node.master_candidate:
5087
      self.LogInfo("Node will be a master candidate")
5088

    
5089
    if self.op.ndparams:
5090
      new_node.ndparams = self.op.ndparams
5091
    else:
5092
      new_node.ndparams = {}
5093

    
5094
    # check connectivity
5095
    result = self.rpc.call_version([node])[node]
5096
    result.Raise("Can't get version information from node %s" % node)
5097
    if constants.PROTOCOL_VERSION == result.payload:
5098
      logging.info("Communication to node %s fine, sw version %s match",
5099
                   node, result.payload)
5100
    else:
5101
      raise errors.OpExecError("Version mismatch master version %s,"
5102
                               " node version %s" %
5103
                               (constants.PROTOCOL_VERSION, result.payload))
5104

    
5105
    # Add node to our /etc/hosts, and add key to known_hosts
5106
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5107
      master_node = self.cfg.GetMasterNode()
5108
      result = self.rpc.call_etc_hosts_modify(master_node,
5109
                                              constants.ETC_HOSTS_ADD,
5110
                                              self.hostname.name,
5111
                                              self.hostname.ip)
5112
      result.Raise("Can't update hosts file with new host data")
5113

    
5114
    if new_node.secondary_ip != new_node.primary_ip:
5115
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5116
                               False)
5117

    
5118
    node_verify_list = [self.cfg.GetMasterNode()]
5119
    node_verify_param = {
5120
      constants.NV_NODELIST: ([node], {}),
5121
      # TODO: do a node-net-test as well?
5122
    }
5123

    
5124
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5125
                                       self.cfg.GetClusterName())
5126
    for verifier in node_verify_list:
5127
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5128
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5129
      if nl_payload:
5130
        for failed in nl_payload:
5131
          feedback_fn("ssh/hostname verification failed"
5132
                      " (checking from %s): %s" %
5133
                      (verifier, nl_payload[failed]))
5134
        raise errors.OpExecError("ssh/hostname verification failed")
5135

    
5136
    if self.op.readd:
5137
      _RedistributeAncillaryFiles(self)
5138
      self.context.ReaddNode(new_node)
5139
      # make sure we redistribute the config
5140
      self.cfg.Update(new_node, feedback_fn)
5141
      # and make sure the new node will not have old files around
5142
      if not new_node.master_candidate:
5143
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5144
        msg = result.fail_msg
5145
        if msg:
5146
          self.LogWarning("Node failed to demote itself from master"
5147
                          " candidate status: %s" % msg)
5148
    else:
5149
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5150
                                  additional_vm=self.op.vm_capable)
5151
      self.context.AddNode(new_node, self.proc.GetECId())
5152

    
5153

    
5154
class LUNodeSetParams(LogicalUnit):
5155
  """Modifies the parameters of a node.
5156

5157
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5158
      to the node role (as _ROLE_*)
5159
  @cvar _R2F: a dictionary from node role to tuples of flags
5160
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5161

5162
  """
5163
  HPATH = "node-modify"
5164
  HTYPE = constants.HTYPE_NODE
5165
  REQ_BGL = False
5166
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5167
  _F2R = {
5168
    (True, False, False): _ROLE_CANDIDATE,
5169
    (False, True, False): _ROLE_DRAINED,
5170
    (False, False, True): _ROLE_OFFLINE,
5171
    (False, False, False): _ROLE_REGULAR,
5172
    }
5173
  _R2F = dict((v, k) for k, v in _F2R.items())
5174
  _FLAGS = ["master_candidate", "drained", "offline"]
5175

    
5176
  def CheckArguments(self):
5177
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5178
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5179
                self.op.master_capable, self.op.vm_capable,
5180
                self.op.secondary_ip, self.op.ndparams]
5181
    if all_mods.count(None) == len(all_mods):
5182
      raise errors.OpPrereqError("Please pass at least one modification",
5183
                                 errors.ECODE_INVAL)
5184
    if all_mods.count(True) > 1:
5185
      raise errors.OpPrereqError("Can't set the node into more than one"
5186
                                 " state at the same time",
5187
                                 errors.ECODE_INVAL)
5188

    
5189
    # Boolean value that tells us whether we might be demoting from MC
5190
    self.might_demote = (self.op.master_candidate == False or
5191
                         self.op.offline == True or
5192
                         self.op.drained == True or
5193
                         self.op.master_capable == False)
5194

    
5195
    if self.op.secondary_ip:
5196
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5197
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5198
                                   " address" % self.op.secondary_ip,
5199
                                   errors.ECODE_INVAL)
5200

    
5201
    self.lock_all = self.op.auto_promote and self.might_demote
5202
    self.lock_instances = self.op.secondary_ip is not None
5203

    
5204
  def ExpandNames(self):
5205
    if self.lock_all:
5206
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5207
    else:
5208
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5209

    
5210
    if self.lock_instances:
5211
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5212

    
5213
  def DeclareLocks(self, level):
5214
    # If we have locked all instances, before waiting to lock nodes, release
5215
    # all the ones living on nodes unrelated to the current operation.
5216
    if level == locking.LEVEL_NODE and self.lock_instances:
5217
      self.affected_instances = []
5218
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5219
        instances_keep = []
5220

    
5221
        # Build list of instances to release
5222
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5223
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5224
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5225
              self.op.node_name in instance.all_nodes):
5226
            instances_keep.append(instance_name)
5227
            self.affected_instances.append(instance)
5228

    
5229
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5230

    
5231
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5232
                set(instances_keep))
5233

    
5234
  def BuildHooksEnv(self):
5235
    """Build hooks env.
5236

5237
    This runs on the master node.
5238

5239
    """
5240
    return {
5241
      "OP_TARGET": self.op.node_name,
5242
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5243
      "OFFLINE": str(self.op.offline),
5244
      "DRAINED": str(self.op.drained),
5245
      "MASTER_CAPABLE": str(self.op.master_capable),
5246
      "VM_CAPABLE": str(self.op.vm_capable),
5247
      }
5248

    
5249
  def BuildHooksNodes(self):
5250
    """Build hooks nodes.
5251

5252
    """
5253
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5254
    return (nl, nl)
5255

    
5256
  def CheckPrereq(self):
5257
    """Check prerequisites.
5258

5259
    This only checks the instance list against the existing names.
5260

5261
    """
5262
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5263

    
5264
    if (self.op.master_candidate is not None or
5265
        self.op.drained is not None or
5266
        self.op.offline is not None):
5267
      # we can't change the master's node flags
5268
      if self.op.node_name == self.cfg.GetMasterNode():
5269
        raise errors.OpPrereqError("The master role can be changed"
5270
                                   " only via master-failover",
5271
                                   errors.ECODE_INVAL)
5272

    
5273
    if self.op.master_candidate and not node.master_capable:
5274
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5275
                                 " it a master candidate" % node.name,
5276
                                 errors.ECODE_STATE)
5277

    
5278
    if self.op.vm_capable == False:
5279
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5280
      if ipri or isec:
5281
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5282
                                   " the vm_capable flag" % node.name,
5283
                                   errors.ECODE_STATE)
5284

    
5285
    if node.master_candidate and self.might_demote and not self.lock_all:
5286
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5287
      # check if after removing the current node, we're missing master
5288
      # candidates
5289
      (mc_remaining, mc_should, _) = \
5290
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5291
      if mc_remaining < mc_should:
5292
        raise errors.OpPrereqError("Not enough master candidates, please"
5293
                                   " pass auto promote option to allow"
5294
                                   " promotion", errors.ECODE_STATE)
5295

    
5296
    self.old_flags = old_flags = (node.master_candidate,
5297
                                  node.drained, node.offline)
5298
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5299
    self.old_role = old_role = self._F2R[old_flags]
5300

    
5301
    # Check for ineffective changes
5302
    for attr in self._FLAGS:
5303
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5304
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5305
        setattr(self.op, attr, None)
5306

    
5307
    # Past this point, any flag change to False means a transition
5308
    # away from the respective state, as only real changes are kept
5309

    
5310
    # TODO: We might query the real power state if it supports OOB
5311
    if _SupportsOob(self.cfg, node):
5312
      if self.op.offline is False and not (node.powered or
5313
                                           self.op.powered == True):
5314
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5315
                                    " offline status can be reset") %
5316
                                   self.op.node_name)
5317
    elif self.op.powered is not None:
5318
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5319
                                  " as it does not support out-of-band"
5320
                                  " handling") % self.op.node_name)
5321

    
5322
    # If we're being deofflined/drained, we'll MC ourself if needed
5323
    if (self.op.drained == False or self.op.offline == False or
5324
        (self.op.master_capable and not node.master_capable)):
5325
      if _DecideSelfPromotion(self):
5326
        self.op.master_candidate = True
5327
        self.LogInfo("Auto-promoting node to master candidate")
5328

    
5329
    # If we're no longer master capable, we'll demote ourselves from MC
5330
    if self.op.master_capable == False and node.master_candidate:
5331
      self.LogInfo("Demoting from master candidate")
5332
      self.op.master_candidate = False
5333

    
5334
    # Compute new role
5335
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5336
    if self.op.master_candidate:
5337
      new_role = self._ROLE_CANDIDATE
5338
    elif self.op.drained:
5339
      new_role = self._ROLE_DRAINED
5340
    elif self.op.offline:
5341
      new_role = self._ROLE_OFFLINE
5342
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5343
      # False is still in new flags, which means we're un-setting (the
5344
      # only) True flag
5345
      new_role = self._ROLE_REGULAR
5346
    else: # no new flags, nothing, keep old role
5347
      new_role = old_role
5348

    
5349
    self.new_role = new_role
5350

    
5351
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5352
      # Trying to transition out of offline status
5353
      result = self.rpc.call_version([node.name])[node.name]
5354
      if result.fail_msg:
5355
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5356
                                   " to report its version: %s" %
5357
                                   (node.name, result.fail_msg),
5358
                                   errors.ECODE_STATE)
5359
      else:
5360
        self.LogWarning("Transitioning node from offline to online state"
5361
                        " without using re-add. Please make sure the node"
5362
                        " is healthy!")
5363

    
5364
    if self.op.secondary_ip:
5365
      # Ok even without locking, because this can't be changed by any LU
5366
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5367
      master_singlehomed = master.secondary_ip == master.primary_ip
5368
      if master_singlehomed and self.op.secondary_ip:
5369
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5370
                                   " homed cluster", errors.ECODE_INVAL)
5371

    
5372
      if node.offline:
5373
        if self.affected_instances:
5374
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5375
                                     " node has instances (%s) configured"
5376
                                     " to use it" % self.affected_instances)
5377
      else:
5378
        # On online nodes, check that no instances are running, and that
5379
        # the node has the new ip and we can reach it.
5380
        for instance in self.affected_instances:
5381
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5382

    
5383
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5384
        if master.name != node.name:
5385
          # check reachability from master secondary ip to new secondary ip
5386
          if not netutils.TcpPing(self.op.secondary_ip,
5387
                                  constants.DEFAULT_NODED_PORT,
5388
                                  source=master.secondary_ip):
5389
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5390
                                       " based ping to node daemon port",
5391
                                       errors.ECODE_ENVIRON)
5392

    
5393
    if self.op.ndparams:
5394
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5395
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5396
      self.new_ndparams = new_ndparams
5397

    
5398
  def Exec(self, feedback_fn):
5399
    """Modifies a node.
5400

5401
    """
5402
    node = self.node
5403
    old_role = self.old_role
5404
    new_role = self.new_role
5405

    
5406
    result = []
5407

    
5408
    if self.op.ndparams:
5409
      node.ndparams = self.new_ndparams
5410

    
5411
    if self.op.powered is not None:
5412
      node.powered = self.op.powered
5413

    
5414
    for attr in ["master_capable", "vm_capable"]:
5415
      val = getattr(self.op, attr)
5416
      if val is not None:
5417
        setattr(node, attr, val)
5418
        result.append((attr, str(val)))
5419

    
5420
    if new_role != old_role:
5421
      # Tell the node to demote itself, if no longer MC and not offline
5422
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5423
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5424
        if msg:
5425
          self.LogWarning("Node failed to demote itself: %s", msg)
5426

    
5427
      new_flags = self._R2F[new_role]
5428
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5429
        if of != nf:
5430
          result.append((desc, str(nf)))
5431
      (node.master_candidate, node.drained, node.offline) = new_flags
5432

    
5433
      # we locked all nodes, we adjust the CP before updating this node
5434
      if self.lock_all:
5435
        _AdjustCandidatePool(self, [node.name])
5436

    
5437
    if self.op.secondary_ip:
5438
      node.secondary_ip = self.op.secondary_ip
5439
      result.append(("secondary_ip", self.op.secondary_ip))
5440

    
5441
    # this will trigger configuration file update, if needed
5442
    self.cfg.Update(node, feedback_fn)
5443

    
5444
    # this will trigger job queue propagation or cleanup if the mc
5445
    # flag changed
5446
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5447
      self.context.ReaddNode(node)
5448

    
5449
    return result
5450

    
5451

    
5452
class LUNodePowercycle(NoHooksLU):
5453
  """Powercycles a node.
5454

5455
  """
5456
  REQ_BGL = False
5457

    
5458
  def CheckArguments(self):
5459
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5460
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5461
      raise errors.OpPrereqError("The node is the master and the force"
5462
                                 " parameter was not set",
5463
                                 errors.ECODE_INVAL)
5464

    
5465
  def ExpandNames(self):
5466
    """Locking for PowercycleNode.
5467

5468
    This is a last-resort option and shouldn't block on other
5469
    jobs. Therefore, we grab no locks.
5470

5471
    """
5472
    self.needed_locks = {}
5473

    
5474
  def Exec(self, feedback_fn):
5475
    """Reboots a node.
5476

5477
    """
5478
    result = self.rpc.call_node_powercycle(self.op.node_name,
5479
                                           self.cfg.GetHypervisorType())
5480
    result.Raise("Failed to schedule the reboot")
5481
    return result.payload
5482

    
5483

    
5484
class LUClusterQuery(NoHooksLU):
5485
  """Query cluster configuration.
5486

5487
  """
5488
  REQ_BGL = False
5489

    
5490
  def ExpandNames(self):
5491
    self.needed_locks = {}
5492

    
5493
  def Exec(self, feedback_fn):
5494
    """Return cluster config.
5495

5496
    """
5497
    cluster = self.cfg.GetClusterInfo()
5498
    os_hvp = {}
5499

    
5500
    # Filter just for enabled hypervisors
5501
    for os_name, hv_dict in cluster.os_hvp.items():
5502
      os_hvp[os_name] = {}
5503
      for hv_name, hv_params in hv_dict.items():
5504
        if hv_name in cluster.enabled_hypervisors:
5505
          os_hvp[os_name][hv_name] = hv_params
5506

    
5507
    # Convert ip_family to ip_version
5508
    primary_ip_version = constants.IP4_VERSION
5509
    if cluster.primary_ip_family == netutils.IP6Address.family:
5510
      primary_ip_version = constants.IP6_VERSION
5511

    
5512
    result = {
5513
      "software_version": constants.RELEASE_VERSION,
5514
      "protocol_version": constants.PROTOCOL_VERSION,
5515
      "config_version": constants.CONFIG_VERSION,
5516
      "os_api_version": max(constants.OS_API_VERSIONS),
5517
      "export_version": constants.EXPORT_VERSION,
5518
      "architecture": (platform.architecture()[0], platform.machine()),
5519
      "name": cluster.cluster_name,
5520
      "master": cluster.master_node,
5521
      "default_hypervisor": cluster.enabled_hypervisors[0],
5522
      "enabled_hypervisors": cluster.enabled_hypervisors,
5523
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5524
                        for hypervisor_name in cluster.enabled_hypervisors]),
5525
      "os_hvp": os_hvp,
5526
      "beparams": cluster.beparams,
5527
      "osparams": cluster.osparams,
5528
      "nicparams": cluster.nicparams,
5529
      "ndparams": cluster.ndparams,
5530
      "candidate_pool_size": cluster.candidate_pool_size,
5531
      "master_netdev": cluster.master_netdev,
5532
      "volume_group_name": cluster.volume_group_name,
5533
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5534
      "file_storage_dir": cluster.file_storage_dir,
5535
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5536
      "maintain_node_health": cluster.maintain_node_health,
5537
      "ctime": cluster.ctime,
5538
      "mtime": cluster.mtime,
5539
      "uuid": cluster.uuid,
5540
      "tags": list(cluster.GetTags()),
5541
      "uid_pool": cluster.uid_pool,
5542
      "default_iallocator": cluster.default_iallocator,
5543
      "reserved_lvs": cluster.reserved_lvs,
5544
      "primary_ip_version": primary_ip_version,
5545
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5546
      "hidden_os": cluster.hidden_os,
5547
      "blacklisted_os": cluster.blacklisted_os,
5548
      }
5549

    
5550
    return result
5551

    
5552

    
5553
class LUClusterConfigQuery(NoHooksLU):
5554
  """Return configuration values.
5555

5556
  """
5557
  REQ_BGL = False
5558
  _FIELDS_DYNAMIC = utils.FieldSet()
5559
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5560
                                  "watcher_pause", "volume_group_name")
5561

    
5562
  def CheckArguments(self):
5563
    _CheckOutputFields(static=self._FIELDS_STATIC,
5564
                       dynamic=self._FIELDS_DYNAMIC,
5565
                       selected=self.op.output_fields)
5566

    
5567
  def ExpandNames(self):
5568
    self.needed_locks = {}
5569

    
5570
  def Exec(self, feedback_fn):
5571
    """Dump a representation of the cluster config to the standard output.
5572

5573
    """
5574
    values = []
5575
    for field in self.op.output_fields:
5576
      if field == "cluster_name":
5577
        entry = self.cfg.GetClusterName()
5578
      elif field == "master_node":
5579
        entry = self.cfg.GetMasterNode()
5580
      elif field == "drain_flag":
5581
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5582
      elif field == "watcher_pause":
5583
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5584
      elif field == "volume_group_name":
5585
        entry = self.cfg.GetVGName()
5586
      else:
5587
        raise errors.ParameterError(field)
5588
      values.append(entry)
5589
    return values
5590

    
5591

    
5592
class LUInstanceActivateDisks(NoHooksLU):
5593
  """Bring up an instance's disks.
5594

5595
  """
5596
  REQ_BGL = False
5597

    
5598
  def ExpandNames(self):
5599
    self._ExpandAndLockInstance()
5600
    self.needed_locks[locking.LEVEL_NODE] = []
5601
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5602

    
5603
  def DeclareLocks(self, level):
5604
    if level == locking.LEVEL_NODE:
5605
      self._LockInstancesNodes()
5606

    
5607
  def CheckPrereq(self):
5608
    """Check prerequisites.
5609

5610
    This checks that the instance is in the cluster.
5611

5612
    """
5613
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5614
    assert self.instance is not None, \
5615
      "Cannot retrieve locked instance %s" % self.op.instance_name
5616
    _CheckNodeOnline(self, self.instance.primary_node)
5617

    
5618
  def Exec(self, feedback_fn):
5619
    """Activate the disks.
5620

5621
    """
5622
    disks_ok, disks_info = \
5623
              _AssembleInstanceDisks(self, self.instance,
5624
                                     ignore_size=self.op.ignore_size)
5625
    if not disks_ok:
5626
      raise errors.OpExecError("Cannot activate block devices")
5627

    
5628
    return disks_info
5629

    
5630

    
5631
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5632
                           ignore_size=False):
5633
  """Prepare the block devices for an instance.
5634

5635
  This sets up the block devices on all nodes.
5636

5637
  @type lu: L{LogicalUnit}
5638
  @param lu: the logical unit on whose behalf we execute
5639
  @type instance: L{objects.Instance}
5640
  @param instance: the instance for whose disks we assemble
5641
  @type disks: list of L{objects.Disk} or None
5642
  @param disks: which disks to assemble (or all, if None)
5643
  @type ignore_secondaries: boolean
5644
  @param ignore_secondaries: if true, errors on secondary nodes
5645
      won't result in an error return from the function
5646
  @type ignore_size: boolean
5647
  @param ignore_size: if true, the current known size of the disk
5648
      will not be used during the disk activation, useful for cases
5649
      when the size is wrong
5650
  @return: False if the operation failed, otherwise a list of
5651
      (host, instance_visible_name, node_visible_name)
5652
      with the mapping from node devices to instance devices
5653

5654
  """
5655
  device_info = []
5656
  disks_ok = True
5657
  iname = instance.name
5658
  disks = _ExpandCheckDisks(instance, disks)
5659

    
5660
  # With the two passes mechanism we try to reduce the window of
5661
  # opportunity for the race condition of switching DRBD to primary
5662
  # before handshaking occured, but we do not eliminate it
5663

    
5664
  # The proper fix would be to wait (with some limits) until the
5665
  # connection has been made and drbd transitions from WFConnection
5666
  # into any other network-connected state (Connected, SyncTarget,
5667
  # SyncSource, etc.)
5668

    
5669
  # 1st pass, assemble on all nodes in secondary mode
5670
  for idx, inst_disk in enumerate(disks):
5671
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5672
      if ignore_size:
5673
        node_disk = node_disk.Copy()
5674
        node_disk.UnsetSize()
5675
      lu.cfg.SetDiskID(node_disk, node)
5676
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5677
      msg = result.fail_msg
5678
      if msg:
5679
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5680
                           " (is_primary=False, pass=1): %s",
5681
                           inst_disk.iv_name, node, msg)
5682
        if not ignore_secondaries:
5683
          disks_ok = False
5684

    
5685
  # FIXME: race condition on drbd migration to primary
5686

    
5687
  # 2nd pass, do only the primary node
5688
  for idx, inst_disk in enumerate(disks):
5689
    dev_path = None
5690

    
5691
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5692
      if node != instance.primary_node:
5693
        continue
5694
      if ignore_size:
5695
        node_disk = node_disk.Copy()
5696
        node_disk.UnsetSize()
5697
      lu.cfg.SetDiskID(node_disk, node)
5698
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5699
      msg = result.fail_msg
5700
      if msg:
5701
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5702
                           " (is_primary=True, pass=2): %s",
5703
                           inst_disk.iv_name, node, msg)
5704
        disks_ok = False
5705
      else:
5706
        dev_path = result.payload
5707

    
5708
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5709

    
5710
  # leave the disks configured for the primary node
5711
  # this is a workaround that would be fixed better by
5712
  # improving the logical/physical id handling
5713
  for disk in disks:
5714
    lu.cfg.SetDiskID(disk, instance.primary_node)
5715

    
5716
  return disks_ok, device_info
5717

    
5718

    
5719
def _StartInstanceDisks(lu, instance, force):
5720
  """Start the disks of an instance.
5721

5722
  """
5723
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5724
                                           ignore_secondaries=force)
5725
  if not disks_ok:
5726
    _ShutdownInstanceDisks(lu, instance)
5727
    if force is not None and not force:
5728
      lu.proc.LogWarning("", hint="If the message above refers to a"
5729
                         " secondary node,"
5730
                         " you can retry the operation using '--force'.")
5731
    raise errors.OpExecError("Disk consistency error")
5732

    
5733

    
5734
class LUInstanceDeactivateDisks(NoHooksLU):
5735
  """Shutdown an instance's disks.
5736

5737
  """
5738
  REQ_BGL = False
5739

    
5740
  def ExpandNames(self):
5741
    self._ExpandAndLockInstance()
5742
    self.needed_locks[locking.LEVEL_NODE] = []
5743
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5744

    
5745
  def DeclareLocks(self, level):
5746
    if level == locking.LEVEL_NODE:
5747
      self._LockInstancesNodes()
5748

    
5749
  def CheckPrereq(self):
5750
    """Check prerequisites.
5751

5752
    This checks that the instance is in the cluster.
5753

5754
    """
5755
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5756
    assert self.instance is not None, \
5757
      "Cannot retrieve locked instance %s" % self.op.instance_name
5758

    
5759
  def Exec(self, feedback_fn):
5760
    """Deactivate the disks
5761

5762
    """
5763
    instance = self.instance
5764
    if self.op.force:
5765
      _ShutdownInstanceDisks(self, instance)
5766
    else:
5767
      _SafeShutdownInstanceDisks(self, instance)
5768

    
5769

    
5770
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5771
  """Shutdown block devices of an instance.
5772

5773
  This function checks if an instance is running, before calling
5774
  _ShutdownInstanceDisks.
5775

5776
  """
5777
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5778
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5779

    
5780

    
5781
def _ExpandCheckDisks(instance, disks):
5782
  """Return the instance disks selected by the disks list
5783

5784
  @type disks: list of L{objects.Disk} or None
5785
  @param disks: selected disks
5786
  @rtype: list of L{objects.Disk}
5787
  @return: selected instance disks to act on
5788

5789
  """
5790
  if disks is None:
5791
    return instance.disks
5792
  else:
5793
    if not set(disks).issubset(instance.disks):
5794
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5795
                                   " target instance")
5796
    return disks
5797

    
5798

    
5799
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5800
  """Shutdown block devices of an instance.
5801

5802
  This does the shutdown on all nodes of the instance.
5803

5804
  If the ignore_primary is false, errors on the primary node are
5805
  ignored.
5806

5807
  """
5808
  all_result = True
5809
  disks = _ExpandCheckDisks(instance, disks)
5810

    
5811
  for disk in disks:
5812
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5813
      lu.cfg.SetDiskID(top_disk, node)
5814
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5815
      msg = result.fail_msg
5816
      if msg:
5817
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5818
                      disk.iv_name, node, msg)
5819
        if ((node == instance.primary_node and not ignore_primary) or
5820
            (node != instance.primary_node and not result.offline)):
5821
          all_result = False
5822
  return all_result
5823

    
5824

    
5825
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5826
  """Checks if a node has enough free memory.
5827

5828
  This function check if a given node has the needed amount of free
5829
  memory. In case the node has less memory or we cannot get the
5830
  information from the node, this function raise an OpPrereqError
5831
  exception.
5832

5833
  @type lu: C{LogicalUnit}
5834
  @param lu: a logical unit from which we get configuration data
5835
  @type node: C{str}
5836
  @param node: the node to check
5837
  @type reason: C{str}
5838
  @param reason: string to use in the error message
5839
  @type requested: C{int}
5840
  @param requested: the amount of memory in MiB to check for
5841
  @type hypervisor_name: C{str}
5842
  @param hypervisor_name: the hypervisor to ask for memory stats
5843
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5844
      we cannot check the node
5845

5846
  """
5847
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5848
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5849
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5850
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5851
  if not isinstance(free_mem, int):
5852
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5853
                               " was '%s'" % (node, free_mem),
5854
                               errors.ECODE_ENVIRON)
5855
  if requested > free_mem:
5856
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5857
                               " needed %s MiB, available %s MiB" %
5858
                               (node, reason, requested, free_mem),
5859
                               errors.ECODE_NORES)
5860

    
5861

    
5862
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5863
  """Checks if nodes have enough free disk space in the all VGs.
5864

5865
  This function check if all given nodes have the needed amount of
5866
  free disk. In case any node has less disk or we cannot get the
5867
  information from the node, this function raise an OpPrereqError
5868
  exception.
5869

5870
  @type lu: C{LogicalUnit}
5871
  @param lu: a logical unit from which we get configuration data
5872
  @type nodenames: C{list}
5873
  @param nodenames: the list of node names to check
5874
  @type req_sizes: C{dict}
5875
  @param req_sizes: the hash of vg and corresponding amount of disk in
5876
      MiB to check for
5877
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5878
      or we cannot check the node
5879

5880
  """
5881
  for vg, req_size in req_sizes.items():
5882
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5883

    
5884

    
5885
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5886
  """Checks if nodes have enough free disk space in the specified VG.
5887

5888
  This function check if all given nodes have the needed amount of
5889
  free disk. In case any node has less disk or we cannot get the
5890
  information from the node, this function raise an OpPrereqError
5891
  exception.
5892

5893
  @type lu: C{LogicalUnit}
5894
  @param lu: a logical unit from which we get configuration data
5895
  @type nodenames: C{list}
5896
  @param nodenames: the list of node names to check
5897
  @type vg: C{str}
5898
  @param vg: the volume group to check
5899
  @type requested: C{int}
5900
  @param requested: the amount of disk in MiB to check for
5901
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5902
      or we cannot check the node
5903

5904
  """
5905
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5906
  for node in nodenames:
5907
    info = nodeinfo[node]
5908
    info.Raise("Cannot get current information from node %s" % node,
5909
               prereq=True, ecode=errors.ECODE_ENVIRON)
5910
    vg_free = info.payload.get("vg_free", None)
5911
    if not isinstance(vg_free, int):
5912
      raise errors.OpPrereqError("Can't compute free disk space on node"
5913
                                 " %s for vg %s, result was '%s'" %
5914
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5915
    if requested > vg_free:
5916
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5917
                                 " vg %s: required %d MiB, available %d MiB" %
5918
                                 (node, vg, requested, vg_free),
5919
                                 errors.ECODE_NORES)
5920

    
5921

    
5922
class LUInstanceStartup(LogicalUnit):
5923
  """Starts an instance.
5924

5925
  """
5926
  HPATH = "instance-start"
5927
  HTYPE = constants.HTYPE_INSTANCE
5928
  REQ_BGL = False
5929

    
5930
  def CheckArguments(self):
5931
    # extra beparams
5932
    if self.op.beparams:
5933
      # fill the beparams dict
5934
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5935

    
5936
  def ExpandNames(self):
5937
    self._ExpandAndLockInstance()
5938

    
5939
  def BuildHooksEnv(self):
5940
    """Build hooks env.
5941

5942
    This runs on master, primary and secondary nodes of the instance.
5943

5944
    """
5945
    env = {
5946
      "FORCE": self.op.force,
5947
      }
5948

    
5949
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5950

    
5951
    return env
5952

    
5953
  def BuildHooksNodes(self):
5954
    """Build hooks nodes.
5955

5956
    """
5957
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5958
    return (nl, nl)
5959

    
5960
  def CheckPrereq(self):
5961
    """Check prerequisites.
5962

5963
    This checks that the instance is in the cluster.
5964

5965
    """
5966
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5967
    assert self.instance is not None, \
5968
      "Cannot retrieve locked instance %s" % self.op.instance_name
5969

    
5970
    # extra hvparams
5971
    if self.op.hvparams:
5972
      # check hypervisor parameter syntax (locally)
5973
      cluster = self.cfg.GetClusterInfo()
5974
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5975
      filled_hvp = cluster.FillHV(instance)
5976
      filled_hvp.update(self.op.hvparams)
5977
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5978
      hv_type.CheckParameterSyntax(filled_hvp)
5979
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5980

    
5981
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5982

    
5983
    if self.primary_offline and self.op.ignore_offline_nodes:
5984
      self.proc.LogWarning("Ignoring offline primary node")
5985

    
5986
      if self.op.hvparams or self.op.beparams:
5987
        self.proc.LogWarning("Overridden parameters are ignored")
5988
    else:
5989
      _CheckNodeOnline(self, instance.primary_node)
5990

    
5991
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5992

    
5993
      # check bridges existence
5994
      _CheckInstanceBridgesExist(self, instance)
5995

    
5996
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5997
                                                instance.name,
5998
                                                instance.hypervisor)
5999
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6000
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6001
      if not remote_info.payload: # not running already
6002
        _CheckNodeFreeMemory(self, instance.primary_node,
6003
                             "starting instance %s" % instance.name,
6004
                             bep[constants.BE_MEMORY], instance.hypervisor)
6005

    
6006
  def Exec(self, feedback_fn):
6007
    """Start the instance.
6008

6009
    """
6010
    instance = self.instance
6011
    force = self.op.force
6012

    
6013
    if not self.op.no_remember:
6014
      self.cfg.MarkInstanceUp(instance.name)
6015

    
6016
    if self.primary_offline:
6017
      assert self.op.ignore_offline_nodes
6018
      self.proc.LogInfo("Primary node offline, marked instance as started")
6019
    else:
6020
      node_current = instance.primary_node
6021

    
6022
      _StartInstanceDisks(self, instance, force)
6023

    
6024
      result = self.rpc.call_instance_start(node_current, instance,
6025
                                            self.op.hvparams, self.op.beparams,
6026
                                            self.op.startup_paused)
6027
      msg = result.fail_msg
6028
      if msg:
6029
        _ShutdownInstanceDisks(self, instance)
6030
        raise errors.OpExecError("Could not start instance: %s" % msg)
6031

    
6032

    
6033
class LUInstanceReboot(LogicalUnit):
6034
  """Reboot an instance.
6035

6036
  """
6037
  HPATH = "instance-reboot"
6038
  HTYPE = constants.HTYPE_INSTANCE
6039
  REQ_BGL = False
6040

    
6041
  def ExpandNames(self):
6042
    self._ExpandAndLockInstance()
6043

    
6044
  def BuildHooksEnv(self):
6045
    """Build hooks env.
6046

6047
    This runs on master, primary and secondary nodes of the instance.
6048

6049
    """
6050
    env = {
6051
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6052
      "REBOOT_TYPE": self.op.reboot_type,
6053
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6054
      }
6055

    
6056
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6057

    
6058
    return env
6059

    
6060
  def BuildHooksNodes(self):
6061
    """Build hooks nodes.
6062

6063
    """
6064
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6065
    return (nl, nl)
6066

    
6067
  def CheckPrereq(self):
6068
    """Check prerequisites.
6069

6070
    This checks that the instance is in the cluster.
6071

6072
    """
6073
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6074
    assert self.instance is not None, \
6075
      "Cannot retrieve locked instance %s" % self.op.instance_name
6076

    
6077
    _CheckNodeOnline(self, instance.primary_node)
6078

    
6079
    # check bridges existence
6080
    _CheckInstanceBridgesExist(self, instance)
6081

    
6082
  def Exec(self, feedback_fn):
6083
    """Reboot the instance.
6084

6085
    """
6086
    instance = self.instance
6087
    ignore_secondaries = self.op.ignore_secondaries
6088
    reboot_type = self.op.reboot_type
6089

    
6090
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6091
                                              instance.name,
6092
                                              instance.hypervisor)
6093
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6094
    instance_running = bool(remote_info.payload)
6095

    
6096
    node_current = instance.primary_node
6097

    
6098
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6099
                                            constants.INSTANCE_REBOOT_HARD]:
6100
      for disk in instance.disks:
6101
        self.cfg.SetDiskID(disk, node_current)
6102
      result = self.rpc.call_instance_reboot(node_current, instance,
6103
                                             reboot_type,
6104
                                             self.op.shutdown_timeout)
6105
      result.Raise("Could not reboot instance")
6106
    else:
6107
      if instance_running:
6108
        result = self.rpc.call_instance_shutdown(node_current, instance,
6109
                                                 self.op.shutdown_timeout)
6110
        result.Raise("Could not shutdown instance for full reboot")
6111
        _ShutdownInstanceDisks(self, instance)
6112
      else:
6113
        self.LogInfo("Instance %s was already stopped, starting now",
6114
                     instance.name)
6115
      _StartInstanceDisks(self, instance, ignore_secondaries)
6116
      result = self.rpc.call_instance_start(node_current, instance,
6117
                                            None, None, False)
6118
      msg = result.fail_msg
6119
      if msg:
6120
        _ShutdownInstanceDisks(self, instance)
6121
        raise errors.OpExecError("Could not start instance for"
6122
                                 " full reboot: %s" % msg)
6123

    
6124
    self.cfg.MarkInstanceUp(instance.name)
6125

    
6126

    
6127
class LUInstanceShutdown(LogicalUnit):
6128
  """Shutdown an instance.
6129

6130
  """
6131
  HPATH = "instance-stop"
6132
  HTYPE = constants.HTYPE_INSTANCE
6133
  REQ_BGL = False
6134

    
6135
  def ExpandNames(self):
6136
    self._ExpandAndLockInstance()
6137

    
6138
  def BuildHooksEnv(self):
6139
    """Build hooks env.
6140

6141
    This runs on master, primary and secondary nodes of the instance.
6142

6143
    """
6144
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6145
    env["TIMEOUT"] = self.op.timeout
6146
    return env
6147

    
6148
  def BuildHooksNodes(self):
6149
    """Build hooks nodes.
6150

6151
    """
6152
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6153
    return (nl, nl)
6154

    
6155
  def CheckPrereq(self):
6156
    """Check prerequisites.
6157

6158
    This checks that the instance is in the cluster.
6159

6160
    """
6161
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6162
    assert self.instance is not None, \
6163
      "Cannot retrieve locked instance %s" % self.op.instance_name
6164

    
6165
    self.primary_offline = \
6166
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6167

    
6168
    if self.primary_offline and self.op.ignore_offline_nodes:
6169
      self.proc.LogWarning("Ignoring offline primary node")
6170
    else:
6171
      _CheckNodeOnline(self, self.instance.primary_node)
6172

    
6173
  def Exec(self, feedback_fn):
6174
    """Shutdown the instance.
6175

6176
    """
6177
    instance = self.instance
6178
    node_current = instance.primary_node
6179
    timeout = self.op.timeout
6180

    
6181
    if not self.op.no_remember:
6182
      self.cfg.MarkInstanceDown(instance.name)
6183

    
6184
    if self.primary_offline:
6185
      assert self.op.ignore_offline_nodes
6186
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6187
    else:
6188
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6189
      msg = result.fail_msg
6190
      if msg:
6191
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6192

    
6193
      _ShutdownInstanceDisks(self, instance)
6194

    
6195

    
6196
class LUInstanceReinstall(LogicalUnit):
6197
  """Reinstall an instance.
6198

6199
  """
6200
  HPATH = "instance-reinstall"
6201
  HTYPE = constants.HTYPE_INSTANCE
6202
  REQ_BGL = False
6203

    
6204
  def ExpandNames(self):
6205
    self._ExpandAndLockInstance()
6206

    
6207
  def BuildHooksEnv(self):
6208
    """Build hooks env.
6209

6210
    This runs on master, primary and secondary nodes of the instance.
6211

6212
    """
6213
    return _BuildInstanceHookEnvByObject(self, self.instance)
6214

    
6215
  def BuildHooksNodes(self):
6216
    """Build hooks nodes.
6217

6218
    """
6219
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6220
    return (nl, nl)
6221

    
6222
  def CheckPrereq(self):
6223
    """Check prerequisites.
6224

6225
    This checks that the instance is in the cluster and is not running.
6226

6227
    """
6228
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6229
    assert instance is not None, \
6230
      "Cannot retrieve locked instance %s" % self.op.instance_name
6231
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6232
                     " offline, cannot reinstall")
6233
    for node in instance.secondary_nodes:
6234
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6235
                       " cannot reinstall")
6236

    
6237
    if instance.disk_template == constants.DT_DISKLESS:
6238
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6239
                                 self.op.instance_name,
6240
                                 errors.ECODE_INVAL)
6241
    _CheckInstanceDown(self, instance, "cannot reinstall")
6242

    
6243
    if self.op.os_type is not None:
6244
      # OS verification
6245
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6246
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6247
      instance_os = self.op.os_type
6248
    else:
6249
      instance_os = instance.os
6250

    
6251
    nodelist = list(instance.all_nodes)
6252

    
6253
    if self.op.osparams:
6254
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6255
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6256
      self.os_inst = i_osdict # the new dict (without defaults)
6257
    else:
6258
      self.os_inst = None
6259

    
6260
    self.instance = instance
6261

    
6262
  def Exec(self, feedback_fn):
6263
    """Reinstall the instance.
6264

6265
    """
6266
    inst = self.instance
6267

    
6268
    if self.op.os_type is not None:
6269
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6270
      inst.os = self.op.os_type
6271
      # Write to configuration
6272
      self.cfg.Update(inst, feedback_fn)
6273

    
6274
    _StartInstanceDisks(self, inst, None)
6275
    try:
6276
      feedback_fn("Running the instance OS create scripts...")
6277
      # FIXME: pass debug option from opcode to backend
6278
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6279
                                             self.op.debug_level,
6280
                                             osparams=self.os_inst)
6281
      result.Raise("Could not install OS for instance %s on node %s" %
6282
                   (inst.name, inst.primary_node))
6283
    finally:
6284
      _ShutdownInstanceDisks(self, inst)
6285

    
6286

    
6287
class LUInstanceRecreateDisks(LogicalUnit):
6288
  """Recreate an instance's missing disks.
6289

6290
  """
6291
  HPATH = "instance-recreate-disks"
6292
  HTYPE = constants.HTYPE_INSTANCE
6293
  REQ_BGL = False
6294

    
6295
  def CheckArguments(self):
6296
    # normalise the disk list
6297
    self.op.disks = sorted(frozenset(self.op.disks))
6298

    
6299
  def ExpandNames(self):
6300
    self._ExpandAndLockInstance()
6301
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6302
    if self.op.nodes:
6303
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6304
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6305
    else:
6306
      self.needed_locks[locking.LEVEL_NODE] = []
6307

    
6308
  def DeclareLocks(self, level):
6309
    if level == locking.LEVEL_NODE:
6310
      # if we replace the nodes, we only need to lock the old primary,
6311
      # otherwise we need to lock all nodes for disk re-creation
6312
      primary_only = bool(self.op.nodes)
6313
      self._LockInstancesNodes(primary_only=primary_only)
6314

    
6315
  def BuildHooksEnv(self):
6316
    """Build hooks env.
6317

6318
    This runs on master, primary and secondary nodes of the instance.
6319

6320
    """
6321
    return _BuildInstanceHookEnvByObject(self, self.instance)
6322

    
6323
  def BuildHooksNodes(self):
6324
    """Build hooks nodes.
6325

6326
    """
6327
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6328
    return (nl, nl)
6329

    
6330
  def CheckPrereq(self):
6331
    """Check prerequisites.
6332

6333
    This checks that the instance is in the cluster and is not running.
6334

6335
    """
6336
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6337
    assert instance is not None, \
6338
      "Cannot retrieve locked instance %s" % self.op.instance_name
6339
    if self.op.nodes:
6340
      if len(self.op.nodes) != len(instance.all_nodes):
6341
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6342
                                   " %d replacement nodes were specified" %
6343
                                   (instance.name, len(instance.all_nodes),
6344
                                    len(self.op.nodes)),
6345
                                   errors.ECODE_INVAL)
6346
      assert instance.disk_template != constants.DT_DRBD8 or \
6347
          len(self.op.nodes) == 2
6348
      assert instance.disk_template != constants.DT_PLAIN or \
6349
          len(self.op.nodes) == 1
6350
      primary_node = self.op.nodes[0]
6351
    else:
6352
      primary_node = instance.primary_node
6353
    _CheckNodeOnline(self, primary_node)
6354

    
6355
    if instance.disk_template == constants.DT_DISKLESS:
6356
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6357
                                 self.op.instance_name, errors.ECODE_INVAL)
6358
    # if we replace nodes *and* the old primary is offline, we don't
6359
    # check
6360
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6361
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6362
    if not (self.op.nodes and old_pnode.offline):
6363
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6364

    
6365
    if not self.op.disks:
6366
      self.op.disks = range(len(instance.disks))
6367
    else:
6368
      for idx in self.op.disks:
6369
        if idx >= len(instance.disks):
6370
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6371
                                     errors.ECODE_INVAL)
6372
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6373
      raise errors.OpPrereqError("Can't recreate disks partially and"
6374
                                 " change the nodes at the same time",
6375
                                 errors.ECODE_INVAL)
6376
    self.instance = instance
6377

    
6378
  def Exec(self, feedback_fn):
6379
    """Recreate the disks.
6380

6381
    """
6382
    instance = self.instance
6383

    
6384
    to_skip = []
6385
    mods = [] # keeps track of needed logical_id changes
6386

    
6387
    for idx, disk in enumerate(instance.disks):
6388
      if idx not in self.op.disks: # disk idx has not been passed in
6389
        to_skip.append(idx)
6390
        continue
6391
      # update secondaries for disks, if needed
6392
      if self.op.nodes:
6393
        if disk.dev_type == constants.LD_DRBD8:
6394
          # need to update the nodes and minors
6395
          assert len(self.op.nodes) == 2
6396
          assert len(disk.logical_id) == 6 # otherwise disk internals
6397
                                           # have changed
6398
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6399
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6400
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6401
                    new_minors[0], new_minors[1], old_secret)
6402
          assert len(disk.logical_id) == len(new_id)
6403
          mods.append((idx, new_id))
6404

    
6405
    # now that we have passed all asserts above, we can apply the mods
6406
    # in a single run (to avoid partial changes)
6407
    for idx, new_id in mods:
6408
      instance.disks[idx].logical_id = new_id
6409

    
6410
    # change primary node, if needed
6411
    if self.op.nodes:
6412
      instance.primary_node = self.op.nodes[0]
6413
      self.LogWarning("Changing the instance's nodes, you will have to"
6414
                      " remove any disks left on the older nodes manually")
6415

    
6416
    if self.op.nodes:
6417
      self.cfg.Update(instance, feedback_fn)
6418

    
6419
    _CreateDisks(self, instance, to_skip=to_skip)
6420

    
6421

    
6422
class LUInstanceRename(LogicalUnit):
6423
  """Rename an instance.
6424

6425
  """
6426
  HPATH = "instance-rename"
6427
  HTYPE = constants.HTYPE_INSTANCE
6428

    
6429
  def CheckArguments(self):
6430
    """Check arguments.
6431

6432
    """
6433
    if self.op.ip_check and not self.op.name_check:
6434
      # TODO: make the ip check more flexible and not depend on the name check
6435
      raise errors.OpPrereqError("IP address check requires a name check",
6436
                                 errors.ECODE_INVAL)
6437

    
6438
  def BuildHooksEnv(self):
6439
    """Build hooks env.
6440

6441
    This runs on master, primary and secondary nodes of the instance.
6442

6443
    """
6444
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6445
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6446
    return env
6447

    
6448
  def BuildHooksNodes(self):
6449
    """Build hooks nodes.
6450

6451
    """
6452
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6453
    return (nl, nl)
6454

    
6455
  def CheckPrereq(self):
6456
    """Check prerequisites.
6457

6458
    This checks that the instance is in the cluster and is not running.
6459

6460
    """
6461
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6462
                                                self.op.instance_name)
6463
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6464
    assert instance is not None
6465
    _CheckNodeOnline(self, instance.primary_node)
6466
    _CheckInstanceDown(self, instance, "cannot rename")
6467
    self.instance = instance
6468

    
6469
    new_name = self.op.new_name
6470
    if self.op.name_check:
6471
      hostname = netutils.GetHostname(name=new_name)
6472
      if hostname.name != new_name:
6473
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6474
                     hostname.name)
6475
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6476
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6477
                                    " same as given hostname '%s'") %
6478
                                    (hostname.name, self.op.new_name),
6479
                                    errors.ECODE_INVAL)
6480
      new_name = self.op.new_name = hostname.name
6481
      if (self.op.ip_check and
6482
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6483
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6484
                                   (hostname.ip, new_name),
6485
                                   errors.ECODE_NOTUNIQUE)
6486

    
6487
    instance_list = self.cfg.GetInstanceList()
6488
    if new_name in instance_list and new_name != instance.name:
6489
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6490
                                 new_name, errors.ECODE_EXISTS)
6491

    
6492
  def Exec(self, feedback_fn):
6493
    """Rename the instance.
6494

6495
    """
6496
    inst = self.instance
6497
    old_name = inst.name
6498

    
6499
    rename_file_storage = False
6500
    if (inst.disk_template in constants.DTS_FILEBASED and
6501
        self.op.new_name != inst.name):
6502
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6503
      rename_file_storage = True
6504

    
6505
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6506
    # Change the instance lock. This is definitely safe while we hold the BGL.
6507
    # Otherwise the new lock would have to be added in acquired mode.
6508
    assert self.REQ_BGL
6509
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6510
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6511

    
6512
    # re-read the instance from the configuration after rename
6513
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6514

    
6515
    if rename_file_storage:
6516
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6517
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6518
                                                     old_file_storage_dir,
6519
                                                     new_file_storage_dir)
6520
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6521
                   " (but the instance has been renamed in Ganeti)" %
6522
                   (inst.primary_node, old_file_storage_dir,
6523
                    new_file_storage_dir))
6524

    
6525
    _StartInstanceDisks(self, inst, None)
6526
    try:
6527
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6528
                                                 old_name, self.op.debug_level)
6529
      msg = result.fail_msg
6530
      if msg:
6531
        msg = ("Could not run OS rename script for instance %s on node %s"
6532
               " (but the instance has been renamed in Ganeti): %s" %
6533
               (inst.name, inst.primary_node, msg))
6534
        self.proc.LogWarning(msg)
6535
    finally:
6536
      _ShutdownInstanceDisks(self, inst)
6537

    
6538
    return inst.name
6539

    
6540

    
6541
class LUInstanceRemove(LogicalUnit):
6542
  """Remove an instance.
6543

6544
  """
6545
  HPATH = "instance-remove"
6546
  HTYPE = constants.HTYPE_INSTANCE
6547
  REQ_BGL = False
6548

    
6549
  def ExpandNames(self):
6550
    self._ExpandAndLockInstance()
6551
    self.needed_locks[locking.LEVEL_NODE] = []
6552
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6553

    
6554
  def DeclareLocks(self, level):
6555
    if level == locking.LEVEL_NODE:
6556
      self._LockInstancesNodes()
6557

    
6558
  def BuildHooksEnv(self):
6559
    """Build hooks env.
6560

6561
    This runs on master, primary and secondary nodes of the instance.
6562

6563
    """
6564
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6565
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6566
    return env
6567

    
6568
  def BuildHooksNodes(self):
6569
    """Build hooks nodes.
6570

6571
    """
6572
    nl = [self.cfg.GetMasterNode()]
6573
    nl_post = list(self.instance.all_nodes) + nl
6574
    return (nl, nl_post)
6575

    
6576
  def CheckPrereq(self):
6577
    """Check prerequisites.
6578

6579
    This checks that the instance is in the cluster.
6580

6581
    """
6582
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583
    assert self.instance is not None, \
6584
      "Cannot retrieve locked instance %s" % self.op.instance_name
6585

    
6586
  def Exec(self, feedback_fn):
6587
    """Remove the instance.
6588

6589
    """
6590
    instance = self.instance
6591
    logging.info("Shutting down instance %s on node %s",
6592
                 instance.name, instance.primary_node)
6593

    
6594
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6595
                                             self.op.shutdown_timeout)
6596
    msg = result.fail_msg
6597
    if msg:
6598
      if self.op.ignore_failures:
6599
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6600
      else:
6601
        raise errors.OpExecError("Could not shutdown instance %s on"
6602
                                 " node %s: %s" %
6603
                                 (instance.name, instance.primary_node, msg))
6604

    
6605
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6606

    
6607

    
6608
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6609
  """Utility function to remove an instance.
6610

6611
  """
6612
  logging.info("Removing block devices for instance %s", instance.name)
6613

    
6614
  if not _RemoveDisks(lu, instance):
6615
    if not ignore_failures:
6616
      raise errors.OpExecError("Can't remove instance's disks")
6617
    feedback_fn("Warning: can't remove instance's disks")
6618

    
6619
  logging.info("Removing instance %s out of cluster config", instance.name)
6620

    
6621
  lu.cfg.RemoveInstance(instance.name)
6622

    
6623
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6624
    "Instance lock removal conflict"
6625

    
6626
  # Remove lock for the instance
6627
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6628

    
6629

    
6630
class LUInstanceQuery(NoHooksLU):
6631
  """Logical unit for querying instances.
6632

6633
  """
6634
  # pylint: disable=W0142
6635
  REQ_BGL = False
6636

    
6637
  def CheckArguments(self):
6638
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6639
                             self.op.output_fields, self.op.use_locking)
6640

    
6641
  def ExpandNames(self):
6642
    self.iq.ExpandNames(self)
6643

    
6644
  def DeclareLocks(self, level):
6645
    self.iq.DeclareLocks(self, level)
6646

    
6647
  def Exec(self, feedback_fn):
6648
    return self.iq.OldStyleQuery(self)
6649

    
6650

    
6651
class LUInstanceFailover(LogicalUnit):
6652
  """Failover an instance.
6653

6654
  """
6655
  HPATH = "instance-failover"
6656
  HTYPE = constants.HTYPE_INSTANCE
6657
  REQ_BGL = False
6658

    
6659
  def CheckArguments(self):
6660
    """Check the arguments.
6661

6662
    """
6663
    self.iallocator = getattr(self.op, "iallocator", None)
6664
    self.target_node = getattr(self.op, "target_node", None)
6665

    
6666
  def ExpandNames(self):
6667
    self._ExpandAndLockInstance()
6668

    
6669
    if self.op.target_node is not None:
6670
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6671

    
6672
    self.needed_locks[locking.LEVEL_NODE] = []
6673
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6674

    
6675
    ignore_consistency = self.op.ignore_consistency
6676
    shutdown_timeout = self.op.shutdown_timeout
6677
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6678
                                       cleanup=False,
6679
                                       failover=True,
6680
                                       ignore_consistency=ignore_consistency,
6681
                                       shutdown_timeout=shutdown_timeout)
6682
    self.tasklets = [self._migrater]
6683

    
6684
  def DeclareLocks(self, level):
6685
    if level == locking.LEVEL_NODE:
6686
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6687
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6688
        if self.op.target_node is None:
6689
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6690
        else:
6691
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6692
                                                   self.op.target_node]
6693
        del self.recalculate_locks[locking.LEVEL_NODE]
6694
      else:
6695
        self._LockInstancesNodes()
6696

    
6697
  def BuildHooksEnv(self):
6698
    """Build hooks env.
6699

6700
    This runs on master, primary and secondary nodes of the instance.
6701

6702
    """
6703
    instance = self._migrater.instance
6704
    source_node = instance.primary_node
6705
    target_node = self.op.target_node
6706
    env = {
6707
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6708
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6709
      "OLD_PRIMARY": source_node,
6710
      "NEW_PRIMARY": target_node,
6711
      }
6712

    
6713
    if instance.disk_template in constants.DTS_INT_MIRROR:
6714
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6715
      env["NEW_SECONDARY"] = source_node
6716
    else:
6717
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6718

    
6719
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6720

    
6721
    return env
6722

    
6723
  def BuildHooksNodes(self):
6724
    """Build hooks nodes.
6725

6726
    """
6727
    instance = self._migrater.instance
6728
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6729
    return (nl, nl + [instance.primary_node])
6730

    
6731

    
6732
class LUInstanceMigrate(LogicalUnit):
6733
  """Migrate an instance.
6734

6735
  This is migration without shutting down, compared to the failover,
6736
  which is done with shutdown.
6737

6738
  """
6739
  HPATH = "instance-migrate"
6740
  HTYPE = constants.HTYPE_INSTANCE
6741
  REQ_BGL = False
6742

    
6743
  def ExpandNames(self):
6744
    self._ExpandAndLockInstance()
6745

    
6746
    if self.op.target_node is not None:
6747
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6748

    
6749
    self.needed_locks[locking.LEVEL_NODE] = []
6750
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6751

    
6752
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6753
                                       cleanup=self.op.cleanup,
6754
                                       failover=False,
6755
                                       fallback=self.op.allow_failover)
6756
    self.tasklets = [self._migrater]
6757

    
6758
  def DeclareLocks(self, level):
6759
    if level == locking.LEVEL_NODE:
6760
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6761
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6762
        if self.op.target_node is None:
6763
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6764
        else:
6765
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6766
                                                   self.op.target_node]
6767
        del self.recalculate_locks[locking.LEVEL_NODE]
6768
      else:
6769
        self._LockInstancesNodes()
6770

    
6771
  def BuildHooksEnv(self):
6772
    """Build hooks env.
6773

6774
    This runs on master, primary and secondary nodes of the instance.
6775

6776
    """
6777
    instance = self._migrater.instance
6778
    source_node = instance.primary_node
6779
    target_node = self.op.target_node
6780
    env = _BuildInstanceHookEnvByObject(self, instance)
6781
    env.update({
6782
      "MIGRATE_LIVE": self._migrater.live,
6783
      "MIGRATE_CLEANUP": self.op.cleanup,
6784
      "OLD_PRIMARY": source_node,
6785
      "NEW_PRIMARY": target_node,
6786
      })
6787

    
6788
    if instance.disk_template in constants.DTS_INT_MIRROR:
6789
      env["OLD_SECONDARY"] = target_node
6790
      env["NEW_SECONDARY"] = source_node
6791
    else:
6792
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6793

    
6794
    return env
6795

    
6796
  def BuildHooksNodes(self):
6797
    """Build hooks nodes.
6798

6799
    """
6800
    instance = self._migrater.instance
6801
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6802
    return (nl, nl + [instance.primary_node])
6803

    
6804

    
6805
class LUInstanceMove(LogicalUnit):
6806
  """Move an instance by data-copying.
6807

6808
  """
6809
  HPATH = "instance-move"
6810
  HTYPE = constants.HTYPE_INSTANCE
6811
  REQ_BGL = False
6812

    
6813
  def ExpandNames(self):
6814
    self._ExpandAndLockInstance()
6815
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6816
    self.op.target_node = target_node
6817
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6818
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6819

    
6820
  def DeclareLocks(self, level):
6821
    if level == locking.LEVEL_NODE:
6822
      self._LockInstancesNodes(primary_only=True)
6823

    
6824
  def BuildHooksEnv(self):
6825
    """Build hooks env.
6826

6827
    This runs on master, primary and secondary nodes of the instance.
6828

6829
    """
6830
    env = {
6831
      "TARGET_NODE": self.op.target_node,
6832
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6833
      }
6834
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6835
    return env
6836

    
6837
  def BuildHooksNodes(self):
6838
    """Build hooks nodes.
6839

6840
    """
6841
    nl = [
6842
      self.cfg.GetMasterNode(),
6843
      self.instance.primary_node,
6844
      self.op.target_node,
6845
      ]
6846
    return (nl, nl)
6847

    
6848
  def CheckPrereq(self):
6849
    """Check prerequisites.
6850

6851
    This checks that the instance is in the cluster.
6852

6853
    """
6854
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6855
    assert self.instance is not None, \
6856
      "Cannot retrieve locked instance %s" % self.op.instance_name
6857

    
6858
    node = self.cfg.GetNodeInfo(self.op.target_node)
6859
    assert node is not None, \
6860
      "Cannot retrieve locked node %s" % self.op.target_node
6861

    
6862
    self.target_node = target_node = node.name
6863

    
6864
    if target_node == instance.primary_node:
6865
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6866
                                 (instance.name, target_node),
6867
                                 errors.ECODE_STATE)
6868

    
6869
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6870

    
6871
    for idx, dsk in enumerate(instance.disks):
6872
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6873
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6874
                                   " cannot copy" % idx, errors.ECODE_STATE)
6875

    
6876
    _CheckNodeOnline(self, target_node)
6877
    _CheckNodeNotDrained(self, target_node)
6878
    _CheckNodeVmCapable(self, target_node)
6879

    
6880
    if instance.admin_up:
6881
      # check memory requirements on the secondary node
6882
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6883
                           instance.name, bep[constants.BE_MEMORY],
6884
                           instance.hypervisor)
6885
    else:
6886
      self.LogInfo("Not checking memory on the secondary node as"
6887
                   " instance will not be started")
6888

    
6889
    # check bridge existance
6890
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6891

    
6892
  def Exec(self, feedback_fn):
6893
    """Move an instance.
6894

6895
    The move is done by shutting it down on its present node, copying
6896
    the data over (slow) and starting it on the new node.
6897

6898
    """
6899
    instance = self.instance
6900

    
6901
    source_node = instance.primary_node
6902
    target_node = self.target_node
6903

    
6904
    self.LogInfo("Shutting down instance %s on source node %s",
6905
                 instance.name, source_node)
6906

    
6907
    result = self.rpc.call_instance_shutdown(source_node, instance,
6908
                                             self.op.shutdown_timeout)
6909
    msg = result.fail_msg
6910
    if msg:
6911
      if self.op.ignore_consistency:
6912
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6913
                             " Proceeding anyway. Please make sure node"
6914
                             " %s is down. Error details: %s",
6915
                             instance.name, source_node, source_node, msg)
6916
      else:
6917
        raise errors.OpExecError("Could not shutdown instance %s on"
6918
                                 " node %s: %s" %
6919
                                 (instance.name, source_node, msg))
6920

    
6921
    # create the target disks
6922
    try:
6923
      _CreateDisks(self, instance, target_node=target_node)
6924
    except errors.OpExecError:
6925
      self.LogWarning("Device creation failed, reverting...")
6926
      try:
6927
        _RemoveDisks(self, instance, target_node=target_node)
6928
      finally:
6929
        self.cfg.ReleaseDRBDMinors(instance.name)
6930
        raise
6931

    
6932
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6933

    
6934
    errs = []
6935
    # activate, get path, copy the data over
6936
    for idx, disk in enumerate(instance.disks):
6937
      self.LogInfo("Copying data for disk %d", idx)
6938
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6939
                                               instance.name, True, idx)
6940
      if result.fail_msg:
6941
        self.LogWarning("Can't assemble newly created disk %d: %s",
6942
                        idx, result.fail_msg)
6943
        errs.append(result.fail_msg)
6944
        break
6945
      dev_path = result.payload
6946
      result = self.rpc.call_blockdev_export(source_node, disk,
6947
                                             target_node, dev_path,
6948
                                             cluster_name)
6949
      if result.fail_msg:
6950
        self.LogWarning("Can't copy data over for disk %d: %s",
6951
                        idx, result.fail_msg)
6952
        errs.append(result.fail_msg)
6953
        break
6954

    
6955
    if errs:
6956
      self.LogWarning("Some disks failed to copy, aborting")
6957
      try:
6958
        _RemoveDisks(self, instance, target_node=target_node)
6959
      finally:
6960
        self.cfg.ReleaseDRBDMinors(instance.name)
6961
        raise errors.OpExecError("Errors during disk copy: %s" %
6962
                                 (",".join(errs),))
6963

    
6964
    instance.primary_node = target_node
6965
    self.cfg.Update(instance, feedback_fn)
6966

    
6967
    self.LogInfo("Removing the disks on the original node")
6968
    _RemoveDisks(self, instance, target_node=source_node)
6969

    
6970
    # Only start the instance if it's marked as up
6971
    if instance.admin_up:
6972
      self.LogInfo("Starting instance %s on node %s",
6973
                   instance.name, target_node)
6974

    
6975
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6976
                                           ignore_secondaries=True)
6977
      if not disks_ok:
6978
        _ShutdownInstanceDisks(self, instance)
6979
        raise errors.OpExecError("Can't activate the instance's disks")
6980

    
6981
      result = self.rpc.call_instance_start(target_node, instance,
6982
                                            None, None, False)
6983
      msg = result.fail_msg
6984
      if msg:
6985
        _ShutdownInstanceDisks(self, instance)
6986
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6987
                                 (instance.name, target_node, msg))
6988

    
6989

    
6990
class LUNodeMigrate(LogicalUnit):
6991
  """Migrate all instances from a node.
6992

6993
  """
6994
  HPATH = "node-migrate"
6995
  HTYPE = constants.HTYPE_NODE
6996
  REQ_BGL = False
6997

    
6998
  def CheckArguments(self):
6999
    pass
7000

    
7001
  def ExpandNames(self):
7002
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7003

    
7004
    self.share_locks = _ShareAll()
7005
    self.needed_locks = {
7006
      locking.LEVEL_NODE: [self.op.node_name],
7007
      }
7008

    
7009
  def BuildHooksEnv(self):
7010
    """Build hooks env.
7011

7012
    This runs on the master, the primary and all the secondaries.
7013

7014
    """
7015
    return {
7016
      "NODE_NAME": self.op.node_name,
7017
      }
7018

    
7019
  def BuildHooksNodes(self):
7020
    """Build hooks nodes.
7021

7022
    """
7023
    nl = [self.cfg.GetMasterNode()]
7024
    return (nl, nl)
7025

    
7026
  def CheckPrereq(self):
7027
    pass
7028

    
7029
  def Exec(self, feedback_fn):
7030
    # Prepare jobs for migration instances
7031
    jobs = [
7032
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7033
                                 mode=self.op.mode,
7034
                                 live=self.op.live,
7035
                                 iallocator=self.op.iallocator,
7036
                                 target_node=self.op.target_node)]
7037
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7038
      ]
7039

    
7040
    # TODO: Run iallocator in this opcode and pass correct placement options to
7041
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7042
    # running the iallocator and the actual migration, a good consistency model
7043
    # will have to be found.
7044

    
7045
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7046
            frozenset([self.op.node_name]))
7047

    
7048
    return ResultWithJobs(jobs)
7049

    
7050

    
7051
class TLMigrateInstance(Tasklet):
7052
  """Tasklet class for instance migration.
7053

7054
  @type live: boolean
7055
  @ivar live: whether the migration will be done live or non-live;
7056
      this variable is initalized only after CheckPrereq has run
7057
  @type cleanup: boolean
7058
  @ivar cleanup: Wheater we cleanup from a failed migration
7059
  @type iallocator: string
7060
  @ivar iallocator: The iallocator used to determine target_node
7061
  @type target_node: string
7062
  @ivar target_node: If given, the target_node to reallocate the instance to
7063
  @type failover: boolean
7064
  @ivar failover: Whether operation results in failover or migration
7065
  @type fallback: boolean
7066
  @ivar fallback: Whether fallback to failover is allowed if migration not
7067
                  possible
7068
  @type ignore_consistency: boolean
7069
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7070
                            and target node
7071
  @type shutdown_timeout: int
7072
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7073

7074
  """
7075
  def __init__(self, lu, instance_name, cleanup=False,
7076
               failover=False, fallback=False,
7077
               ignore_consistency=False,
7078
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7079
    """Initializes this class.
7080

7081
    """
7082
    Tasklet.__init__(self, lu)
7083

    
7084
    # Parameters
7085
    self.instance_name = instance_name
7086
    self.cleanup = cleanup
7087
    self.live = False # will be overridden later
7088
    self.failover = failover
7089
    self.fallback = fallback
7090
    self.ignore_consistency = ignore_consistency
7091
    self.shutdown_timeout = shutdown_timeout
7092

    
7093
  def CheckPrereq(self):
7094
    """Check prerequisites.
7095

7096
    This checks that the instance is in the cluster.
7097

7098
    """
7099
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7100
    instance = self.cfg.GetInstanceInfo(instance_name)
7101
    assert instance is not None
7102
    self.instance = instance
7103

    
7104
    if (not self.cleanup and not instance.admin_up and not self.failover and
7105
        self.fallback):
7106
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7107
                      " to failover")
7108
      self.failover = True
7109

    
7110
    if instance.disk_template not in constants.DTS_MIRRORED:
7111
      if self.failover:
7112
        text = "failovers"
7113
      else:
7114
        text = "migrations"
7115
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7116
                                 " %s" % (instance.disk_template, text),
7117
                                 errors.ECODE_STATE)
7118

    
7119
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7120
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7121

    
7122
      if self.lu.op.iallocator:
7123
        self._RunAllocator()
7124
      else:
7125
        # We set set self.target_node as it is required by
7126
        # BuildHooksEnv
7127
        self.target_node = self.lu.op.target_node
7128

    
7129
      # self.target_node is already populated, either directly or by the
7130
      # iallocator run
7131
      target_node = self.target_node
7132
      if self.target_node == instance.primary_node:
7133
        raise errors.OpPrereqError("Cannot migrate instance %s"
7134
                                   " to its primary (%s)" %
7135
                                   (instance.name, instance.primary_node))
7136

    
7137
      if len(self.lu.tasklets) == 1:
7138
        # It is safe to release locks only when we're the only tasklet
7139
        # in the LU
7140
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7141
                      keep=[instance.primary_node, self.target_node])
7142

    
7143
    else:
7144
      secondary_nodes = instance.secondary_nodes
7145
      if not secondary_nodes:
7146
        raise errors.ConfigurationError("No secondary node but using"
7147
                                        " %s disk template" %
7148
                                        instance.disk_template)
7149
      target_node = secondary_nodes[0]
7150
      if self.lu.op.iallocator or (self.lu.op.target_node and
7151
                                   self.lu.op.target_node != target_node):
7152
        if self.failover:
7153
          text = "failed over"
7154
        else:
7155
          text = "migrated"
7156
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7157
                                   " be %s to arbitrary nodes"
7158
                                   " (neither an iallocator nor a target"
7159
                                   " node can be passed)" %
7160
                                   (instance.disk_template, text),
7161
                                   errors.ECODE_INVAL)
7162

    
7163
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7164

    
7165
    # check memory requirements on the secondary node
7166
    if not self.failover or instance.admin_up:
7167
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7168
                           instance.name, i_be[constants.BE_MEMORY],
7169
                           instance.hypervisor)
7170
    else:
7171
      self.lu.LogInfo("Not checking memory on the secondary node as"
7172
                      " instance will not be started")
7173

    
7174
    # check bridge existance
7175
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7176

    
7177
    if not self.cleanup:
7178
      _CheckNodeNotDrained(self.lu, target_node)
7179
      if not self.failover:
7180
        result = self.rpc.call_instance_migratable(instance.primary_node,
7181
                                                   instance)
7182
        if result.fail_msg and self.fallback:
7183
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7184
                          " failover")
7185
          self.failover = True
7186
        else:
7187
          result.Raise("Can't migrate, please use failover",
7188
                       prereq=True, ecode=errors.ECODE_STATE)
7189

    
7190
    assert not (self.failover and self.cleanup)
7191

    
7192
    if not self.failover:
7193
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7194
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7195
                                   " parameters are accepted",
7196
                                   errors.ECODE_INVAL)
7197
      if self.lu.op.live is not None:
7198
        if self.lu.op.live:
7199
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7200
        else:
7201
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7202
        # reset the 'live' parameter to None so that repeated
7203
        # invocations of CheckPrereq do not raise an exception
7204
        self.lu.op.live = None
7205
      elif self.lu.op.mode is None:
7206
        # read the default value from the hypervisor
7207
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7208
                                                skip_globals=False)
7209
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7210

    
7211
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7212
    else:
7213
      # Failover is never live
7214
      self.live = False
7215

    
7216
  def _RunAllocator(self):
7217
    """Run the allocator based on input opcode.
7218

7219
    """
7220
    ial = IAllocator(self.cfg, self.rpc,
7221
                     mode=constants.IALLOCATOR_MODE_RELOC,
7222
                     name=self.instance_name,
7223
                     # TODO See why hail breaks with a single node below
7224
                     relocate_from=[self.instance.primary_node,
7225
                                    self.instance.primary_node],
7226
                     )
7227

    
7228
    ial.Run(self.lu.op.iallocator)
7229

    
7230
    if not ial.success:
7231
      raise errors.OpPrereqError("Can't compute nodes using"
7232
                                 " iallocator '%s': %s" %
7233
                                 (self.lu.op.iallocator, ial.info),
7234
                                 errors.ECODE_NORES)
7235
    if len(ial.result) != ial.required_nodes:
7236
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7237
                                 " of nodes (%s), required %s" %
7238
                                 (self.lu.op.iallocator, len(ial.result),
7239
                                  ial.required_nodes), errors.ECODE_FAULT)
7240
    self.target_node = ial.result[0]
7241
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7242
                 self.instance_name, self.lu.op.iallocator,
7243
                 utils.CommaJoin(ial.result))
7244

    
7245
  def _WaitUntilSync(self):
7246
    """Poll with custom rpc for disk sync.
7247

7248
    This uses our own step-based rpc call.
7249

7250
    """
7251
    self.feedback_fn("* wait until resync is done")
7252
    all_done = False
7253
    while not all_done:
7254
      all_done = True
7255
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7256
                                            self.nodes_ip,
7257
                                            self.instance.disks)
7258
      min_percent = 100
7259
      for node, nres in result.items():
7260
        nres.Raise("Cannot resync disks on node %s" % node)
7261
        node_done, node_percent = nres.payload
7262
        all_done = all_done and node_done
7263
        if node_percent is not None:
7264
          min_percent = min(min_percent, node_percent)
7265
      if not all_done:
7266
        if min_percent < 100:
7267
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7268
        time.sleep(2)
7269

    
7270
  def _EnsureSecondary(self, node):
7271
    """Demote a node to secondary.
7272

7273
    """
7274
    self.feedback_fn("* switching node %s to secondary mode" % node)
7275

    
7276
    for dev in self.instance.disks:
7277
      self.cfg.SetDiskID(dev, node)
7278

    
7279
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7280
                                          self.instance.disks)
7281
    result.Raise("Cannot change disk to secondary on node %s" % node)
7282

    
7283
  def _GoStandalone(self):
7284
    """Disconnect from the network.
7285

7286
    """
7287
    self.feedback_fn("* changing into standalone mode")
7288
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7289
                                               self.instance.disks)
7290
    for node, nres in result.items():
7291
      nres.Raise("Cannot disconnect disks node %s" % node)
7292

    
7293
  def _GoReconnect(self, multimaster):
7294
    """Reconnect to the network.
7295

7296
    """
7297
    if multimaster:
7298
      msg = "dual-master"
7299
    else:
7300
      msg = "single-master"
7301
    self.feedback_fn("* changing disks into %s mode" % msg)
7302
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7303
                                           self.instance.disks,
7304
                                           self.instance.name, multimaster)
7305
    for node, nres in result.items():
7306
      nres.Raise("Cannot change disks config on node %s" % node)
7307

    
7308
  def _ExecCleanup(self):
7309
    """Try to cleanup after a failed migration.
7310

7311
    The cleanup is done by:
7312
      - check that the instance is running only on one node
7313
        (and update the config if needed)
7314
      - change disks on its secondary node to secondary
7315
      - wait until disks are fully synchronized
7316
      - disconnect from the network
7317
      - change disks into single-master mode
7318
      - wait again until disks are fully synchronized
7319

7320
    """
7321
    instance = self.instance
7322
    target_node = self.target_node
7323
    source_node = self.source_node
7324

    
7325
    # check running on only one node
7326
    self.feedback_fn("* checking where the instance actually runs"
7327
                     " (if this hangs, the hypervisor might be in"
7328
                     " a bad state)")
7329
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7330
    for node, result in ins_l.items():
7331
      result.Raise("Can't contact node %s" % node)
7332

    
7333
    runningon_source = instance.name in ins_l[source_node].payload
7334
    runningon_target = instance.name in ins_l[target_node].payload
7335

    
7336
    if runningon_source and runningon_target:
7337
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7338
                               " or the hypervisor is confused; you will have"
7339
                               " to ensure manually that it runs only on one"
7340
                               " and restart this operation")
7341

    
7342
    if not (runningon_source or runningon_target):
7343
      raise errors.OpExecError("Instance does not seem to be running at all;"
7344
                               " in this case it's safer to repair by"
7345
                               " running 'gnt-instance stop' to ensure disk"
7346
                               " shutdown, and then restarting it")
7347

    
7348
    if runningon_target:
7349
      # the migration has actually succeeded, we need to update the config
7350
      self.feedback_fn("* instance running on secondary node (%s),"
7351
                       " updating config" % target_node)
7352
      instance.primary_node = target_node
7353
      self.cfg.Update(instance, self.feedback_fn)
7354
      demoted_node = source_node
7355
    else:
7356
      self.feedback_fn("* instance confirmed to be running on its"
7357
                       " primary node (%s)" % source_node)
7358
      demoted_node = target_node
7359

    
7360
    if instance.disk_template in constants.DTS_INT_MIRROR:
7361
      self._EnsureSecondary(demoted_node)
7362
      try:
7363
        self._WaitUntilSync()
7364
      except errors.OpExecError:
7365
        # we ignore here errors, since if the device is standalone, it
7366
        # won't be able to sync
7367
        pass
7368
      self._GoStandalone()
7369
      self._GoReconnect(False)
7370
      self._WaitUntilSync()
7371

    
7372
    self.feedback_fn("* done")
7373

    
7374
  def _RevertDiskStatus(self):
7375
    """Try to revert the disk status after a failed migration.
7376

7377
    """
7378
    target_node = self.target_node
7379
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7380
      return
7381

    
7382
    try:
7383
      self._EnsureSecondary(target_node)
7384
      self._GoStandalone()
7385
      self._GoReconnect(False)
7386
      self._WaitUntilSync()
7387
    except errors.OpExecError, err:
7388
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7389
                         " please try to recover the instance manually;"
7390
                         " error '%s'" % str(err))
7391

    
7392
  def _AbortMigration(self):
7393
    """Call the hypervisor code to abort a started migration.
7394

7395
    """
7396
    instance = self.instance
7397
    target_node = self.target_node
7398
    migration_info = self.migration_info
7399

    
7400
    abort_result = self.rpc.call_finalize_migration(target_node,
7401
                                                    instance,
7402
                                                    migration_info,
7403
                                                    False)
7404
    abort_msg = abort_result.fail_msg
7405
    if abort_msg:
7406
      logging.error("Aborting migration failed on target node %s: %s",
7407
                    target_node, abort_msg)
7408
      # Don't raise an exception here, as we stil have to try to revert the
7409
      # disk status, even if this step failed.
7410

    
7411
  def _ExecMigration(self):
7412
    """Migrate an instance.
7413

7414
    The migrate is done by:
7415
      - change the disks into dual-master mode
7416
      - wait until disks are fully synchronized again
7417
      - migrate the instance
7418
      - change disks on the new secondary node (the old primary) to secondary
7419
      - wait until disks are fully synchronized
7420
      - change disks into single-master mode
7421

7422
    """
7423
    instance = self.instance
7424
    target_node = self.target_node
7425
    source_node = self.source_node
7426

    
7427
    # Check for hypervisor version mismatch and warn the user.
7428
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7429
                                       None, self.instance.hypervisor)
7430
    src_info = nodeinfo[source_node]
7431
    dst_info = nodeinfo[target_node]
7432

    
7433
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7434
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7435
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7436
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7437
      if src_version != dst_version:
7438
        self.feedback_fn("* warning: hypervisor version mismatch between"
7439
                         " source (%s) and target (%s) node" %
7440
                         (src_version, dst_version))
7441

    
7442
    self.feedback_fn("* checking disk consistency between source and target")
7443
    for dev in instance.disks:
7444
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7445
        raise errors.OpExecError("Disk %s is degraded or not fully"
7446
                                 " synchronized on target node,"
7447
                                 " aborting migration" % dev.iv_name)
7448

    
7449
    # First get the migration information from the remote node
7450
    result = self.rpc.call_migration_info(source_node, instance)
7451
    msg = result.fail_msg
7452
    if msg:
7453
      log_err = ("Failed fetching source migration information from %s: %s" %
7454
                 (source_node, msg))
7455
      logging.error(log_err)
7456
      raise errors.OpExecError(log_err)
7457

    
7458
    self.migration_info = migration_info = result.payload
7459

    
7460
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7461
      # Then switch the disks to master/master mode
7462
      self._EnsureSecondary(target_node)
7463
      self._GoStandalone()
7464
      self._GoReconnect(True)
7465
      self._WaitUntilSync()
7466

    
7467
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7468
    result = self.rpc.call_accept_instance(target_node,
7469
                                           instance,
7470
                                           migration_info,
7471
                                           self.nodes_ip[target_node])
7472

    
7473
    msg = result.fail_msg
7474
    if msg:
7475
      logging.error("Instance pre-migration failed, trying to revert"
7476
                    " disk status: %s", msg)
7477
      self.feedback_fn("Pre-migration failed, aborting")
7478
      self._AbortMigration()
7479
      self._RevertDiskStatus()
7480
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7481
                               (instance.name, msg))
7482

    
7483
    self.feedback_fn("* migrating instance to %s" % target_node)
7484
    result = self.rpc.call_instance_migrate(source_node, instance,
7485
                                            self.nodes_ip[target_node],
7486
                                            self.live)
7487
    msg = result.fail_msg
7488
    if msg:
7489
      logging.error("Instance migration failed, trying to revert"
7490
                    " disk status: %s", msg)
7491
      self.feedback_fn("Migration failed, aborting")
7492
      self._AbortMigration()
7493
      self._RevertDiskStatus()
7494
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7495
                               (instance.name, msg))
7496

    
7497
    instance.primary_node = target_node
7498
    # distribute new instance config to the other nodes
7499
    self.cfg.Update(instance, self.feedback_fn)
7500

    
7501
    result = self.rpc.call_finalize_migration(target_node,
7502
                                              instance,
7503
                                              migration_info,
7504
                                              True)
7505
    msg = result.fail_msg
7506
    if msg:
7507
      logging.error("Instance migration succeeded, but finalization failed:"
7508
                    " %s", msg)
7509
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7510
                               msg)
7511

    
7512
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7513
      self._EnsureSecondary(source_node)
7514
      self._WaitUntilSync()
7515
      self._GoStandalone()
7516
      self._GoReconnect(False)
7517
      self._WaitUntilSync()
7518

    
7519
    self.feedback_fn("* done")
7520

    
7521
  def _ExecFailover(self):
7522
    """Failover an instance.
7523

7524
    The failover is done by shutting it down on its present node and
7525
    starting it on the secondary.
7526

7527
    """
7528
    instance = self.instance
7529
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7530

    
7531
    source_node = instance.primary_node
7532
    target_node = self.target_node
7533

    
7534
    if instance.admin_up:
7535
      self.feedback_fn("* checking disk consistency between source and target")
7536
      for dev in instance.disks:
7537
        # for drbd, these are drbd over lvm
7538
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7539
          if primary_node.offline:
7540
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7541
                             " target node %s" %
7542
                             (primary_node.name, dev.iv_name, target_node))
7543
          elif not self.ignore_consistency:
7544
            raise errors.OpExecError("Disk %s is degraded on target node,"
7545
                                     " aborting failover" % dev.iv_name)
7546
    else:
7547
      self.feedback_fn("* not checking disk consistency as instance is not"
7548
                       " running")
7549

    
7550
    self.feedback_fn("* shutting down instance on source node")
7551
    logging.info("Shutting down instance %s on node %s",
7552
                 instance.name, source_node)
7553

    
7554
    result = self.rpc.call_instance_shutdown(source_node, instance,
7555
                                             self.shutdown_timeout)
7556
    msg = result.fail_msg
7557
    if msg:
7558
      if self.ignore_consistency or primary_node.offline:
7559
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7560
                           " proceeding anyway; please make sure node"
7561
                           " %s is down; error details: %s",
7562
                           instance.name, source_node, source_node, msg)
7563
      else:
7564
        raise errors.OpExecError("Could not shutdown instance %s on"
7565
                                 " node %s: %s" %
7566
                                 (instance.name, source_node, msg))
7567

    
7568
    self.feedback_fn("* deactivating the instance's disks on source node")
7569
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7570
      raise errors.OpExecError("Can't shut down the instance's disks")
7571

    
7572
    instance.primary_node = target_node
7573
    # distribute new instance config to the other nodes
7574
    self.cfg.Update(instance, self.feedback_fn)
7575

    
7576
    # Only start the instance if it's marked as up
7577
    if instance.admin_up:
7578
      self.feedback_fn("* activating the instance's disks on target node %s" %
7579
                       target_node)
7580
      logging.info("Starting instance %s on node %s",
7581
                   instance.name, target_node)
7582

    
7583
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7584
                                           ignore_secondaries=True)
7585
      if not disks_ok:
7586
        _ShutdownInstanceDisks(self.lu, instance)
7587
        raise errors.OpExecError("Can't activate the instance's disks")
7588

    
7589
      self.feedback_fn("* starting the instance on the target node %s" %
7590
                       target_node)
7591
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7592
                                            False)
7593
      msg = result.fail_msg
7594
      if msg:
7595
        _ShutdownInstanceDisks(self.lu, instance)
7596
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7597
                                 (instance.name, target_node, msg))
7598

    
7599
  def Exec(self, feedback_fn):
7600
    """Perform the migration.
7601

7602
    """
7603
    self.feedback_fn = feedback_fn
7604
    self.source_node = self.instance.primary_node
7605

    
7606
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7607
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7608
      self.target_node = self.instance.secondary_nodes[0]
7609
      # Otherwise self.target_node has been populated either
7610
      # directly, or through an iallocator.
7611

    
7612
    self.all_nodes = [self.source_node, self.target_node]
7613
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7614
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7615

    
7616
    if self.failover:
7617
      feedback_fn("Failover instance %s" % self.instance.name)
7618
      self._ExecFailover()
7619
    else:
7620
      feedback_fn("Migrating instance %s" % self.instance.name)
7621

    
7622
      if self.cleanup:
7623
        return self._ExecCleanup()
7624
      else:
7625
        return self._ExecMigration()
7626

    
7627

    
7628
def _CreateBlockDev(lu, node, instance, device, force_create,
7629
                    info, force_open):
7630
  """Create a tree of block devices on a given node.
7631

7632
  If this device type has to be created on secondaries, create it and
7633
  all its children.
7634

7635
  If not, just recurse to children keeping the same 'force' value.
7636

7637
  @param lu: the lu on whose behalf we execute
7638
  @param node: the node on which to create the device
7639
  @type instance: L{objects.Instance}
7640
  @param instance: the instance which owns the device
7641
  @type device: L{objects.Disk}
7642
  @param device: the device to create
7643
  @type force_create: boolean
7644
  @param force_create: whether to force creation of this device; this
7645
      will be change to True whenever we find a device which has
7646
      CreateOnSecondary() attribute
7647
  @param info: the extra 'metadata' we should attach to the device
7648
      (this will be represented as a LVM tag)
7649
  @type force_open: boolean
7650
  @param force_open: this parameter will be passes to the
7651
      L{backend.BlockdevCreate} function where it specifies
7652
      whether we run on primary or not, and it affects both
7653
      the child assembly and the device own Open() execution
7654

7655
  """
7656
  if device.CreateOnSecondary():
7657
    force_create = True
7658

    
7659
  if device.children:
7660
    for child in device.children:
7661
      _CreateBlockDev(lu, node, instance, child, force_create,
7662
                      info, force_open)
7663

    
7664
  if not force_create:
7665
    return
7666

    
7667
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7668

    
7669

    
7670
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7671
  """Create a single block device on a given node.
7672

7673
  This will not recurse over children of the device, so they must be
7674
  created in advance.
7675

7676
  @param lu: the lu on whose behalf we execute
7677
  @param node: the node on which to create the device
7678
  @type instance: L{objects.Instance}
7679
  @param instance: the instance which owns the device
7680
  @type device: L{objects.Disk}
7681
  @param device: the device to create
7682
  @param info: the extra 'metadata' we should attach to the device
7683
      (this will be represented as a LVM tag)
7684
  @type force_open: boolean
7685
  @param force_open: this parameter will be passes to the
7686
      L{backend.BlockdevCreate} function where it specifies
7687
      whether we run on primary or not, and it affects both
7688
      the child assembly and the device own Open() execution
7689

7690
  """
7691
  lu.cfg.SetDiskID(device, node)
7692
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7693
                                       instance.name, force_open, info)
7694
  result.Raise("Can't create block device %s on"
7695
               " node %s for instance %s" % (device, node, instance.name))
7696
  if device.physical_id is None:
7697
    device.physical_id = result.payload
7698

    
7699

    
7700
def _GenerateUniqueNames(lu, exts):
7701
  """Generate a suitable LV name.
7702

7703
  This will generate a logical volume name for the given instance.
7704

7705
  """
7706
  results = []
7707
  for val in exts:
7708
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7709
    results.append("%s%s" % (new_id, val))
7710
  return results
7711

    
7712

    
7713
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7714
                         iv_name, p_minor, s_minor):
7715
  """Generate a drbd8 device complete with its children.
7716

7717
  """
7718
  assert len(vgnames) == len(names) == 2
7719
  port = lu.cfg.AllocatePort()
7720
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7721
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7722
                          logical_id=(vgnames[0], names[0]))
7723
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7724
                          logical_id=(vgnames[1], names[1]))
7725
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7726
                          logical_id=(primary, secondary, port,
7727
                                      p_minor, s_minor,
7728
                                      shared_secret),
7729
                          children=[dev_data, dev_meta],
7730
                          iv_name=iv_name)
7731
  return drbd_dev
7732

    
7733

    
7734
def _GenerateDiskTemplate(lu, template_name,
7735
                          instance_name, primary_node,
7736
                          secondary_nodes, disk_info,
7737
                          file_storage_dir, file_driver,
7738
                          base_index, feedback_fn):
7739
  """Generate the entire disk layout for a given template type.
7740

7741
  """
7742
  #TODO: compute space requirements
7743

    
7744
  vgname = lu.cfg.GetVGName()
7745
  disk_count = len(disk_info)
7746
  disks = []
7747
  if template_name == constants.DT_DISKLESS:
7748
    pass
7749
  elif template_name == constants.DT_PLAIN:
7750
    if len(secondary_nodes) != 0:
7751
      raise errors.ProgrammerError("Wrong template configuration")
7752

    
7753
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7754
                                      for i in range(disk_count)])
7755
    for idx, disk in enumerate(disk_info):
7756
      disk_index = idx + base_index
7757
      vg = disk.get(constants.IDISK_VG, vgname)
7758
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7759
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7760
                              size=disk[constants.IDISK_SIZE],
7761
                              logical_id=(vg, names[idx]),
7762
                              iv_name="disk/%d" % disk_index,
7763
                              mode=disk[constants.IDISK_MODE])
7764
      disks.append(disk_dev)
7765
  elif template_name == constants.DT_DRBD8:
7766
    if len(secondary_nodes) != 1:
7767
      raise errors.ProgrammerError("Wrong template configuration")
7768
    remote_node = secondary_nodes[0]
7769
    minors = lu.cfg.AllocateDRBDMinor(
7770
      [primary_node, remote_node] * len(disk_info), instance_name)
7771

    
7772
    names = []
7773
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7774
                                               for i in range(disk_count)]):
7775
      names.append(lv_prefix + "_data")
7776
      names.append(lv_prefix + "_meta")
7777
    for idx, disk in enumerate(disk_info):
7778
      disk_index = idx + base_index
7779
      data_vg = disk.get(constants.IDISK_VG, vgname)
7780
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7781
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7782
                                      disk[constants.IDISK_SIZE],
7783
                                      [data_vg, meta_vg],
7784
                                      names[idx * 2:idx * 2 + 2],
7785
                                      "disk/%d" % disk_index,
7786
                                      minors[idx * 2], minors[idx * 2 + 1])
7787
      disk_dev.mode = disk[constants.IDISK_MODE]
7788
      disks.append(disk_dev)
7789
  elif template_name == constants.DT_FILE:
7790
    if len(secondary_nodes) != 0:
7791
      raise errors.ProgrammerError("Wrong template configuration")
7792

    
7793
    opcodes.RequireFileStorage()
7794

    
7795
    for idx, disk in enumerate(disk_info):
7796
      disk_index = idx + base_index
7797
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7798
                              size=disk[constants.IDISK_SIZE],
7799
                              iv_name="disk/%d" % disk_index,
7800
                              logical_id=(file_driver,
7801
                                          "%s/disk%d" % (file_storage_dir,
7802
                                                         disk_index)),
7803
                              mode=disk[constants.IDISK_MODE])
7804
      disks.append(disk_dev)
7805
  elif template_name == constants.DT_SHARED_FILE:
7806
    if len(secondary_nodes) != 0:
7807
      raise errors.ProgrammerError("Wrong template configuration")
7808

    
7809
    opcodes.RequireSharedFileStorage()
7810

    
7811
    for idx, disk in enumerate(disk_info):
7812
      disk_index = idx + base_index
7813
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7814
                              size=disk[constants.IDISK_SIZE],
7815
                              iv_name="disk/%d" % disk_index,
7816
                              logical_id=(file_driver,
7817
                                          "%s/disk%d" % (file_storage_dir,
7818
                                                         disk_index)),
7819
                              mode=disk[constants.IDISK_MODE])
7820
      disks.append(disk_dev)
7821
  elif template_name == constants.DT_BLOCK:
7822
    if len(secondary_nodes) != 0:
7823
      raise errors.ProgrammerError("Wrong template configuration")
7824

    
7825
    for idx, disk in enumerate(disk_info):
7826
      disk_index = idx + base_index
7827
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7828
                              size=disk[constants.IDISK_SIZE],
7829
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7830
                                          disk[constants.IDISK_ADOPT]),
7831
                              iv_name="disk/%d" % disk_index,
7832
                              mode=disk[constants.IDISK_MODE])
7833
      disks.append(disk_dev)
7834

    
7835
  else:
7836
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7837
  return disks
7838

    
7839

    
7840
def _GetInstanceInfoText(instance):
7841
  """Compute that text that should be added to the disk's metadata.
7842

7843
  """
7844
  return "originstname+%s" % instance.name
7845

    
7846

    
7847
def _CalcEta(time_taken, written, total_size):
7848
  """Calculates the ETA based on size written and total size.
7849

7850
  @param time_taken: The time taken so far
7851
  @param written: amount written so far
7852
  @param total_size: The total size of data to be written
7853
  @return: The remaining time in seconds
7854

7855
  """
7856
  avg_time = time_taken / float(written)
7857
  return (total_size - written) * avg_time
7858

    
7859

    
7860
def _WipeDisks(lu, instance):
7861
  """Wipes instance disks.
7862

7863
  @type lu: L{LogicalUnit}
7864
  @param lu: the logical unit on whose behalf we execute
7865
  @type instance: L{objects.Instance}
7866
  @param instance: the instance whose disks we should create
7867
  @return: the success of the wipe
7868

7869
  """
7870
  node = instance.primary_node
7871

    
7872
  for device in instance.disks:
7873
    lu.cfg.SetDiskID(device, node)
7874

    
7875
  logging.info("Pause sync of instance %s disks", instance.name)
7876
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7877

    
7878
  for idx, success in enumerate(result.payload):
7879
    if not success:
7880
      logging.warn("pause-sync of instance %s for disks %d failed",
7881
                   instance.name, idx)
7882

    
7883
  try:
7884
    for idx, device in enumerate(instance.disks):
7885
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7886
      # MAX_WIPE_CHUNK at max
7887
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7888
                            constants.MIN_WIPE_CHUNK_PERCENT)
7889
      # we _must_ make this an int, otherwise rounding errors will
7890
      # occur
7891
      wipe_chunk_size = int(wipe_chunk_size)
7892

    
7893
      lu.LogInfo("* Wiping disk %d", idx)
7894
      logging.info("Wiping disk %d for instance %s, node %s using"
7895
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7896

    
7897
      offset = 0
7898
      size = device.size
7899
      last_output = 0
7900
      start_time = time.time()
7901

    
7902
      while offset < size:
7903
        wipe_size = min(wipe_chunk_size, size - offset)
7904
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7905
                      idx, offset, wipe_size)
7906
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7907
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7908
                     (idx, offset, wipe_size))
7909
        now = time.time()
7910
        offset += wipe_size
7911
        if now - last_output >= 60:
7912
          eta = _CalcEta(now - start_time, offset, size)
7913
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7914
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7915
          last_output = now
7916
  finally:
7917
    logging.info("Resume sync of instance %s disks", instance.name)
7918

    
7919
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7920

    
7921
    for idx, success in enumerate(result.payload):
7922
      if not success:
7923
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7924
                      " look at the status and troubleshoot the issue", idx)
7925
        logging.warn("resume-sync of instance %s for disks %d failed",
7926
                     instance.name, idx)
7927

    
7928

    
7929
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7930
  """Create all disks for an instance.
7931

7932
  This abstracts away some work from AddInstance.
7933

7934
  @type lu: L{LogicalUnit}
7935
  @param lu: the logical unit on whose behalf we execute
7936
  @type instance: L{objects.Instance}
7937
  @param instance: the instance whose disks we should create
7938
  @type to_skip: list
7939
  @param to_skip: list of indices to skip
7940
  @type target_node: string
7941
  @param target_node: if passed, overrides the target node for creation
7942
  @rtype: boolean
7943
  @return: the success of the creation
7944

7945
  """
7946
  info = _GetInstanceInfoText(instance)
7947
  if target_node is None:
7948
    pnode = instance.primary_node
7949
    all_nodes = instance.all_nodes
7950
  else:
7951
    pnode = target_node
7952
    all_nodes = [pnode]
7953

    
7954
  if instance.disk_template in constants.DTS_FILEBASED:
7955
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7956
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7957

    
7958
    result.Raise("Failed to create directory '%s' on"
7959
                 " node %s" % (file_storage_dir, pnode))
7960

    
7961
  # Note: this needs to be kept in sync with adding of disks in
7962
  # LUInstanceSetParams
7963
  for idx, device in enumerate(instance.disks):
7964
    if to_skip and idx in to_skip:
7965
      continue
7966
    logging.info("Creating volume %s for instance %s",
7967
                 device.iv_name, instance.name)
7968
    #HARDCODE
7969
    for node in all_nodes:
7970
      f_create = node == pnode
7971
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7972

    
7973

    
7974
def _RemoveDisks(lu, instance, target_node=None):
7975
  """Remove all disks for an instance.
7976

7977
  This abstracts away some work from `AddInstance()` and
7978
  `RemoveInstance()`. Note that in case some of the devices couldn't
7979
  be removed, the removal will continue with the other ones (compare
7980
  with `_CreateDisks()`).
7981

7982
  @type lu: L{LogicalUnit}
7983
  @param lu: the logical unit on whose behalf we execute
7984
  @type instance: L{objects.Instance}
7985
  @param instance: the instance whose disks we should remove
7986
  @type target_node: string
7987
  @param target_node: used to override the node on which to remove the disks
7988
  @rtype: boolean
7989
  @return: the success of the removal
7990

7991
  """
7992
  logging.info("Removing block devices for instance %s", instance.name)
7993

    
7994
  all_result = True
7995
  for device in instance.disks:
7996
    if target_node:
7997
      edata = [(target_node, device)]
7998
    else:
7999
      edata = device.ComputeNodeTree(instance.primary_node)
8000
    for node, disk in edata:
8001
      lu.cfg.SetDiskID(disk, node)
8002
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8003
      if msg:
8004
        lu.LogWarning("Could not remove block device %s on node %s,"
8005
                      " continuing anyway: %s", device.iv_name, node, msg)
8006
        all_result = False
8007

    
8008
    # if this is a DRBD disk, return its port to the pool
8009
    if device.dev_type in constants.LDS_DRBD:
8010
      tcp_port = device.logical_id[2]
8011
      lu.cfg.AddTcpUdpPort(tcp_port)
8012

    
8013
  if instance.disk_template == constants.DT_FILE:
8014
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8015
    if target_node:
8016
      tgt = target_node
8017
    else:
8018
      tgt = instance.primary_node
8019
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8020
    if result.fail_msg:
8021
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8022
                    file_storage_dir, instance.primary_node, result.fail_msg)
8023
      all_result = False
8024

    
8025
  return all_result
8026

    
8027

    
8028
def _ComputeDiskSizePerVG(disk_template, disks):
8029
  """Compute disk size requirements in the volume group
8030

8031
  """
8032
  def _compute(disks, payload):
8033
    """Universal algorithm.
8034

8035
    """
8036
    vgs = {}
8037
    for disk in disks:
8038
      vgs[disk[constants.IDISK_VG]] = \
8039
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8040

    
8041
    return vgs
8042

    
8043
  # Required free disk space as a function of disk and swap space
8044
  req_size_dict = {
8045
    constants.DT_DISKLESS: {},
8046
    constants.DT_PLAIN: _compute(disks, 0),
8047
    # 128 MB are added for drbd metadata for each disk
8048
    constants.DT_DRBD8: _compute(disks, 128),
8049
    constants.DT_FILE: {},
8050
    constants.DT_SHARED_FILE: {},
8051
  }
8052

    
8053
  if disk_template not in req_size_dict:
8054
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8055
                                 " is unknown" % disk_template)
8056

    
8057
  return req_size_dict[disk_template]
8058

    
8059

    
8060
def _ComputeDiskSize(disk_template, disks):
8061
  """Compute disk size requirements in the volume group
8062

8063
  """
8064
  # Required free disk space as a function of disk and swap space
8065
  req_size_dict = {
8066
    constants.DT_DISKLESS: None,
8067
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8068
    # 128 MB are added for drbd metadata for each disk
8069
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8070
    constants.DT_FILE: None,
8071
    constants.DT_SHARED_FILE: 0,
8072
    constants.DT_BLOCK: 0,
8073
  }
8074

    
8075
  if disk_template not in req_size_dict:
8076
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8077
                                 " is unknown" % disk_template)
8078

    
8079
  return req_size_dict[disk_template]
8080

    
8081

    
8082
def _FilterVmNodes(lu, nodenames):
8083
  """Filters out non-vm_capable nodes from a list.
8084

8085
  @type lu: L{LogicalUnit}
8086
  @param lu: the logical unit for which we check
8087
  @type nodenames: list
8088
  @param nodenames: the list of nodes on which we should check
8089
  @rtype: list
8090
  @return: the list of vm-capable nodes
8091

8092
  """
8093
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8094
  return [name for name in nodenames if name not in vm_nodes]
8095

    
8096

    
8097
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8098
  """Hypervisor parameter validation.
8099

8100
  This function abstract the hypervisor parameter validation to be
8101
  used in both instance create and instance modify.
8102

8103
  @type lu: L{LogicalUnit}
8104
  @param lu: the logical unit for which we check
8105
  @type nodenames: list
8106
  @param nodenames: the list of nodes on which we should check
8107
  @type hvname: string
8108
  @param hvname: the name of the hypervisor we should use
8109
  @type hvparams: dict
8110
  @param hvparams: the parameters which we need to check
8111
  @raise errors.OpPrereqError: if the parameters are not valid
8112

8113
  """
8114
  nodenames = _FilterVmNodes(lu, nodenames)
8115
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8116
                                                  hvname,
8117
                                                  hvparams)
8118
  for node in nodenames:
8119
    info = hvinfo[node]
8120
    if info.offline:
8121
      continue
8122
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8123

    
8124

    
8125
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8126
  """OS parameters validation.
8127

8128
  @type lu: L{LogicalUnit}
8129
  @param lu: the logical unit for which we check
8130
  @type required: boolean
8131
  @param required: whether the validation should fail if the OS is not
8132
      found
8133
  @type nodenames: list
8134
  @param nodenames: the list of nodes on which we should check
8135
  @type osname: string
8136
  @param osname: the name of the hypervisor we should use
8137
  @type osparams: dict
8138
  @param osparams: the parameters which we need to check
8139
  @raise errors.OpPrereqError: if the parameters are not valid
8140

8141
  """
8142
  nodenames = _FilterVmNodes(lu, nodenames)
8143
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8144
                                   [constants.OS_VALIDATE_PARAMETERS],
8145
                                   osparams)
8146
  for node, nres in result.items():
8147
    # we don't check for offline cases since this should be run only
8148
    # against the master node and/or an instance's nodes
8149
    nres.Raise("OS Parameters validation failed on node %s" % node)
8150
    if not nres.payload:
8151
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8152
                 osname, node)
8153

    
8154

    
8155
class LUInstanceCreate(LogicalUnit):
8156
  """Create an instance.
8157

8158
  """
8159
  HPATH = "instance-add"
8160
  HTYPE = constants.HTYPE_INSTANCE
8161
  REQ_BGL = False
8162

    
8163
  def CheckArguments(self):
8164
    """Check arguments.
8165

8166
    """
8167
    # do not require name_check to ease forward/backward compatibility
8168
    # for tools
8169
    if self.op.no_install and self.op.start:
8170
      self.LogInfo("No-installation mode selected, disabling startup")
8171
      self.op.start = False
8172
    # validate/normalize the instance name
8173
    self.op.instance_name = \
8174
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8175

    
8176
    if self.op.ip_check and not self.op.name_check:
8177
      # TODO: make the ip check more flexible and not depend on the name check
8178
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8179
                                 " check", errors.ECODE_INVAL)
8180

    
8181
    # check nics' parameter names
8182
    for nic in self.op.nics:
8183
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8184

    
8185
    # check disks. parameter names and consistent adopt/no-adopt strategy
8186
    has_adopt = has_no_adopt = False
8187
    for disk in self.op.disks:
8188
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8189
      if constants.IDISK_ADOPT in disk:
8190
        has_adopt = True
8191
      else:
8192
        has_no_adopt = True
8193
    if has_adopt and has_no_adopt:
8194
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8195
                                 errors.ECODE_INVAL)
8196
    if has_adopt:
8197
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8198
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8199
                                   " '%s' disk template" %
8200
                                   self.op.disk_template,
8201
                                   errors.ECODE_INVAL)
8202
      if self.op.iallocator is not None:
8203
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8204
                                   " iallocator script", errors.ECODE_INVAL)
8205
      if self.op.mode == constants.INSTANCE_IMPORT:
8206
        raise errors.OpPrereqError("Disk adoption not allowed for"
8207
                                   " instance import", errors.ECODE_INVAL)
8208
    else:
8209
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8210
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8211
                                   " but no 'adopt' parameter given" %
8212
                                   self.op.disk_template,
8213
                                   errors.ECODE_INVAL)
8214

    
8215
    self.adopt_disks = has_adopt
8216

    
8217
    # instance name verification
8218
    if self.op.name_check:
8219
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8220
      self.op.instance_name = self.hostname1.name
8221
      # used in CheckPrereq for ip ping check
8222
      self.check_ip = self.hostname1.ip
8223
    else:
8224
      self.check_ip = None
8225

    
8226
    # file storage checks
8227
    if (self.op.file_driver and
8228
        not self.op.file_driver in constants.FILE_DRIVER):
8229
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8230
                                 self.op.file_driver, errors.ECODE_INVAL)
8231

    
8232
    if self.op.disk_template == constants.DT_FILE:
8233
      opcodes.RequireFileStorage()
8234
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8235
      opcodes.RequireSharedFileStorage()
8236

    
8237
    ### Node/iallocator related checks
8238
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8239

    
8240
    if self.op.pnode is not None:
8241
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8242
        if self.op.snode is None:
8243
          raise errors.OpPrereqError("The networked disk templates need"
8244
                                     " a mirror node", errors.ECODE_INVAL)
8245
      elif self.op.snode:
8246
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8247
                        " template")
8248
        self.op.snode = None
8249

    
8250
    self._cds = _GetClusterDomainSecret()
8251

    
8252
    if self.op.mode == constants.INSTANCE_IMPORT:
8253
      # On import force_variant must be True, because if we forced it at
8254
      # initial install, our only chance when importing it back is that it
8255
      # works again!
8256
      self.op.force_variant = True
8257

    
8258
      if self.op.no_install:
8259
        self.LogInfo("No-installation mode has no effect during import")
8260

    
8261
    elif self.op.mode == constants.INSTANCE_CREATE:
8262
      if self.op.os_type is None:
8263
        raise errors.OpPrereqError("No guest OS specified",
8264
                                   errors.ECODE_INVAL)
8265
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8266
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8267
                                   " installation" % self.op.os_type,
8268
                                   errors.ECODE_STATE)
8269
      if self.op.disk_template is None:
8270
        raise errors.OpPrereqError("No disk template specified",
8271
                                   errors.ECODE_INVAL)
8272

    
8273
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8274
      # Check handshake to ensure both clusters have the same domain secret
8275
      src_handshake = self.op.source_handshake
8276
      if not src_handshake:
8277
        raise errors.OpPrereqError("Missing source handshake",
8278
                                   errors.ECODE_INVAL)
8279

    
8280
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8281
                                                           src_handshake)
8282
      if errmsg:
8283
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8284
                                   errors.ECODE_INVAL)
8285

    
8286
      # Load and check source CA
8287
      self.source_x509_ca_pem = self.op.source_x509_ca
8288
      if not self.source_x509_ca_pem:
8289
        raise errors.OpPrereqError("Missing source X509 CA",
8290
                                   errors.ECODE_INVAL)
8291

    
8292
      try:
8293
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8294
                                                    self._cds)
8295
      except OpenSSL.crypto.Error, err:
8296
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8297
                                   (err, ), errors.ECODE_INVAL)
8298

    
8299
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8300
      if errcode is not None:
8301
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8302
                                   errors.ECODE_INVAL)
8303

    
8304
      self.source_x509_ca = cert
8305

    
8306
      src_instance_name = self.op.source_instance_name
8307
      if not src_instance_name:
8308
        raise errors.OpPrereqError("Missing source instance name",
8309
                                   errors.ECODE_INVAL)
8310

    
8311
      self.source_instance_name = \
8312
          netutils.GetHostname(name=src_instance_name).name
8313

    
8314
    else:
8315
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8316
                                 self.op.mode, errors.ECODE_INVAL)
8317

    
8318
  def ExpandNames(self):
8319
    """ExpandNames for CreateInstance.
8320

8321
    Figure out the right locks for instance creation.
8322

8323
    """
8324
    self.needed_locks = {}
8325

    
8326
    instance_name = self.op.instance_name
8327
    # this is just a preventive check, but someone might still add this
8328
    # instance in the meantime, and creation will fail at lock-add time
8329
    if instance_name in self.cfg.GetInstanceList():
8330
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8331
                                 instance_name, errors.ECODE_EXISTS)
8332

    
8333
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8334

    
8335
    if self.op.iallocator:
8336
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8337
    else:
8338
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8339
      nodelist = [self.op.pnode]
8340
      if self.op.snode is not None:
8341
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8342
        nodelist.append(self.op.snode)
8343
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8344

    
8345
    # in case of import lock the source node too
8346
    if self.op.mode == constants.INSTANCE_IMPORT:
8347
      src_node = self.op.src_node
8348
      src_path = self.op.src_path
8349

    
8350
      if src_path is None:
8351
        self.op.src_path = src_path = self.op.instance_name
8352

    
8353
      if src_node is None:
8354
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8355
        self.op.src_node = None
8356
        if os.path.isabs(src_path):
8357
          raise errors.OpPrereqError("Importing an instance from a path"
8358
                                     " requires a source node option",
8359
                                     errors.ECODE_INVAL)
8360
      else:
8361
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8362
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8363
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8364
        if not os.path.isabs(src_path):
8365
          self.op.src_path = src_path = \
8366
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8367

    
8368
  def _RunAllocator(self):
8369
    """Run the allocator based on input opcode.
8370

8371
    """
8372
    nics = [n.ToDict() for n in self.nics]
8373
    ial = IAllocator(self.cfg, self.rpc,
8374
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8375
                     name=self.op.instance_name,
8376
                     disk_template=self.op.disk_template,
8377
                     tags=self.op.tags,
8378
                     os=self.op.os_type,
8379
                     vcpus=self.be_full[constants.BE_VCPUS],
8380
                     memory=self.be_full[constants.BE_MEMORY],
8381
                     disks=self.disks,
8382
                     nics=nics,
8383
                     hypervisor=self.op.hypervisor,
8384
                     )
8385

    
8386
    ial.Run(self.op.iallocator)
8387

    
8388
    if not ial.success:
8389
      raise errors.OpPrereqError("Can't compute nodes using"
8390
                                 " iallocator '%s': %s" %
8391
                                 (self.op.iallocator, ial.info),
8392
                                 errors.ECODE_NORES)
8393
    if len(ial.result) != ial.required_nodes:
8394
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8395
                                 " of nodes (%s), required %s" %
8396
                                 (self.op.iallocator, len(ial.result),
8397
                                  ial.required_nodes), errors.ECODE_FAULT)
8398
    self.op.pnode = ial.result[0]
8399
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8400
                 self.op.instance_name, self.op.iallocator,
8401
                 utils.CommaJoin(ial.result))
8402
    if ial.required_nodes == 2:
8403
      self.op.snode = ial.result[1]
8404

    
8405
  def BuildHooksEnv(self):
8406
    """Build hooks env.
8407

8408
    This runs on master, primary and secondary nodes of the instance.
8409

8410
    """
8411
    env = {
8412
      "ADD_MODE": self.op.mode,
8413
      }
8414
    if self.op.mode == constants.INSTANCE_IMPORT:
8415
      env["SRC_NODE"] = self.op.src_node
8416
      env["SRC_PATH"] = self.op.src_path
8417
      env["SRC_IMAGES"] = self.src_images
8418

    
8419
    env.update(_BuildInstanceHookEnv(
8420
      name=self.op.instance_name,
8421
      primary_node=self.op.pnode,
8422
      secondary_nodes=self.secondaries,
8423
      status=self.op.start,
8424
      os_type=self.op.os_type,
8425
      memory=self.be_full[constants.BE_MEMORY],
8426
      vcpus=self.be_full[constants.BE_VCPUS],
8427
      nics=_NICListToTuple(self, self.nics),
8428
      disk_template=self.op.disk_template,
8429
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8430
             for d in self.disks],
8431
      bep=self.be_full,
8432
      hvp=self.hv_full,
8433
      hypervisor_name=self.op.hypervisor,
8434
      tags=self.op.tags,
8435
    ))
8436

    
8437
    return env
8438

    
8439
  def BuildHooksNodes(self):
8440
    """Build hooks nodes.
8441

8442
    """
8443
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8444
    return nl, nl
8445

    
8446
  def _ReadExportInfo(self):
8447
    """Reads the export information from disk.
8448

8449
    It will override the opcode source node and path with the actual
8450
    information, if these two were not specified before.
8451

8452
    @return: the export information
8453

8454
    """
8455
    assert self.op.mode == constants.INSTANCE_IMPORT
8456

    
8457
    src_node = self.op.src_node
8458
    src_path = self.op.src_path
8459

    
8460
    if src_node is None:
8461
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8462
      exp_list = self.rpc.call_export_list(locked_nodes)
8463
      found = False
8464
      for node in exp_list:
8465
        if exp_list[node].fail_msg:
8466
          continue
8467
        if src_path in exp_list[node].payload:
8468
          found = True
8469
          self.op.src_node = src_node = node
8470
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8471
                                                       src_path)
8472
          break
8473
      if not found:
8474
        raise errors.OpPrereqError("No export found for relative path %s" %
8475
                                    src_path, errors.ECODE_INVAL)
8476

    
8477
    _CheckNodeOnline(self, src_node)
8478
    result = self.rpc.call_export_info(src_node, src_path)
8479
    result.Raise("No export or invalid export found in dir %s" % src_path)
8480

    
8481
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8482
    if not export_info.has_section(constants.INISECT_EXP):
8483
      raise errors.ProgrammerError("Corrupted export config",
8484
                                   errors.ECODE_ENVIRON)
8485

    
8486
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8487
    if (int(ei_version) != constants.EXPORT_VERSION):
8488
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8489
                                 (ei_version, constants.EXPORT_VERSION),
8490
                                 errors.ECODE_ENVIRON)
8491
    return export_info
8492

    
8493
  def _ReadExportParams(self, einfo):
8494
    """Use export parameters as defaults.
8495

8496
    In case the opcode doesn't specify (as in override) some instance
8497
    parameters, then try to use them from the export information, if
8498
    that declares them.
8499

8500
    """
8501
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8502

    
8503
    if self.op.disk_template is None:
8504
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8505
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8506
                                          "disk_template")
8507
      else:
8508
        raise errors.OpPrereqError("No disk template specified and the export"
8509
                                   " is missing the disk_template information",
8510
                                   errors.ECODE_INVAL)
8511

    
8512
    if not self.op.disks:
8513
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8514
        disks = []
8515
        # TODO: import the disk iv_name too
8516
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8517
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8518
          disks.append({constants.IDISK_SIZE: disk_sz})
8519
        self.op.disks = disks
8520
      else:
8521
        raise errors.OpPrereqError("No disk info specified and the export"
8522
                                   " is missing the disk information",
8523
                                   errors.ECODE_INVAL)
8524

    
8525
    if (not self.op.nics and
8526
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8527
      nics = []
8528
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8529
        ndict = {}
8530
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8531
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8532
          ndict[name] = v
8533
        nics.append(ndict)
8534
      self.op.nics = nics
8535

    
8536
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8537
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8538

    
8539
    if (self.op.hypervisor is None and
8540
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8541
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8542

    
8543
    if einfo.has_section(constants.INISECT_HYP):
8544
      # use the export parameters but do not override the ones
8545
      # specified by the user
8546
      for name, value in einfo.items(constants.INISECT_HYP):
8547
        if name not in self.op.hvparams:
8548
          self.op.hvparams[name] = value
8549

    
8550
    if einfo.has_section(constants.INISECT_BEP):
8551
      # use the parameters, without overriding
8552
      for name, value in einfo.items(constants.INISECT_BEP):
8553
        if name not in self.op.beparams:
8554
          self.op.beparams[name] = value
8555
    else:
8556
      # try to read the parameters old style, from the main section
8557
      for name in constants.BES_PARAMETERS:
8558
        if (name not in self.op.beparams and
8559
            einfo.has_option(constants.INISECT_INS, name)):
8560
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8561

    
8562
    if einfo.has_section(constants.INISECT_OSP):
8563
      # use the parameters, without overriding
8564
      for name, value in einfo.items(constants.INISECT_OSP):
8565
        if name not in self.op.osparams:
8566
          self.op.osparams[name] = value
8567

    
8568
  def _RevertToDefaults(self, cluster):
8569
    """Revert the instance parameters to the default values.
8570

8571
    """
8572
    # hvparams
8573
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8574
    for name in self.op.hvparams.keys():
8575
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8576
        del self.op.hvparams[name]
8577
    # beparams
8578
    be_defs = cluster.SimpleFillBE({})
8579
    for name in self.op.beparams.keys():
8580
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8581
        del self.op.beparams[name]
8582
    # nic params
8583
    nic_defs = cluster.SimpleFillNIC({})
8584
    for nic in self.op.nics:
8585
      for name in constants.NICS_PARAMETERS:
8586
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8587
          del nic[name]
8588
    # osparams
8589
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8590
    for name in self.op.osparams.keys():
8591
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8592
        del self.op.osparams[name]
8593

    
8594
  def _CalculateFileStorageDir(self):
8595
    """Calculate final instance file storage dir.
8596

8597
    """
8598
    # file storage dir calculation/check
8599
    self.instance_file_storage_dir = None
8600
    if self.op.disk_template in constants.DTS_FILEBASED:
8601
      # build the full file storage dir path
8602
      joinargs = []
8603

    
8604
      if self.op.disk_template == constants.DT_SHARED_FILE:
8605
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8606
      else:
8607
        get_fsd_fn = self.cfg.GetFileStorageDir
8608

    
8609
      cfg_storagedir = get_fsd_fn()
8610
      if not cfg_storagedir:
8611
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8612
      joinargs.append(cfg_storagedir)
8613

    
8614
      if self.op.file_storage_dir is not None:
8615
        joinargs.append(self.op.file_storage_dir)
8616

    
8617
      joinargs.append(self.op.instance_name)
8618

    
8619
      # pylint: disable=W0142
8620
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8621

    
8622
  def CheckPrereq(self):
8623
    """Check prerequisites.
8624

8625
    """
8626
    self._CalculateFileStorageDir()
8627

    
8628
    if self.op.mode == constants.INSTANCE_IMPORT:
8629
      export_info = self._ReadExportInfo()
8630
      self._ReadExportParams(export_info)
8631

    
8632
    if (not self.cfg.GetVGName() and
8633
        self.op.disk_template not in constants.DTS_NOT_LVM):
8634
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8635
                                 " instances", errors.ECODE_STATE)
8636

    
8637
    if self.op.hypervisor is None:
8638
      self.op.hypervisor = self.cfg.GetHypervisorType()
8639

    
8640
    cluster = self.cfg.GetClusterInfo()
8641
    enabled_hvs = cluster.enabled_hypervisors
8642
    if self.op.hypervisor not in enabled_hvs:
8643
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8644
                                 " cluster (%s)" % (self.op.hypervisor,
8645
                                  ",".join(enabled_hvs)),
8646
                                 errors.ECODE_STATE)
8647

    
8648
    # Check tag validity
8649
    for tag in self.op.tags:
8650
      objects.TaggableObject.ValidateTag(tag)
8651

    
8652
    # check hypervisor parameter syntax (locally)
8653
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8654
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8655
                                      self.op.hvparams)
8656
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8657
    hv_type.CheckParameterSyntax(filled_hvp)
8658
    self.hv_full = filled_hvp
8659
    # check that we don't specify global parameters on an instance
8660
    _CheckGlobalHvParams(self.op.hvparams)
8661

    
8662
    # fill and remember the beparams dict
8663
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8664
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8665

    
8666
    # build os parameters
8667
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8668

    
8669
    # now that hvp/bep are in final format, let's reset to defaults,
8670
    # if told to do so
8671
    if self.op.identify_defaults:
8672
      self._RevertToDefaults(cluster)
8673

    
8674
    # NIC buildup
8675
    self.nics = []
8676
    for idx, nic in enumerate(self.op.nics):
8677
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8678
      nic_mode = nic_mode_req
8679
      if nic_mode is None:
8680
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8681

    
8682
      # in routed mode, for the first nic, the default ip is 'auto'
8683
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8684
        default_ip_mode = constants.VALUE_AUTO
8685
      else:
8686
        default_ip_mode = constants.VALUE_NONE
8687

    
8688
      # ip validity checks
8689
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8690
      if ip is None or ip.lower() == constants.VALUE_NONE:
8691
        nic_ip = None
8692
      elif ip.lower() == constants.VALUE_AUTO:
8693
        if not self.op.name_check:
8694
          raise errors.OpPrereqError("IP address set to auto but name checks"
8695
                                     " have been skipped",
8696
                                     errors.ECODE_INVAL)
8697
        nic_ip = self.hostname1.ip
8698
      else:
8699
        if not netutils.IPAddress.IsValid(ip):
8700
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8701
                                     errors.ECODE_INVAL)
8702
        nic_ip = ip
8703

    
8704
      # TODO: check the ip address for uniqueness
8705
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8706
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8707
                                   errors.ECODE_INVAL)
8708

    
8709
      # MAC address verification
8710
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8711
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8712
        mac = utils.NormalizeAndValidateMac(mac)
8713

    
8714
        try:
8715
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8716
        except errors.ReservationError:
8717
          raise errors.OpPrereqError("MAC address %s already in use"
8718
                                     " in cluster" % mac,
8719
                                     errors.ECODE_NOTUNIQUE)
8720

    
8721
      #  Build nic parameters
8722
      link = nic.get(constants.INIC_LINK, None)
8723
      nicparams = {}
8724
      if nic_mode_req:
8725
        nicparams[constants.NIC_MODE] = nic_mode_req
8726
      if link:
8727
        nicparams[constants.NIC_LINK] = link
8728

    
8729
      check_params = cluster.SimpleFillNIC(nicparams)
8730
      objects.NIC.CheckParameterSyntax(check_params)
8731
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8732

    
8733
    # disk checks/pre-build
8734
    default_vg = self.cfg.GetVGName()
8735
    self.disks = []
8736
    for disk in self.op.disks:
8737
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8738
      if mode not in constants.DISK_ACCESS_SET:
8739
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8740
                                   mode, errors.ECODE_INVAL)
8741
      size = disk.get(constants.IDISK_SIZE, None)
8742
      if size is None:
8743
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8744
      try:
8745
        size = int(size)
8746
      except (TypeError, ValueError):
8747
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8748
                                   errors.ECODE_INVAL)
8749

    
8750
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8751
      new_disk = {
8752
        constants.IDISK_SIZE: size,
8753
        constants.IDISK_MODE: mode,
8754
        constants.IDISK_VG: data_vg,
8755
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8756
        }
8757
      if constants.IDISK_ADOPT in disk:
8758
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8759
      self.disks.append(new_disk)
8760

    
8761
    if self.op.mode == constants.INSTANCE_IMPORT:
8762

    
8763
      # Check that the new instance doesn't have less disks than the export
8764
      instance_disks = len(self.disks)
8765
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8766
      if instance_disks < export_disks:
8767
        raise errors.OpPrereqError("Not enough disks to import."
8768
                                   " (instance: %d, export: %d)" %
8769
                                   (instance_disks, export_disks),
8770
                                   errors.ECODE_INVAL)
8771

    
8772
      disk_images = []
8773
      for idx in range(export_disks):
8774
        option = "disk%d_dump" % idx
8775
        if export_info.has_option(constants.INISECT_INS, option):
8776
          # FIXME: are the old os-es, disk sizes, etc. useful?
8777
          export_name = export_info.get(constants.INISECT_INS, option)
8778
          image = utils.PathJoin(self.op.src_path, export_name)
8779
          disk_images.append(image)
8780
        else:
8781
          disk_images.append(False)
8782

    
8783
      self.src_images = disk_images
8784

    
8785
      old_name = export_info.get(constants.INISECT_INS, "name")
8786
      try:
8787
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8788
      except (TypeError, ValueError), err:
8789
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8790
                                   " an integer: %s" % str(err),
8791
                                   errors.ECODE_STATE)
8792
      if self.op.instance_name == old_name:
8793
        for idx, nic in enumerate(self.nics):
8794
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8795
            nic_mac_ini = "nic%d_mac" % idx
8796
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8797

    
8798
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8799

    
8800
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8801
    if self.op.ip_check:
8802
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8803
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8804
                                   (self.check_ip, self.op.instance_name),
8805
                                   errors.ECODE_NOTUNIQUE)
8806

    
8807
    #### mac address generation
8808
    # By generating here the mac address both the allocator and the hooks get
8809
    # the real final mac address rather than the 'auto' or 'generate' value.
8810
    # There is a race condition between the generation and the instance object
8811
    # creation, which means that we know the mac is valid now, but we're not
8812
    # sure it will be when we actually add the instance. If things go bad
8813
    # adding the instance will abort because of a duplicate mac, and the
8814
    # creation job will fail.
8815
    for nic in self.nics:
8816
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8817
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8818

    
8819
    #### allocator run
8820

    
8821
    if self.op.iallocator is not None:
8822
      self._RunAllocator()
8823

    
8824
    # Release all unneeded node locks
8825
    _ReleaseLocks(self, locking.LEVEL_NODE,
8826
                  keep=filter(None, [self.op.pnode, self.op.snode,
8827
                                     self.op.src_node]))
8828

    
8829
    #### node related checks
8830

    
8831
    # check primary node
8832
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8833
    assert self.pnode is not None, \
8834
      "Cannot retrieve locked node %s" % self.op.pnode
8835
    if pnode.offline:
8836
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8837
                                 pnode.name, errors.ECODE_STATE)
8838
    if pnode.drained:
8839
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8840
                                 pnode.name, errors.ECODE_STATE)
8841
    if not pnode.vm_capable:
8842
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8843
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8844

    
8845
    self.secondaries = []
8846

    
8847
    # mirror node verification
8848
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8849
      if self.op.snode == pnode.name:
8850
        raise errors.OpPrereqError("The secondary node cannot be the"
8851
                                   " primary node", errors.ECODE_INVAL)
8852
      _CheckNodeOnline(self, self.op.snode)
8853
      _CheckNodeNotDrained(self, self.op.snode)
8854
      _CheckNodeVmCapable(self, self.op.snode)
8855
      self.secondaries.append(self.op.snode)
8856

    
8857
    nodenames = [pnode.name] + self.secondaries
8858

    
8859
    if not self.adopt_disks:
8860
      # Check lv size requirements, if not adopting
8861
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8862
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8863

    
8864
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8865
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8866
                                disk[constants.IDISK_ADOPT])
8867
                     for disk in self.disks])
8868
      if len(all_lvs) != len(self.disks):
8869
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8870
                                   errors.ECODE_INVAL)
8871
      for lv_name in all_lvs:
8872
        try:
8873
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8874
          # to ReserveLV uses the same syntax
8875
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8876
        except errors.ReservationError:
8877
          raise errors.OpPrereqError("LV named %s used by another instance" %
8878
                                     lv_name, errors.ECODE_NOTUNIQUE)
8879

    
8880
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8881
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8882

    
8883
      node_lvs = self.rpc.call_lv_list([pnode.name],
8884
                                       vg_names.payload.keys())[pnode.name]
8885
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8886
      node_lvs = node_lvs.payload
8887

    
8888
      delta = all_lvs.difference(node_lvs.keys())
8889
      if delta:
8890
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8891
                                   utils.CommaJoin(delta),
8892
                                   errors.ECODE_INVAL)
8893
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8894
      if online_lvs:
8895
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8896
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8897
                                   errors.ECODE_STATE)
8898
      # update the size of disk based on what is found
8899
      for dsk in self.disks:
8900
        dsk[constants.IDISK_SIZE] = \
8901
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8902
                                        dsk[constants.IDISK_ADOPT])][0]))
8903

    
8904
    elif self.op.disk_template == constants.DT_BLOCK:
8905
      # Normalize and de-duplicate device paths
8906
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8907
                       for disk in self.disks])
8908
      if len(all_disks) != len(self.disks):
8909
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8910
                                   errors.ECODE_INVAL)
8911
      baddisks = [d for d in all_disks
8912
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8913
      if baddisks:
8914
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8915
                                   " cannot be adopted" %
8916
                                   (", ".join(baddisks),
8917
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8918
                                   errors.ECODE_INVAL)
8919

    
8920
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8921
                                            list(all_disks))[pnode.name]
8922
      node_disks.Raise("Cannot get block device information from node %s" %
8923
                       pnode.name)
8924
      node_disks = node_disks.payload
8925
      delta = all_disks.difference(node_disks.keys())
8926
      if delta:
8927
        raise errors.OpPrereqError("Missing block device(s): %s" %
8928
                                   utils.CommaJoin(delta),
8929
                                   errors.ECODE_INVAL)
8930
      for dsk in self.disks:
8931
        dsk[constants.IDISK_SIZE] = \
8932
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8933

    
8934
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8935

    
8936
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8937
    # check OS parameters (remotely)
8938
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8939

    
8940
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8941

    
8942
    # memory check on primary node
8943
    if self.op.start:
8944
      _CheckNodeFreeMemory(self, self.pnode.name,
8945
                           "creating instance %s" % self.op.instance_name,
8946
                           self.be_full[constants.BE_MEMORY],
8947
                           self.op.hypervisor)
8948

    
8949
    self.dry_run_result = list(nodenames)
8950

    
8951
  def Exec(self, feedback_fn):
8952
    """Create and add the instance to the cluster.
8953

8954
    """
8955
    instance = self.op.instance_name
8956
    pnode_name = self.pnode.name
8957

    
8958
    ht_kind = self.op.hypervisor
8959
    if ht_kind in constants.HTS_REQ_PORT:
8960
      network_port = self.cfg.AllocatePort()
8961
    else:
8962
      network_port = None
8963

    
8964
    disks = _GenerateDiskTemplate(self,
8965
                                  self.op.disk_template,
8966
                                  instance, pnode_name,
8967
                                  self.secondaries,
8968
                                  self.disks,
8969
                                  self.instance_file_storage_dir,
8970
                                  self.op.file_driver,
8971
                                  0,
8972
                                  feedback_fn)
8973

    
8974
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8975
                            primary_node=pnode_name,
8976
                            nics=self.nics, disks=disks,
8977
                            disk_template=self.op.disk_template,
8978
                            admin_up=False,
8979
                            network_port=network_port,
8980
                            beparams=self.op.beparams,
8981
                            hvparams=self.op.hvparams,
8982
                            hypervisor=self.op.hypervisor,
8983
                            osparams=self.op.osparams,
8984
                            )
8985

    
8986
    if self.op.tags:
8987
      for tag in self.op.tags:
8988
        iobj.AddTag(tag)
8989

    
8990
    if self.adopt_disks:
8991
      if self.op.disk_template == constants.DT_PLAIN:
8992
        # rename LVs to the newly-generated names; we need to construct
8993
        # 'fake' LV disks with the old data, plus the new unique_id
8994
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8995
        rename_to = []
8996
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8997
          rename_to.append(t_dsk.logical_id)
8998
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8999
          self.cfg.SetDiskID(t_dsk, pnode_name)
9000
        result = self.rpc.call_blockdev_rename(pnode_name,
9001
                                               zip(tmp_disks, rename_to))
9002
        result.Raise("Failed to rename adoped LVs")
9003
    else:
9004
      feedback_fn("* creating instance disks...")
9005
      try:
9006
        _CreateDisks(self, iobj)
9007
      except errors.OpExecError:
9008
        self.LogWarning("Device creation failed, reverting...")
9009
        try:
9010
          _RemoveDisks(self, iobj)
9011
        finally:
9012
          self.cfg.ReleaseDRBDMinors(instance)
9013
          raise
9014

    
9015
    feedback_fn("adding instance %s to cluster config" % instance)
9016

    
9017
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9018

    
9019
    # Declare that we don't want to remove the instance lock anymore, as we've
9020
    # added the instance to the config
9021
    del self.remove_locks[locking.LEVEL_INSTANCE]
9022

    
9023
    if self.op.mode == constants.INSTANCE_IMPORT:
9024
      # Release unused nodes
9025
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9026
    else:
9027
      # Release all nodes
9028
      _ReleaseLocks(self, locking.LEVEL_NODE)
9029

    
9030
    disk_abort = False
9031
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9032
      feedback_fn("* wiping instance disks...")
9033
      try:
9034
        _WipeDisks(self, iobj)
9035
      except errors.OpExecError, err:
9036
        logging.exception("Wiping disks failed")
9037
        self.LogWarning("Wiping instance disks failed (%s)", err)
9038
        disk_abort = True
9039

    
9040
    if disk_abort:
9041
      # Something is already wrong with the disks, don't do anything else
9042
      pass
9043
    elif self.op.wait_for_sync:
9044
      disk_abort = not _WaitForSync(self, iobj)
9045
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9046
      # make sure the disks are not degraded (still sync-ing is ok)
9047
      feedback_fn("* checking mirrors status")
9048
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9049
    else:
9050
      disk_abort = False
9051

    
9052
    if disk_abort:
9053
      _RemoveDisks(self, iobj)
9054
      self.cfg.RemoveInstance(iobj.name)
9055
      # Make sure the instance lock gets removed
9056
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9057
      raise errors.OpExecError("There are some degraded disks for"
9058
                               " this instance")
9059

    
9060
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9061
      if self.op.mode == constants.INSTANCE_CREATE:
9062
        if not self.op.no_install:
9063
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9064
                        not self.op.wait_for_sync)
9065
          if pause_sync:
9066
            feedback_fn("* pausing disk sync to install instance OS")
9067
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9068
                                                              iobj.disks, True)
9069
            for idx, success in enumerate(result.payload):
9070
              if not success:
9071
                logging.warn("pause-sync of instance %s for disk %d failed",
9072
                             instance, idx)
9073

    
9074
          feedback_fn("* running the instance OS create scripts...")
9075
          # FIXME: pass debug option from opcode to backend
9076
          os_add_result = \
9077
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9078
                                          self.op.debug_level)
9079
          if pause_sync:
9080
            feedback_fn("* resuming disk sync")
9081
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9082
                                                              iobj.disks, False)
9083
            for idx, success in enumerate(result.payload):
9084
              if not success:
9085
                logging.warn("resume-sync of instance %s for disk %d failed",
9086
                             instance, idx)
9087

    
9088
          os_add_result.Raise("Could not add os for instance %s"
9089
                              " on node %s" % (instance, pnode_name))
9090

    
9091
      elif self.op.mode == constants.INSTANCE_IMPORT:
9092
        feedback_fn("* running the instance OS import scripts...")
9093

    
9094
        transfers = []
9095

    
9096
        for idx, image in enumerate(self.src_images):
9097
          if not image:
9098
            continue
9099

    
9100
          # FIXME: pass debug option from opcode to backend
9101
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9102
                                             constants.IEIO_FILE, (image, ),
9103
                                             constants.IEIO_SCRIPT,
9104
                                             (iobj.disks[idx], idx),
9105
                                             None)
9106
          transfers.append(dt)
9107

    
9108
        import_result = \
9109
          masterd.instance.TransferInstanceData(self, feedback_fn,
9110
                                                self.op.src_node, pnode_name,
9111
                                                self.pnode.secondary_ip,
9112
                                                iobj, transfers)
9113
        if not compat.all(import_result):
9114
          self.LogWarning("Some disks for instance %s on node %s were not"
9115
                          " imported successfully" % (instance, pnode_name))
9116

    
9117
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9118
        feedback_fn("* preparing remote import...")
9119
        # The source cluster will stop the instance before attempting to make a
9120
        # connection. In some cases stopping an instance can take a long time,
9121
        # hence the shutdown timeout is added to the connection timeout.
9122
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9123
                           self.op.source_shutdown_timeout)
9124
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9125

    
9126
        assert iobj.primary_node == self.pnode.name
9127
        disk_results = \
9128
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9129
                                        self.source_x509_ca,
9130
                                        self._cds, timeouts)
9131
        if not compat.all(disk_results):
9132
          # TODO: Should the instance still be started, even if some disks
9133
          # failed to import (valid for local imports, too)?
9134
          self.LogWarning("Some disks for instance %s on node %s were not"
9135
                          " imported successfully" % (instance, pnode_name))
9136

    
9137
        # Run rename script on newly imported instance
9138
        assert iobj.name == instance
9139
        feedback_fn("Running rename script for %s" % instance)
9140
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9141
                                                   self.source_instance_name,
9142
                                                   self.op.debug_level)
9143
        if result.fail_msg:
9144
          self.LogWarning("Failed to run rename script for %s on node"
9145
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9146

    
9147
      else:
9148
        # also checked in the prereq part
9149
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9150
                                     % self.op.mode)
9151

    
9152
    if self.op.start:
9153
      iobj.admin_up = True
9154
      self.cfg.Update(iobj, feedback_fn)
9155
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9156
      feedback_fn("* starting instance...")
9157
      result = self.rpc.call_instance_start(pnode_name, iobj,
9158
                                            None, None, False)
9159
      result.Raise("Could not start instance")
9160

    
9161
    return list(iobj.all_nodes)
9162

    
9163

    
9164
class LUInstanceConsole(NoHooksLU):
9165
  """Connect to an instance's console.
9166

9167
  This is somewhat special in that it returns the command line that
9168
  you need to run on the master node in order to connect to the
9169
  console.
9170

9171
  """
9172
  REQ_BGL = False
9173

    
9174
  def ExpandNames(self):
9175
    self._ExpandAndLockInstance()
9176

    
9177
  def CheckPrereq(self):
9178
    """Check prerequisites.
9179

9180
    This checks that the instance is in the cluster.
9181

9182
    """
9183
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9184
    assert self.instance is not None, \
9185
      "Cannot retrieve locked instance %s" % self.op.instance_name
9186
    _CheckNodeOnline(self, self.instance.primary_node)
9187

    
9188
  def Exec(self, feedback_fn):
9189
    """Connect to the console of an instance
9190

9191
    """
9192
    instance = self.instance
9193
    node = instance.primary_node
9194

    
9195
    node_insts = self.rpc.call_instance_list([node],
9196
                                             [instance.hypervisor])[node]
9197
    node_insts.Raise("Can't get node information from %s" % node)
9198

    
9199
    if instance.name not in node_insts.payload:
9200
      if instance.admin_up:
9201
        state = constants.INSTST_ERRORDOWN
9202
      else:
9203
        state = constants.INSTST_ADMINDOWN
9204
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9205
                               (instance.name, state))
9206

    
9207
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9208

    
9209
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9210

    
9211

    
9212
def _GetInstanceConsole(cluster, instance):
9213
  """Returns console information for an instance.
9214

9215
  @type cluster: L{objects.Cluster}
9216
  @type instance: L{objects.Instance}
9217
  @rtype: dict
9218

9219
  """
9220
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9221
  # beparams and hvparams are passed separately, to avoid editing the
9222
  # instance and then saving the defaults in the instance itself.
9223
  hvparams = cluster.FillHV(instance)
9224
  beparams = cluster.FillBE(instance)
9225
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9226

    
9227
  assert console.instance == instance.name
9228
  assert console.Validate()
9229

    
9230
  return console.ToDict()
9231

    
9232

    
9233
class LUInstanceReplaceDisks(LogicalUnit):
9234
  """Replace the disks of an instance.
9235

9236
  """
9237
  HPATH = "mirrors-replace"
9238
  HTYPE = constants.HTYPE_INSTANCE
9239
  REQ_BGL = False
9240

    
9241
  def CheckArguments(self):
9242
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9243
                                  self.op.iallocator)
9244

    
9245
  def ExpandNames(self):
9246
    self._ExpandAndLockInstance()
9247

    
9248
    assert locking.LEVEL_NODE not in self.needed_locks
9249
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9250

    
9251
    assert self.op.iallocator is None or self.op.remote_node is None, \
9252
      "Conflicting options"
9253

    
9254
    if self.op.remote_node is not None:
9255
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9256

    
9257
      # Warning: do not remove the locking of the new secondary here
9258
      # unless DRBD8.AddChildren is changed to work in parallel;
9259
      # currently it doesn't since parallel invocations of
9260
      # FindUnusedMinor will conflict
9261
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9262
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9263
    else:
9264
      self.needed_locks[locking.LEVEL_NODE] = []
9265
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9266

    
9267
      if self.op.iallocator is not None:
9268
        # iallocator will select a new node in the same group
9269
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9270

    
9271
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9272
                                   self.op.iallocator, self.op.remote_node,
9273
                                   self.op.disks, False, self.op.early_release)
9274

    
9275
    self.tasklets = [self.replacer]
9276

    
9277
  def DeclareLocks(self, level):
9278
    if level == locking.LEVEL_NODEGROUP:
9279
      assert self.op.remote_node is None
9280
      assert self.op.iallocator is not None
9281
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9282

    
9283
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9284
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9285
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9286

    
9287
    elif level == locking.LEVEL_NODE:
9288
      if self.op.iallocator is not None:
9289
        assert self.op.remote_node is None
9290
        assert not self.needed_locks[locking.LEVEL_NODE]
9291

    
9292
        # Lock member nodes of all locked groups
9293
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9294
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9295
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9296
      else:
9297
        self._LockInstancesNodes()
9298

    
9299
  def BuildHooksEnv(self):
9300
    """Build hooks env.
9301

9302
    This runs on the master, the primary and all the secondaries.
9303

9304
    """
9305
    instance = self.replacer.instance
9306
    env = {
9307
      "MODE": self.op.mode,
9308
      "NEW_SECONDARY": self.op.remote_node,
9309
      "OLD_SECONDARY": instance.secondary_nodes[0],
9310
      }
9311
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9312
    return env
9313

    
9314
  def BuildHooksNodes(self):
9315
    """Build hooks nodes.
9316

9317
    """
9318
    instance = self.replacer.instance
9319
    nl = [
9320
      self.cfg.GetMasterNode(),
9321
      instance.primary_node,
9322
      ]
9323
    if self.op.remote_node is not None:
9324
      nl.append(self.op.remote_node)
9325
    return nl, nl
9326

    
9327
  def CheckPrereq(self):
9328
    """Check prerequisites.
9329

9330
    """
9331
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9332
            self.op.iallocator is None)
9333

    
9334
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9335
    if owned_groups:
9336
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9337

    
9338
    return LogicalUnit.CheckPrereq(self)
9339

    
9340

    
9341
class TLReplaceDisks(Tasklet):
9342
  """Replaces disks for an instance.
9343

9344
  Note: Locking is not within the scope of this class.
9345

9346
  """
9347
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9348
               disks, delay_iallocator, early_release):
9349
    """Initializes this class.
9350

9351
    """
9352
    Tasklet.__init__(self, lu)
9353

    
9354
    # Parameters
9355
    self.instance_name = instance_name
9356
    self.mode = mode
9357
    self.iallocator_name = iallocator_name
9358
    self.remote_node = remote_node
9359
    self.disks = disks
9360
    self.delay_iallocator = delay_iallocator
9361
    self.early_release = early_release
9362

    
9363
    # Runtime data
9364
    self.instance = None
9365
    self.new_node = None
9366
    self.target_node = None
9367
    self.other_node = None
9368
    self.remote_node_info = None
9369
    self.node_secondary_ip = None
9370

    
9371
  @staticmethod
9372
  def CheckArguments(mode, remote_node, iallocator):
9373
    """Helper function for users of this class.
9374

9375
    """
9376
    # check for valid parameter combination
9377
    if mode == constants.REPLACE_DISK_CHG:
9378
      if remote_node is None and iallocator is None:
9379
        raise errors.OpPrereqError("When changing the secondary either an"
9380
                                   " iallocator script must be used or the"
9381
                                   " new node given", errors.ECODE_INVAL)
9382

    
9383
      if remote_node is not None and iallocator is not None:
9384
        raise errors.OpPrereqError("Give either the iallocator or the new"
9385
                                   " secondary, not both", errors.ECODE_INVAL)
9386

    
9387
    elif remote_node is not None or iallocator is not None:
9388
      # Not replacing the secondary
9389
      raise errors.OpPrereqError("The iallocator and new node options can"
9390
                                 " only be used when changing the"
9391
                                 " secondary node", errors.ECODE_INVAL)
9392

    
9393
  @staticmethod
9394
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9395
    """Compute a new secondary node using an IAllocator.
9396

9397
    """
9398
    ial = IAllocator(lu.cfg, lu.rpc,
9399
                     mode=constants.IALLOCATOR_MODE_RELOC,
9400
                     name=instance_name,
9401
                     relocate_from=list(relocate_from))
9402

    
9403
    ial.Run(iallocator_name)
9404

    
9405
    if not ial.success:
9406
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9407
                                 " %s" % (iallocator_name, ial.info),
9408
                                 errors.ECODE_NORES)
9409

    
9410
    if len(ial.result) != ial.required_nodes:
9411
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9412
                                 " of nodes (%s), required %s" %
9413
                                 (iallocator_name,
9414
                                  len(ial.result), ial.required_nodes),
9415
                                 errors.ECODE_FAULT)
9416

    
9417
    remote_node_name = ial.result[0]
9418

    
9419
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9420
               instance_name, remote_node_name)
9421

    
9422
    return remote_node_name
9423

    
9424
  def _FindFaultyDisks(self, node_name):
9425
    """Wrapper for L{_FindFaultyInstanceDisks}.
9426

9427
    """
9428
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9429
                                    node_name, True)
9430

    
9431
  def _CheckDisksActivated(self, instance):
9432
    """Checks if the instance disks are activated.
9433

9434
    @param instance: The instance to check disks
9435
    @return: True if they are activated, False otherwise
9436

9437
    """
9438
    nodes = instance.all_nodes
9439

    
9440
    for idx, dev in enumerate(instance.disks):
9441
      for node in nodes:
9442
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9443
        self.cfg.SetDiskID(dev, node)
9444

    
9445
        result = self.rpc.call_blockdev_find(node, dev)
9446

    
9447
        if result.offline:
9448
          continue
9449
        elif result.fail_msg or not result.payload:
9450
          return False
9451

    
9452
    return True
9453

    
9454
  def CheckPrereq(self):
9455
    """Check prerequisites.
9456

9457
    This checks that the instance is in the cluster.
9458

9459
    """
9460
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9461
    assert instance is not None, \
9462
      "Cannot retrieve locked instance %s" % self.instance_name
9463

    
9464
    if instance.disk_template != constants.DT_DRBD8:
9465
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9466
                                 " instances", errors.ECODE_INVAL)
9467

    
9468
    if len(instance.secondary_nodes) != 1:
9469
      raise errors.OpPrereqError("The instance has a strange layout,"
9470
                                 " expected one secondary but found %d" %
9471
                                 len(instance.secondary_nodes),
9472
                                 errors.ECODE_FAULT)
9473

    
9474
    if not self.delay_iallocator:
9475
      self._CheckPrereq2()
9476

    
9477
  def _CheckPrereq2(self):
9478
    """Check prerequisites, second part.
9479

9480
    This function should always be part of CheckPrereq. It was separated and is
9481
    now called from Exec because during node evacuation iallocator was only
9482
    called with an unmodified cluster model, not taking planned changes into
9483
    account.
9484

9485
    """
9486
    instance = self.instance
9487
    secondary_node = instance.secondary_nodes[0]
9488

    
9489
    if self.iallocator_name is None:
9490
      remote_node = self.remote_node
9491
    else:
9492
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9493
                                       instance.name, instance.secondary_nodes)
9494

    
9495
    if remote_node is None:
9496
      self.remote_node_info = None
9497
    else:
9498
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9499
             "Remote node '%s' is not locked" % remote_node
9500

    
9501
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9502
      assert self.remote_node_info is not None, \
9503
        "Cannot retrieve locked node %s" % remote_node
9504

    
9505
    if remote_node == self.instance.primary_node:
9506
      raise errors.OpPrereqError("The specified node is the primary node of"
9507
                                 " the instance", errors.ECODE_INVAL)
9508

    
9509
    if remote_node == secondary_node:
9510
      raise errors.OpPrereqError("The specified node is already the"
9511
                                 " secondary node of the instance",
9512
                                 errors.ECODE_INVAL)
9513

    
9514
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9515
                                    constants.REPLACE_DISK_CHG):
9516
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9517
                                 errors.ECODE_INVAL)
9518

    
9519
    if self.mode == constants.REPLACE_DISK_AUTO:
9520
      if not self._CheckDisksActivated(instance):
9521
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9522
                                   " first" % self.instance_name,
9523
                                   errors.ECODE_STATE)
9524
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9525
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9526

    
9527
      if faulty_primary and faulty_secondary:
9528
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9529
                                   " one node and can not be repaired"
9530
                                   " automatically" % self.instance_name,
9531
                                   errors.ECODE_STATE)
9532

    
9533
      if faulty_primary:
9534
        self.disks = faulty_primary
9535
        self.target_node = instance.primary_node
9536
        self.other_node = secondary_node
9537
        check_nodes = [self.target_node, self.other_node]
9538
      elif faulty_secondary:
9539
        self.disks = faulty_secondary
9540
        self.target_node = secondary_node
9541
        self.other_node = instance.primary_node
9542
        check_nodes = [self.target_node, self.other_node]
9543
      else:
9544
        self.disks = []
9545
        check_nodes = []
9546

    
9547
    else:
9548
      # Non-automatic modes
9549
      if self.mode == constants.REPLACE_DISK_PRI:
9550
        self.target_node = instance.primary_node
9551
        self.other_node = secondary_node
9552
        check_nodes = [self.target_node, self.other_node]
9553

    
9554
      elif self.mode == constants.REPLACE_DISK_SEC:
9555
        self.target_node = secondary_node
9556
        self.other_node = instance.primary_node
9557
        check_nodes = [self.target_node, self.other_node]
9558

    
9559
      elif self.mode == constants.REPLACE_DISK_CHG:
9560
        self.new_node = remote_node
9561
        self.other_node = instance.primary_node
9562
        self.target_node = secondary_node
9563
        check_nodes = [self.new_node, self.other_node]
9564

    
9565
        _CheckNodeNotDrained(self.lu, remote_node)
9566
        _CheckNodeVmCapable(self.lu, remote_node)
9567

    
9568
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9569
        assert old_node_info is not None
9570
        if old_node_info.offline and not self.early_release:
9571
          # doesn't make sense to delay the release
9572
          self.early_release = True
9573
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9574
                          " early-release mode", secondary_node)
9575

    
9576
      else:
9577
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9578
                                     self.mode)
9579

    
9580
      # If not specified all disks should be replaced
9581
      if not self.disks:
9582
        self.disks = range(len(self.instance.disks))
9583

    
9584
    for node in check_nodes:
9585
      _CheckNodeOnline(self.lu, node)
9586

    
9587
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9588
                                                          self.other_node,
9589
                                                          self.target_node]
9590
                              if node_name is not None)
9591

    
9592
    # Release unneeded node locks
9593
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9594

    
9595
    # Release any owned node group
9596
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9597
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9598

    
9599
    # Check whether disks are valid
9600
    for disk_idx in self.disks:
9601
      instance.FindDisk(disk_idx)
9602

    
9603
    # Get secondary node IP addresses
9604
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9605
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9606

    
9607
  def Exec(self, feedback_fn):
9608
    """Execute disk replacement.
9609

9610
    This dispatches the disk replacement to the appropriate handler.
9611

9612
    """
9613
    if self.delay_iallocator:
9614
      self._CheckPrereq2()
9615

    
9616
    if __debug__:
9617
      # Verify owned locks before starting operation
9618
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9619
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9620
          ("Incorrect node locks, owning %s, expected %s" %
9621
           (owned_nodes, self.node_secondary_ip.keys()))
9622

    
9623
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9624
      assert list(owned_instances) == [self.instance_name], \
9625
          "Instance '%s' not locked" % self.instance_name
9626

    
9627
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9628
          "Should not own any node group lock at this point"
9629

    
9630
    if not self.disks:
9631
      feedback_fn("No disks need replacement")
9632
      return
9633

    
9634
    feedback_fn("Replacing disk(s) %s for %s" %
9635
                (utils.CommaJoin(self.disks), self.instance.name))
9636

    
9637
    activate_disks = (not self.instance.admin_up)
9638

    
9639
    # Activate the instance disks if we're replacing them on a down instance
9640
    if activate_disks:
9641
      _StartInstanceDisks(self.lu, self.instance, True)
9642

    
9643
    try:
9644
      # Should we replace the secondary node?
9645
      if self.new_node is not None:
9646
        fn = self._ExecDrbd8Secondary
9647
      else:
9648
        fn = self._ExecDrbd8DiskOnly
9649

    
9650
      result = fn(feedback_fn)
9651
    finally:
9652
      # Deactivate the instance disks if we're replacing them on a
9653
      # down instance
9654
      if activate_disks:
9655
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9656

    
9657
    if __debug__:
9658
      # Verify owned locks
9659
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9660
      nodes = frozenset(self.node_secondary_ip)
9661
      assert ((self.early_release and not owned_nodes) or
9662
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9663
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9664
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9665

    
9666
    return result
9667

    
9668
  def _CheckVolumeGroup(self, nodes):
9669
    self.lu.LogInfo("Checking volume groups")
9670

    
9671
    vgname = self.cfg.GetVGName()
9672

    
9673
    # Make sure volume group exists on all involved nodes
9674
    results = self.rpc.call_vg_list(nodes)
9675
    if not results:
9676
      raise errors.OpExecError("Can't list volume groups on the nodes")
9677

    
9678
    for node in nodes:
9679
      res = results[node]
9680
      res.Raise("Error checking node %s" % node)
9681
      if vgname not in res.payload:
9682
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9683
                                 (vgname, node))
9684

    
9685
  def _CheckDisksExistence(self, nodes):
9686
    # Check disk existence
9687
    for idx, dev in enumerate(self.instance.disks):
9688
      if idx not in self.disks:
9689
        continue
9690

    
9691
      for node in nodes:
9692
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9693
        self.cfg.SetDiskID(dev, node)
9694

    
9695
        result = self.rpc.call_blockdev_find(node, dev)
9696

    
9697
        msg = result.fail_msg
9698
        if msg or not result.payload:
9699
          if not msg:
9700
            msg = "disk not found"
9701
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9702
                                   (idx, node, msg))
9703

    
9704
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9705
    for idx, dev in enumerate(self.instance.disks):
9706
      if idx not in self.disks:
9707
        continue
9708

    
9709
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9710
                      (idx, node_name))
9711

    
9712
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9713
                                   ldisk=ldisk):
9714
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9715
                                 " replace disks for instance %s" %
9716
                                 (node_name, self.instance.name))
9717

    
9718
  def _CreateNewStorage(self, node_name):
9719
    """Create new storage on the primary or secondary node.
9720

9721
    This is only used for same-node replaces, not for changing the
9722
    secondary node, hence we don't want to modify the existing disk.
9723

9724
    """
9725
    iv_names = {}
9726

    
9727
    for idx, dev in enumerate(self.instance.disks):
9728
      if idx not in self.disks:
9729
        continue
9730

    
9731
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9732

    
9733
      self.cfg.SetDiskID(dev, node_name)
9734

    
9735
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9736
      names = _GenerateUniqueNames(self.lu, lv_names)
9737

    
9738
      vg_data = dev.children[0].logical_id[0]
9739
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9740
                             logical_id=(vg_data, names[0]))
9741
      vg_meta = dev.children[1].logical_id[0]
9742
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9743
                             logical_id=(vg_meta, names[1]))
9744

    
9745
      new_lvs = [lv_data, lv_meta]
9746
      old_lvs = [child.Copy() for child in dev.children]
9747
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9748

    
9749
      # we pass force_create=True to force the LVM creation
9750
      for new_lv in new_lvs:
9751
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9752
                        _GetInstanceInfoText(self.instance), False)
9753

    
9754
    return iv_names
9755

    
9756
  def _CheckDevices(self, node_name, iv_names):
9757
    for name, (dev, _, _) in iv_names.iteritems():
9758
      self.cfg.SetDiskID(dev, node_name)
9759

    
9760
      result = self.rpc.call_blockdev_find(node_name, dev)
9761

    
9762
      msg = result.fail_msg
9763
      if msg or not result.payload:
9764
        if not msg:
9765
          msg = "disk not found"
9766
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9767
                                 (name, msg))
9768

    
9769
      if result.payload.is_degraded:
9770
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9771

    
9772
  def _RemoveOldStorage(self, node_name, iv_names):
9773
    for name, (_, old_lvs, _) in iv_names.iteritems():
9774
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9775

    
9776
      for lv in old_lvs:
9777
        self.cfg.SetDiskID(lv, node_name)
9778

    
9779
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9780
        if msg:
9781
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9782
                             hint="remove unused LVs manually")
9783

    
9784
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9785
    """Replace a disk on the primary or secondary for DRBD 8.
9786

9787
    The algorithm for replace is quite complicated:
9788

9789
      1. for each disk to be replaced:
9790

9791
        1. create new LVs on the target node with unique names
9792
        1. detach old LVs from the drbd device
9793
        1. rename old LVs to name_replaced.<time_t>
9794
        1. rename new LVs to old LVs
9795
        1. attach the new LVs (with the old names now) to the drbd device
9796

9797
      1. wait for sync across all devices
9798

9799
      1. for each modified disk:
9800

9801
        1. remove old LVs (which have the name name_replaces.<time_t>)
9802

9803
    Failures are not very well handled.
9804

9805
    """
9806
    steps_total = 6
9807

    
9808
    # Step: check device activation
9809
    self.lu.LogStep(1, steps_total, "Check device existence")
9810
    self._CheckDisksExistence([self.other_node, self.target_node])
9811
    self._CheckVolumeGroup([self.target_node, self.other_node])
9812

    
9813
    # Step: check other node consistency
9814
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9815
    self._CheckDisksConsistency(self.other_node,
9816
                                self.other_node == self.instance.primary_node,
9817
                                False)
9818

    
9819
    # Step: create new storage
9820
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9821
    iv_names = self._CreateNewStorage(self.target_node)
9822

    
9823
    # Step: for each lv, detach+rename*2+attach
9824
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9825
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9826
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9827

    
9828
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9829
                                                     old_lvs)
9830
      result.Raise("Can't detach drbd from local storage on node"
9831
                   " %s for device %s" % (self.target_node, dev.iv_name))
9832
      #dev.children = []
9833
      #cfg.Update(instance)
9834

    
9835
      # ok, we created the new LVs, so now we know we have the needed
9836
      # storage; as such, we proceed on the target node to rename
9837
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9838
      # using the assumption that logical_id == physical_id (which in
9839
      # turn is the unique_id on that node)
9840

    
9841
      # FIXME(iustin): use a better name for the replaced LVs
9842
      temp_suffix = int(time.time())
9843
      ren_fn = lambda d, suff: (d.physical_id[0],
9844
                                d.physical_id[1] + "_replaced-%s" % suff)
9845

    
9846
      # Build the rename list based on what LVs exist on the node
9847
      rename_old_to_new = []
9848
      for to_ren in old_lvs:
9849
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9850
        if not result.fail_msg and result.payload:
9851
          # device exists
9852
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9853

    
9854
      self.lu.LogInfo("Renaming the old LVs on the target node")
9855
      result = self.rpc.call_blockdev_rename(self.target_node,
9856
                                             rename_old_to_new)
9857
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9858

    
9859
      # Now we rename the new LVs to the old LVs
9860
      self.lu.LogInfo("Renaming the new LVs on the target node")
9861
      rename_new_to_old = [(new, old.physical_id)
9862
                           for old, new in zip(old_lvs, new_lvs)]
9863
      result = self.rpc.call_blockdev_rename(self.target_node,
9864
                                             rename_new_to_old)
9865
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9866

    
9867
      # Intermediate steps of in memory modifications
9868
      for old, new in zip(old_lvs, new_lvs):
9869
        new.logical_id = old.logical_id
9870
        self.cfg.SetDiskID(new, self.target_node)
9871

    
9872
      # We need to modify old_lvs so that removal later removes the
9873
      # right LVs, not the newly added ones; note that old_lvs is a
9874
      # copy here
9875
      for disk in old_lvs:
9876
        disk.logical_id = ren_fn(disk, temp_suffix)
9877
        self.cfg.SetDiskID(disk, self.target_node)
9878

    
9879
      # Now that the new lvs have the old name, we can add them to the device
9880
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9881
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9882
                                                  new_lvs)
9883
      msg = result.fail_msg
9884
      if msg:
9885
        for new_lv in new_lvs:
9886
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9887
                                               new_lv).fail_msg
9888
          if msg2:
9889
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9890
                               hint=("cleanup manually the unused logical"
9891
                                     "volumes"))
9892
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9893

    
9894
    cstep = 5
9895
    if self.early_release:
9896
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9897
      cstep += 1
9898
      self._RemoveOldStorage(self.target_node, iv_names)
9899
      # WARNING: we release both node locks here, do not do other RPCs
9900
      # than WaitForSync to the primary node
9901
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9902
                    names=[self.target_node, self.other_node])
9903

    
9904
    # Wait for sync
9905
    # This can fail as the old devices are degraded and _WaitForSync
9906
    # does a combined result over all disks, so we don't check its return value
9907
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9908
    cstep += 1
9909
    _WaitForSync(self.lu, self.instance)
9910

    
9911
    # Check all devices manually
9912
    self._CheckDevices(self.instance.primary_node, iv_names)
9913

    
9914
    # Step: remove old storage
9915
    if not self.early_release:
9916
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9917
      cstep += 1
9918
      self._RemoveOldStorage(self.target_node, iv_names)
9919

    
9920
  def _ExecDrbd8Secondary(self, feedback_fn):
9921
    """Replace the secondary node for DRBD 8.
9922

9923
    The algorithm for replace is quite complicated:
9924
      - for all disks of the instance:
9925
        - create new LVs on the new node with same names
9926
        - shutdown the drbd device on the old secondary
9927
        - disconnect the drbd network on the primary
9928
        - create the drbd device on the new secondary
9929
        - network attach the drbd on the primary, using an artifice:
9930
          the drbd code for Attach() will connect to the network if it
9931
          finds a device which is connected to the good local disks but
9932
          not network enabled
9933
      - wait for sync across all devices
9934
      - remove all disks from the old secondary
9935

9936
    Failures are not very well handled.
9937

9938
    """
9939
    steps_total = 6
9940

    
9941
    pnode = self.instance.primary_node
9942

    
9943
    # Step: check device activation
9944
    self.lu.LogStep(1, steps_total, "Check device existence")
9945
    self._CheckDisksExistence([self.instance.primary_node])
9946
    self._CheckVolumeGroup([self.instance.primary_node])
9947

    
9948
    # Step: check other node consistency
9949
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9950
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9951

    
9952
    # Step: create new storage
9953
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9954
    for idx, dev in enumerate(self.instance.disks):
9955
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9956
                      (self.new_node, idx))
9957
      # we pass force_create=True to force LVM creation
9958
      for new_lv in dev.children:
9959
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9960
                        _GetInstanceInfoText(self.instance), False)
9961

    
9962
    # Step 4: dbrd minors and drbd setups changes
9963
    # after this, we must manually remove the drbd minors on both the
9964
    # error and the success paths
9965
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9966
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9967
                                         for dev in self.instance.disks],
9968
                                        self.instance.name)
9969
    logging.debug("Allocated minors %r", minors)
9970

    
9971
    iv_names = {}
9972
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9973
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9974
                      (self.new_node, idx))
9975
      # create new devices on new_node; note that we create two IDs:
9976
      # one without port, so the drbd will be activated without
9977
      # networking information on the new node at this stage, and one
9978
      # with network, for the latter activation in step 4
9979
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9980
      if self.instance.primary_node == o_node1:
9981
        p_minor = o_minor1
9982
      else:
9983
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9984
        p_minor = o_minor2
9985

    
9986
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9987
                      p_minor, new_minor, o_secret)
9988
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9989
                    p_minor, new_minor, o_secret)
9990

    
9991
      iv_names[idx] = (dev, dev.children, new_net_id)
9992
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9993
                    new_net_id)
9994
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9995
                              logical_id=new_alone_id,
9996
                              children=dev.children,
9997
                              size=dev.size)
9998
      try:
9999
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10000
                              _GetInstanceInfoText(self.instance), False)
10001
      except errors.GenericError:
10002
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10003
        raise
10004

    
10005
    # We have new devices, shutdown the drbd on the old secondary
10006
    for idx, dev in enumerate(self.instance.disks):
10007
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10008
      self.cfg.SetDiskID(dev, self.target_node)
10009
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10010
      if msg:
10011
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10012
                           "node: %s" % (idx, msg),
10013
                           hint=("Please cleanup this device manually as"
10014
                                 " soon as possible"))
10015

    
10016
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10017
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10018
                                               self.instance.disks)[pnode]
10019

    
10020
    msg = result.fail_msg
10021
    if msg:
10022
      # detaches didn't succeed (unlikely)
10023
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10024
      raise errors.OpExecError("Can't detach the disks from the network on"
10025
                               " old node: %s" % (msg,))
10026

    
10027
    # if we managed to detach at least one, we update all the disks of
10028
    # the instance to point to the new secondary
10029
    self.lu.LogInfo("Updating instance configuration")
10030
    for dev, _, new_logical_id in iv_names.itervalues():
10031
      dev.logical_id = new_logical_id
10032
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10033

    
10034
    self.cfg.Update(self.instance, feedback_fn)
10035

    
10036
    # and now perform the drbd attach
10037
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10038
                    " (standalone => connected)")
10039
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10040
                                            self.new_node],
10041
                                           self.node_secondary_ip,
10042
                                           self.instance.disks,
10043
                                           self.instance.name,
10044
                                           False)
10045
    for to_node, to_result in result.items():
10046
      msg = to_result.fail_msg
10047
      if msg:
10048
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10049
                           to_node, msg,
10050
                           hint=("please do a gnt-instance info to see the"
10051
                                 " status of disks"))
10052
    cstep = 5
10053
    if self.early_release:
10054
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10055
      cstep += 1
10056
      self._RemoveOldStorage(self.target_node, iv_names)
10057
      # WARNING: we release all node locks here, do not do other RPCs
10058
      # than WaitForSync to the primary node
10059
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10060
                    names=[self.instance.primary_node,
10061
                           self.target_node,
10062
                           self.new_node])
10063

    
10064
    # Wait for sync
10065
    # This can fail as the old devices are degraded and _WaitForSync
10066
    # does a combined result over all disks, so we don't check its return value
10067
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10068
    cstep += 1
10069
    _WaitForSync(self.lu, self.instance)
10070

    
10071
    # Check all devices manually
10072
    self._CheckDevices(self.instance.primary_node, iv_names)
10073

    
10074
    # Step: remove old storage
10075
    if not self.early_release:
10076
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10077
      self._RemoveOldStorage(self.target_node, iv_names)
10078

    
10079

    
10080
class LURepairNodeStorage(NoHooksLU):
10081
  """Repairs the volume group on a node.
10082

10083
  """
10084
  REQ_BGL = False
10085

    
10086
  def CheckArguments(self):
10087
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10088

    
10089
    storage_type = self.op.storage_type
10090

    
10091
    if (constants.SO_FIX_CONSISTENCY not in
10092
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10093
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10094
                                 " repaired" % storage_type,
10095
                                 errors.ECODE_INVAL)
10096

    
10097
  def ExpandNames(self):
10098
    self.needed_locks = {
10099
      locking.LEVEL_NODE: [self.op.node_name],
10100
      }
10101

    
10102
  def _CheckFaultyDisks(self, instance, node_name):
10103
    """Ensure faulty disks abort the opcode or at least warn."""
10104
    try:
10105
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10106
                                  node_name, True):
10107
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10108
                                   " node '%s'" % (instance.name, node_name),
10109
                                   errors.ECODE_STATE)
10110
    except errors.OpPrereqError, err:
10111
      if self.op.ignore_consistency:
10112
        self.proc.LogWarning(str(err.args[0]))
10113
      else:
10114
        raise
10115

    
10116
  def CheckPrereq(self):
10117
    """Check prerequisites.
10118

10119
    """
10120
    # Check whether any instance on this node has faulty disks
10121
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10122
      if not inst.admin_up:
10123
        continue
10124
      check_nodes = set(inst.all_nodes)
10125
      check_nodes.discard(self.op.node_name)
10126
      for inst_node_name in check_nodes:
10127
        self._CheckFaultyDisks(inst, inst_node_name)
10128

    
10129
  def Exec(self, feedback_fn):
10130
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10131
                (self.op.name, self.op.node_name))
10132

    
10133
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10134
    result = self.rpc.call_storage_execute(self.op.node_name,
10135
                                           self.op.storage_type, st_args,
10136
                                           self.op.name,
10137
                                           constants.SO_FIX_CONSISTENCY)
10138
    result.Raise("Failed to repair storage unit '%s' on %s" %
10139
                 (self.op.name, self.op.node_name))
10140

    
10141

    
10142
class LUNodeEvacuate(NoHooksLU):
10143
  """Evacuates instances off a list of nodes.
10144

10145
  """
10146
  REQ_BGL = False
10147

    
10148
  _MODE2IALLOCATOR = {
10149
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10150
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10151
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10152
    }
10153
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10154
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10155
          constants.IALLOCATOR_NEVAC_MODES)
10156

    
10157
  def CheckArguments(self):
10158
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10159

    
10160
  def ExpandNames(self):
10161
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10162

    
10163
    if self.op.remote_node is not None:
10164
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10165
      assert self.op.remote_node
10166

    
10167
      if self.op.remote_node == self.op.node_name:
10168
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10169
                                   " secondary node", errors.ECODE_INVAL)
10170

    
10171
      if self.op.mode != constants.NODE_EVAC_SEC:
10172
        raise errors.OpPrereqError("Without the use of an iallocator only"
10173
                                   " secondary instances can be evacuated",
10174
                                   errors.ECODE_INVAL)
10175

    
10176
    # Declare locks
10177
    self.share_locks = _ShareAll()
10178
    self.needed_locks = {
10179
      locking.LEVEL_INSTANCE: [],
10180
      locking.LEVEL_NODEGROUP: [],
10181
      locking.LEVEL_NODE: [],
10182
      }
10183

    
10184
    if self.op.remote_node is None:
10185
      # Iallocator will choose any node(s) in the same group
10186
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10187
    else:
10188
      group_nodes = frozenset([self.op.remote_node])
10189

    
10190
    # Determine nodes to be locked
10191
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10192

    
10193
  def _DetermineInstances(self):
10194
    """Builds list of instances to operate on.
10195

10196
    """
10197
    assert self.op.mode in constants.NODE_EVAC_MODES
10198

    
10199
    if self.op.mode == constants.NODE_EVAC_PRI:
10200
      # Primary instances only
10201
      inst_fn = _GetNodePrimaryInstances
10202
      assert self.op.remote_node is None, \
10203
        "Evacuating primary instances requires iallocator"
10204
    elif self.op.mode == constants.NODE_EVAC_SEC:
10205
      # Secondary instances only
10206
      inst_fn = _GetNodeSecondaryInstances
10207
    else:
10208
      # All instances
10209
      assert self.op.mode == constants.NODE_EVAC_ALL
10210
      inst_fn = _GetNodeInstances
10211

    
10212
    return inst_fn(self.cfg, self.op.node_name)
10213

    
10214
  def DeclareLocks(self, level):
10215
    if level == locking.LEVEL_INSTANCE:
10216
      # Lock instances optimistically, needs verification once node and group
10217
      # locks have been acquired
10218
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10219
        set(i.name for i in self._DetermineInstances())
10220

    
10221
    elif level == locking.LEVEL_NODEGROUP:
10222
      # Lock node groups optimistically, needs verification once nodes have
10223
      # been acquired
10224
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10225
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10226

    
10227
    elif level == locking.LEVEL_NODE:
10228
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10229

    
10230
  def CheckPrereq(self):
10231
    # Verify locks
10232
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10233
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10234
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10235

    
10236
    assert owned_nodes == self.lock_nodes
10237

    
10238
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10239
    if owned_groups != wanted_groups:
10240
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10241
                               " current groups are '%s', used to be '%s'" %
10242
                               (utils.CommaJoin(wanted_groups),
10243
                                utils.CommaJoin(owned_groups)))
10244

    
10245
    # Determine affected instances
10246
    self.instances = self._DetermineInstances()
10247
    self.instance_names = [i.name for i in self.instances]
10248

    
10249
    if set(self.instance_names) != owned_instances:
10250
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10251
                               " were acquired, current instances are '%s',"
10252
                               " used to be '%s'" %
10253
                               (self.op.node_name,
10254
                                utils.CommaJoin(self.instance_names),
10255
                                utils.CommaJoin(owned_instances)))
10256

    
10257
    if self.instance_names:
10258
      self.LogInfo("Evacuating instances from node '%s': %s",
10259
                   self.op.node_name,
10260
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10261
    else:
10262
      self.LogInfo("No instances to evacuate from node '%s'",
10263
                   self.op.node_name)
10264

    
10265
    if self.op.remote_node is not None:
10266
      for i in self.instances:
10267
        if i.primary_node == self.op.remote_node:
10268
          raise errors.OpPrereqError("Node %s is the primary node of"
10269
                                     " instance %s, cannot use it as"
10270
                                     " secondary" %
10271
                                     (self.op.remote_node, i.name),
10272
                                     errors.ECODE_INVAL)
10273

    
10274
  def Exec(self, feedback_fn):
10275
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10276

    
10277
    if not self.instance_names:
10278
      # No instances to evacuate
10279
      jobs = []
10280

    
10281
    elif self.op.iallocator is not None:
10282
      # TODO: Implement relocation to other group
10283
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10284
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10285
                       instances=list(self.instance_names))
10286

    
10287
      ial.Run(self.op.iallocator)
10288

    
10289
      if not ial.success:
10290
        raise errors.OpPrereqError("Can't compute node evacuation using"
10291
                                   " iallocator '%s': %s" %
10292
                                   (self.op.iallocator, ial.info),
10293
                                   errors.ECODE_NORES)
10294

    
10295
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10296

    
10297
    elif self.op.remote_node is not None:
10298
      assert self.op.mode == constants.NODE_EVAC_SEC
10299
      jobs = [
10300
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10301
                                        remote_node=self.op.remote_node,
10302
                                        disks=[],
10303
                                        mode=constants.REPLACE_DISK_CHG,
10304
                                        early_release=self.op.early_release)]
10305
        for instance_name in self.instance_names
10306
        ]
10307

    
10308
    else:
10309
      raise errors.ProgrammerError("No iallocator or remote node")
10310

    
10311
    return ResultWithJobs(jobs)
10312

    
10313

    
10314
def _SetOpEarlyRelease(early_release, op):
10315
  """Sets C{early_release} flag on opcodes if available.
10316

10317
  """
10318
  try:
10319
    op.early_release = early_release
10320
  except AttributeError:
10321
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10322

    
10323
  return op
10324

    
10325

    
10326
def _NodeEvacDest(use_nodes, group, nodes):
10327
  """Returns group or nodes depending on caller's choice.
10328

10329
  """
10330
  if use_nodes:
10331
    return utils.CommaJoin(nodes)
10332
  else:
10333
    return group
10334

    
10335

    
10336
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10337
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10338

10339
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10340
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10341

10342
  @type lu: L{LogicalUnit}
10343
  @param lu: Logical unit instance
10344
  @type alloc_result: tuple/list
10345
  @param alloc_result: Result from iallocator
10346
  @type early_release: bool
10347
  @param early_release: Whether to release locks early if possible
10348
  @type use_nodes: bool
10349
  @param use_nodes: Whether to display node names instead of groups
10350

10351
  """
10352
  (moved, failed, jobs) = alloc_result
10353

    
10354
  if failed:
10355
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10356
                                 for (name, reason) in failed)
10357
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10358
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10359

    
10360
  if moved:
10361
    lu.LogInfo("Instances to be moved: %s",
10362
               utils.CommaJoin("%s (to %s)" %
10363
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10364
                               for (name, group, nodes) in moved))
10365

    
10366
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10367
              map(opcodes.OpCode.LoadOpCode, ops))
10368
          for ops in jobs]
10369

    
10370

    
10371
class LUInstanceGrowDisk(LogicalUnit):
10372
  """Grow a disk of an instance.
10373

10374
  """
10375
  HPATH = "disk-grow"
10376
  HTYPE = constants.HTYPE_INSTANCE
10377
  REQ_BGL = False
10378

    
10379
  def ExpandNames(self):
10380
    self._ExpandAndLockInstance()
10381
    self.needed_locks[locking.LEVEL_NODE] = []
10382
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10383

    
10384
  def DeclareLocks(self, level):
10385
    if level == locking.LEVEL_NODE:
10386
      self._LockInstancesNodes()
10387

    
10388
  def BuildHooksEnv(self):
10389
    """Build hooks env.
10390

10391
    This runs on the master, the primary and all the secondaries.
10392

10393
    """
10394
    env = {
10395
      "DISK": self.op.disk,
10396
      "AMOUNT": self.op.amount,
10397
      }
10398
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10399
    return env
10400

    
10401
  def BuildHooksNodes(self):
10402
    """Build hooks nodes.
10403

10404
    """
10405
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10406
    return (nl, nl)
10407

    
10408
  def CheckPrereq(self):
10409
    """Check prerequisites.
10410

10411
    This checks that the instance is in the cluster.
10412

10413
    """
10414
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10415
    assert instance is not None, \
10416
      "Cannot retrieve locked instance %s" % self.op.instance_name
10417
    nodenames = list(instance.all_nodes)
10418
    for node in nodenames:
10419
      _CheckNodeOnline(self, node)
10420

    
10421
    self.instance = instance
10422

    
10423
    if instance.disk_template not in constants.DTS_GROWABLE:
10424
      raise errors.OpPrereqError("Instance's disk layout does not support"
10425
                                 " growing", errors.ECODE_INVAL)
10426

    
10427
    self.disk = instance.FindDisk(self.op.disk)
10428

    
10429
    if instance.disk_template not in (constants.DT_FILE,
10430
                                      constants.DT_SHARED_FILE):
10431
      # TODO: check the free disk space for file, when that feature will be
10432
      # supported
10433
      _CheckNodesFreeDiskPerVG(self, nodenames,
10434
                               self.disk.ComputeGrowth(self.op.amount))
10435

    
10436
  def Exec(self, feedback_fn):
10437
    """Execute disk grow.
10438

10439
    """
10440
    instance = self.instance
10441
    disk = self.disk
10442

    
10443
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10444
    if not disks_ok:
10445
      raise errors.OpExecError("Cannot activate block device to grow")
10446

    
10447
    # First run all grow ops in dry-run mode
10448
    for node in instance.all_nodes:
10449
      self.cfg.SetDiskID(disk, node)
10450
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10451
      result.Raise("Grow request failed to node %s" % node)
10452

    
10453
    # We know that (as far as we can test) operations across different
10454
    # nodes will succeed, time to run it for real
10455
    for node in instance.all_nodes:
10456
      self.cfg.SetDiskID(disk, node)
10457
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10458
      result.Raise("Grow request failed to node %s" % node)
10459

    
10460
      # TODO: Rewrite code to work properly
10461
      # DRBD goes into sync mode for a short amount of time after executing the
10462
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10463
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10464
      # time is a work-around.
10465
      time.sleep(5)
10466

    
10467
    disk.RecordGrow(self.op.amount)
10468
    self.cfg.Update(instance, feedback_fn)
10469
    if self.op.wait_for_sync:
10470
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10471
      if disk_abort:
10472
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10473
                             " status; please check the instance")
10474
      if not instance.admin_up:
10475
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10476
    elif not instance.admin_up:
10477
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10478
                           " not supposed to be running because no wait for"
10479
                           " sync mode was requested")
10480

    
10481

    
10482
class LUInstanceQueryData(NoHooksLU):
10483
  """Query runtime instance data.
10484

10485
  """
10486
  REQ_BGL = False
10487

    
10488
  def ExpandNames(self):
10489
    self.needed_locks = {}
10490

    
10491
    # Use locking if requested or when non-static information is wanted
10492
    if not (self.op.static or self.op.use_locking):
10493
      self.LogWarning("Non-static data requested, locks need to be acquired")
10494
      self.op.use_locking = True
10495

    
10496
    if self.op.instances or not self.op.use_locking:
10497
      # Expand instance names right here
10498
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10499
    else:
10500
      # Will use acquired locks
10501
      self.wanted_names = None
10502

    
10503
    if self.op.use_locking:
10504
      self.share_locks = _ShareAll()
10505

    
10506
      if self.wanted_names is None:
10507
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10508
      else:
10509
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10510

    
10511
      self.needed_locks[locking.LEVEL_NODE] = []
10512
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10513

    
10514
  def DeclareLocks(self, level):
10515
    if self.op.use_locking and level == locking.LEVEL_NODE:
10516
      self._LockInstancesNodes()
10517

    
10518
  def CheckPrereq(self):
10519
    """Check prerequisites.
10520

10521
    This only checks the optional instance list against the existing names.
10522

10523
    """
10524
    if self.wanted_names is None:
10525
      assert self.op.use_locking, "Locking was not used"
10526
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10527

    
10528
    self.wanted_instances = \
10529
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10530

    
10531
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10532
    """Returns the status of a block device
10533

10534
    """
10535
    if self.op.static or not node:
10536
      return None
10537

    
10538
    self.cfg.SetDiskID(dev, node)
10539

    
10540
    result = self.rpc.call_blockdev_find(node, dev)
10541
    if result.offline:
10542
      return None
10543

    
10544
    result.Raise("Can't compute disk status for %s" % instance_name)
10545

    
10546
    status = result.payload
10547
    if status is None:
10548
      return None
10549

    
10550
    return (status.dev_path, status.major, status.minor,
10551
            status.sync_percent, status.estimated_time,
10552
            status.is_degraded, status.ldisk_status)
10553

    
10554
  def _ComputeDiskStatus(self, instance, snode, dev):
10555
    """Compute block device status.
10556

10557
    """
10558
    if dev.dev_type in constants.LDS_DRBD:
10559
      # we change the snode then (otherwise we use the one passed in)
10560
      if dev.logical_id[0] == instance.primary_node:
10561
        snode = dev.logical_id[1]
10562
      else:
10563
        snode = dev.logical_id[0]
10564

    
10565
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10566
                                              instance.name, dev)
10567
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10568

    
10569
    if dev.children:
10570
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10571
                                        instance, snode),
10572
                         dev.children)
10573
    else:
10574
      dev_children = []
10575

    
10576
    return {
10577
      "iv_name": dev.iv_name,
10578
      "dev_type": dev.dev_type,
10579
      "logical_id": dev.logical_id,
10580
      "physical_id": dev.physical_id,
10581
      "pstatus": dev_pstatus,
10582
      "sstatus": dev_sstatus,
10583
      "children": dev_children,
10584
      "mode": dev.mode,
10585
      "size": dev.size,
10586
      }
10587

    
10588
  def Exec(self, feedback_fn):
10589
    """Gather and return data"""
10590
    result = {}
10591

    
10592
    cluster = self.cfg.GetClusterInfo()
10593

    
10594
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10595
                                          for i in self.wanted_instances)
10596
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10597
      if self.op.static or pnode.offline:
10598
        remote_state = None
10599
        if pnode.offline:
10600
          self.LogWarning("Primary node %s is marked offline, returning static"
10601
                          " information only for instance %s" %
10602
                          (pnode.name, instance.name))
10603
      else:
10604
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10605
                                                  instance.name,
10606
                                                  instance.hypervisor)
10607
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10608
        remote_info = remote_info.payload
10609
        if remote_info and "state" in remote_info:
10610
          remote_state = "up"
10611
        else:
10612
          remote_state = "down"
10613

    
10614
      if instance.admin_up:
10615
        config_state = "up"
10616
      else:
10617
        config_state = "down"
10618

    
10619
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10620
                  instance.disks)
10621

    
10622
      result[instance.name] = {
10623
        "name": instance.name,
10624
        "config_state": config_state,
10625
        "run_state": remote_state,
10626
        "pnode": instance.primary_node,
10627
        "snodes": instance.secondary_nodes,
10628
        "os": instance.os,
10629
        # this happens to be the same format used for hooks
10630
        "nics": _NICListToTuple(self, instance.nics),
10631
        "disk_template": instance.disk_template,
10632
        "disks": disks,
10633
        "hypervisor": instance.hypervisor,
10634
        "network_port": instance.network_port,
10635
        "hv_instance": instance.hvparams,
10636
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10637
        "be_instance": instance.beparams,
10638
        "be_actual": cluster.FillBE(instance),
10639
        "os_instance": instance.osparams,
10640
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10641
        "serial_no": instance.serial_no,
10642
        "mtime": instance.mtime,
10643
        "ctime": instance.ctime,
10644
        "uuid": instance.uuid,
10645
        }
10646

    
10647
    return result
10648

    
10649

    
10650
class LUInstanceSetParams(LogicalUnit):
10651
  """Modifies an instances's parameters.
10652

10653
  """
10654
  HPATH = "instance-modify"
10655
  HTYPE = constants.HTYPE_INSTANCE
10656
  REQ_BGL = False
10657

    
10658
  def CheckArguments(self):
10659
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10660
            self.op.hvparams or self.op.beparams or self.op.os_name):
10661
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10662

    
10663
    if self.op.hvparams:
10664
      _CheckGlobalHvParams(self.op.hvparams)
10665

    
10666
    # Disk validation
10667
    disk_addremove = 0
10668
    for disk_op, disk_dict in self.op.disks:
10669
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10670
      if disk_op == constants.DDM_REMOVE:
10671
        disk_addremove += 1
10672
        continue
10673
      elif disk_op == constants.DDM_ADD:
10674
        disk_addremove += 1
10675
      else:
10676
        if not isinstance(disk_op, int):
10677
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10678
        if not isinstance(disk_dict, dict):
10679
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10680
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10681

    
10682
      if disk_op == constants.DDM_ADD:
10683
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10684
        if mode not in constants.DISK_ACCESS_SET:
10685
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10686
                                     errors.ECODE_INVAL)
10687
        size = disk_dict.get(constants.IDISK_SIZE, None)
10688
        if size is None:
10689
          raise errors.OpPrereqError("Required disk parameter size missing",
10690
                                     errors.ECODE_INVAL)
10691
        try:
10692
          size = int(size)
10693
        except (TypeError, ValueError), err:
10694
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10695
                                     str(err), errors.ECODE_INVAL)
10696
        disk_dict[constants.IDISK_SIZE] = size
10697
      else:
10698
        # modification of disk
10699
        if constants.IDISK_SIZE in disk_dict:
10700
          raise errors.OpPrereqError("Disk size change not possible, use"
10701
                                     " grow-disk", errors.ECODE_INVAL)
10702

    
10703
    if disk_addremove > 1:
10704
      raise errors.OpPrereqError("Only one disk add or remove operation"
10705
                                 " supported at a time", errors.ECODE_INVAL)
10706

    
10707
    if self.op.disks and self.op.disk_template is not None:
10708
      raise errors.OpPrereqError("Disk template conversion and other disk"
10709
                                 " changes not supported at the same time",
10710
                                 errors.ECODE_INVAL)
10711

    
10712
    if (self.op.disk_template and
10713
        self.op.disk_template in constants.DTS_INT_MIRROR and
10714
        self.op.remote_node is None):
10715
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10716
                                 " one requires specifying a secondary node",
10717
                                 errors.ECODE_INVAL)
10718

    
10719
    # NIC validation
10720
    nic_addremove = 0
10721
    for nic_op, nic_dict in self.op.nics:
10722
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10723
      if nic_op == constants.DDM_REMOVE:
10724
        nic_addremove += 1
10725
        continue
10726
      elif nic_op == constants.DDM_ADD:
10727
        nic_addremove += 1
10728
      else:
10729
        if not isinstance(nic_op, int):
10730
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10731
        if not isinstance(nic_dict, dict):
10732
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10733
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10734

    
10735
      # nic_dict should be a dict
10736
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10737
      if nic_ip is not None:
10738
        if nic_ip.lower() == constants.VALUE_NONE:
10739
          nic_dict[constants.INIC_IP] = None
10740
        else:
10741
          if not netutils.IPAddress.IsValid(nic_ip):
10742
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10743
                                       errors.ECODE_INVAL)
10744

    
10745
      nic_bridge = nic_dict.get("bridge", None)
10746
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10747
      if nic_bridge and nic_link:
10748
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10749
                                   " at the same time", errors.ECODE_INVAL)
10750
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10751
        nic_dict["bridge"] = None
10752
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10753
        nic_dict[constants.INIC_LINK] = None
10754

    
10755
      if nic_op == constants.DDM_ADD:
10756
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10757
        if nic_mac is None:
10758
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10759

    
10760
      if constants.INIC_MAC in nic_dict:
10761
        nic_mac = nic_dict[constants.INIC_MAC]
10762
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10763
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10764

    
10765
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10766
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10767
                                     " modifying an existing nic",
10768
                                     errors.ECODE_INVAL)
10769

    
10770
    if nic_addremove > 1:
10771
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10772
                                 " supported at a time", errors.ECODE_INVAL)
10773

    
10774
  def ExpandNames(self):
10775
    self._ExpandAndLockInstance()
10776
    self.needed_locks[locking.LEVEL_NODE] = []
10777
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10778

    
10779
  def DeclareLocks(self, level):
10780
    if level == locking.LEVEL_NODE:
10781
      self._LockInstancesNodes()
10782
      if self.op.disk_template and self.op.remote_node:
10783
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10784
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10785

    
10786
  def BuildHooksEnv(self):
10787
    """Build hooks env.
10788

10789
    This runs on the master, primary and secondaries.
10790

10791
    """
10792
    args = dict()
10793
    if constants.BE_MEMORY in self.be_new:
10794
      args["memory"] = self.be_new[constants.BE_MEMORY]
10795
    if constants.BE_VCPUS in self.be_new:
10796
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10797
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10798
    # information at all.
10799
    if self.op.nics:
10800
      args["nics"] = []
10801
      nic_override = dict(self.op.nics)
10802
      for idx, nic in enumerate(self.instance.nics):
10803
        if idx in nic_override:
10804
          this_nic_override = nic_override[idx]
10805
        else:
10806
          this_nic_override = {}
10807
        if constants.INIC_IP in this_nic_override:
10808
          ip = this_nic_override[constants.INIC_IP]
10809
        else:
10810
          ip = nic.ip
10811
        if constants.INIC_MAC in this_nic_override:
10812
          mac = this_nic_override[constants.INIC_MAC]
10813
        else:
10814
          mac = nic.mac
10815
        if idx in self.nic_pnew:
10816
          nicparams = self.nic_pnew[idx]
10817
        else:
10818
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10819
        mode = nicparams[constants.NIC_MODE]
10820
        link = nicparams[constants.NIC_LINK]
10821
        args["nics"].append((ip, mac, mode, link))
10822
      if constants.DDM_ADD in nic_override:
10823
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10824
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10825
        nicparams = self.nic_pnew[constants.DDM_ADD]
10826
        mode = nicparams[constants.NIC_MODE]
10827
        link = nicparams[constants.NIC_LINK]
10828
        args["nics"].append((ip, mac, mode, link))
10829
      elif constants.DDM_REMOVE in nic_override:
10830
        del args["nics"][-1]
10831

    
10832
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10833
    if self.op.disk_template:
10834
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10835

    
10836
    return env
10837

    
10838
  def BuildHooksNodes(self):
10839
    """Build hooks nodes.
10840

10841
    """
10842
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10843
    return (nl, nl)
10844

    
10845
  def CheckPrereq(self):
10846
    """Check prerequisites.
10847

10848
    This only checks the instance list against the existing names.
10849

10850
    """
10851
    # checking the new params on the primary/secondary nodes
10852

    
10853
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10854
    cluster = self.cluster = self.cfg.GetClusterInfo()
10855
    assert self.instance is not None, \
10856
      "Cannot retrieve locked instance %s" % self.op.instance_name
10857
    pnode = instance.primary_node
10858
    nodelist = list(instance.all_nodes)
10859

    
10860
    # OS change
10861
    if self.op.os_name and not self.op.force:
10862
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10863
                      self.op.force_variant)
10864
      instance_os = self.op.os_name
10865
    else:
10866
      instance_os = instance.os
10867

    
10868
    if self.op.disk_template:
10869
      if instance.disk_template == self.op.disk_template:
10870
        raise errors.OpPrereqError("Instance already has disk template %s" %
10871
                                   instance.disk_template, errors.ECODE_INVAL)
10872

    
10873
      if (instance.disk_template,
10874
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10875
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10876
                                   " %s to %s" % (instance.disk_template,
10877
                                                  self.op.disk_template),
10878
                                   errors.ECODE_INVAL)
10879
      _CheckInstanceDown(self, instance, "cannot change disk template")
10880
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10881
        if self.op.remote_node == pnode:
10882
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10883
                                     " as the primary node of the instance" %
10884
                                     self.op.remote_node, errors.ECODE_STATE)
10885
        _CheckNodeOnline(self, self.op.remote_node)
10886
        _CheckNodeNotDrained(self, self.op.remote_node)
10887
        # FIXME: here we assume that the old instance type is DT_PLAIN
10888
        assert instance.disk_template == constants.DT_PLAIN
10889
        disks = [{constants.IDISK_SIZE: d.size,
10890
                  constants.IDISK_VG: d.logical_id[0]}
10891
                 for d in instance.disks]
10892
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10893
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10894

    
10895
    # hvparams processing
10896
    if self.op.hvparams:
10897
      hv_type = instance.hypervisor
10898
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10899
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10900
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10901

    
10902
      # local check
10903
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10904
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10905
      self.hv_new = hv_new # the new actual values
10906
      self.hv_inst = i_hvdict # the new dict (without defaults)
10907
    else:
10908
      self.hv_new = self.hv_inst = {}
10909

    
10910
    # beparams processing
10911
    if self.op.beparams:
10912
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10913
                                   use_none=True)
10914
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10915
      be_new = cluster.SimpleFillBE(i_bedict)
10916
      self.be_new = be_new # the new actual values
10917
      self.be_inst = i_bedict # the new dict (without defaults)
10918
    else:
10919
      self.be_new = self.be_inst = {}
10920
    be_old = cluster.FillBE(instance)
10921

    
10922
    # osparams processing
10923
    if self.op.osparams:
10924
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10925
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10926
      self.os_inst = i_osdict # the new dict (without defaults)
10927
    else:
10928
      self.os_inst = {}
10929

    
10930
    self.warn = []
10931

    
10932
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10933
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10934
      mem_check_list = [pnode]
10935
      if be_new[constants.BE_AUTO_BALANCE]:
10936
        # either we changed auto_balance to yes or it was from before
10937
        mem_check_list.extend(instance.secondary_nodes)
10938
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10939
                                                  instance.hypervisor)
10940
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10941
                                         instance.hypervisor)
10942
      pninfo = nodeinfo[pnode]
10943
      msg = pninfo.fail_msg
10944
      if msg:
10945
        # Assume the primary node is unreachable and go ahead
10946
        self.warn.append("Can't get info from primary node %s: %s" %
10947
                         (pnode, msg))
10948
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10949
        self.warn.append("Node data from primary node %s doesn't contain"
10950
                         " free memory information" % pnode)
10951
      elif instance_info.fail_msg:
10952
        self.warn.append("Can't get instance runtime information: %s" %
10953
                        instance_info.fail_msg)
10954
      else:
10955
        if instance_info.payload:
10956
          current_mem = int(instance_info.payload["memory"])
10957
        else:
10958
          # Assume instance not running
10959
          # (there is a slight race condition here, but it's not very probable,
10960
          # and we have no other way to check)
10961
          current_mem = 0
10962
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10963
                    pninfo.payload["memory_free"])
10964
        if miss_mem > 0:
10965
          raise errors.OpPrereqError("This change will prevent the instance"
10966
                                     " from starting, due to %d MB of memory"
10967
                                     " missing on its primary node" % miss_mem,
10968
                                     errors.ECODE_NORES)
10969

    
10970
      if be_new[constants.BE_AUTO_BALANCE]:
10971
        for node, nres in nodeinfo.items():
10972
          if node not in instance.secondary_nodes:
10973
            continue
10974
          nres.Raise("Can't get info from secondary node %s" % node,
10975
                     prereq=True, ecode=errors.ECODE_STATE)
10976
          if not isinstance(nres.payload.get("memory_free", None), int):
10977
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10978
                                       " memory information" % node,
10979
                                       errors.ECODE_STATE)
10980
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10981
            raise errors.OpPrereqError("This change will prevent the instance"
10982
                                       " from failover to its secondary node"
10983
                                       " %s, due to not enough memory" % node,
10984
                                       errors.ECODE_STATE)
10985

    
10986
    # NIC processing
10987
    self.nic_pnew = {}
10988
    self.nic_pinst = {}
10989
    for nic_op, nic_dict in self.op.nics:
10990
      if nic_op == constants.DDM_REMOVE:
10991
        if not instance.nics:
10992
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10993
                                     errors.ECODE_INVAL)
10994
        continue
10995
      if nic_op != constants.DDM_ADD:
10996
        # an existing nic
10997
        if not instance.nics:
10998
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10999
                                     " no NICs" % nic_op,
11000
                                     errors.ECODE_INVAL)
11001
        if nic_op < 0 or nic_op >= len(instance.nics):
11002
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11003
                                     " are 0 to %d" %
11004
                                     (nic_op, len(instance.nics) - 1),
11005
                                     errors.ECODE_INVAL)
11006
        old_nic_params = instance.nics[nic_op].nicparams
11007
        old_nic_ip = instance.nics[nic_op].ip
11008
      else:
11009
        old_nic_params = {}
11010
        old_nic_ip = None
11011

    
11012
      update_params_dict = dict([(key, nic_dict[key])
11013
                                 for key in constants.NICS_PARAMETERS
11014
                                 if key in nic_dict])
11015

    
11016
      if "bridge" in nic_dict:
11017
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11018

    
11019
      new_nic_params = _GetUpdatedParams(old_nic_params,
11020
                                         update_params_dict)
11021
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11022
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11023
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11024
      self.nic_pinst[nic_op] = new_nic_params
11025
      self.nic_pnew[nic_op] = new_filled_nic_params
11026
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11027

    
11028
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11029
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11030
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11031
        if msg:
11032
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11033
          if self.op.force:
11034
            self.warn.append(msg)
11035
          else:
11036
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11037
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11038
        if constants.INIC_IP in nic_dict:
11039
          nic_ip = nic_dict[constants.INIC_IP]
11040
        else:
11041
          nic_ip = old_nic_ip
11042
        if nic_ip is None:
11043
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11044
                                     " on a routed nic", errors.ECODE_INVAL)
11045
      if constants.INIC_MAC in nic_dict:
11046
        nic_mac = nic_dict[constants.INIC_MAC]
11047
        if nic_mac is None:
11048
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11049
                                     errors.ECODE_INVAL)
11050
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11051
          # otherwise generate the mac
11052
          nic_dict[constants.INIC_MAC] = \
11053
            self.cfg.GenerateMAC(self.proc.GetECId())
11054
        else:
11055
          # or validate/reserve the current one
11056
          try:
11057
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11058
          except errors.ReservationError:
11059
            raise errors.OpPrereqError("MAC address %s already in use"
11060
                                       " in cluster" % nic_mac,
11061
                                       errors.ECODE_NOTUNIQUE)
11062

    
11063
    # DISK processing
11064
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11065
      raise errors.OpPrereqError("Disk operations not supported for"
11066
                                 " diskless instances",
11067
                                 errors.ECODE_INVAL)
11068
    for disk_op, _ in self.op.disks:
11069
      if disk_op == constants.DDM_REMOVE:
11070
        if len(instance.disks) == 1:
11071
          raise errors.OpPrereqError("Cannot remove the last disk of"
11072
                                     " an instance", errors.ECODE_INVAL)
11073
        _CheckInstanceDown(self, instance, "cannot remove disks")
11074

    
11075
      if (disk_op == constants.DDM_ADD and
11076
          len(instance.disks) >= constants.MAX_DISKS):
11077
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11078
                                   " add more" % constants.MAX_DISKS,
11079
                                   errors.ECODE_STATE)
11080
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11081
        # an existing disk
11082
        if disk_op < 0 or disk_op >= len(instance.disks):
11083
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11084
                                     " are 0 to %d" %
11085
                                     (disk_op, len(instance.disks)),
11086
                                     errors.ECODE_INVAL)
11087

    
11088
    return
11089

    
11090
  def _ConvertPlainToDrbd(self, feedback_fn):
11091
    """Converts an instance from plain to drbd.
11092

11093
    """
11094
    feedback_fn("Converting template to drbd")
11095
    instance = self.instance
11096
    pnode = instance.primary_node
11097
    snode = self.op.remote_node
11098

    
11099
    # create a fake disk info for _GenerateDiskTemplate
11100
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11101
                  constants.IDISK_VG: d.logical_id[0]}
11102
                 for d in instance.disks]
11103
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11104
                                      instance.name, pnode, [snode],
11105
                                      disk_info, None, None, 0, feedback_fn)
11106
    info = _GetInstanceInfoText(instance)
11107
    feedback_fn("Creating aditional volumes...")
11108
    # first, create the missing data and meta devices
11109
    for disk in new_disks:
11110
      # unfortunately this is... not too nice
11111
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11112
                            info, True)
11113
      for child in disk.children:
11114
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11115
    # at this stage, all new LVs have been created, we can rename the
11116
    # old ones
11117
    feedback_fn("Renaming original volumes...")
11118
    rename_list = [(o, n.children[0].logical_id)
11119
                   for (o, n) in zip(instance.disks, new_disks)]
11120
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11121
    result.Raise("Failed to rename original LVs")
11122

    
11123
    feedback_fn("Initializing DRBD devices...")
11124
    # all child devices are in place, we can now create the DRBD devices
11125
    for disk in new_disks:
11126
      for node in [pnode, snode]:
11127
        f_create = node == pnode
11128
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11129

    
11130
    # at this point, the instance has been modified
11131
    instance.disk_template = constants.DT_DRBD8
11132
    instance.disks = new_disks
11133
    self.cfg.Update(instance, feedback_fn)
11134

    
11135
    # disks are created, waiting for sync
11136
    disk_abort = not _WaitForSync(self, instance,
11137
                                  oneshot=not self.op.wait_for_sync)
11138
    if disk_abort:
11139
      raise errors.OpExecError("There are some degraded disks for"
11140
                               " this instance, please cleanup manually")
11141

    
11142
  def _ConvertDrbdToPlain(self, feedback_fn):
11143
    """Converts an instance from drbd to plain.
11144

11145
    """
11146
    instance = self.instance
11147
    assert len(instance.secondary_nodes) == 1
11148
    pnode = instance.primary_node
11149
    snode = instance.secondary_nodes[0]
11150
    feedback_fn("Converting template to plain")
11151

    
11152
    old_disks = instance.disks
11153
    new_disks = [d.children[0] for d in old_disks]
11154

    
11155
    # copy over size and mode
11156
    for parent, child in zip(old_disks, new_disks):
11157
      child.size = parent.size
11158
      child.mode = parent.mode
11159

    
11160
    # update instance structure
11161
    instance.disks = new_disks
11162
    instance.disk_template = constants.DT_PLAIN
11163
    self.cfg.Update(instance, feedback_fn)
11164

    
11165
    feedback_fn("Removing volumes on the secondary node...")
11166
    for disk in old_disks:
11167
      self.cfg.SetDiskID(disk, snode)
11168
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11169
      if msg:
11170
        self.LogWarning("Could not remove block device %s on node %s,"
11171
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11172

    
11173
    feedback_fn("Removing unneeded volumes on the primary node...")
11174
    for idx, disk in enumerate(old_disks):
11175
      meta = disk.children[1]
11176
      self.cfg.SetDiskID(meta, pnode)
11177
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11178
      if msg:
11179
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11180
                        " continuing anyway: %s", idx, pnode, msg)
11181

    
11182
    # this is a DRBD disk, return its port to the pool
11183
    for disk in old_disks:
11184
      tcp_port = disk.logical_id[2]
11185
      self.cfg.AddTcpUdpPort(tcp_port)
11186

    
11187
  def Exec(self, feedback_fn):
11188
    """Modifies an instance.
11189

11190
    All parameters take effect only at the next restart of the instance.
11191

11192
    """
11193
    # Process here the warnings from CheckPrereq, as we don't have a
11194
    # feedback_fn there.
11195
    for warn in self.warn:
11196
      feedback_fn("WARNING: %s" % warn)
11197

    
11198
    result = []
11199
    instance = self.instance
11200
    # disk changes
11201
    for disk_op, disk_dict in self.op.disks:
11202
      if disk_op == constants.DDM_REMOVE:
11203
        # remove the last disk
11204
        device = instance.disks.pop()
11205
        device_idx = len(instance.disks)
11206
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11207
          self.cfg.SetDiskID(disk, node)
11208
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11209
          if msg:
11210
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11211
                            " continuing anyway", device_idx, node, msg)
11212
        result.append(("disk/%d" % device_idx, "remove"))
11213

    
11214
        # if this is a DRBD disk, return its port to the pool
11215
        if device.dev_type in constants.LDS_DRBD:
11216
          tcp_port = device.logical_id[2]
11217
          self.cfg.AddTcpUdpPort(tcp_port)
11218
      elif disk_op == constants.DDM_ADD:
11219
        # add a new disk
11220
        if instance.disk_template in (constants.DT_FILE,
11221
                                        constants.DT_SHARED_FILE):
11222
          file_driver, file_path = instance.disks[0].logical_id
11223
          file_path = os.path.dirname(file_path)
11224
        else:
11225
          file_driver = file_path = None
11226
        disk_idx_base = len(instance.disks)
11227
        new_disk = _GenerateDiskTemplate(self,
11228
                                         instance.disk_template,
11229
                                         instance.name, instance.primary_node,
11230
                                         instance.secondary_nodes,
11231
                                         [disk_dict],
11232
                                         file_path,
11233
                                         file_driver,
11234
                                         disk_idx_base, feedback_fn)[0]
11235
        instance.disks.append(new_disk)
11236
        info = _GetInstanceInfoText(instance)
11237

    
11238
        logging.info("Creating volume %s for instance %s",
11239
                     new_disk.iv_name, instance.name)
11240
        # Note: this needs to be kept in sync with _CreateDisks
11241
        #HARDCODE
11242
        for node in instance.all_nodes:
11243
          f_create = node == instance.primary_node
11244
          try:
11245
            _CreateBlockDev(self, node, instance, new_disk,
11246
                            f_create, info, f_create)
11247
          except errors.OpExecError, err:
11248
            self.LogWarning("Failed to create volume %s (%s) on"
11249
                            " node %s: %s",
11250
                            new_disk.iv_name, new_disk, node, err)
11251
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11252
                       (new_disk.size, new_disk.mode)))
11253
      else:
11254
        # change a given disk
11255
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11256
        result.append(("disk.mode/%d" % disk_op,
11257
                       disk_dict[constants.IDISK_MODE]))
11258

    
11259
    if self.op.disk_template:
11260
      r_shut = _ShutdownInstanceDisks(self, instance)
11261
      if not r_shut:
11262
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11263
                                 " proceed with disk template conversion")
11264
      mode = (instance.disk_template, self.op.disk_template)
11265
      try:
11266
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11267
      except:
11268
        self.cfg.ReleaseDRBDMinors(instance.name)
11269
        raise
11270
      result.append(("disk_template", self.op.disk_template))
11271

    
11272
    # NIC changes
11273
    for nic_op, nic_dict in self.op.nics:
11274
      if nic_op == constants.DDM_REMOVE:
11275
        # remove the last nic
11276
        del instance.nics[-1]
11277
        result.append(("nic.%d" % len(instance.nics), "remove"))
11278
      elif nic_op == constants.DDM_ADD:
11279
        # mac and bridge should be set, by now
11280
        mac = nic_dict[constants.INIC_MAC]
11281
        ip = nic_dict.get(constants.INIC_IP, None)
11282
        nicparams = self.nic_pinst[constants.DDM_ADD]
11283
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11284
        instance.nics.append(new_nic)
11285
        result.append(("nic.%d" % (len(instance.nics) - 1),
11286
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11287
                       (new_nic.mac, new_nic.ip,
11288
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11289
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11290
                       )))
11291
      else:
11292
        for key in (constants.INIC_MAC, constants.INIC_IP):
11293
          if key in nic_dict:
11294
            setattr(instance.nics[nic_op], key, nic_dict[key])
11295
        if nic_op in self.nic_pinst:
11296
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11297
        for key, val in nic_dict.iteritems():
11298
          result.append(("nic.%s/%d" % (key, nic_op), val))
11299

    
11300
    # hvparams changes
11301
    if self.op.hvparams:
11302
      instance.hvparams = self.hv_inst
11303
      for key, val in self.op.hvparams.iteritems():
11304
        result.append(("hv/%s" % key, val))
11305

    
11306
    # beparams changes
11307
    if self.op.beparams:
11308
      instance.beparams = self.be_inst
11309
      for key, val in self.op.beparams.iteritems():
11310
        result.append(("be/%s" % key, val))
11311

    
11312
    # OS change
11313
    if self.op.os_name:
11314
      instance.os = self.op.os_name
11315

    
11316
    # osparams changes
11317
    if self.op.osparams:
11318
      instance.osparams = self.os_inst
11319
      for key, val in self.op.osparams.iteritems():
11320
        result.append(("os/%s" % key, val))
11321

    
11322
    self.cfg.Update(instance, feedback_fn)
11323

    
11324
    return result
11325

    
11326
  _DISK_CONVERSIONS = {
11327
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11328
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11329
    }
11330

    
11331

    
11332
class LUInstanceChangeGroup(LogicalUnit):
11333
  HPATH = "instance-change-group"
11334
  HTYPE = constants.HTYPE_INSTANCE
11335
  REQ_BGL = False
11336

    
11337
  def ExpandNames(self):
11338
    self.share_locks = _ShareAll()
11339
    self.needed_locks = {
11340
      locking.LEVEL_NODEGROUP: [],
11341
      locking.LEVEL_NODE: [],
11342
      }
11343

    
11344
    self._ExpandAndLockInstance()
11345

    
11346
    if self.op.target_groups:
11347
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11348
                                  self.op.target_groups)
11349
    else:
11350
      self.req_target_uuids = None
11351

    
11352
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11353

    
11354
  def DeclareLocks(self, level):
11355
    if level == locking.LEVEL_NODEGROUP:
11356
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11357

    
11358
      if self.req_target_uuids:
11359
        lock_groups = set(self.req_target_uuids)
11360

    
11361
        # Lock all groups used by instance optimistically; this requires going
11362
        # via the node before it's locked, requiring verification later on
11363
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11364
        lock_groups.update(instance_groups)
11365
      else:
11366
        # No target groups, need to lock all of them
11367
        lock_groups = locking.ALL_SET
11368

    
11369
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11370

    
11371
    elif level == locking.LEVEL_NODE:
11372
      if self.req_target_uuids:
11373
        # Lock all nodes used by instances
11374
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11375
        self._LockInstancesNodes()
11376

    
11377
        # Lock all nodes in all potential target groups
11378
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11379
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11380
        member_nodes = [node_name
11381
                        for group in lock_groups
11382
                        for node_name in self.cfg.GetNodeGroup(group).members]
11383
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11384
      else:
11385
        # Lock all nodes as all groups are potential targets
11386
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11387

    
11388
  def CheckPrereq(self):
11389
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11390
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11391
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11392

    
11393
    assert (self.req_target_uuids is None or
11394
            owned_groups.issuperset(self.req_target_uuids))
11395
    assert owned_instances == set([self.op.instance_name])
11396

    
11397
    # Get instance information
11398
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11399

    
11400
    # Check if node groups for locked instance are still correct
11401
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11402
      ("Instance %s's nodes changed while we kept the lock" %
11403
       self.op.instance_name)
11404

    
11405
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11406
                                           owned_groups)
11407

    
11408
    if self.req_target_uuids:
11409
      # User requested specific target groups
11410
      self.target_uuids = self.req_target_uuids
11411
    else:
11412
      # All groups except those used by the instance are potential targets
11413
      self.target_uuids = owned_groups - inst_groups
11414

    
11415
    conflicting_groups = self.target_uuids & inst_groups
11416
    if conflicting_groups:
11417
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11418
                                 " used by the instance '%s'" %
11419
                                 (utils.CommaJoin(conflicting_groups),
11420
                                  self.op.instance_name),
11421
                                 errors.ECODE_INVAL)
11422

    
11423
    if not self.target_uuids:
11424
      raise errors.OpPrereqError("There are no possible target groups",
11425
                                 errors.ECODE_INVAL)
11426

    
11427
  def BuildHooksEnv(self):
11428
    """Build hooks env.
11429

11430
    """
11431
    assert self.target_uuids
11432

    
11433
    env = {
11434
      "TARGET_GROUPS": " ".join(self.target_uuids),
11435
      }
11436

    
11437
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11438

    
11439
    return env
11440

    
11441
  def BuildHooksNodes(self):
11442
    """Build hooks nodes.
11443

11444
    """
11445
    mn = self.cfg.GetMasterNode()
11446
    return ([mn], [mn])
11447

    
11448
  def Exec(self, feedback_fn):
11449
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11450

    
11451
    assert instances == [self.op.instance_name], "Instance not locked"
11452

    
11453
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11454
                     instances=instances, target_groups=list(self.target_uuids))
11455

    
11456
    ial.Run(self.op.iallocator)
11457

    
11458
    if not ial.success:
11459
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11460
                                 " instance '%s' using iallocator '%s': %s" %
11461
                                 (self.op.instance_name, self.op.iallocator,
11462
                                  ial.info),
11463
                                 errors.ECODE_NORES)
11464

    
11465
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11466

    
11467
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11468
                 " instance '%s'", len(jobs), self.op.instance_name)
11469

    
11470
    return ResultWithJobs(jobs)
11471

    
11472

    
11473
class LUBackupQuery(NoHooksLU):
11474
  """Query the exports list
11475

11476
  """
11477
  REQ_BGL = False
11478

    
11479
  def ExpandNames(self):
11480
    self.needed_locks = {}
11481
    self.share_locks[locking.LEVEL_NODE] = 1
11482
    if not self.op.nodes:
11483
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11484
    else:
11485
      self.needed_locks[locking.LEVEL_NODE] = \
11486
        _GetWantedNodes(self, self.op.nodes)
11487

    
11488
  def Exec(self, feedback_fn):
11489
    """Compute the list of all the exported system images.
11490

11491
    @rtype: dict
11492
    @return: a dictionary with the structure node->(export-list)
11493
        where export-list is a list of the instances exported on
11494
        that node.
11495

11496
    """
11497
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11498
    rpcresult = self.rpc.call_export_list(self.nodes)
11499
    result = {}
11500
    for node in rpcresult:
11501
      if rpcresult[node].fail_msg:
11502
        result[node] = False
11503
      else:
11504
        result[node] = rpcresult[node].payload
11505

    
11506
    return result
11507

    
11508

    
11509
class LUBackupPrepare(NoHooksLU):
11510
  """Prepares an instance for an export and returns useful information.
11511

11512
  """
11513
  REQ_BGL = False
11514

    
11515
  def ExpandNames(self):
11516
    self._ExpandAndLockInstance()
11517

    
11518
  def CheckPrereq(self):
11519
    """Check prerequisites.
11520

11521
    """
11522
    instance_name = self.op.instance_name
11523

    
11524
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11525
    assert self.instance is not None, \
11526
          "Cannot retrieve locked instance %s" % self.op.instance_name
11527
    _CheckNodeOnline(self, self.instance.primary_node)
11528

    
11529
    self._cds = _GetClusterDomainSecret()
11530

    
11531
  def Exec(self, feedback_fn):
11532
    """Prepares an instance for an export.
11533

11534
    """
11535
    instance = self.instance
11536

    
11537
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11538
      salt = utils.GenerateSecret(8)
11539

    
11540
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11541
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11542
                                              constants.RIE_CERT_VALIDITY)
11543
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11544

    
11545
      (name, cert_pem) = result.payload
11546

    
11547
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11548
                                             cert_pem)
11549

    
11550
      return {
11551
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11552
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11553
                          salt),
11554
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11555
        }
11556

    
11557
    return None
11558

    
11559

    
11560
class LUBackupExport(LogicalUnit):
11561
  """Export an instance to an image in the cluster.
11562

11563
  """
11564
  HPATH = "instance-export"
11565
  HTYPE = constants.HTYPE_INSTANCE
11566
  REQ_BGL = False
11567

    
11568
  def CheckArguments(self):
11569
    """Check the arguments.
11570

11571
    """
11572
    self.x509_key_name = self.op.x509_key_name
11573
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11574

    
11575
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11576
      if not self.x509_key_name:
11577
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11578
                                   errors.ECODE_INVAL)
11579

    
11580
      if not self.dest_x509_ca_pem:
11581
        raise errors.OpPrereqError("Missing destination X509 CA",
11582
                                   errors.ECODE_INVAL)
11583

    
11584
  def ExpandNames(self):
11585
    self._ExpandAndLockInstance()
11586

    
11587
    # Lock all nodes for local exports
11588
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11589
      # FIXME: lock only instance primary and destination node
11590
      #
11591
      # Sad but true, for now we have do lock all nodes, as we don't know where
11592
      # the previous export might be, and in this LU we search for it and
11593
      # remove it from its current node. In the future we could fix this by:
11594
      #  - making a tasklet to search (share-lock all), then create the
11595
      #    new one, then one to remove, after
11596
      #  - removing the removal operation altogether
11597
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11598

    
11599
  def DeclareLocks(self, level):
11600
    """Last minute lock declaration."""
11601
    # All nodes are locked anyway, so nothing to do here.
11602

    
11603
  def BuildHooksEnv(self):
11604
    """Build hooks env.
11605

11606
    This will run on the master, primary node and target node.
11607

11608
    """
11609
    env = {
11610
      "EXPORT_MODE": self.op.mode,
11611
      "EXPORT_NODE": self.op.target_node,
11612
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11613
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11614
      # TODO: Generic function for boolean env variables
11615
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11616
      }
11617

    
11618
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11619

    
11620
    return env
11621

    
11622
  def BuildHooksNodes(self):
11623
    """Build hooks nodes.
11624

11625
    """
11626
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11627

    
11628
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11629
      nl.append(self.op.target_node)
11630

    
11631
    return (nl, nl)
11632

    
11633
  def CheckPrereq(self):
11634
    """Check prerequisites.
11635

11636
    This checks that the instance and node names are valid.
11637

11638
    """
11639
    instance_name = self.op.instance_name
11640

    
11641
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11642
    assert self.instance is not None, \
11643
          "Cannot retrieve locked instance %s" % self.op.instance_name
11644
    _CheckNodeOnline(self, self.instance.primary_node)
11645

    
11646
    if (self.op.remove_instance and self.instance.admin_up and
11647
        not self.op.shutdown):
11648
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11649
                                 " down before")
11650

    
11651
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11652
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11653
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11654
      assert self.dst_node is not None
11655

    
11656
      _CheckNodeOnline(self, self.dst_node.name)
11657
      _CheckNodeNotDrained(self, self.dst_node.name)
11658

    
11659
      self._cds = None
11660
      self.dest_disk_info = None
11661
      self.dest_x509_ca = None
11662

    
11663
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11664
      self.dst_node = None
11665

    
11666
      if len(self.op.target_node) != len(self.instance.disks):
11667
        raise errors.OpPrereqError(("Received destination information for %s"
11668
                                    " disks, but instance %s has %s disks") %
11669
                                   (len(self.op.target_node), instance_name,
11670
                                    len(self.instance.disks)),
11671
                                   errors.ECODE_INVAL)
11672

    
11673
      cds = _GetClusterDomainSecret()
11674

    
11675
      # Check X509 key name
11676
      try:
11677
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11678
      except (TypeError, ValueError), err:
11679
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11680

    
11681
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11682
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11683
                                   errors.ECODE_INVAL)
11684

    
11685
      # Load and verify CA
11686
      try:
11687
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11688
      except OpenSSL.crypto.Error, err:
11689
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11690
                                   (err, ), errors.ECODE_INVAL)
11691

    
11692
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11693
      if errcode is not None:
11694
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11695
                                   (msg, ), errors.ECODE_INVAL)
11696

    
11697
      self.dest_x509_ca = cert
11698

    
11699
      # Verify target information
11700
      disk_info = []
11701
      for idx, disk_data in enumerate(self.op.target_node):
11702
        try:
11703
          (host, port, magic) = \
11704
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11705
        except errors.GenericError, err:
11706
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11707
                                     (idx, err), errors.ECODE_INVAL)
11708

    
11709
        disk_info.append((host, port, magic))
11710

    
11711
      assert len(disk_info) == len(self.op.target_node)
11712
      self.dest_disk_info = disk_info
11713

    
11714
    else:
11715
      raise errors.ProgrammerError("Unhandled export mode %r" %
11716
                                   self.op.mode)
11717

    
11718
    # instance disk type verification
11719
    # TODO: Implement export support for file-based disks
11720
    for disk in self.instance.disks:
11721
      if disk.dev_type == constants.LD_FILE:
11722
        raise errors.OpPrereqError("Export not supported for instances with"
11723
                                   " file-based disks", errors.ECODE_INVAL)
11724

    
11725
  def _CleanupExports(self, feedback_fn):
11726
    """Removes exports of current instance from all other nodes.
11727

11728
    If an instance in a cluster with nodes A..D was exported to node C, its
11729
    exports will be removed from the nodes A, B and D.
11730

11731
    """
11732
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11733

    
11734
    nodelist = self.cfg.GetNodeList()
11735
    nodelist.remove(self.dst_node.name)
11736

    
11737
    # on one-node clusters nodelist will be empty after the removal
11738
    # if we proceed the backup would be removed because OpBackupQuery
11739
    # substitutes an empty list with the full cluster node list.
11740
    iname = self.instance.name
11741
    if nodelist:
11742
      feedback_fn("Removing old exports for instance %s" % iname)
11743
      exportlist = self.rpc.call_export_list(nodelist)
11744
      for node in exportlist:
11745
        if exportlist[node].fail_msg:
11746
          continue
11747
        if iname in exportlist[node].payload:
11748
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11749
          if msg:
11750
            self.LogWarning("Could not remove older export for instance %s"
11751
                            " on node %s: %s", iname, node, msg)
11752

    
11753
  def Exec(self, feedback_fn):
11754
    """Export an instance to an image in the cluster.
11755

11756
    """
11757
    assert self.op.mode in constants.EXPORT_MODES
11758

    
11759
    instance = self.instance
11760
    src_node = instance.primary_node
11761

    
11762
    if self.op.shutdown:
11763
      # shutdown the instance, but not the disks
11764
      feedback_fn("Shutting down instance %s" % instance.name)
11765
      result = self.rpc.call_instance_shutdown(src_node, instance,
11766
                                               self.op.shutdown_timeout)
11767
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11768
      result.Raise("Could not shutdown instance %s on"
11769
                   " node %s" % (instance.name, src_node))
11770

    
11771
    # set the disks ID correctly since call_instance_start needs the
11772
    # correct drbd minor to create the symlinks
11773
    for disk in instance.disks:
11774
      self.cfg.SetDiskID(disk, src_node)
11775

    
11776
    activate_disks = (not instance.admin_up)
11777

    
11778
    if activate_disks:
11779
      # Activate the instance disks if we'exporting a stopped instance
11780
      feedback_fn("Activating disks for %s" % instance.name)
11781
      _StartInstanceDisks(self, instance, None)
11782

    
11783
    try:
11784
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11785
                                                     instance)
11786

    
11787
      helper.CreateSnapshots()
11788
      try:
11789
        if (self.op.shutdown and instance.admin_up and
11790
            not self.op.remove_instance):
11791
          assert not activate_disks
11792
          feedback_fn("Starting instance %s" % instance.name)
11793
          result = self.rpc.call_instance_start(src_node, instance,
11794
                                                None, None, False)
11795
          msg = result.fail_msg
11796
          if msg:
11797
            feedback_fn("Failed to start instance: %s" % msg)
11798
            _ShutdownInstanceDisks(self, instance)
11799
            raise errors.OpExecError("Could not start instance: %s" % msg)
11800

    
11801
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11802
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11803
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11804
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11805
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11806

    
11807
          (key_name, _, _) = self.x509_key_name
11808

    
11809
          dest_ca_pem = \
11810
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11811
                                            self.dest_x509_ca)
11812

    
11813
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11814
                                                     key_name, dest_ca_pem,
11815
                                                     timeouts)
11816
      finally:
11817
        helper.Cleanup()
11818

    
11819
      # Check for backwards compatibility
11820
      assert len(dresults) == len(instance.disks)
11821
      assert compat.all(isinstance(i, bool) for i in dresults), \
11822
             "Not all results are boolean: %r" % dresults
11823

    
11824
    finally:
11825
      if activate_disks:
11826
        feedback_fn("Deactivating disks for %s" % instance.name)
11827
        _ShutdownInstanceDisks(self, instance)
11828

    
11829
    if not (compat.all(dresults) and fin_resu):
11830
      failures = []
11831
      if not fin_resu:
11832
        failures.append("export finalization")
11833
      if not compat.all(dresults):
11834
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11835
                               if not dsk)
11836
        failures.append("disk export: disk(s) %s" % fdsk)
11837

    
11838
      raise errors.OpExecError("Export failed, errors in %s" %
11839
                               utils.CommaJoin(failures))
11840

    
11841
    # At this point, the export was successful, we can cleanup/finish
11842

    
11843
    # Remove instance if requested
11844
    if self.op.remove_instance:
11845
      feedback_fn("Removing instance %s" % instance.name)
11846
      _RemoveInstance(self, feedback_fn, instance,
11847
                      self.op.ignore_remove_failures)
11848

    
11849
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11850
      self._CleanupExports(feedback_fn)
11851

    
11852
    return fin_resu, dresults
11853

    
11854

    
11855
class LUBackupRemove(NoHooksLU):
11856
  """Remove exports related to the named instance.
11857

11858
  """
11859
  REQ_BGL = False
11860

    
11861
  def ExpandNames(self):
11862
    self.needed_locks = {}
11863
    # We need all nodes to be locked in order for RemoveExport to work, but we
11864
    # don't need to lock the instance itself, as nothing will happen to it (and
11865
    # we can remove exports also for a removed instance)
11866
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11867

    
11868
  def Exec(self, feedback_fn):
11869
    """Remove any export.
11870

11871
    """
11872
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11873
    # If the instance was not found we'll try with the name that was passed in.
11874
    # This will only work if it was an FQDN, though.
11875
    fqdn_warn = False
11876
    if not instance_name:
11877
      fqdn_warn = True
11878
      instance_name = self.op.instance_name
11879

    
11880
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11881
    exportlist = self.rpc.call_export_list(locked_nodes)
11882
    found = False
11883
    for node in exportlist:
11884
      msg = exportlist[node].fail_msg
11885
      if msg:
11886
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11887
        continue
11888
      if instance_name in exportlist[node].payload:
11889
        found = True
11890
        result = self.rpc.call_export_remove(node, instance_name)
11891
        msg = result.fail_msg
11892
        if msg:
11893
          logging.error("Could not remove export for instance %s"
11894
                        " on node %s: %s", instance_name, node, msg)
11895

    
11896
    if fqdn_warn and not found:
11897
      feedback_fn("Export not found. If trying to remove an export belonging"
11898
                  " to a deleted instance please use its Fully Qualified"
11899
                  " Domain Name.")
11900

    
11901

    
11902
class LUGroupAdd(LogicalUnit):
11903
  """Logical unit for creating node groups.
11904

11905
  """
11906
  HPATH = "group-add"
11907
  HTYPE = constants.HTYPE_GROUP
11908
  REQ_BGL = False
11909

    
11910
  def ExpandNames(self):
11911
    # We need the new group's UUID here so that we can create and acquire the
11912
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11913
    # that it should not check whether the UUID exists in the configuration.
11914
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11915
    self.needed_locks = {}
11916
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11917

    
11918
  def CheckPrereq(self):
11919
    """Check prerequisites.
11920

11921
    This checks that the given group name is not an existing node group
11922
    already.
11923

11924
    """
11925
    try:
11926
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11927
    except errors.OpPrereqError:
11928
      pass
11929
    else:
11930
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11931
                                 " node group (UUID: %s)" %
11932
                                 (self.op.group_name, existing_uuid),
11933
                                 errors.ECODE_EXISTS)
11934

    
11935
    if self.op.ndparams:
11936
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11937

    
11938
  def BuildHooksEnv(self):
11939
    """Build hooks env.
11940

11941
    """
11942
    return {
11943
      "GROUP_NAME": self.op.group_name,
11944
      }
11945

    
11946
  def BuildHooksNodes(self):
11947
    """Build hooks nodes.
11948

11949
    """
11950
    mn = self.cfg.GetMasterNode()
11951
    return ([mn], [mn])
11952

    
11953
  def Exec(self, feedback_fn):
11954
    """Add the node group to the cluster.
11955

11956
    """
11957
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11958
                                  uuid=self.group_uuid,
11959
                                  alloc_policy=self.op.alloc_policy,
11960
                                  ndparams=self.op.ndparams)
11961

    
11962
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11963
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11964

    
11965

    
11966
class LUGroupAssignNodes(NoHooksLU):
11967
  """Logical unit for assigning nodes to groups.
11968

11969
  """
11970
  REQ_BGL = False
11971

    
11972
  def ExpandNames(self):
11973
    # These raise errors.OpPrereqError on their own:
11974
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11975
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11976

    
11977
    # We want to lock all the affected nodes and groups. We have readily
11978
    # available the list of nodes, and the *destination* group. To gather the
11979
    # list of "source" groups, we need to fetch node information later on.
11980
    self.needed_locks = {
11981
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11982
      locking.LEVEL_NODE: self.op.nodes,
11983
      }
11984

    
11985
  def DeclareLocks(self, level):
11986
    if level == locking.LEVEL_NODEGROUP:
11987
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11988

    
11989
      # Try to get all affected nodes' groups without having the group or node
11990
      # lock yet. Needs verification later in the code flow.
11991
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11992

    
11993
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11994

    
11995
  def CheckPrereq(self):
11996
    """Check prerequisites.
11997

11998
    """
11999
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12000
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12001
            frozenset(self.op.nodes))
12002

    
12003
    expected_locks = (set([self.group_uuid]) |
12004
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12005
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12006
    if actual_locks != expected_locks:
12007
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12008
                               " current groups are '%s', used to be '%s'" %
12009
                               (utils.CommaJoin(expected_locks),
12010
                                utils.CommaJoin(actual_locks)))
12011

    
12012
    self.node_data = self.cfg.GetAllNodesInfo()
12013
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12014
    instance_data = self.cfg.GetAllInstancesInfo()
12015

    
12016
    if self.group is None:
12017
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12018
                               (self.op.group_name, self.group_uuid))
12019

    
12020
    (new_splits, previous_splits) = \
12021
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12022
                                             for node in self.op.nodes],
12023
                                            self.node_data, instance_data)
12024

    
12025
    if new_splits:
12026
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12027

    
12028
      if not self.op.force:
12029
        raise errors.OpExecError("The following instances get split by this"
12030
                                 " change and --force was not given: %s" %
12031
                                 fmt_new_splits)
12032
      else:
12033
        self.LogWarning("This operation will split the following instances: %s",
12034
                        fmt_new_splits)
12035

    
12036
        if previous_splits:
12037
          self.LogWarning("In addition, these already-split instances continue"
12038
                          " to be split across groups: %s",
12039
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12040

    
12041
  def Exec(self, feedback_fn):
12042
    """Assign nodes to a new group.
12043

12044
    """
12045
    for node in self.op.nodes:
12046
      self.node_data[node].group = self.group_uuid
12047

    
12048
    # FIXME: Depends on side-effects of modifying the result of
12049
    # C{cfg.GetAllNodesInfo}
12050

    
12051
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12052

    
12053
  @staticmethod
12054
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12055
    """Check for split instances after a node assignment.
12056

12057
    This method considers a series of node assignments as an atomic operation,
12058
    and returns information about split instances after applying the set of
12059
    changes.
12060

12061
    In particular, it returns information about newly split instances, and
12062
    instances that were already split, and remain so after the change.
12063

12064
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12065
    considered.
12066

12067
    @type changes: list of (node_name, new_group_uuid) pairs.
12068
    @param changes: list of node assignments to consider.
12069
    @param node_data: a dict with data for all nodes
12070
    @param instance_data: a dict with all instances to consider
12071
    @rtype: a two-tuple
12072
    @return: a list of instances that were previously okay and result split as a
12073
      consequence of this change, and a list of instances that were previously
12074
      split and this change does not fix.
12075

12076
    """
12077
    changed_nodes = dict((node, group) for node, group in changes
12078
                         if node_data[node].group != group)
12079

    
12080
    all_split_instances = set()
12081
    previously_split_instances = set()
12082

    
12083
    def InstanceNodes(instance):
12084
      return [instance.primary_node] + list(instance.secondary_nodes)
12085

    
12086
    for inst in instance_data.values():
12087
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12088
        continue
12089

    
12090
      instance_nodes = InstanceNodes(inst)
12091

    
12092
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12093
        previously_split_instances.add(inst.name)
12094

    
12095
      if len(set(changed_nodes.get(node, node_data[node].group)
12096
                 for node in instance_nodes)) > 1:
12097
        all_split_instances.add(inst.name)
12098

    
12099
    return (list(all_split_instances - previously_split_instances),
12100
            list(previously_split_instances & all_split_instances))
12101

    
12102

    
12103
class _GroupQuery(_QueryBase):
12104
  FIELDS = query.GROUP_FIELDS
12105

    
12106
  def ExpandNames(self, lu):
12107
    lu.needed_locks = {}
12108

    
12109
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12110
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12111

    
12112
    if not self.names:
12113
      self.wanted = [name_to_uuid[name]
12114
                     for name in utils.NiceSort(name_to_uuid.keys())]
12115
    else:
12116
      # Accept names to be either names or UUIDs.
12117
      missing = []
12118
      self.wanted = []
12119
      all_uuid = frozenset(self._all_groups.keys())
12120

    
12121
      for name in self.names:
12122
        if name in all_uuid:
12123
          self.wanted.append(name)
12124
        elif name in name_to_uuid:
12125
          self.wanted.append(name_to_uuid[name])
12126
        else:
12127
          missing.append(name)
12128

    
12129
      if missing:
12130
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12131
                                   utils.CommaJoin(missing),
12132
                                   errors.ECODE_NOENT)
12133

    
12134
  def DeclareLocks(self, lu, level):
12135
    pass
12136

    
12137
  def _GetQueryData(self, lu):
12138
    """Computes the list of node groups and their attributes.
12139

12140
    """
12141
    do_nodes = query.GQ_NODE in self.requested_data
12142
    do_instances = query.GQ_INST in self.requested_data
12143

    
12144
    group_to_nodes = None
12145
    group_to_instances = None
12146

    
12147
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12148
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12149
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12150
    # instance->node. Hence, we will need to process nodes even if we only need
12151
    # instance information.
12152
    if do_nodes or do_instances:
12153
      all_nodes = lu.cfg.GetAllNodesInfo()
12154
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12155
      node_to_group = {}
12156

    
12157
      for node in all_nodes.values():
12158
        if node.group in group_to_nodes:
12159
          group_to_nodes[node.group].append(node.name)
12160
          node_to_group[node.name] = node.group
12161

    
12162
      if do_instances:
12163
        all_instances = lu.cfg.GetAllInstancesInfo()
12164
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12165

    
12166
        for instance in all_instances.values():
12167
          node = instance.primary_node
12168
          if node in node_to_group:
12169
            group_to_instances[node_to_group[node]].append(instance.name)
12170

    
12171
        if not do_nodes:
12172
          # Do not pass on node information if it was not requested.
12173
          group_to_nodes = None
12174

    
12175
    return query.GroupQueryData([self._all_groups[uuid]
12176
                                 for uuid in self.wanted],
12177
                                group_to_nodes, group_to_instances)
12178

    
12179

    
12180
class LUGroupQuery(NoHooksLU):
12181
  """Logical unit for querying node groups.
12182

12183
  """
12184
  REQ_BGL = False
12185

    
12186
  def CheckArguments(self):
12187
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12188
                          self.op.output_fields, False)
12189

    
12190
  def ExpandNames(self):
12191
    self.gq.ExpandNames(self)
12192

    
12193
  def DeclareLocks(self, level):
12194
    self.gq.DeclareLocks(self, level)
12195

    
12196
  def Exec(self, feedback_fn):
12197
    return self.gq.OldStyleQuery(self)
12198

    
12199

    
12200
class LUGroupSetParams(LogicalUnit):
12201
  """Modifies the parameters of a node group.
12202

12203
  """
12204
  HPATH = "group-modify"
12205
  HTYPE = constants.HTYPE_GROUP
12206
  REQ_BGL = False
12207

    
12208
  def CheckArguments(self):
12209
    all_changes = [
12210
      self.op.ndparams,
12211
      self.op.alloc_policy,
12212
      ]
12213

    
12214
    if all_changes.count(None) == len(all_changes):
12215
      raise errors.OpPrereqError("Please pass at least one modification",
12216
                                 errors.ECODE_INVAL)
12217

    
12218
  def ExpandNames(self):
12219
    # This raises errors.OpPrereqError on its own:
12220
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12221

    
12222
    self.needed_locks = {
12223
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12224
      }
12225

    
12226
  def CheckPrereq(self):
12227
    """Check prerequisites.
12228

12229
    """
12230
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12231

    
12232
    if self.group is None:
12233
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12234
                               (self.op.group_name, self.group_uuid))
12235

    
12236
    if self.op.ndparams:
12237
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12238
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12239
      self.new_ndparams = new_ndparams
12240

    
12241
  def BuildHooksEnv(self):
12242
    """Build hooks env.
12243

12244
    """
12245
    return {
12246
      "GROUP_NAME": self.op.group_name,
12247
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12248
      }
12249

    
12250
  def BuildHooksNodes(self):
12251
    """Build hooks nodes.
12252

12253
    """
12254
    mn = self.cfg.GetMasterNode()
12255
    return ([mn], [mn])
12256

    
12257
  def Exec(self, feedback_fn):
12258
    """Modifies the node group.
12259

12260
    """
12261
    result = []
12262

    
12263
    if self.op.ndparams:
12264
      self.group.ndparams = self.new_ndparams
12265
      result.append(("ndparams", str(self.group.ndparams)))
12266

    
12267
    if self.op.alloc_policy:
12268
      self.group.alloc_policy = self.op.alloc_policy
12269

    
12270
    self.cfg.Update(self.group, feedback_fn)
12271
    return result
12272

    
12273

    
12274
class LUGroupRemove(LogicalUnit):
12275
  HPATH = "group-remove"
12276
  HTYPE = constants.HTYPE_GROUP
12277
  REQ_BGL = False
12278

    
12279
  def ExpandNames(self):
12280
    # This will raises errors.OpPrereqError on its own:
12281
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12282
    self.needed_locks = {
12283
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12284
      }
12285

    
12286
  def CheckPrereq(self):
12287
    """Check prerequisites.
12288

12289
    This checks that the given group name exists as a node group, that is
12290
    empty (i.e., contains no nodes), and that is not the last group of the
12291
    cluster.
12292

12293
    """
12294
    # Verify that the group is empty.
12295
    group_nodes = [node.name
12296
                   for node in self.cfg.GetAllNodesInfo().values()
12297
                   if node.group == self.group_uuid]
12298

    
12299
    if group_nodes:
12300
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12301
                                 " nodes: %s" %
12302
                                 (self.op.group_name,
12303
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12304
                                 errors.ECODE_STATE)
12305

    
12306
    # Verify the cluster would not be left group-less.
12307
    if len(self.cfg.GetNodeGroupList()) == 1:
12308
      raise errors.OpPrereqError("Group '%s' is the only group,"
12309
                                 " cannot be removed" %
12310
                                 self.op.group_name,
12311
                                 errors.ECODE_STATE)
12312

    
12313
  def BuildHooksEnv(self):
12314
    """Build hooks env.
12315

12316
    """
12317
    return {
12318
      "GROUP_NAME": self.op.group_name,
12319
      }
12320

    
12321
  def BuildHooksNodes(self):
12322
    """Build hooks nodes.
12323

12324
    """
12325
    mn = self.cfg.GetMasterNode()
12326
    return ([mn], [mn])
12327

    
12328
  def Exec(self, feedback_fn):
12329
    """Remove the node group.
12330

12331
    """
12332
    try:
12333
      self.cfg.RemoveNodeGroup(self.group_uuid)
12334
    except errors.ConfigurationError:
12335
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12336
                               (self.op.group_name, self.group_uuid))
12337

    
12338
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12339

    
12340

    
12341
class LUGroupRename(LogicalUnit):
12342
  HPATH = "group-rename"
12343
  HTYPE = constants.HTYPE_GROUP
12344
  REQ_BGL = False
12345

    
12346
  def ExpandNames(self):
12347
    # This raises errors.OpPrereqError on its own:
12348
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12349

    
12350
    self.needed_locks = {
12351
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12352
      }
12353

    
12354
  def CheckPrereq(self):
12355
    """Check prerequisites.
12356

12357
    Ensures requested new name is not yet used.
12358

12359
    """
12360
    try:
12361
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12362
    except errors.OpPrereqError:
12363
      pass
12364
    else:
12365
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12366
                                 " node group (UUID: %s)" %
12367
                                 (self.op.new_name, new_name_uuid),
12368
                                 errors.ECODE_EXISTS)
12369

    
12370
  def BuildHooksEnv(self):
12371
    """Build hooks env.
12372

12373
    """
12374
    return {
12375
      "OLD_NAME": self.op.group_name,
12376
      "NEW_NAME": self.op.new_name,
12377
      }
12378

    
12379
  def BuildHooksNodes(self):
12380
    """Build hooks nodes.
12381

12382
    """
12383
    mn = self.cfg.GetMasterNode()
12384

    
12385
    all_nodes = self.cfg.GetAllNodesInfo()
12386
    all_nodes.pop(mn, None)
12387

    
12388
    run_nodes = [mn]
12389
    run_nodes.extend(node.name for node in all_nodes.values()
12390
                     if node.group == self.group_uuid)
12391

    
12392
    return (run_nodes, run_nodes)
12393

    
12394
  def Exec(self, feedback_fn):
12395
    """Rename the node group.
12396

12397
    """
12398
    group = self.cfg.GetNodeGroup(self.group_uuid)
12399

    
12400
    if group is None:
12401
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12402
                               (self.op.group_name, self.group_uuid))
12403

    
12404
    group.name = self.op.new_name
12405
    self.cfg.Update(group, feedback_fn)
12406

    
12407
    return self.op.new_name
12408

    
12409

    
12410
class LUGroupEvacuate(LogicalUnit):
12411
  HPATH = "group-evacuate"
12412
  HTYPE = constants.HTYPE_GROUP
12413
  REQ_BGL = False
12414

    
12415
  def ExpandNames(self):
12416
    # This raises errors.OpPrereqError on its own:
12417
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12418

    
12419
    if self.op.target_groups:
12420
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12421
                                  self.op.target_groups)
12422
    else:
12423
      self.req_target_uuids = []
12424

    
12425
    if self.group_uuid in self.req_target_uuids:
12426
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12427
                                 " as a target group (targets are %s)" %
12428
                                 (self.group_uuid,
12429
                                  utils.CommaJoin(self.req_target_uuids)),
12430
                                 errors.ECODE_INVAL)
12431

    
12432
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12433

    
12434
    self.share_locks = _ShareAll()
12435
    self.needed_locks = {
12436
      locking.LEVEL_INSTANCE: [],
12437
      locking.LEVEL_NODEGROUP: [],
12438
      locking.LEVEL_NODE: [],
12439
      }
12440

    
12441
  def DeclareLocks(self, level):
12442
    if level == locking.LEVEL_INSTANCE:
12443
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12444

    
12445
      # Lock instances optimistically, needs verification once node and group
12446
      # locks have been acquired
12447
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12448
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12449

    
12450
    elif level == locking.LEVEL_NODEGROUP:
12451
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12452

    
12453
      if self.req_target_uuids:
12454
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12455

    
12456
        # Lock all groups used by instances optimistically; this requires going
12457
        # via the node before it's locked, requiring verification later on
12458
        lock_groups.update(group_uuid
12459
                           for instance_name in
12460
                             self.owned_locks(locking.LEVEL_INSTANCE)
12461
                           for group_uuid in
12462
                             self.cfg.GetInstanceNodeGroups(instance_name))
12463
      else:
12464
        # No target groups, need to lock all of them
12465
        lock_groups = locking.ALL_SET
12466

    
12467
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12468

    
12469
    elif level == locking.LEVEL_NODE:
12470
      # This will only lock the nodes in the group to be evacuated which
12471
      # contain actual instances
12472
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12473
      self._LockInstancesNodes()
12474

    
12475
      # Lock all nodes in group to be evacuated and target groups
12476
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12477
      assert self.group_uuid in owned_groups
12478
      member_nodes = [node_name
12479
                      for group in owned_groups
12480
                      for node_name in self.cfg.GetNodeGroup(group).members]
12481
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12482

    
12483
  def CheckPrereq(self):
12484
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12485
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12486
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12487

    
12488
    assert owned_groups.issuperset(self.req_target_uuids)
12489
    assert self.group_uuid in owned_groups
12490

    
12491
    # Check if locked instances are still correct
12492
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12493

    
12494
    # Get instance information
12495
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12496

    
12497
    # Check if node groups for locked instances are still correct
12498
    for instance_name in owned_instances:
12499
      inst = self.instances[instance_name]
12500
      assert owned_nodes.issuperset(inst.all_nodes), \
12501
        "Instance %s's nodes changed while we kept the lock" % instance_name
12502

    
12503
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12504
                                             owned_groups)
12505

    
12506
      assert self.group_uuid in inst_groups, \
12507
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12508

    
12509
    if self.req_target_uuids:
12510
      # User requested specific target groups
12511
      self.target_uuids = self.req_target_uuids
12512
    else:
12513
      # All groups except the one to be evacuated are potential targets
12514
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12515
                           if group_uuid != self.group_uuid]
12516

    
12517
      if not self.target_uuids:
12518
        raise errors.OpPrereqError("There are no possible target groups",
12519
                                   errors.ECODE_INVAL)
12520

    
12521
  def BuildHooksEnv(self):
12522
    """Build hooks env.
12523

12524
    """
12525
    return {
12526
      "GROUP_NAME": self.op.group_name,
12527
      "TARGET_GROUPS": " ".join(self.target_uuids),
12528
      }
12529

    
12530
  def BuildHooksNodes(self):
12531
    """Build hooks nodes.
12532

12533
    """
12534
    mn = self.cfg.GetMasterNode()
12535

    
12536
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12537

    
12538
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12539

    
12540
    return (run_nodes, run_nodes)
12541

    
12542
  def Exec(self, feedback_fn):
12543
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12544

    
12545
    assert self.group_uuid not in self.target_uuids
12546

    
12547
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12548
                     instances=instances, target_groups=self.target_uuids)
12549

    
12550
    ial.Run(self.op.iallocator)
12551

    
12552
    if not ial.success:
12553
      raise errors.OpPrereqError("Can't compute group evacuation using"
12554
                                 " iallocator '%s': %s" %
12555
                                 (self.op.iallocator, ial.info),
12556
                                 errors.ECODE_NORES)
12557

    
12558
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12559

    
12560
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12561
                 len(jobs), self.op.group_name)
12562

    
12563
    return ResultWithJobs(jobs)
12564

    
12565

    
12566
class TagsLU(NoHooksLU): # pylint: disable=W0223
12567
  """Generic tags LU.
12568

12569
  This is an abstract class which is the parent of all the other tags LUs.
12570

12571
  """
12572
  def ExpandNames(self):
12573
    self.group_uuid = None
12574
    self.needed_locks = {}
12575
    if self.op.kind == constants.TAG_NODE:
12576
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12577
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12578
    elif self.op.kind == constants.TAG_INSTANCE:
12579
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12580
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12581
    elif self.op.kind == constants.TAG_NODEGROUP:
12582
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12583

    
12584
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12585
    # not possible to acquire the BGL based on opcode parameters)
12586

    
12587
  def CheckPrereq(self):
12588
    """Check prerequisites.
12589

12590
    """
12591
    if self.op.kind == constants.TAG_CLUSTER:
12592
      self.target = self.cfg.GetClusterInfo()
12593
    elif self.op.kind == constants.TAG_NODE:
12594
      self.target = self.cfg.GetNodeInfo(self.op.name)
12595
    elif self.op.kind == constants.TAG_INSTANCE:
12596
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12597
    elif self.op.kind == constants.TAG_NODEGROUP:
12598
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12599
    else:
12600
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12601
                                 str(self.op.kind), errors.ECODE_INVAL)
12602

    
12603

    
12604
class LUTagsGet(TagsLU):
12605
  """Returns the tags of a given object.
12606

12607
  """
12608
  REQ_BGL = False
12609

    
12610
  def ExpandNames(self):
12611
    TagsLU.ExpandNames(self)
12612

    
12613
    # Share locks as this is only a read operation
12614
    self.share_locks = _ShareAll()
12615

    
12616
  def Exec(self, feedback_fn):
12617
    """Returns the tag list.
12618

12619
    """
12620
    return list(self.target.GetTags())
12621

    
12622

    
12623
class LUTagsSearch(NoHooksLU):
12624
  """Searches the tags for a given pattern.
12625

12626
  """
12627
  REQ_BGL = False
12628

    
12629
  def ExpandNames(self):
12630
    self.needed_locks = {}
12631

    
12632
  def CheckPrereq(self):
12633
    """Check prerequisites.
12634

12635
    This checks the pattern passed for validity by compiling it.
12636

12637
    """
12638
    try:
12639
      self.re = re.compile(self.op.pattern)
12640
    except re.error, err:
12641
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12642
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12643

    
12644
  def Exec(self, feedback_fn):
12645
    """Returns the tag list.
12646

12647
    """
12648
    cfg = self.cfg
12649
    tgts = [("/cluster", cfg.GetClusterInfo())]
12650
    ilist = cfg.GetAllInstancesInfo().values()
12651
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12652
    nlist = cfg.GetAllNodesInfo().values()
12653
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12654
    tgts.extend(("/nodegroup/%s" % n.name, n)
12655
                for n in cfg.GetAllNodeGroupsInfo().values())
12656
    results = []
12657
    for path, target in tgts:
12658
      for tag in target.GetTags():
12659
        if self.re.search(tag):
12660
          results.append((path, tag))
12661
    return results
12662

    
12663

    
12664
class LUTagsSet(TagsLU):
12665
  """Sets a tag on a given object.
12666

12667
  """
12668
  REQ_BGL = False
12669

    
12670
  def CheckPrereq(self):
12671
    """Check prerequisites.
12672

12673
    This checks the type and length of the tag name and value.
12674

12675
    """
12676
    TagsLU.CheckPrereq(self)
12677
    for tag in self.op.tags:
12678
      objects.TaggableObject.ValidateTag(tag)
12679

    
12680
  def Exec(self, feedback_fn):
12681
    """Sets the tag.
12682

12683
    """
12684
    try:
12685
      for tag in self.op.tags:
12686
        self.target.AddTag(tag)
12687
    except errors.TagError, err:
12688
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12689
    self.cfg.Update(self.target, feedback_fn)
12690

    
12691

    
12692
class LUTagsDel(TagsLU):
12693
  """Delete a list of tags from a given object.
12694

12695
  """
12696
  REQ_BGL = False
12697

    
12698
  def CheckPrereq(self):
12699
    """Check prerequisites.
12700

12701
    This checks that we have the given tag.
12702

12703
    """
12704
    TagsLU.CheckPrereq(self)
12705
    for tag in self.op.tags:
12706
      objects.TaggableObject.ValidateTag(tag)
12707
    del_tags = frozenset(self.op.tags)
12708
    cur_tags = self.target.GetTags()
12709

    
12710
    diff_tags = del_tags - cur_tags
12711
    if diff_tags:
12712
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12713
      raise errors.OpPrereqError("Tag(s) %s not found" %
12714
                                 (utils.CommaJoin(diff_names), ),
12715
                                 errors.ECODE_NOENT)
12716

    
12717
  def Exec(self, feedback_fn):
12718
    """Remove the tag from the object.
12719

12720
    """
12721
    for tag in self.op.tags:
12722
      self.target.RemoveTag(tag)
12723
    self.cfg.Update(self.target, feedback_fn)
12724

    
12725

    
12726
class LUTestDelay(NoHooksLU):
12727
  """Sleep for a specified amount of time.
12728

12729
  This LU sleeps on the master and/or nodes for a specified amount of
12730
  time.
12731

12732
  """
12733
  REQ_BGL = False
12734

    
12735
  def ExpandNames(self):
12736
    """Expand names and set required locks.
12737

12738
    This expands the node list, if any.
12739

12740
    """
12741
    self.needed_locks = {}
12742
    if self.op.on_nodes:
12743
      # _GetWantedNodes can be used here, but is not always appropriate to use
12744
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12745
      # more information.
12746
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12747
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12748

    
12749
  def _TestDelay(self):
12750
    """Do the actual sleep.
12751

12752
    """
12753
    if self.op.on_master:
12754
      if not utils.TestDelay(self.op.duration):
12755
        raise errors.OpExecError("Error during master delay test")
12756
    if self.op.on_nodes:
12757
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12758
      for node, node_result in result.items():
12759
        node_result.Raise("Failure during rpc call to node %s" % node)
12760

    
12761
  def Exec(self, feedback_fn):
12762
    """Execute the test delay opcode, with the wanted repetitions.
12763

12764
    """
12765
    if self.op.repeat == 0:
12766
      self._TestDelay()
12767
    else:
12768
      top_value = self.op.repeat - 1
12769
      for i in range(self.op.repeat):
12770
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12771
        self._TestDelay()
12772

    
12773

    
12774
class LUTestJqueue(NoHooksLU):
12775
  """Utility LU to test some aspects of the job queue.
12776

12777
  """
12778
  REQ_BGL = False
12779

    
12780
  # Must be lower than default timeout for WaitForJobChange to see whether it
12781
  # notices changed jobs
12782
  _CLIENT_CONNECT_TIMEOUT = 20.0
12783
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12784

    
12785
  @classmethod
12786
  def _NotifyUsingSocket(cls, cb, errcls):
12787
    """Opens a Unix socket and waits for another program to connect.
12788

12789
    @type cb: callable
12790
    @param cb: Callback to send socket name to client
12791
    @type errcls: class
12792
    @param errcls: Exception class to use for errors
12793

12794
    """
12795
    # Using a temporary directory as there's no easy way to create temporary
12796
    # sockets without writing a custom loop around tempfile.mktemp and
12797
    # socket.bind
12798
    tmpdir = tempfile.mkdtemp()
12799
    try:
12800
      tmpsock = utils.PathJoin(tmpdir, "sock")
12801

    
12802
      logging.debug("Creating temporary socket at %s", tmpsock)
12803
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12804
      try:
12805
        sock.bind(tmpsock)
12806
        sock.listen(1)
12807

    
12808
        # Send details to client
12809
        cb(tmpsock)
12810

    
12811
        # Wait for client to connect before continuing
12812
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12813
        try:
12814
          (conn, _) = sock.accept()
12815
        except socket.error, err:
12816
          raise errcls("Client didn't connect in time (%s)" % err)
12817
      finally:
12818
        sock.close()
12819
    finally:
12820
      # Remove as soon as client is connected
12821
      shutil.rmtree(tmpdir)
12822

    
12823
    # Wait for client to close
12824
    try:
12825
      try:
12826
        # pylint: disable=E1101
12827
        # Instance of '_socketobject' has no ... member
12828
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12829
        conn.recv(1)
12830
      except socket.error, err:
12831
        raise errcls("Client failed to confirm notification (%s)" % err)
12832
    finally:
12833
      conn.close()
12834

    
12835
  def _SendNotification(self, test, arg, sockname):
12836
    """Sends a notification to the client.
12837

12838
    @type test: string
12839
    @param test: Test name
12840
    @param arg: Test argument (depends on test)
12841
    @type sockname: string
12842
    @param sockname: Socket path
12843

12844
    """
12845
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12846

    
12847
  def _Notify(self, prereq, test, arg):
12848
    """Notifies the client of a test.
12849

12850
    @type prereq: bool
12851
    @param prereq: Whether this is a prereq-phase test
12852
    @type test: string
12853
    @param test: Test name
12854
    @param arg: Test argument (depends on test)
12855

12856
    """
12857
    if prereq:
12858
      errcls = errors.OpPrereqError
12859
    else:
12860
      errcls = errors.OpExecError
12861

    
12862
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12863
                                                  test, arg),
12864
                                   errcls)
12865

    
12866
  def CheckArguments(self):
12867
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12868
    self.expandnames_calls = 0
12869

    
12870
  def ExpandNames(self):
12871
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12872
    if checkargs_calls < 1:
12873
      raise errors.ProgrammerError("CheckArguments was not called")
12874

    
12875
    self.expandnames_calls += 1
12876

    
12877
    if self.op.notify_waitlock:
12878
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12879

    
12880
    self.LogInfo("Expanding names")
12881

    
12882
    # Get lock on master node (just to get a lock, not for a particular reason)
12883
    self.needed_locks = {
12884
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12885
      }
12886

    
12887
  def Exec(self, feedback_fn):
12888
    if self.expandnames_calls < 1:
12889
      raise errors.ProgrammerError("ExpandNames was not called")
12890

    
12891
    if self.op.notify_exec:
12892
      self._Notify(False, constants.JQT_EXEC, None)
12893

    
12894
    self.LogInfo("Executing")
12895

    
12896
    if self.op.log_messages:
12897
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12898
      for idx, msg in enumerate(self.op.log_messages):
12899
        self.LogInfo("Sending log message %s", idx + 1)
12900
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12901
        # Report how many test messages have been sent
12902
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12903

    
12904
    if self.op.fail:
12905
      raise errors.OpExecError("Opcode failure was requested")
12906

    
12907
    return True
12908

    
12909

    
12910
class IAllocator(object):
12911
  """IAllocator framework.
12912

12913
  An IAllocator instance has three sets of attributes:
12914
    - cfg that is needed to query the cluster
12915
    - input data (all members of the _KEYS class attribute are required)
12916
    - four buffer attributes (in|out_data|text), that represent the
12917
      input (to the external script) in text and data structure format,
12918
      and the output from it, again in two formats
12919
    - the result variables from the script (success, info, nodes) for
12920
      easy usage
12921

12922
  """
12923
  # pylint: disable=R0902
12924
  # lots of instance attributes
12925

    
12926
  def __init__(self, cfg, rpc, mode, **kwargs):
12927
    self.cfg = cfg
12928
    self.rpc = rpc
12929
    # init buffer variables
12930
    self.in_text = self.out_text = self.in_data = self.out_data = None
12931
    # init all input fields so that pylint is happy
12932
    self.mode = mode
12933
    self.memory = self.disks = self.disk_template = None
12934
    self.os = self.tags = self.nics = self.vcpus = None
12935
    self.hypervisor = None
12936
    self.relocate_from = None
12937
    self.name = None
12938
    self.instances = None
12939
    self.evac_mode = None
12940
    self.target_groups = []
12941
    # computed fields
12942
    self.required_nodes = None
12943
    # init result fields
12944
    self.success = self.info = self.result = None
12945

    
12946
    try:
12947
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12948
    except KeyError:
12949
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12950
                                   " IAllocator" % self.mode)
12951

    
12952
    keyset = [n for (n, _) in keydata]
12953

    
12954
    for key in kwargs:
12955
      if key not in keyset:
12956
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12957
                                     " IAllocator" % key)
12958
      setattr(self, key, kwargs[key])
12959

    
12960
    for key in keyset:
12961
      if key not in kwargs:
12962
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12963
                                     " IAllocator" % key)
12964
    self._BuildInputData(compat.partial(fn, self), keydata)
12965

    
12966
  def _ComputeClusterData(self):
12967
    """Compute the generic allocator input data.
12968

12969
    This is the data that is independent of the actual operation.
12970

12971
    """
12972
    cfg = self.cfg
12973
    cluster_info = cfg.GetClusterInfo()
12974
    # cluster data
12975
    data = {
12976
      "version": constants.IALLOCATOR_VERSION,
12977
      "cluster_name": cfg.GetClusterName(),
12978
      "cluster_tags": list(cluster_info.GetTags()),
12979
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12980
      # we don't have job IDs
12981
      }
12982
    ninfo = cfg.GetAllNodesInfo()
12983
    iinfo = cfg.GetAllInstancesInfo().values()
12984
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12985

    
12986
    # node data
12987
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12988

    
12989
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12990
      hypervisor_name = self.hypervisor
12991
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12992
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12993
    else:
12994
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12995

    
12996
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12997
                                        hypervisor_name)
12998
    node_iinfo = \
12999
      self.rpc.call_all_instances_info(node_list,
13000
                                       cluster_info.enabled_hypervisors)
13001

    
13002
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13003

    
13004
    config_ndata = self._ComputeBasicNodeData(ninfo)
13005
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13006
                                                 i_list, config_ndata)
13007
    assert len(data["nodes"]) == len(ninfo), \
13008
        "Incomplete node data computed"
13009

    
13010
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13011

    
13012
    self.in_data = data
13013

    
13014
  @staticmethod
13015
  def _ComputeNodeGroupData(cfg):
13016
    """Compute node groups data.
13017

13018
    """
13019
    ng = dict((guuid, {
13020
      "name": gdata.name,
13021
      "alloc_policy": gdata.alloc_policy,
13022
      })
13023
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13024

    
13025
    return ng
13026

    
13027
  @staticmethod
13028
  def _ComputeBasicNodeData(node_cfg):
13029
    """Compute global node data.
13030

13031
    @rtype: dict
13032
    @returns: a dict of name: (node dict, node config)
13033

13034
    """
13035
    # fill in static (config-based) values
13036
    node_results = dict((ninfo.name, {
13037
      "tags": list(ninfo.GetTags()),
13038
      "primary_ip": ninfo.primary_ip,
13039
      "secondary_ip": ninfo.secondary_ip,
13040
      "offline": ninfo.offline,
13041
      "drained": ninfo.drained,
13042
      "master_candidate": ninfo.master_candidate,
13043
      "group": ninfo.group,
13044
      "master_capable": ninfo.master_capable,
13045
      "vm_capable": ninfo.vm_capable,
13046
      })
13047
      for ninfo in node_cfg.values())
13048

    
13049
    return node_results
13050

    
13051
  @staticmethod
13052
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13053
                              node_results):
13054
    """Compute global node data.
13055

13056
    @param node_results: the basic node structures as filled from the config
13057

13058
    """
13059
    # make a copy of the current dict
13060
    node_results = dict(node_results)
13061
    for nname, nresult in node_data.items():
13062
      assert nname in node_results, "Missing basic data for node %s" % nname
13063
      ninfo = node_cfg[nname]
13064

    
13065
      if not (ninfo.offline or ninfo.drained):
13066
        nresult.Raise("Can't get data for node %s" % nname)
13067
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13068
                                nname)
13069
        remote_info = nresult.payload
13070

    
13071
        for attr in ["memory_total", "memory_free", "memory_dom0",
13072
                     "vg_size", "vg_free", "cpu_total"]:
13073
          if attr not in remote_info:
13074
            raise errors.OpExecError("Node '%s' didn't return attribute"
13075
                                     " '%s'" % (nname, attr))
13076
          if not isinstance(remote_info[attr], int):
13077
            raise errors.OpExecError("Node '%s' returned invalid value"
13078
                                     " for '%s': %s" %
13079
                                     (nname, attr, remote_info[attr]))
13080
        # compute memory used by primary instances
13081
        i_p_mem = i_p_up_mem = 0
13082
        for iinfo, beinfo in i_list:
13083
          if iinfo.primary_node == nname:
13084
            i_p_mem += beinfo[constants.BE_MEMORY]
13085
            if iinfo.name not in node_iinfo[nname].payload:
13086
              i_used_mem = 0
13087
            else:
13088
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13089
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13090
            remote_info["memory_free"] -= max(0, i_mem_diff)
13091

    
13092
            if iinfo.admin_up:
13093
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13094

    
13095
        # compute memory used by instances
13096
        pnr_dyn = {
13097
          "total_memory": remote_info["memory_total"],
13098
          "reserved_memory": remote_info["memory_dom0"],
13099
          "free_memory": remote_info["memory_free"],
13100
          "total_disk": remote_info["vg_size"],
13101
          "free_disk": remote_info["vg_free"],
13102
          "total_cpus": remote_info["cpu_total"],
13103
          "i_pri_memory": i_p_mem,
13104
          "i_pri_up_memory": i_p_up_mem,
13105
          }
13106
        pnr_dyn.update(node_results[nname])
13107
        node_results[nname] = pnr_dyn
13108

    
13109
    return node_results
13110

    
13111
  @staticmethod
13112
  def _ComputeInstanceData(cluster_info, i_list):
13113
    """Compute global instance data.
13114

13115
    """
13116
    instance_data = {}
13117
    for iinfo, beinfo in i_list:
13118
      nic_data = []
13119
      for nic in iinfo.nics:
13120
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13121
        nic_dict = {
13122
          "mac": nic.mac,
13123
          "ip": nic.ip,
13124
          "mode": filled_params[constants.NIC_MODE],
13125
          "link": filled_params[constants.NIC_LINK],
13126
          }
13127
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13128
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13129
        nic_data.append(nic_dict)
13130
      pir = {
13131
        "tags": list(iinfo.GetTags()),
13132
        "admin_up": iinfo.admin_up,
13133
        "vcpus": beinfo[constants.BE_VCPUS],
13134
        "memory": beinfo[constants.BE_MEMORY],
13135
        "os": iinfo.os,
13136
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13137
        "nics": nic_data,
13138
        "disks": [{constants.IDISK_SIZE: dsk.size,
13139
                   constants.IDISK_MODE: dsk.mode}
13140
                  for dsk in iinfo.disks],
13141
        "disk_template": iinfo.disk_template,
13142
        "hypervisor": iinfo.hypervisor,
13143
        }
13144
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13145
                                                 pir["disks"])
13146
      instance_data[iinfo.name] = pir
13147

    
13148
    return instance_data
13149

    
13150
  def _AddNewInstance(self):
13151
    """Add new instance data to allocator structure.
13152

13153
    This in combination with _AllocatorGetClusterData will create the
13154
    correct structure needed as input for the allocator.
13155

13156
    The checks for the completeness of the opcode must have already been
13157
    done.
13158

13159
    """
13160
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13161

    
13162
    if self.disk_template in constants.DTS_INT_MIRROR:
13163
      self.required_nodes = 2
13164
    else:
13165
      self.required_nodes = 1
13166

    
13167
    request = {
13168
      "name": self.name,
13169
      "disk_template": self.disk_template,
13170
      "tags": self.tags,
13171
      "os": self.os,
13172
      "vcpus": self.vcpus,
13173
      "memory": self.memory,
13174
      "disks": self.disks,
13175
      "disk_space_total": disk_space,
13176
      "nics": self.nics,
13177
      "required_nodes": self.required_nodes,
13178
      "hypervisor": self.hypervisor,
13179
      }
13180

    
13181
    return request
13182

    
13183
  def _AddRelocateInstance(self):
13184
    """Add relocate instance data to allocator structure.
13185

13186
    This in combination with _IAllocatorGetClusterData will create the
13187
    correct structure needed as input for the allocator.
13188

13189
    The checks for the completeness of the opcode must have already been
13190
    done.
13191

13192
    """
13193
    instance = self.cfg.GetInstanceInfo(self.name)
13194
    if instance is None:
13195
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13196
                                   " IAllocator" % self.name)
13197

    
13198
    if instance.disk_template not in constants.DTS_MIRRORED:
13199
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13200
                                 errors.ECODE_INVAL)
13201

    
13202
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13203
        len(instance.secondary_nodes) != 1:
13204
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13205
                                 errors.ECODE_STATE)
13206

    
13207
    self.required_nodes = 1
13208
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13209
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13210

    
13211
    request = {
13212
      "name": self.name,
13213
      "disk_space_total": disk_space,
13214
      "required_nodes": self.required_nodes,
13215
      "relocate_from": self.relocate_from,
13216
      }
13217
    return request
13218

    
13219
  def _AddNodeEvacuate(self):
13220
    """Get data for node-evacuate requests.
13221

13222
    """
13223
    return {
13224
      "instances": self.instances,
13225
      "evac_mode": self.evac_mode,
13226
      }
13227

    
13228
  def _AddChangeGroup(self):
13229
    """Get data for node-evacuate requests.
13230

13231
    """
13232
    return {
13233
      "instances": self.instances,
13234
      "target_groups": self.target_groups,
13235
      }
13236

    
13237
  def _BuildInputData(self, fn, keydata):
13238
    """Build input data structures.
13239

13240
    """
13241
    self._ComputeClusterData()
13242

    
13243
    request = fn()
13244
    request["type"] = self.mode
13245
    for keyname, keytype in keydata:
13246
      if keyname not in request:
13247
        raise errors.ProgrammerError("Request parameter %s is missing" %
13248
                                     keyname)
13249
      val = request[keyname]
13250
      if not keytype(val):
13251
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13252
                                     " validation, value %s, expected"
13253
                                     " type %s" % (keyname, val, keytype))
13254
    self.in_data["request"] = request
13255

    
13256
    self.in_text = serializer.Dump(self.in_data)
13257

    
13258
  _STRING_LIST = ht.TListOf(ht.TString)
13259
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13260
     # pylint: disable=E1101
13261
     # Class '...' has no 'OP_ID' member
13262
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13263
                          opcodes.OpInstanceMigrate.OP_ID,
13264
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13265
     })))
13266

    
13267
  _NEVAC_MOVED = \
13268
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13269
                       ht.TItems([ht.TNonEmptyString,
13270
                                  ht.TNonEmptyString,
13271
                                  ht.TListOf(ht.TNonEmptyString),
13272
                                 ])))
13273
  _NEVAC_FAILED = \
13274
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13275
                       ht.TItems([ht.TNonEmptyString,
13276
                                  ht.TMaybeString,
13277
                                 ])))
13278
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13279
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13280

    
13281
  _MODE_DATA = {
13282
    constants.IALLOCATOR_MODE_ALLOC:
13283
      (_AddNewInstance,
13284
       [
13285
        ("name", ht.TString),
13286
        ("memory", ht.TInt),
13287
        ("disks", ht.TListOf(ht.TDict)),
13288
        ("disk_template", ht.TString),
13289
        ("os", ht.TString),
13290
        ("tags", _STRING_LIST),
13291
        ("nics", ht.TListOf(ht.TDict)),
13292
        ("vcpus", ht.TInt),
13293
        ("hypervisor", ht.TString),
13294
        ], ht.TList),
13295
    constants.IALLOCATOR_MODE_RELOC:
13296
      (_AddRelocateInstance,
13297
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13298
       ht.TList),
13299
     constants.IALLOCATOR_MODE_NODE_EVAC:
13300
      (_AddNodeEvacuate, [
13301
        ("instances", _STRING_LIST),
13302
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13303
        ], _NEVAC_RESULT),
13304
     constants.IALLOCATOR_MODE_CHG_GROUP:
13305
      (_AddChangeGroup, [
13306
        ("instances", _STRING_LIST),
13307
        ("target_groups", _STRING_LIST),
13308
        ], _NEVAC_RESULT),
13309
    }
13310

    
13311
  def Run(self, name, validate=True, call_fn=None):
13312
    """Run an instance allocator and return the results.
13313

13314
    """
13315
    if call_fn is None:
13316
      call_fn = self.rpc.call_iallocator_runner
13317

    
13318
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13319
    result.Raise("Failure while running the iallocator script")
13320

    
13321
    self.out_text = result.payload
13322
    if validate:
13323
      self._ValidateResult()
13324

    
13325
  def _ValidateResult(self):
13326
    """Process the allocator results.
13327

13328
    This will process and if successful save the result in
13329
    self.out_data and the other parameters.
13330

13331
    """
13332
    try:
13333
      rdict = serializer.Load(self.out_text)
13334
    except Exception, err:
13335
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13336

    
13337
    if not isinstance(rdict, dict):
13338
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13339

    
13340
    # TODO: remove backwards compatiblity in later versions
13341
    if "nodes" in rdict and "result" not in rdict:
13342
      rdict["result"] = rdict["nodes"]
13343
      del rdict["nodes"]
13344

    
13345
    for key in "success", "info", "result":
13346
      if key not in rdict:
13347
        raise errors.OpExecError("Can't parse iallocator results:"
13348
                                 " missing key '%s'" % key)
13349
      setattr(self, key, rdict[key])
13350

    
13351
    if not self._result_check(self.result):
13352
      raise errors.OpExecError("Iallocator returned invalid result,"
13353
                               " expected %s, got %s" %
13354
                               (self._result_check, self.result),
13355
                               errors.ECODE_INVAL)
13356

    
13357
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13358
      assert self.relocate_from is not None
13359
      assert self.required_nodes == 1
13360

    
13361
      node2group = dict((name, ndata["group"])
13362
                        for (name, ndata) in self.in_data["nodes"].items())
13363

    
13364
      fn = compat.partial(self._NodesToGroups, node2group,
13365
                          self.in_data["nodegroups"])
13366

    
13367
      instance = self.cfg.GetInstanceInfo(self.name)
13368
      request_groups = fn(self.relocate_from + [instance.primary_node])
13369
      result_groups = fn(rdict["result"] + [instance.primary_node])
13370

    
13371
      if self.success and not set(result_groups).issubset(request_groups):
13372
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13373
                                 " differ from original groups (%s)" %
13374
                                 (utils.CommaJoin(result_groups),
13375
                                  utils.CommaJoin(request_groups)))
13376

    
13377
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13378
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13379

    
13380
    self.out_data = rdict
13381

    
13382
  @staticmethod
13383
  def _NodesToGroups(node2group, groups, nodes):
13384
    """Returns a list of unique group names for a list of nodes.
13385

13386
    @type node2group: dict
13387
    @param node2group: Map from node name to group UUID
13388
    @type groups: dict
13389
    @param groups: Group information
13390
    @type nodes: list
13391
    @param nodes: Node names
13392

13393
    """
13394
    result = set()
13395

    
13396
    for node in nodes:
13397
      try:
13398
        group_uuid = node2group[node]
13399
      except KeyError:
13400
        # Ignore unknown node
13401
        pass
13402
      else:
13403
        try:
13404
          group = groups[group_uuid]
13405
        except KeyError:
13406
          # Can't find group, let's use UUID
13407
          group_name = group_uuid
13408
        else:
13409
          group_name = group["name"]
13410

    
13411
        result.add(group_name)
13412

    
13413
    return sorted(result)
13414

    
13415

    
13416
class LUTestAllocator(NoHooksLU):
13417
  """Run allocator tests.
13418

13419
  This LU runs the allocator tests
13420

13421
  """
13422
  def CheckPrereq(self):
13423
    """Check prerequisites.
13424

13425
    This checks the opcode parameters depending on the director and mode test.
13426

13427
    """
13428
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13429
      for attr in ["memory", "disks", "disk_template",
13430
                   "os", "tags", "nics", "vcpus"]:
13431
        if not hasattr(self.op, attr):
13432
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13433
                                     attr, errors.ECODE_INVAL)
13434
      iname = self.cfg.ExpandInstanceName(self.op.name)
13435
      if iname is not None:
13436
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13437
                                   iname, errors.ECODE_EXISTS)
13438
      if not isinstance(self.op.nics, list):
13439
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13440
                                   errors.ECODE_INVAL)
13441
      if not isinstance(self.op.disks, list):
13442
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13443
                                   errors.ECODE_INVAL)
13444
      for row in self.op.disks:
13445
        if (not isinstance(row, dict) or
13446
            constants.IDISK_SIZE not in row or
13447
            not isinstance(row[constants.IDISK_SIZE], int) or
13448
            constants.IDISK_MODE not in row or
13449
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13450
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13451
                                     " parameter", errors.ECODE_INVAL)
13452
      if self.op.hypervisor is None:
13453
        self.op.hypervisor = self.cfg.GetHypervisorType()
13454
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13455
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13456
      self.op.name = fname
13457
      self.relocate_from = \
13458
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13459
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13460
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13461
      if not self.op.instances:
13462
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13463
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13464
    else:
13465
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13466
                                 self.op.mode, errors.ECODE_INVAL)
13467

    
13468
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13469
      if self.op.allocator is None:
13470
        raise errors.OpPrereqError("Missing allocator name",
13471
                                   errors.ECODE_INVAL)
13472
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13473
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13474
                                 self.op.direction, errors.ECODE_INVAL)
13475

    
13476
  def Exec(self, feedback_fn):
13477
    """Run the allocator test.
13478

13479
    """
13480
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13481
      ial = IAllocator(self.cfg, self.rpc,
13482
                       mode=self.op.mode,
13483
                       name=self.op.name,
13484
                       memory=self.op.memory,
13485
                       disks=self.op.disks,
13486
                       disk_template=self.op.disk_template,
13487
                       os=self.op.os,
13488
                       tags=self.op.tags,
13489
                       nics=self.op.nics,
13490
                       vcpus=self.op.vcpus,
13491
                       hypervisor=self.op.hypervisor,
13492
                       )
13493
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13494
      ial = IAllocator(self.cfg, self.rpc,
13495
                       mode=self.op.mode,
13496
                       name=self.op.name,
13497
                       relocate_from=list(self.relocate_from),
13498
                       )
13499
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13500
      ial = IAllocator(self.cfg, self.rpc,
13501
                       mode=self.op.mode,
13502
                       instances=self.op.instances,
13503
                       target_groups=self.op.target_groups)
13504
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13505
      ial = IAllocator(self.cfg, self.rpc,
13506
                       mode=self.op.mode,
13507
                       instances=self.op.instances,
13508
                       evac_mode=self.op.evac_mode)
13509
    else:
13510
      raise errors.ProgrammerError("Uncatched mode %s in"
13511
                                   " LUTestAllocator.Exec", self.op.mode)
13512

    
13513
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13514
      result = ial.in_text
13515
    else:
13516
      ial.Run(self.op.allocator, validate=False)
13517
      result = ial.out_text
13518
    return result
13519

    
13520

    
13521
#: Query type implementations
13522
_QUERY_IMPL = {
13523
  constants.QR_INSTANCE: _InstanceQuery,
13524
  constants.QR_NODE: _NodeQuery,
13525
  constants.QR_GROUP: _GroupQuery,
13526
  constants.QR_OS: _OsQuery,
13527
  }
13528

    
13529
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13530

    
13531

    
13532
def _GetQueryImplementation(name):
13533
  """Returns the implemtnation for a query type.
13534

13535
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13536

13537
  """
13538
  try:
13539
    return _QUERY_IMPL[name]
13540
  except KeyError:
13541
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13542
                               errors.ECODE_INVAL)